<!-- ### Load Culture -->

In [3]:
import pandas as pd
import glob

paths = glob.glob('df_indicators_hdi/*')
paths = [x for x in paths if 'regression_table' in x]


# Function to add stars based on p-value
def add_stars(row, model, var):
    if row['Unnamed: 0'] in ['Intercept', f'{var} Coef']:
        p_value = df.loc[df['Unnamed: 0'] == f'{var} p-value', model].values[0]
        coef = row[model].split(" ")[0]  # Extract coefficient value
        se = row[model].split(" ")[1]   # Extract standard error (in parentheses)
        stars = ''
        if p_value < 0.001:
            stars = '***'
        elif p_value < 0.01:
            stars = '**'
        elif p_value < 0.05:
            stars = '*'
        return f"{coef} {se}{stars}"
    return row[model]



final = []

for x in paths:
    df = pd.read_csv(x)
    final.append(df)


data = pd.concat([x for x in final])
data = data.reset_index(drop=True)
data = data.rename(columns = {'Unnamed: 0':'measure'})
list(set(data['measure']))


data.to_csv('other_data/data_hdi_regression_table.csv')

In [4]:

def format_results(row):
    coef = f"{row['Coef']:.3f}"
    se = f"({row['Coef_SE']:.3f})"
    ci = row['Coef_CI']
    t_value = f"[{row['Coef_t-value']:.2f}]"
    return f"{coef} {se} {ci} {t_value}"



# Adding significance stars based on p-values
def add_stars(row):
    p_value = row['Coef_p-value']
    if p_value < 0.001:
        return f"{row['Coef']}***"
    elif p_value < 0.01:
        return f"{row['Coef']}**"
    elif p_value < 0.05:
        return f"{row['Coef']}*"
    else:
        return f"{row['Coef']}"

In [5]:


measures_to_keep = ['Coef_p-value',
 'Correlation',
 'Coef',
 'R-squared',
 'Coef_t-value',
 'Coef_CI',
 'Coef_SE',
 'N']


data = data[data['measure'].isin(measures_to_keep)]

table = data.pivot(index='Variable', columns='measure', values='OLS Without FE')
# Function to format the data rows, expecting a pandas Series for each row
table['Coef_p-value'] = table['Coef_p-value'].astype(float)


# Apply the function to create a new column with stars
table['Coef'] = table.apply(add_stars, axis=1)
table['Coef'] = table['Coef'] +  " ("+table['Coef_SE'] + ")" +  " ["+table['Coef_t-value'] + "]"
table = table[['Coef', 'Coef_CI','Correlation', 'N']]
table['type'] = 'OLS Without FE'
table = table.rename(columns = {'Coef':'CPI', 'Coef_CI':'CI'})
table

measure,CPI,CI,Correlation,N,type
Variable,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Gender_equality_num,0.01** (0.00) [3.42],"(0.01, 0.02)",0.36,78,OLS Without FE
gdp,0.07*** (0.01) [12.59],"(0.06, 0.08)",0.49,508,OLS Without FE
height,0.00** (0.00) [2.94],"(0.00, 0.00)",0.22,168,OLS Without FE
homicide_rate,-1.02 (0.60) [-1.70],"(-2.21, 0.18)",-0.2,72,OLS Without FE
infant_mortality,0.10* (0.04) [2.44],"(0.02, 0.19)",0.35,44,OLS Without FE
life_expectancy,0.05* (0.02) [2.64],"(0.01, 0.09)",0.3,75,OLS Without FE
numeracy,0.22*** (0.04) [6.16],"(0.15, 0.29)",0.41,185,OLS Without FE
wellbeing,2.64*** (0.42) [6.32],"(1.81, 3.47)",0.53,105,OLS Without FE


In [6]:
table_1 = data.pivot(index='Variable', columns='measure', values='MixedLM With FE')
# Function to format the data rows, expecting a pandas Series for each row
table_1['Coef_p-value'] = table_1['Coef_p-value'].astype(float)


# Apply the function to create a new column with stars
table_1['Coef'] = table_1.apply(add_stars, axis=1)
table_1['Coef'] = table_1['Coef'] +  " ("+table_1['Coef_SE'] + ")" +  " ["+table_1['Coef_t-value'] + "]"
table_1 = table_1[['Coef', 'Coef_CI','Correlation', 'N']]
table_1['type'] = 'MixedLM With FE'
table_1 = table_1.rename(columns = {'Coef':'CPI', 'Coef_CI':'CI'})

table_1

measure,CPI,CI,Correlation,N,type
Variable,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Gender_equality_num,0.01 (0.01) [1.47],"(-0.00, 0.02)",0.16,78,MixedLM With FE
gdp,0.06*** (0.00) [15.40],"(0.05, 0.07)",0.46,508,MixedLM With FE
height,0.00 (0.00) [0.32],"(-0.00, 0.00)",-0.13,168,MixedLM With FE
homicide_rate,-0.28 (0.32) [-0.86],"(-0.92, 0.36)",-0.38,72,MixedLM With FE
infant_mortality,-0.13 (0.07) [-1.84],"(-0.27, 0.01)",-0.41,44,MixedLM With FE
life_expectancy,0.11*** (0.03) [3.91],"(0.05, 0.16)",0.76,75,MixedLM With FE
numeracy,0.21*** (0.02) [9.12],"(0.17, 0.26)",0.68,185,MixedLM With FE
wellbeing,2.05** (0.69) [2.99],"(0.70, 3.39)",0.28,105,MixedLM With FE


In [7]:
final_table = pd.concat([table, table_1])

#### regression table

In [8]:
paths = glob.glob('df_indicators_hdi/data_for_regression/*')
paths

['df_indicators_hdi/data_for_regression/cpm.csv',
 'df_indicators_hdi/data_for_regression/numeracy.csv',
 'df_indicators_hdi/data_for_regression/homicide_rate.csv',
 'df_indicators_hdi/data_for_regression/infant_mortality.csv',
 'df_indicators_hdi/data_for_regression/Gender_equality_num.csv',
 'df_indicators_hdi/data_for_regression/height.csv',
 'df_indicators_hdi/data_for_regression/life_expectancy.csv',
 'df_indicators_hdi/data_for_regression/gdp.csv',
 'df_indicators_hdi/data_for_regression/wellbeing.csv']

In [9]:
df_1 = pd.read_csv('df_indicators_hdi/data_for_regression/numeracy.csv', index_col = [0])
df_1 = df_1[['region_name', 'year', 'numeracy_weighted']]

df_2 = pd.read_csv('df_indicators_hdi/data_for_regression/homicide_rate.csv', index_col = [0])
df_2 = df_2[['region_name', 'year', 'homicide_rate_weighted']]

df_3 = pd.read_csv('df_indicators_hdi/data_for_regression/infant_mortality.csv', index_col = [0])
df_3 = df_3[['region_name', 'year', 'infant_mortality_weighted']]

df_4 = pd.read_csv('df_indicators_hdi/data_for_regression/Gender_equality_num.csv', index_col = [0])
df_4 = df_4[['region_name', 'year', 'Gender_equality_num_weighted']]

df_5 = pd.read_csv('df_indicators_hdi/data_for_regression/height.csv', index_col = [0])
df_5 = df_5[['region_name', 'year', 'height_weighted']]

df_6 = pd.read_csv('df_indicators_hdi/data_for_regression/life_expectancy.csv', index_col = [0])
df_6 = df_6[['region_name', 'year', 'life_expectancy_weighted']]

#df_7 = pd.read_csv('df_indicators_hdi/data_for_regression/gdp.csv', index_col = [0])
#df_7 = df_7[['region_name', 'year', 'numeracy_weighted']]
df_8 = pd.read_csv('df_indicators_hdi/data_for_regression/wellbeing.csv', index_col = [0])
df_8 = df_8[['region_name', 'year', 'wellbeing_weighted']]

# df_gdp = pd.read_csv("../gdp_analysis/results/gdp_weighted_pop_fifty.csv")
df_gdp = pd.read_csv("other_data/gdp_weighted_pop_decade.csv")
df_gdp = df_gdp.rename(columns={"region": "region_name"})
df_gdp = df_gdp.rename(columns = {'decade':'year'})

df_cpm = pd.read_csv('df_indicators_hdi/data_for_regression/cpm.csv', index_col = [0])
df_cpm = df_cpm[['region_name', 'decade', 'N_est']]
df_cpm = df_cpm.rename(columns = {'decade':'year'})


In [10]:



# Merge all dataframes on 'region_name' and 'year' using an outer join
merged_df = df_1.merge(df_2, on=['region_name', 'year'], how='outer')
merged_df = merged_df.merge(df_3, on=['region_name', 'year'], how='outer')
merged_df = merged_df.merge(df_4, on=['region_name', 'year'], how='outer')
merged_df = merged_df.merge(df_5, on=['region_name', 'year'], how='outer')
merged_df = merged_df.merge(df_6, on=['region_name', 'year'], how='outer')
merged_df = merged_df.merge(df_8, on=['region_name', 'year'], how='outer')
merged_df = merged_df.merge(df_gdp, on=['region_name', 'year'], how='outer')
merged_df = merged_df.merge(df_cpm, on=['region_name', 'year'], how='outer')


merged_df = merged_df.sort_values(['region_name', 'year'])
merged_df = merged_df.reset_index(drop=True)
merged_df



Unnamed: 0,region_name,year,numeracy_weighted,homicide_rate_weighted,infant_mortality_weighted,Gender_equality_num_weighted,height_weighted,life_expectancy_weighted,wellbeing_weighted,gdp_pc_weighted,pop,N_est
0,Arabic world,-610,,,,,,,,,,5.529170
1,Arabic world,-400,,,,,,,,,,2.611448
2,Arabic world,-360,,,,,,,,,,14.825990
3,Arabic world,-330,,,,,,,,,,6.061846
4,Arabic world,-310,,,,,,,,,,6.022250
...,...,...,...,...,...,...,...,...,...,...,...,...
2582,United Kingdom,1860,97.4465,1.6,137.5,120.324804,166.60,40.862273,1.679138,5028.818182,28901.909091,15480.038693
2583,United Kingdom,1870,97.3455,1.6,136.3,121.799031,167.15,41.663889,1.786446,5695.222222,31388.444444,11764.644742
2584,United Kingdom,1880,97.7782,1.3,130.7,121.119752,167.95,42.720833,2.015722,6045.818182,34527.636364,18735.300446
2585,United Kingdom,1890,,,,,,,,6611.666667,37526.666667,


In [11]:
import numpy as np
merged_df['region_category'] = np.nan



category_1 = ['Italy',
 'United Kingdom',
 'Japan',
 'Nordic countries',
 'Persian world',
 'Balkans',
 'Spain',
 'Latin World',
 'Low countries',
 'Korea',
 'Portugal',
 'German world',
 'France',
 'Chinese world',
 'Indian world',
 'East Slavic',
 'Central Europe',
 'Greek World',
 'Arabic world']


merged_df['region_category'][merged_df['region_name'].isin(category_1)] = 1
merged_df['region_category'][~merged_df['region_name'].isin(category_1)] = 0

merged_df.to_csv('df_indicators_hdi/data_hdi_metrics.csv')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_df['region_category'][merged_df['region_name'].isin(category_1)] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_df['region_category'][~merged_df['region_name'].isin(category_1)] = 0


In [13]:
merged_df

Unnamed: 0,region_name,year,numeracy_weighted,homicide_rate_weighted,infant_mortality_weighted,Gender_equality_num_weighted,height_weighted,life_expectancy_weighted,wellbeing_weighted,gdp_pc_weighted,pop,N_est,region_category
0,Arabic world,-610,,,,,,,,,,5.529170,1.0
1,Arabic world,-400,,,,,,,,,,2.611448,1.0
2,Arabic world,-360,,,,,,,,,,14.825990,1.0
3,Arabic world,-330,,,,,,,,,,6.061846,1.0
4,Arabic world,-310,,,,,,,,,,6.022250,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2582,United Kingdom,1860,97.4465,1.6,137.5,120.324804,166.60,40.862273,1.679138,5028.818182,28901.909091,15480.038693,1.0
2583,United Kingdom,1870,97.3455,1.6,136.3,121.799031,167.15,41.663889,1.786446,5695.222222,31388.444444,11764.644742,1.0
2584,United Kingdom,1880,97.7782,1.3,130.7,121.119752,167.95,42.720833,2.015722,6045.818182,34527.636364,18735.300446,1.0
2585,United Kingdom,1890,,,,,,,,6611.666667,37526.666667,,1.0


In [14]:
list(set(merged_df.region_name))

['German world',
 'Central Europe',
 'Northern China',
 'Spain',
 'Indian world',
 'Portugal',
 'Northern Japan',
 'Japan',
 'United Kingdom',
 'Persian world',
 'Balkans',
 'Southwestern Europe',
 'Latin World',
 'France',
 'East Slavic',
 'Korea',
 'Nordic countries',
 'Chinese world',
 'Northwestern Europe',
 'Greek World',
 'Eastern Europe',
 'Arabic world',
 'Low countries',
 'Italy',
 'Southern Japan',
 'Southern China']