<!-- ### Load Culture -->

In [102]:
import pandas as pd
import glob

paths = glob.glob('df_indicators_hdi/*')
paths = [x for x in paths if 'regression_table' in x]



# Function to add stars based on p-value
def add_stars(row, model, var):
    if row['Unnamed: 0'] in ['Intercept', f'{var} Coef']:
        p_value = df.loc[df['Unnamed: 0'] == f'{var} p-value', model].values[0]
        coef = row[model].split(" ")[0]  # Extract coefficient value
        se = row[model].split(" ")[1]   # Extract standard error (in parentheses)
        stars = ''
        if p_value < 0.001:
            stars = '***'
        elif p_value < 0.01:
            stars = '**'
        elif p_value < 0.05:
            stars = '*'
        return f"{coef} {se}{stars}"
    return row[model]



final = []

for x in paths:
    df = pd.read_csv(x)
    final.append(df)


data = pd.concat([x for x in final])
data = data.reset_index(drop=True)
data = data.rename(columns = {'Unnamed: 0':'measure'})
list(set(data['measure']))



def format_results(row):
    coef = f"{row['Coef']:.3f}"
    se = f"({row['Coef_SE']:.3f})"
    ci = row['Coef_CI']
    t_value = f"[{row['Coef_t-value']:.2f}]"
    return f"{coef} {se} {ci} {t_value}"



# Adding significance stars based on p-values
def add_stars(row):
    p_value = row['Coef_p-value']
    if p_value < 0.001:
        return f"{row['Coef']}***"
    elif p_value < 0.01:
        return f"{row['Coef']}**"
    elif p_value < 0.05:
        return f"{row['Coef']}*"
    else:
        return f"{row['Coef']}"

In [106]:


measures_to_keep = ['Coef_p-value',
 'Correlation',
 'Coef',
 'R-squared',
 'Coef_t-value',
 'Coef_CI',
 'Coef_SE',
 'N']


data = data[data['measure'].isin(measures_to_keep)]

table = data.pivot(index='Variable', columns='measure', values='OLS Without FE')
# Function to format the data rows, expecting a pandas Series for each row
table['Coef_p-value'] = table['Coef_p-value'].astype(float)


# Apply the function to create a new column with stars
table['Coef'] = table.apply(add_stars, axis=1)
table['Coef'] = table['Coef'] +  " ("+table['Coef_SE'] + ")" +  " ["+table['Coef_t-value'] + "]"
table = table[['Coef', 'Coef_CI','Correlation', 'N']]
table['type'] = 'OLS Without FE'
table = table.rename(columns = {'Coef':'CPI', 'Coef_CI':'Confidence Intervals'})
table

measure,CPI,Confidence Intervals,Correlation,N,type
Variable,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Gender_equality_num,0.01** (0.00) [3.42],"(0.01, 0.02)",0.36,78,OLS Without FE
gdp,0.07*** (0.01) [12.59],"(0.06, 0.08)",0.49,508,OLS Without FE
height,0.00** (0.00) [2.94],"(0.00, 0.00)",0.22,168,OLS Without FE
homicide_rate,-1.02 (0.60) [-1.70],"(-2.21, 0.18)",-0.2,72,OLS Without FE
infant_mortality,0.10* (0.04) [2.44],"(0.02, 0.19)",0.35,44,OLS Without FE
life_expectancy,0.05* (0.02) [2.64],"(0.01, 0.09)",0.3,75,OLS Without FE
numeracy,0.22*** (0.04) [6.16],"(0.15, 0.29)",0.41,185,OLS Without FE
wellbeing,2.64*** (0.42) [6.32],"(1.81, 3.47)",0.53,105,OLS Without FE


In [104]:
table = data.pivot(index='Variable', columns='measure', values='MixedLM With FE')
# Function to format the data rows, expecting a pandas Series for each row
table['Coef_p-value'] = table['Coef_p-value'].astype(float)


# Apply the function to create a new column with stars
table['Coef'] = table.apply(add_stars, axis=1)
table['Coef'] = table['Coef'] +  " ("+table['Coef_SE'] + ")" +  " ["+table['Coef_t-value'] + "]"
table = table[['Coef', 'Coef_CI','Correlation', 'N']]
table['type'] = 'MixedLM With FE'
table = table.rename(columns = {'Coef':'CPI', 'Coef_CI':'Confidence Intervals'})

table

measure,CPI,Confidence Intervals,Correlation,N,type
Variable,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Gender_equality_num,0.01 (0.01) [1.47],"(-0.00, 0.02)",0.16,78,MixedLM With FE
gdp,0.06*** (0.00) [15.40],"(0.05, 0.07)",0.46,508,MixedLM With FE
height,0.00 (0.00) [0.32],"(-0.00, 0.00)",-0.13,168,MixedLM With FE
homicide_rate,-0.28 (0.32) [-0.86],"(-0.92, 0.36)",-0.38,72,MixedLM With FE
infant_mortality,-0.13 (0.07) [-1.84],"(-0.27, 0.01)",-0.41,44,MixedLM With FE
life_expectancy,0.11*** (0.03) [3.91],"(0.05, 0.16)",0.76,75,MixedLM With FE
numeracy,0.21*** (0.02) [9.12],"(0.17, 0.26)",0.68,185,MixedLM With FE
wellbeing,2.05** (0.69) [2.99],"(0.70, 3.39)",0.28,105,MixedLM With FE


In [105]:


#data.to_csv('db_extract/hdi_indicators.csv')