In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import plotly
import plotly.graph_objs as go
import plotly.express as px
from plotly.offline import download_plotlyjs,init_notebook_mode,plot,iplot

In [2]:
def make_map(df, metrics):
    traces=[]

    for metric in metrics:
        traces.append(go.Choropleth(locations=df['Country'].tolist(),
                                    locationmode='country names',
                                    z=df[metric],
                                    colorscale='Portland',
                                    marker_line_color='darkgray',
                                    marker_line_width=0.5,
                                    #text= df.Country.tolist(),
                                    reversescale=True,
                                    colorbar = {'title':metric, 'len':200,'lenmode':'pixels'},
                                    visible=True if metric==metrics[0] else False)
                     )

    updatemenus = []

    buttons=[]
    for metric in metrics:
        # May also need colorbox title?
        buttons.append(dict(method='update',
                            label=metric,
                            args=[{'visible': metrics==metric}])
                      )

    dropdown = dict(buttons=buttons, direction='down',x = 0.01,xanchor = 'left',
                    y = 0.99,yanchor = 'bottom',font = dict(size=11))
    updatemenus=[dropdown] # If we want multiple dropdowns, add em to the list!
    layout = dict(updatemenus=updatemenus,
                  title='Title')

    col_map = go.Figure(data = traces,layout = layout)
    iplot(col_map)

In [3]:
def regional_ttest(df, metrics):
    regions = df.Region.unique()
    regions_short = ['Asia-Pac', 'Eur', 'ME & NA', 'Sub-Shra', 'Americas']
    scores = {}
    for col in metrics:
        scores_col = {}
        for i in range(len(regions)-1):
            for j in range(i+1,len(regions)):
                p = stats.ttest_ind(df.loc[df.Region==regions[i],col].dropna(),
                                    df.loc[df.Region==regions[j],col].dropna())[1]
                scores_col[regions_short[i]+' - '+regions_short[j]] = p
        scores[col] = scores_col
    return scores

In [27]:
hdi = pd.read_csv('human_dev_2018.csv',encoding='latin-1')
gender_dev = pd.read_csv('gender_dev_2018.csv',encoding='latin-1')
gender_ineq = pd.read_csv('gender_dev_2018.csv',encoding='latin-1')
econ = pd.read_csv('economic_freedom_2019.csv', encoding='latin-1')

df = hdi.merge(gender_dev,on='Country', suffixes=['_hdi','_gdev'])
df = df.merge(gender_ineq, on='Country',suffixes=['','_gineq'])
df = df.merge(econ, left_on='Country', right_on='Country Name',suffixes=['','_econ'])
df.replace('..',np.nan,inplace=True)

df.replace(',','', regex=True, inplace=True)
df.replace('\$','', regex=True, inplace=True)
df.replace('40.0 (2015 est.)', '40.0', inplace=True)
df.replace('6.1 CHF (2014 )', '6.1', inplace=True)
df.replace('2.1 (2016)', '2.1', inplace=True)
df.replace('38000 ppl.', '38000', inplace=True)
df.replace('139100 (2009 est.)', '139100', inplace=True)
df.replace('1700 (2015 est.)','1700', inplace=True)
df=df.astype({'Population (Millions)': 'float64','GDP (Billions, PPP)': 'float64',
              'GDP per Capita (PPP)': 'float64','Unemployment (%)': 'float64',
              'FDI Inflow (Millions)': 'float64', 'GDI': 'float64' })
df.rename(columns={'Country_x':'Country'}, inplace=True)

df['HDI_gender_diff'] = df['HDI_f'].astype('float') - df['HDI_m'].astype('float')
df['HDI_delta'] = df['HDI_rank'] - df['HDI_rank_2017']
df['Mean_sch_diff'] = df['Mean_sch_f'].astype('float') - df['Mean_sch_m'].astype('float')
df['Life_exp_diff'] = df['Life_exp_m'].astype('float') - df['Life_exp_f'].astype('float')

df

Unnamed: 0,HDI_rank,Country,HDI,Life_exp,Exp_sch,Mean_sch,GNI_percap,GNI_HDI_rank_diff,HDI_rank_2017,GDI,...,5 Year GDP Growth Rate (%),GDP per Capita (PPP),Unemployment (%),Inflation (%),FDI Inflow (Millions),Public Debt (% of GDP),HDI_gender_diff,HDI_delta,Mean_sch_diff,Life_exp_diff
0,167.0,Afghanistan,0.496,64.5,10.1,3.9,1746,1.0,170.0,0.723,...,2.9,1958.0,8.8,5.0,53.9,7.3,-0.157,-3.0,-4.1,-3.0
1,68.0,Albania,0.791,78.5,15.2,10.1,12300,20.0,69.0,0.971,...,2.5,12507.0,13.9,2.0,1119.1,71.2,-0.023,-1.0,-0.3,-3.4
2,81.0,Algeria,0.759,76.7,14.7,8.0,13639,0.0,81.0,0.865,...,3.1,15237.0,10.0,5.6,1203.0,25.8,-0.107,0.0,-0.6,-2.4
3,147.0,Angola,0.574,60.8,11.8,5.1,5555,-16.0,147.0,0.902,...,2.9,6753.0,8.2,31.7,-2254.5,65.3,-0.059,0.0,-2.4,-5.6
4,48.0,Argentina,0.830,76.5,17.6,10.6,17611,18.0,48.0,0.988,...,0.7,20876.0,8.7,25.7,11857.0,52.6,-0.010,0.0,0.2,-6.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
175,96.0,Venezuela,0.726,72.1,12.8,10.3,9070,14.0,92.0,1.013,...,-7.8,12114.0,7.7,1087.5,-68.0,34.9,0.009,4.0,0.7,-7.7
176,,Vietnam,0.693,75.3,12.7,8.2,6220,10.0,118.0,1.003,...,6.2,6913.0,2.1,3.5,14100.0,58.2,0.001,,-0.6,-8.2
177,174.0,Yemen,0.463,66.1,8.7,3.2,1433,3.0,175.0,0.458,...,-16.1,1287.0,14.0,4.9,-269.9,141.0,-0.290,-1.0,-2.5,-3.4
178,141.0,Zambia,0.591,63.5,12.1,7.1,3582,7.0,144.0,0.949,...,4.0,3996.0,7.8,6.6,1091.2,62.2,-0.031,-3.0,-0.8,-5.9


In [28]:
to_get_regional = ['HDI','Government Integrity', 'Business Freedom', 'Labor Freedom',
             'GDI', 'HDI_gender_diff', 'GDP (Billions, PPP)', 'GDP Growth Rate (%)',
             'Unemployment (%)']
to_plot = np.array(['HDI','Life_exp','Mean_sch','GNI_percap','GDI','Labor Freedom',
                   'HDI_gender_diff', 'HDI_delta', 'Life_exp_diff',
                   'Mean_sch_diff'])

In [29]:
# Getting a bug with .transform so need to use a lambda instead
regional = df.groupby('Region')[to_get_regional].mean()
regional_col_names = [x+'_regional' for x in vals_todo]
df[regional_col_names]=df.apply(lambda x: regional.loc[x.Region, vals_todo],axis=1)

In [32]:
ttests = regional_ttest(df, vals_todo)
ttests['HDI']

{'Asia-Pac - Eur': 1.6279942883787921e-10,
 'Asia-Pac - ME & NA': 0.28352905783792265,
 'Asia-Pac - Sub-Shra': 1.0214557542922163e-10,
 'Asia-Pac - Americas': 0.16228641397039228,
 'Eur - ME & NA': 2.3850345037847713e-06,
 'Eur - Sub-Shra': 3.278473425954058e-32,
 'Eur - Americas': 6.039130316254105e-10,
 'ME & NA - Sub-Shra': 3.8722579415326696e-10,
 'ME & NA - Americas': 0.9685323015614629,
 'Sub-Shra - Americas': 2.1580654703787503e-15}

In [30]:
make_map(df, to_plot)