In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import plotly
import plotly.graph_objs as go
import plotly.express as px
from plotly.offline import download_plotlyjs,init_notebook_mode,plot,iplot

In [2]:
def make_map(df, metrics):
    traces=[]

    for metric in metrics:
        traces.append(go.Choropleth(locations=df['Country'].tolist(),
                                    locationmode='country names',
                                    z=df[metric],
                                    colorscale='Portland',
                                    marker_line_color='darkgray',
                                    marker_line_width=0.5,
                                    #text= df.Country.tolist(),
                                    reversescale=True,
                                    colorbar = {'title':metric, 'len':200,'lenmode':'pixels'},
                                    visible=True if metric==metrics[0] else False)
                     )

    updatemenus = []

    buttons=[]
    for metric in metrics:
        # May also need colorbox title?
        buttons.append(dict(method='update',
                            label=metric,
                            args=[{'visible': metrics==metric}])
                      )

    dropdown = dict(buttons=buttons, direction='down',x = 0.01,xanchor = 'left',
                    y = 0.99,yanchor = 'bottom',font = dict(size=11))
    updatemenus=[dropdown] # If we want multiple dropdowns, add em to the list!
    layout = dict(updatemenus=updatemenus,
                  title='Economic and Social Freedoms')

    col_map = go.Figure(data = traces,layout = layout)
    iplot(col_map)

In [3]:
def regional_ttest(df, metrics):
    regions = df.Region.unique()
    regions_short = ['Asia-Pac', 'Eur', 'ME & NA', 'Sub-Shra', 'Americas']
    scores = {}
    for col in metrics:
        scores_col = {}
        for i in range(len(regions)-1):
            for j in range(i+1,len(regions)):
                p = stats.ttest_ind(df.loc[df.Region==regions[i],col].dropna(),
                                    df.loc[df.Region==regions[j],col].dropna())[1]
                scores_col[regions_short[i]+' - '+regions_short[j]] = p
        scores[col] = scores_col
    return scores

In [4]:
df.Country.nunique()

NameError: name 'df' is not defined

In [None]:
hdi = pd.read_csv('human_dev_2018.csv',encoding='latin-1')
gender_dev = pd.read_csv('gender_dev_2018.csv',encoding='latin-1')
gender_ineq = pd.read_csv('gender_dev_2018.csv',encoding='latin-1')
econ = pd.read_csv('economic_freedom_2019.csv', encoding='latin-1')

df = hdi.merge(gender_dev,on='Country', suffixes=['_hdi','_gdev'])
df = df.merge(gender_ineq, on='Country',suffixes=['','_gineq'])
df = df.merge(econ, left_on='Country', right_on='Country Name',suffixes=['','_econ'])
df.replace('..',np.nan,inplace=True)

df.replace(',','', regex=True, inplace=True)
df.replace('\$','', regex=True, inplace=True)
df.replace('40.0 (2015 est.)', '40.0', inplace=True)
df.replace('6.1 CHF (2014 )', '6.1', inplace=True)
df.replace('2.1 (2016)', '2.1', inplace=True)
df.replace('38000 ppl.', '38000', inplace=True)
df.replace('139100 (2009 est.)', '139100', inplace=True)
df.replace('1700 (2015 est.)','1700', inplace=True)
df=df.astype({'Population (Millions)': 'float64','GDP (Billions, PPP)': 'float64',
              'GDP per Capita (PPP)': 'float64','Unemployment (%)': 'float64',
              'FDI Inflow (Millions)': 'float64', 'GDI': 'float64' })
df.rename(columns={'Country_x':'Country'}, inplace=True)

df['HDI_gender_diff'] = df['HDI_f'].astype('float') - df['HDI_m'].astype('float')
df['HDI_delta'] = df['HDI_rank'] - df['HDI_rank_2017']
df['Mean_sch_diff'] = df['Mean_sch_f'].astype('float') - df['Mean_sch_m'].astype('float')
df['Life_exp_diff'] = df['Life_exp_m'].astype('float') - df['Life_exp_f'].astype('float')

df

In [None]:
to_get_regional = ['HDI','Government Integrity', 'Business Freedom', 'Labor Freedom',
             'GDI', 'HDI_gender_diff', 'GDP (Billions, PPP)', 'GDP Growth Rate (%)',
             'Unemployment (%)']

In [None]:
# Getting a bug with .transform so need to use a lambda instead
regional = df.groupby('Region')[to_get_regional].mean()
regional_col_names = [x+'_regional' for x in to_get_regional]
df[regional_col_names]=df.apply(lambda x: regional.loc[x.Region, to_get_regional],axis=1)

In [None]:
to_plot = np.array(['HDI','Life_exp','Mean_sch','GNI_percap','GDI','Labor Freedom',
                   'HDI_gender_diff', 'HDI_delta', 'Life_exp_diff',
                   'Mean_sch_diff']+regional_col_names)

In [None]:
ttests = regional_ttest(df, to_get_regional)
for key in ttests["Government Integrity"].keys():
    print(f'{key}: {ttests["Government Integrity"][key]:.4f}')
#print(np.round(np.array(ttests['Government Integrity'], 4))

In [None]:
make_map(df, to_plot)