In [None]:
'''
Sources:
HDI
- Human Development Index (HDI).csv:
http://hdr.undp.org/en/data
- human-development-index-escosura.csv:
https://ourworldindata.org/human-development-index

GINI
- economic-inequality-gini-index.csv:
https://ourworldindata.org/grapher/economic-inequality-gini-index

shapefile
- Countries_WGS84.shp:
https://hub.arcgis.com/datasets/a21fdb46d23e4ef896f31475217cbb08_1

https://www.naturalearthdata.com/downloads/10m-cultural-vectors/
'''

# GINI Index

In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np

In [2]:
df_map = gpd.read_file('./geo/Countries_WGS84/Countries_WGS84.shp')
df_map.columns = ['country_code', 'country', 'geometry']
df_map.drop(df_map.index[14], inplace=True) #Drop row corresponding to 'Antarctica'

In [3]:
df_gini = pd.read_csv('./data/GINI/economic-inequality-gini-index.csv')

In [4]:
df_gini_pivot = df_gini.pivot_table(index=['Entity', 'Code'], columns='Year', values='GINI index (World Bank estimate)').reset_index()
df_gini_pivot = df_gini_pivot.fillna(method='bfill', axis=1)
df_gini_pivot = df_gini_pivot.fillna(method='ffill', axis=1)
#df_gini_pivot.fillna(0, inplace=True)
df_gini_pivot

Year,Entity,Code,1979,1980,1981,1982,1983,1984,1985,1986,...,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
0,Albania,ALB,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,...,30.0,30.0,30.0,29.0,29.0,29.0,29.0,29.0,29.0,29.0
1,Algeria,DZA,40.2,40.2,40.2,40.2,40.2,40.2,40.2,40.2,...,27.6,27.6,27.6,27.6,27.6,27.6,27.6,27.6,27.6,27.6
2,Angola,AGO,42.7,42.7,42.7,42.7,42.7,42.7,42.7,42.7,...,42.7,42.7,42.7,42.7,42.7,42.7,42.7,42.7,42.7,42.7
3,Argentina,ARG,40.8,40.8,42.8,42.8,42.8,42.8,42.8,42.8,...,48.3,47.4,46.3,45.3,44.5,43.6,42.5,42.3,42.7,42.7
4,Armenia,ARM,36.2,36.2,36.2,36.2,36.2,36.2,36.2,36.2,...,32.5,29.8,30.7,29.6,31.1,31.3,30.5,31.5,31.5,32.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
158,Venezuela,VEN,55.6,55.6,55.6,53.5,53.5,53.5,53.5,53.5,...,46.9,46.9,46.9,46.9,46.9,46.9,46.9,46.9,46.9,46.9
159,Vietnam,VNM,35.7,35.7,35.7,35.7,35.7,35.7,35.7,35.7,...,35.8,35.6,35.6,39.3,39.3,35.7,35.7,34.8,34.8,34.8
160,Yemen,YEM,35.0,35.0,35.0,35.0,35.0,35.0,35.0,35.0,...,36.7,36.7,36.7,36.7,36.7,36.7,36.7,36.7,36.7,36.7
161,Zambia,ZMB,60.5,60.5,60.5,60.5,60.5,60.5,60.5,60.5,...,54.6,55.6,55.6,55.6,55.6,57.1,57.1,57.1,57.1,57.1


In [5]:
df_gini_pivot[df_gini_pivot['Entity'] == 'Denmark'] 

Year,Entity,Code,1979,1980,1981,1982,1983,1984,1985,1986,...,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
37,Denmark,DNK,24.9,24.9,24.9,24.9,24.9,24.9,24.9,24.9,...,25.9,26.2,25.3,26.7,27.3,27.5,27.8,28.5,28.5,28.5


In [6]:
df_gini_pivot[df_gini_pivot['Entity'] == 'Brazil'] 

Year,Entity,Code,1979,1980,1981,1982,1983,1984,1985,1986,...,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
17,Brazil,BRA,58.0,58.0,58.0,58.4,59.0,58.4,55.6,58.5,...,55.9,55.2,54.4,53.9,53.1,53.1,52.7,52.9,51.5,51.3


In [7]:
replace_values  = {
     'Myanmar':'Myanmar (Burma)',
     'Saint Lucia':'St. Lucia',
     'Tanzania':'Tanzania, United Republic of'
    }

df_gini_pivot.replace({'Entity': replace_values}, inplace=True)

In [8]:
YEAR = 2011
gpd_gini = pd.merge(df_map, df_gini_pivot[['Entity', YEAR]], left_on='country', right_on='Entity', how = 'left')
gpd_gini[[YEAR]] = gpd_gini[[YEAR]].fillna(0)

In [None]:
import json
from bokeh.io import output_notebook, show, output_file
from bokeh.models import LogColorMapper, LinearColorMapper, ColorBar, LogTicker, GeoJSONDataSource
from bokeh.palettes import brewer, d3, all_palettes
from bokeh.plotting import figure

In [None]:
geosource = GeoJSONDataSource(geojson = gpd_gini.to_json())

In [None]:
#Define a sequential multi-hue color palette.
palette = all_palettes['Viridis'][8]
#Reverse color order
palette = palette[::-1]

color_mapper = LinearColorMapper(palette=palette, low = 0, high = gpd_gini[YEAR].max())

TOOLS = "pan,wheel_zoom,reset,hover,save"

tick_labels = {'0': '0%', '5': '5%', '10':'10%', '15':'15%', '20':'20%', '25':'25%', '30':'30%','35':'35%', '40': '>40%'}

color_bar = ColorBar(color_mapper=color_mapper,
                     label_standoff=8,
                     width = 500,
                     height = 20,
                     border_line_color=None,
                     location = (0,0), 
                     orientation = 'horizontal')
                     #major_label_overrides = tick_labels)

p = figure(
    title='GINI Index, ' + str(YEAR), 
    tools=TOOLS,
    plot_height = 480,
    plot_width = 720,
    x_axis_location=None,
    y_axis_location=None,
    tooltips=[
        ("Country", "@country"), ("GINI", "@"+str(YEAR))
    ])
p.grid.grid_line_color = None
p.hover.point_policy = "follow_mouse"

p.patches('xs','ys', source=geosource,
          fill_color={'field': str(YEAR), 'transform': color_mapper},
          fill_alpha=0.8, line_color="black", line_width=0.2)

p.add_layout(color_bar, 'below')

output_notebook()

show(p)

# HDI 

In [9]:
df_hdi = pd.read_csv('./data/HDI/Human Development Index (HDI).csv', sep=',', skiprows=1)
df_hdi.dropna(axis=1, how='all', inplace=True) # drop empty columns
df_hdi.drop(df_hdi.index[189:], inplace=True) # drop last part of the table
df_hdi.replace('..', 0, inplace=True) # replace null values to zero
df_hdi.iloc[:,2:] = df_hdi.iloc[:,2:].astype(float)

In [10]:
replace_values  = {
     'Bahamas':'Bahamas, The',
     'Belarus':'Byelarus',
     'Bolivia (Plurinational State of)':'Bolivia',
     'Brunei Darussalam':'Brunei',
     'Cabo Verde':'Cape Verde',
     'Czechia':'Czech Republic',
     'Gambia':'Gambia, The',
     'Iran (Islamic Republic of)':'Iran',
     'Korea (Republic of)':'South Korea',
     'Micronesia (Federated States of)':'Federated States of Micronesia',
     'Moldova (Republic of)':'Moldova',
     'Myanmar':'Myanmar (Burma)',
     'North Macedonia':'Macedonia',
     'Palau':'Pacific Islands (Palau)',
     'Russian Federation':'Russia',
     'Saint Kitts and Nevis':'St. Kitts and Nevis',
     'Saint Lucia':'St. Lucia',
     'Saint Vincent and the Grenadines':'St. Vincent and the Grenadines',
     'Samoa':'Western Samoa',
     'Syrian Arab Republic':'Syria',
     'Tanzania (United Republic of)':'Tanzania, United Republic of',
     'Venezuela (Bolivarian Republic of)':'Venezuela',
     'Viet Nam':'Vietnam'
    }

df_hdi.replace({'Country': replace_values}, inplace=True)

In [11]:
df_hdi[df_hdi['Country'] == 'Denmark'] 

Unnamed: 0,HDI Rank (2018),Country,1990,1991,1992,1993,1994,1995,1996,1997,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
46,11,Denmark,0.799,0.804,0.809,0.815,0.824,0.831,0.837,0.843,...,0.906,0.91,0.922,0.924,0.926,0.928,0.926,0.928,0.929,0.93


In [12]:
df_hdi[df_hdi['Country'] == 'Brazil'] 

Unnamed: 0,HDI Rank (2018),Country,1990,1991,1992,1993,1994,1995,1996,1997,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
23,79,Brazil,0.613,0.62,0.626,0.634,0.642,0.651,0.657,0.664,...,0.718,0.726,0.73,0.734,0.752,0.755,0.755,0.757,0.76,0.761


In [13]:
YEAR = str(YEAR)
gpd_hdi = pd.merge(df_map, df_hdi[['Country', YEAR]], left_on='country', right_on='Country', how = 'left')
gpd_hdi[[YEAR]] = gpd_hdi[[YEAR]].fillna(0)

In [None]:
geosource = GeoJSONDataSource(geojson = gpd_hdi.to_json())

In [None]:
#Define a sequential multi-hue color palette.
palette = all_palettes['Viridis'][8]
#Reverse color order 
#palette = palette[::-1]

color_mapper = LinearColorMapper(palette=palette, low = 0, high = 1)

TOOLS = "pan,wheel_zoom,reset,hover,save"

tick_labels = {'0': '0%', '5': '5%', '10':'10%', '15':'15%', '20':'20%', '25':'25%', '30':'30%','35':'35%', '40': '>40%'}

color_bar = ColorBar(color_mapper=color_mapper,
                     label_standoff=8,
                     width = 500,
                     height = 20,
                     border_line_color=None,
                     location = (0,0), 
                     orientation = 'horizontal')
                     #major_label_overrides = tick_labels)

p = figure(
    title='Human Development Index, ' + YEAR, 
    tools=TOOLS,
    plot_height = 480,
    plot_width = 720,
    x_axis_location=None,
    y_axis_location=None,
    tooltips=[
        ("Country", "@country"), ("HDI", "@"+YEAR)
    ])
p.grid.grid_line_color = None
p.hover.point_policy = "follow_mouse"

p.patches('xs','ys', source=geosource,
          fill_color={'field': YEAR, 'transform': color_mapper},
          fill_alpha=0.8, line_color="black", line_width=0.1)

p.add_layout(color_bar, 'below')

output_notebook()

show(p)

# Normalising Data 

In [14]:
YEAR = int(YEAR)

In [15]:
# GINI normalized
df_gini_pivot['norm_'+str(YEAR)] = ((df_gini_pivot[YEAR]-df_gini_pivot[YEAR].min())/(df_gini_pivot[YEAR].max()-df_gini_pivot[YEAR].min()))

In [16]:
# Inverted GINI
df_gini_pivot['inv_'+str(YEAR)] = df_gini_pivot.loc[:,'norm_'+str(YEAR)].apply(lambda x : 1-x)

In [17]:
# HDI normalized
df_hdi['norm_'+str(YEAR)]=((df_hdi[str(YEAR)]-df_hdi[str(YEAR)].min())/(df_hdi[str(YEAR)].max()-df_hdi[str(YEAR)].min()))

# Correlation

In [18]:
df_corr = pd.merge(df_gini_pivot[['Entity', 'inv_'+str(YEAR)]], df_hdi[['Country', 'norm_'+str(YEAR)]], left_on='Entity', right_on='Country')
df_corr['corr'] = df_corr['inv_'+str(YEAR)] - df_corr['norm_'+str(YEAR)]

In [29]:
df_corr.sort_values('corr').head(60)

Unnamed: 0,Entity,inv_2011,Country,norm_2011,corr
120,South Africa,0.0,South Africa,0.703075,-0.703075
15,Botswana,0.07455,Botswana,0.716861,-0.642311
126,Suriname,0.1491,Suriname,0.750795,-0.601695
93,Namibia,0.061697,Namibia,0.637328,-0.575631
27,Colombia,0.236504,Colombia,0.779427,-0.542924
101,Panama,0.298201,Panama,0.81018,-0.51198
16,Brazil,0.264781,Brazil,0.774125,-0.509344
55,Honduras,0.154242,Honduras,0.637328,-0.483086
10,Belize,0.25964,Belize,0.741251,-0.481611
103,Paraguay,0.277635,Paraguay,0.73913,-0.461495


In [30]:
df_corr.sort_values('corr').tail(60)

Unnamed: 0,Entity,inv_2011,Country,norm_2011,corr
99,Norway,0.979434,Norway,1.0,-0.0205656
57,Iceland,0.940874,Iceland,0.955461,-0.0145873
35,Djibouti,0.470437,Djibouti,0.478261,-0.00782385
89,Montenegro,0.838046,Montenegro,0.845175,-0.0071287
67,Jordan,0.763496,Jordan,0.768823,-0.00532676
14,Bosnia and Herzegovina,0.760925,Bosnia and Herzegovina,0.765642,-0.00471612
79,Madagascar,0.532134,Madagascar,0.534464,-0.0023308
150,Zimbabwe,0.51928,Zimbabwe,0.519618,-0.000338034
98,Nigeria,0.524422,Nigeria,0.52386,0.000561573
88,Mongolia,0.758355,Mongolia,0.753977,0.00437809


In [None]:
gpd_corr = pd.merge(df_map, df_corr[['Country', 'corr']], left_on='country', right_on='Country', how = 'left')
gpd_corr[['corr']] = gpd_corr[['corr']].fillna(0)

In [None]:
geosource = GeoJSONDataSource(geojson = gpd_corr.to_json())

In [None]:
#Define a sequential multi-hue color palette.
palette = all_palettes['Viridis'][8]
#Reverse color order 
#palette = palette[::-1]

color_mapper = LinearColorMapper(palette=palette, low = -1, high = 1)

TOOLS = "pan,wheel_zoom,reset,hover,save"

tick_labels = {'0': '0%', '5': '5%', '10':'10%', '15':'15%', '20':'20%', '25':'25%', '30':'30%','35':'35%', '40': '>40%'}

color_bar = ColorBar(color_mapper=color_mapper,
                     label_standoff=8,
                     width = 500,
                     height = 20,
                     border_line_color=None,
                     location = (0,0), 
                     orientation = 'horizontal')
                     #major_label_overrides = tick_labels)

p = figure(
    title='GINI Index vs Human Development Index, ' + str(YEAR), 
    tools=TOOLS,
    plot_height = 480,
    plot_width = 720,
    x_axis_location=None,
    y_axis_location=None,
    tooltips=[
        ("Country", "@country"), ("Correl", "@corr")
    ])
p.grid.grid_line_color = None
p.hover.point_policy = "follow_mouse"

p.patches('xs','ys', source=geosource,
          fill_color={'field': 'corr', 'transform': color_mapper},
          fill_alpha=0.8, line_color="black", line_width=0.1)

p.add_layout(color_bar, 'below')

output_notebook()

show(p)