In [1]:
import pandas as pd
import numpy as np
from pandas_datareader import wb
import pycountry

import altair as alt

import json

import wbgapi as wb

### WORLD BANK GDP PER CAPITA (PPP)

#### COUNTRIES

In [2]:
# Former USSR countries
fsu_countries = [
    {'name': 'Russia', 'region': 'Eastern Europe', 'independence': '12.12.1991'},
    {'name': 'Ukraine', 'region': 'Eastern Europe', 'independence': '24.08.1991'},
    {'name': 'Belarus', 'region': 'Eastern Europe', 'independence': '25.08.1991'},
    {'name': 'Lithuania', 'region': 'Baltic states', 'independence': '11.03.1990'},
    {'name': 'Latvia', 'region': 'Baltic states', 'independence': '4.05.1990'},
    {'name': 'Estonia', 'region': 'Baltic states', 'independence': '8.05.1990'},
    {'name': 'Moldova', 'region': 'Eastern Europe', 'independence': '27.08.1991'},
    
    {'name': 'Uzbekistan', 'region': 'Central Asia', 'independence': '31.08.1991'},
    {'name': 'Kazakhstan', 'region': 'Central Asia', 'independence': '16.12.1991'},
    {'name': 'Kyrgyzstan', 'region': 'Central Asia', 'independence': '31.08.1991'},
    {'name': 'Tajikistan', 'region': 'Central Asia', 'independence': '9.09.1991'},
    {'name': 'Turkmenistan', 'region': 'Central Asia', 'independence': '27.10.1991'},
    
    {'name': 'Georgia', 'region': 'South Caucasus', 'independence': '9.04.1991'},
    {'name': 'Azerbaijan', 'region': 'South Caucasus', 'independence': '30.08.1991'},
    {'name': 'Armenia', 'region': 'South Caucasus', 'independence': '23.08.1990'},
]

In [3]:
# Former Warsaw Treaty/Pact countries, excluding East Germany
fwt_countries = [
    {'name': 'Bulgaria', 'region': 'Eastern Europe', 'independence': '15.11.1990'},
    {'name': 'Slovakia', 'region': 'Eastern Europe', 'independence': '1.01.1993'},
    {'name': 'Czech Republic', 'region': 'Eastern Europe', 'independence': '1.01.1993'},
    {'name': 'Hungary', 'region': 'Eastern Europe', 'independence': '23.10.1989'},
    {'name': 'Poland', 'region': 'Eastern Europe', 'independence': '29.12.1989'},
    {'name': 'Romania', 'region': 'Eastern Europe', 'independence': '25.12.1989'},
]

In [4]:
# Add ISO 3 codes
for rec in fsu_countries:
    rec['iso'] = pycountry.countries.search_fuzzy(
        rec['name']
    )[0].alpha_3
    
    rec['group'] = 'FSU'
    
for rec in fwt_countries:
    rec['iso'] = pycountry.countries.search_fuzzy(
        rec['name']
    )[0].alpha_3
    
    rec['group'] = 'FWT'

In [5]:
iso_codes = { rec['iso']:rec for rec in fsu_countries + fwt_countries}

In [6]:
iso_codes

{'RUS': {'name': 'Russia',
  'region': 'Eastern Europe',
  'independence': '12.12.1991',
  'iso': 'RUS',
  'group': 'FSU'},
 'UKR': {'name': 'Ukraine',
  'region': 'Eastern Europe',
  'independence': '24.08.1991',
  'iso': 'UKR',
  'group': 'FSU'},
 'BLR': {'name': 'Belarus',
  'region': 'Eastern Europe',
  'independence': '25.08.1991',
  'iso': 'BLR',
  'group': 'FSU'},
 'LTU': {'name': 'Lithuania',
  'region': 'Baltic states',
  'independence': '11.03.1990',
  'iso': 'LTU',
  'group': 'FSU'},
 'LVA': {'name': 'Latvia',
  'region': 'Baltic states',
  'independence': '4.05.1990',
  'iso': 'LVA',
  'group': 'FSU'},
 'EST': {'name': 'Estonia',
  'region': 'Baltic states',
  'independence': '8.05.1990',
  'iso': 'EST',
  'group': 'FSU'},
 'MDA': {'name': 'Moldova',
  'region': 'Eastern Europe',
  'independence': '27.08.1991',
  'iso': 'MDA',
  'group': 'FSU'},
 'UZB': {'name': 'Uzbekistan',
  'region': 'Central Asia',
  'independence': '31.08.1991',
  'iso': 'UZB',
  'group': 'FSU'},
 'KA

In [7]:
neighbours = [
    {"name":"Norway","id":"425"},
    {"name":"Finland","id":"434"},
    {"name":"Sweden","id":"455"},
    {"name":"Germany","id":"439"},
    {"name":"Austria","id":"317"},
    {"name":"Slovenia","id":"246"},
    {"name":"Croatia","id":"296"},
    {"name":"Bosnia and Herzegovina","id":"293"},
    {"name":"Serbia and Montenegro","id":"332"},
    #{"name":"Serbia and Montenegro","id":"353"},
    {"name":"North Macedonia","id":"256"},
    {"name":"Greece","id":"358"},
    {"name":"Turkey","id":"493"},
    {"name":"Iran","id":"553"},
    {"name":"Islamic State of Afghanistan","id":"480"},
    {"name":"People's Republic of China","id":"507"},
    {"name":"Mongolia","id":"549"},
    {"name":"Syria","id":"382"},
    {"name":"Ba'athist Iraq","id":"450"},
    {"name":"Kuwait","id":"237"},
    {"name":"Bahrain","id":"161"},
    {"name":"Qatar","id":"226"},
    {"name":"United Arab Emirates","id":"308"},
    {"name":"Oman","id":"423"},
    {"name":"South Yemen","id":"415"},
    {"name":"Yemen Arab Republic","id":"363"},
    {"name":"Yemen","id":"448"},
    {"name":"Socialist Federal Republic of Yugoslavia","id":"405"},
    {"name":"West Germany","id":"404"},
    {"name":"Democratic Republic of Afghanistan","id":"482"},
    {"name":"Pakistan","id":"502"},
    {"name":"Saudi Arabia","id":"568"},
    {"name":"Lebanon","id":"219"},
    {"name":"Jordan","id":"321"},
    {"name":"Israel","id":"263"},
    {"name":"Turkish Republic of Northern Cyprus","id":"197"},
    {"name":"Cyprus","id":"210"},
    {"name":"Albania","id":"265"},
    {"name":"People's Socialist Republic of Albania","id":"264"},
    {"name":"Bosnia and Herzegovina","id":"293"},
    {"name":"Croatia","id":"296"},
    {"name":"Netherlands","id":"274"},
    {"name":"Belgium","id":"267"},
    {"name":"Denmark","id":"281"},
    {"name":"Egypt","id":"516"},
    {"name":"France","id":"465"},
    {"name":"Italy","id":"421"},
    {"name":"Switzerland","id":"280"},
    {"name":"Luxembourg","id":"189"},
    {"name":"India","id":"581"},
    {"name":"Transnistria","id":"201"},
    {"name":"Sudan","id":"577"},
    {"name":"Libyan Arab Jamahiriya","id":"556"},
    {"name":"Ethiopia","id":"541"},
    {"name":"United Kingdom","id":"402"}
]

In [8]:
neighbours_id = { rec['id']: rec for rec in neighbours}

In [9]:
#neighbours_id

#### 1988 / 1993 MAPS

#### 1993

In [161]:
with open('data/map-1993.json', 'r') as f:
    map_1991 = json.load(f)

In [162]:
features_filt = []
for rec in map_1991['features']:
    try:
        iso = pycountry.countries.search_fuzzy(rec["properties"]['name'])[0].alpha_3
        if iso in iso_codes.keys():
            rec["properties"]['iso'] = iso
            rec["properties"]['group'] = 'FSU-B' if iso_codes[iso]['region'] == 'Baltic states' else iso_codes[iso]['group']
            rec["properties"]['region'] = iso_codes[iso]['region']
            features_filt.append(rec)
    except LookupError as e:
        rec["properties"]['iso'] = None
        
    if rec["properties"]['id'] in neighbours_id.keys():
        rec["properties"]['iso'] = iso
        rec["properties"]['group'] = 'NGHBR'
        rec["properties"]['region'] = 'Neighbour'
        features_filt.append(rec)

In [163]:
len(features_filt)

67

In [164]:
map_1991['features'] = features_filt

In [165]:
with open('data/app/map-1993-s.json', 'w') as f:
    json.dump(map_1991, f)

#### 1988

In [166]:
with open('data/map-1988.json', 'r') as f:
    map_1988 = json.load(f)

In [167]:
soviet_block = [
    {"name":"Soviet Union","id":"590", "group": "FSU", 'region': 'Eastern Europe'},
    {"name":"German Democratic Republic","id":"336", "group": "FWT", 'region': 'Eastern Europe'},
    {'name': 'Bulgaria', "id":"339", 'region': 'Eastern Europe', "group": "FWT"},
    {"name": "Czechoslovakia","id":"355", 'region': 'Eastern Europe', "group": "FWT"},
    {'name': 'Hungary', "id":"325", 'region': 'Eastern Europe', "group": "FWT"},
    {'name': 'Poland', "id":"424", 'region': 'Eastern Europe', "group": "FWT"},
    {'name': 'Romania', "id":"397", 'region': 'Eastern Europe', "group": "FWT"},
]

In [168]:
soviet_block_id = { rec['id']: rec for rec in soviet_block}

In [169]:
soviet_block_id

{'590': {'name': 'Soviet Union',
  'id': '590',
  'group': 'FSU',
  'region': 'Eastern Europe'},
 '336': {'name': 'German Democratic Republic',
  'id': '336',
  'group': 'FWT',
  'region': 'Eastern Europe'},
 '339': {'name': 'Bulgaria',
  'id': '339',
  'region': 'Eastern Europe',
  'group': 'FWT'},
 '355': {'name': 'Czechoslovakia',
  'id': '355',
  'region': 'Eastern Europe',
  'group': 'FWT'},
 '325': {'name': 'Hungary',
  'id': '325',
  'region': 'Eastern Europe',
  'group': 'FWT'},
 '424': {'name': 'Poland',
  'id': '424',
  'region': 'Eastern Europe',
  'group': 'FWT'},
 '397': {'name': 'Romania',
  'id': '397',
  'region': 'Eastern Europe',
  'group': 'FWT'}}

In [170]:
features_filt = []
for rec in map_1988['features']:
    cid = rec["properties"]['id']
    if cid == "590":
        print('Soviet')
    if cid in soviet_block_id.keys():
        rec["properties"]['group'] = soviet_block_id[cid]['group']
        rec["properties"]['region'] = soviet_block_id[cid]['region']
        
        # replace Hungary name
        if cid == '325':
            rec["properties"]['name'] = soviet_block_id[cid]['name']
            
        # replace East Germany
        if cid == '336':
            rec["properties"]['name'] = 'East Germany'
            
        features_filt.append(rec)
    
    if cid in neighbours_id.keys():
        rec["properties"]['group'] = 'NGHBR'
        rec["properties"]['region'] = 'Neighbour'
        features_filt.append(rec)
        
map_1988['features'] = features_filt

Soviet


In [171]:
len(features_filt)

48

In [172]:
with open('data/app/map-1988-s.json', 'w') as f:
    json.dump(map_1988, f)

#### DOWNLOAD DATA

In [9]:
iso_codes = list(map(lambda x: x['iso'], fsu_countries + fwt_countries))
use_cols = [
    'Country Name',
    'Country Code',
    'Indicator Code'
] + list(map(lambda x: str(x), range(1990, 2021)))

In [10]:
# GDP per capita growth (annual %)
gdp_pc_grw = pd.read_csv(
    'data/API_NY.GDP.PCAP.KD.ZG_DS2_en_csv_v2_3634329.csv',
    sep=',',
    skiprows=4,
    usecols = use_cols
)

gdp_pc_grw = gdp_pc_grw.loc[
    gdp_pc_grw['Country Code'].isin(iso_codes + ['EUU', 'ECS', 'ECA'])
]

In [11]:
# GDP per capita, PPP (constant 2017 international $)
gdp_pc_ppp = pd.read_csv(
    'data/API_NY.GDP.PCAP.PP.KD_DS2_en_csv_v2_3682444.csv',
    sep=',',
    skiprows=4,
    usecols = use_cols
)

gdp_pc_ppp = gdp_pc_ppp.loc[
    gdp_pc_ppp['Country Code'].isin(iso_codes + ['EUU', 'ECS', 'ECA'])
]

In [12]:
# Population, total
pop = pd.read_csv(
    'data/API_SP.POP.TOTL_DS2_en_csv_v2_3628828.csv',
    sep=',',
    skiprows=4,
    usecols = use_cols
)

pop = pop.loc[
    pop['Country Code'].isin(iso_codes + ['EUU', 'ECS', 'ECA'])
]

In [13]:
pop.head()

Unnamed: 0,Country Name,Country Code,Indicator Code,1990,1991,1992,1993,1994,1995,1996,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
10,Armenia,ARM,SP.POP.TOTL,3538164.0,3505249.0,3442820.0,3363111.0,3283664.0,3217349.0,3168213.0,...,2876536.0,2884239.0,2897593.0,2912403.0,2925559.0,2936147.0,2944789.0,2951741.0,2957728.0,2963234.0
15,Azerbaijan,AZE,SP.POP.TOTL,7175200.0,7271300.0,7382050.0,7494800.0,7596550.0,7684850.0,7763000.0,...,9173082.0,9295784.0,9416801.0,9535079.0,9649341.0,9757812.0,9854033.0,9939771.0,10024283.0,10093121.0
21,Bulgaria,BGR,SP.POP.TOTL,8718289.0,8632367.0,8540164.0,8472313.0,8443591.0,8406067.0,8362826.0,...,7348328.0,7305888.0,7265115.0,7223938.0,7177991.0,7127822.0,7075947.0,7025037.0,6975761.0,6934015.0
25,Belarus,BLR,SP.POP.TOTL,10189348.0,10194050.0,10216470.0,10239050.0,10226955.0,10193831.0,10159569.0,...,9461643.0,9446836.0,9443211.0,9448515.0,9461076.0,9469379.0,9458989.0,9438785.0,9419758.0,9379952.0
54,Czech Republic,CZE,SP.POP.TOTL,10333355.0,10308578.0,10319123.0,10329855.0,10333587.0,10327253.0,10315241.0,...,10496088.0,10510785.0,10514272.0,10525347.0,10546059.0,10566332.0,10594438.0,10629928.0,10671870.0,10698896.0


In [14]:
countries_df = pd.DataFrame(
    fwt_countries + fsu_countries
).rename(columns={
    'name': 'Country Name',
    'region': 'Region',
    'iso': 'Country Code',
    'group': 'Group'
})

In [15]:
# Merge with countries
pop = pop.merge(countries_df.loc[:, ['Group', 'Country Code', 'Region']], on = 'Country Code', how ='left')
gdp_pc_ppp = gdp_pc_ppp.merge(countries_df.loc[:, ['Group', 'Country Code', 'Region']], on = 'Country Code', how ='left')
gdp_pc_grw = gdp_pc_grw.merge(countries_df.loc[:, ['Group', 'Country Code', 'Region']], on = 'Country Code', how ='left')

In [16]:
# Wide to long format
pop = pd.melt(
    pop,
    id_vars = [
        'Country Name',
        'Country Code',
        'Indicator Code',
        'Group',
        'Region'
    ],
    var_name = 'Year',
    value_name = 'Value'
)

gdp_pc_ppp = pd.melt(
    gdp_pc_ppp,
    id_vars = [
        'Country Name',
        'Country Code',
        'Indicator Code',
        'Group',
        'Region'
    ],
    var_name = 'Year',
    value_name = 'Value'
)

gdp_pc_grw = pd.melt(
    gdp_pc_grw,
    id_vars = [
        'Country Name',
        'Country Code',
        'Indicator Code',
        'Group',
        'Region'
    ],
    var_name = 'Year',
    value_name = 'Value'
)

In [17]:
pop.loc[:, 'Year'] = pd.to_numeric(pop.Year)
gdp_pc_ppp.loc[:, 'Year'] = pd.to_numeric(gdp_pc_ppp.Year)
gdp_pc_grw.loc[:, 'Year'] = pd.to_numeric(gdp_pc_grw.Year)

### NAs

In [18]:
gdp_pc_ppp.loc[
    gdp_pc_ppp.Value.isna()
]

Unnamed: 0,Country Name,Country Code,Indicator Code,Group,Region,Year,Value
7,Estonia,EST,NY.GDP.PCAP.PP.KD,FSU,Baltic states,1990,
10,Hungary,HUN,NY.GDP.PCAP.PP.KD,FWT,Eastern Europe,1990,
13,Lithuania,LTU,NY.GDP.PCAP.PP.KD,FSU,Baltic states,1990,
14,Latvia,LVA,NY.GDP.PCAP.PP.KD,FSU,Baltic states,1990,
15,Moldova,MDA,NY.GDP.PCAP.PP.KD,FSU,Eastern Europe,1990,
19,Slovak Republic,SVK,NY.GDP.PCAP.PP.KD,FWT,Eastern Europe,1990,
31,Estonia,EST,NY.GDP.PCAP.PP.KD,FSU,Baltic states,1991,
37,Lithuania,LTU,NY.GDP.PCAP.PP.KD,FSU,Baltic states,1991,
38,Latvia,LVA,NY.GDP.PCAP.PP.KD,FSU,Baltic states,1991,
39,Moldova,MDA,NY.GDP.PCAP.PP.KD,FSU,Eastern Europe,1991,


In [19]:
gdp_pc_ppp.loc[
    gdp_pc_ppp.Value.isna(),
    'Country Name'
].value_counts()

Estonia            5
Lithuania          5
Latvia             5
Moldova            5
Slovak Republic    2
Hungary            1
Turkmenistan       1
Name: Country Name, dtype: int64

#### IMPUTE NAs WITH GAPMINDER DATA

In [20]:
gdp_pc_ppp_gm = pd.read_csv(
    'data/GM-GDP per capita - Dataset - v27 - data-GDP-per-capita-in-columns.csv',
    sep=',',
    skiprows=3,
    usecols = [
        'geo', 'Country Name', '1990', '1991', '1992', '1993', '1994', '2020'
    ]
)

In [21]:
fill_df = gdp_pc_ppp_gm.loc[
    gdp_pc_ppp_gm['Country Name'].isin(
        gdp_pc_ppp.loc[
            gdp_pc_ppp.Value.isna(),
            'Country Name'
        ]
    )
].melt(id_vars = ['Country Name', 'geo'], var_name = 'Year', value_name = 'Value')

fill_df.loc[:, 'Year'] = pd.to_numeric(fill_df.loc[:, 'Year'])

In [22]:
fill_df.dtypes

Country Name     object
geo              object
Year              int64
Value           float64
dtype: object

In [23]:
gdp_pc_ppp = gdp_pc_ppp.merge(
    fill_df.loc[:, ['Country Name', 'Year', 'Value']],
    on = ['Country Name', 'Year'],
    how = 'left',
    suffixes = ['', '_fill']
)

In [24]:
gdp_pc_ppp.loc[
    gdp_pc_ppp.Value.isna(),
    'Value'
] = gdp_pc_ppp.loc[
    gdp_pc_ppp.Value.isna(),
    'Value_fill'
]

In [25]:
gdp_pc_ppp.drop('Value_fill', axis=1, inplace=True)

In [26]:
gdp_pc_ppp.loc[
    gdp_pc_ppp.Value.isna()
]

Unnamed: 0,Country Name,Country Code,Indicator Code,Group,Region,Year,Value


### EDA

#### GDP PPC

In [27]:
df = gdp_pc_ppp.loc[
    gdp_pc_ppp.Year.isin([1991, 2020])
].copy()

df.loc[
    df.Region == 'Baltic states',
    'Group'
] = 'FSU-B'

# Add Pct Change and Labels
pct_change = df.groupby('Country Code').apply(
    lambda tx: tx.sort_values(by='Year').loc[:, ['Value']].pct_change().iloc[1] * 100
).rename(columns = {'Value': 'PctChange'})

df = df.merge(pct_change, on = 'Country Code')

df.loc[:, 'Label'] = df.loc[:, 'Country Name'] + ' ↓' + df.loc[:, 'PctChange'].round(2).astype(str) + '%'
df.loc[df.PctChange>0, 'Label'] = df.loc[:, 'Country Name'] + ' ↑+' + df.loc[:, 'PctChange'].round(2).astype(str) + '%'


x = 'Year:O'
y = 'Value'
detail = 'Country Code'

color_fwt = alt.Color(
    'Group',
    scale = alt.Scale(
    domain = ['FWT', 'FSU', 'FSU-B', 'null'],
    range = ['#D50000', '#E0E0E0', '#E0E0E0', '#212121']
    ),
    legend=alt.Legend(orient="top")
)

color_fsu = alt.Color(
    'Group',
    scale = alt.Scale(
    domain = ['FWT', 'FSU', 'FSU-B', 'null'],
    range = ['#E0E0E0', '#D50000', '#E0E0E0', '#212121']
    ),
    legend=alt.Legend(orient="top")
)

color_fsu_b = alt.Color(
    'Group',
    scale = alt.Scale(
    domain = ['FWT', 'FSU', 'FSU-B', 'null'],
    range = ['#E0E0E0', '#E0E0E0', '#D50000', '#212121']
    ),
    legend=alt.Legend(orient="top")
)

text = alt.condition(alt.datum.Year == 2020, 'Label', alt.value(' '))

width = 250
height = 550

# FWT

base_fwt = alt.Chart(
    df.loc[
        ~df['Country Code'].isin(['FSU', 'FSU-B', 'ECS', 'ECA'])
    ]
).encode(
    x=x,
    y=y,
    color=color_fwt,
    detail=detail,
    text=text
)

slope_fwt = alt.layer(
    base_fwt.mark_line(),
    base_fwt.mark_circle(),
    base_fwt.mark_text(
        align='left',
        baseline='middle',
        dx=15  # Nudges text to right so it doesn't appear on top of the bar
    )
).properties(
    width = width,
    height = height
)

# FSU

base_fsu = alt.Chart(
    df.loc[
        ~df['Country Code'].isin(['FWT', 'FSU-B', 'EUU', 'ECA'])
    ]
).encode(
    x=x,
    y=y,
    color=color_fsu,
    detail=detail,
    text=text
)

slope_fsu = alt.layer(
    base_fsu.mark_line(),
    base_fsu.mark_circle(),
    base_fsu.mark_text(
        align='left',
        baseline='middle',
        dx=15  # Nudges text to right so it doesn't appear on top of the bar
    )
).properties(
    width = width,
    height = height
)


# FSU - B

base_fsu_b = alt.Chart(
    df.loc[
        ~df['Country Code'].isin(['FWT', 'FSU', 'ECS', 'ECA'])
    ]
).encode(
    x=x,
    y=y,
    color=color_fsu_b,
    detail=detail,
    text=text
)

slope_fsu_b = alt.layer(
    base_fsu_b.mark_line(),
    base_fsu_b.mark_circle(),
    base_fsu_b.mark_text(
        align='left',
        baseline='middle',
        dx=15  # Nudges text to right so it doesn't appear on top of the bar
    )
).properties(
    width = width,
    height = height
)

alt.hconcat(
    slope_fsu,
    slope_fsu_b,
    slope_fwt
).configure_view(
    strokeWidth=0
).configure_axis(
    grid=False
).resolve_scale(color='independent', y='shared')

In [28]:
x = 'Country Code'
y = 'Value'
color = 'PeriodChg'
detail = 'Country Code'

chart_df = gdp_pc_ppp.loc[
    gdp_pc_ppp.Year.isin([1990, 2020])
].copy()

change = chart_df.groupby('Country Code').apply(
    lambda tx: tx.sort_values(by='Year').loc[:, ['Value']].diff().iloc[1] > 0
).rename(columns = {'Value': 'PeriodChg'})

chart_df = chart_df.merge(change, on = 'Country Code')

width = 250
height = 500

df = gdp_pc_ppp.loc[
    ~gdp_pc_ppp.Group.isin(['FSU']) & \
    gdp_pc_ppp.Year.isin([1990, 2020])
].copy()

base = alt.Chart(
    chart_df.loc[
        ~chart_df.Group.isin(['FSU'])
    ]
).encode(
    x=x,
    y=y,
    color=color,
    detail=detail
)

fwt_slope = alt.layer(
    base.mark_line(),
    base.mark_circle()
).properties(
    width = width,
    height = height
)

base = alt.Chart(
    chart_df.loc[
        ~chart_df.Group.isin(['FWT'])
    ]
).encode(
    x=x,
    y=y,
    color=color,
    detail=detail
)

fsu_slope = alt.layer(
    base.mark_line(),
    base.mark_circle()
).properties(
    width = width,
    height = height
)

alt.hconcat(
    fwt_slope,
    fsu_slope
)

#### SAVE DATA FOR APP

In [32]:
app_df = gdp_pc_ppp.loc[
    gdp_pc_ppp.Year.isin([1991, 2020])
].copy()

app_df.loc[
    app_df.Region == 'Baltic states',
    'Group'
] = 'FSU-B'

# Add Pct Change and Labels
pct_change = app_df.groupby('Country Code').apply(
    lambda tx: tx.sort_values(by='Year').loc[:, ['Value']].pct_change().iloc[1] * 100
).rename(columns = {'Value': 'PctChange'})

app_df = app_df.merge(pct_change, on = 'Country Code')

app_df.loc[:, 'Indicator Code'] = 'GDP_PPC'

In [33]:
app_df

Unnamed: 0,Country Name,Country Code,Indicator Code,Group,Region,Year,Value,PctChange
0,Armenia,ARM,GDP_PPC,FSU,South Caucasus,1991,4616.944854,173.338594
1,Armenia,ARM,GDP_PPC,FSU,South Caucasus,2020,12619.892154,173.338594
2,Azerbaijan,AZE,GDP_PPC,FSU,South Caucasus,1991,7463.628859,83.915491
3,Azerbaijan,AZE,GDP_PPC,FSU,South Caucasus,2020,13726.769663,83.915491
4,Bulgaria,BGR,GDP_PPC,FWT,Eastern Europe,1991,11565.373838,93.50083
5,Bulgaria,BGR,GDP_PPC,FWT,Eastern Europe,2020,22379.094313,93.50083
6,Belarus,BLR,GDP_PPC,FSU,Eastern Europe,1991,8784.377365,118.419303
7,Belarus,BLR,GDP_PPC,FSU,Eastern Europe,2020,19186.775815,118.419303
8,Czech Republic,CZE,GDP_PPC,FWT,Eastern Europe,1991,20895.879761,84.282897
9,Czech Republic,CZE,GDP_PPC,FWT,Eastern Europe,2020,38507.532644,84.282897


In [34]:
app_df = app_df.rename(columns = {
    'Country Name': 'countryName',
    'Country Code': 'countryCode',
    'Indicator Code': 'indicatorCode',
    'Group': 'group',
    'Region': 'region',
    'Year': 'year',
    'Value': 'value',
    'PctChange': 'pctChange'
})

In [35]:
#app_df.to_json('data/app/GDPPPC.json', orient='records')

#### DEMOGRAPHICS - POPULATION

In [29]:
df = pop.loc[
    pop.Year.isin([1991, 2020])
].copy()

df.loc[
    df.Region == 'Baltic states',
    'Group'
] = 'FSU-B'

# Add Pct Change and Labels
pct_change = df.groupby('Country Code').apply(
    lambda tx: tx.sort_values(by='Year').loc[:, ['Value']].pct_change().iloc[1] * 100
).rename(columns = {'Value': 'PctChange'})

df = df.merge(pct_change, on = 'Country Code')

df.loc[:, 'Label'] = df.loc[:, 'Country Name'] + ' ↓' + df.loc[:, 'PctChange'].round(2).astype(str) + '%'
df.loc[df.PctChange>0, 'Label'] = df.loc[:, 'Country Name'] + ' ↑+' + df.loc[:, 'PctChange'].round(2).astype(str) + '%'


x = 'Year:O'
y = alt.Y('Value', scale=alt.Scale(type="log"))
detail = 'Country Code'

color_fwt = alt.Color(
    'Group',
    scale = alt.Scale(
    domain = ['FWT', 'FSU', 'FSU-B', 'null'],
    range = ['#D50000', '#E0E0E0', '#E0E0E0', '#212121']
    ),
    legend=alt.Legend(orient="top")
)

color_fsu = alt.Color(
    'Group',
    scale = alt.Scale(
    domain = ['FWT', 'FSU', 'FSU-B', 'null'],
    range = ['#E0E0E0', '#D50000', '#E0E0E0', '#212121']
    ),
    legend=alt.Legend(orient="top")
)

color_fsu_b = alt.Color(
    'Group',
    scale = alt.Scale(
    domain = ['FWT', 'FSU', 'FSU-B', 'null'],
    range = ['#E0E0E0', '#E0E0E0', '#D50000', '#212121']
    ),
    legend=alt.Legend(orient="top")
)

text = alt.condition(alt.datum.Year == 2020, 'Label', alt.value(' '))

width = 250
height = 550

# FWT

base_fwt = alt.Chart(
    df.loc[
        ~df['Country Code'].isin(['FSU', 'FSU-B', 'ECS', 'ECA'])
    ]
).encode(
    x=x,
    y=y,
    color=color_fwt,
    detail=detail,
    text=text
)

slope_fwt = alt.layer(
    base_fwt.mark_line(),
    base_fwt.mark_circle(),
    base_fwt.mark_text(
        align='left',
        baseline='middle',
        dx=15  # Nudges text to right so it doesn't appear on top of the bar
    )
).properties(
    width = width,
    height = height
)

# FSU

base_fsu = alt.Chart(
    df.loc[
        ~df['Country Code'].isin(['FWT', 'FSU-B', 'EUU', 'ECA'])
    ]
).encode(
    x=x,
    y=y,
    color=color_fsu,
    detail=detail,
    text=text
)

slope_fsu = alt.layer(
    base_fsu.mark_line(),
    base_fsu.mark_circle(),
    base_fsu.mark_text(
        align='left',
        baseline='middle',
        dx=15  # Nudges text to right so it doesn't appear on top of the bar
    )
).properties(
    width = width,
    height = height
)


# FSU - B

base_fsu_b = alt.Chart(
    df.loc[
        ~df['Country Code'].isin(['FWT', 'FSU', 'ECS', 'ECA'])
    ]
).encode(
    x=x,
    y=y,
    color=color_fsu_b,
    detail=detail,
    text=text
)

slope_fsu_b = alt.layer(
    base_fsu_b.mark_line(),
    base_fsu_b.mark_circle(),
    base_fsu_b.mark_text(
        align='left',
        baseline='middle',
        dx=15  # Nudges text to right so it doesn't appear on top of the bar
    )
).properties(
    width = width,
    height = height
)

alt.hconcat(
    slope_fsu,
    slope_fsu_b,
    slope_fwt
).configure_view(
    strokeWidth=0
).configure_axis(
    grid=False
).resolve_scale(color='independent', y='shared')

#### DEMOGRAPHICS - POPULATION % CHANGE 1991 - 2020

In [267]:
sort = alt.EncodingSortField('PctChange', op='min', order='descending')

pop_chg_bars = alt.Chart().mark_bar(size=1).encode(
    x=alt.X('PctChange'),
    y=alt.Y('Country Code', sort=sort),
    color=alt.Color('Group', scale=alt.Scale(domain=['FSU', 'FSU-B', 'FWT'], range=['#800000', '#D3543F', '#FFB495']))
)

pop_chg_circ = alt.Chart().mark_circle(size=60).encode(
    x=alt.X('PctChange'),
    y=alt.Y('Country Code', sort=sort),
    color=alt.Color('Group', scale=alt.Scale(domain=['FSU', 'FSU-B', 'FWT'], range=['#800000', '#D3543F', '#FFB495']))
)

alt.layer(
    pop_chg_bars,
    pop_chg_circ,
    data=df.loc[(df.Year==2020)&(~df.Group.isna())]
).properties(
    width=250,
    height=250
)

#### SAVE POPULATION DATA FOR APP

In [36]:
app_df = pop.loc[
    pop.Year.isin([1991, 2020])
].copy()

app_df.loc[
    app_df.Region == 'Baltic states',
    'Group'
] = 'FSU-B'

# Add Pct Change and Labels
pct_change = app_df.groupby('Country Code').apply(
    lambda tx: tx.sort_values(by='Year').loc[:, ['Value']].pct_change().iloc[1] * 100
).rename(columns = {'Value': 'PctChange'})

app_df = app_df.merge(pct_change, on = 'Country Code')

app_df.loc[:, 'Indicator Code'] = 'POP'

In [37]:
app_df = app_df.rename(columns = {
    'Country Name': 'countryName',
    'Country Code': 'countryCode',
    'Indicator Code': 'indicatorCode',
    'Group': 'group',
    'Region': 'region',
    'Year': 'year',
    'Value': 'value',
    'PctChange': 'pctChange'
})

In [39]:
app_df

Unnamed: 0,countryName,countryCode,indicatorCode,group,region,year,value,pctChange
0,Armenia,ARM,POP,FSU,South Caucasus,1991,3505249.0,-15.462953
1,Armenia,ARM,POP,FSU,South Caucasus,2020,2963234.0,-15.462953
2,Azerbaijan,AZE,POP,FSU,South Caucasus,1991,7271300.0,38.807655
3,Azerbaijan,AZE,POP,FSU,South Caucasus,2020,10093121.0,38.807655
4,Bulgaria,BGR,POP,FWT,Eastern Europe,1991,8632367.0,-19.674233
5,Bulgaria,BGR,POP,FWT,Eastern Europe,2020,6934015.0,-19.674233
6,Belarus,BLR,POP,FSU,Eastern Europe,1991,10194050.0,-7.986011
7,Belarus,BLR,POP,FSU,Eastern Europe,2020,9379952.0,-7.986011
8,Czech Republic,CZE,POP,FWT,Eastern Europe,1991,10308578.0,3.786342
9,Czech Republic,CZE,POP,FWT,Eastern Europe,2020,10698896.0,3.786342


In [41]:
#app_df.to_json('data/app/POP.json', orient='records')

### DEMOGRAPHICS - LIFE EXPECTANCY vs MORTALITY RATE (WORLD BANK DATA API)

In [58]:
#iso_codes

In [125]:
"""
SP.DYN.LE00.IN - Life expectancy at birth, total (years)

Life expectancy at birth indicates the number of years a newborn infant
would live if prevailing patterns of mortality at the time of its birth were to stay the same throughout its life.

SP.DYN.CDRT.IN - Death rate, crude (per 1,000 people)

Crude death rate indicates the number of deaths occurring during the year, per 1,000 population estimated at midyear.
"""

dem_df = wb.data.DataFrame(['SP.DYN.LE00.IN', 'SP.DYN.CDRT.IN'], time=[1991, 2010, 2019, 2020], labels=True, skipAggs=True)

In [126]:
dem = dem_df.reset_index().rename(columns={'Country': 'Country Name', 'economy': 'Country Code'})
dem = dem.merge(countries_df.drop(['independence', 'Country Name'], axis=1), how='left', on='Country Code')

In [127]:
# Transform values from wide to long
dem = pd.wide_to_long(dem, 'YR', i=['Country Code', 'series'], j='Year').reset_index().rename(columns={'YR': 'Value'}).drop('Series', axis=1)

In [128]:
dem.head()

Unnamed: 0,Country Code,series,Year,Group,Country Name,Region,Value
0,ABW,SP.DYN.CDRT.IN,1991,,Aruba,,7.054
1,ABW,SP.DYN.CDRT.IN,2010,,Aruba,,7.918
2,ABW,SP.DYN.CDRT.IN,2019,,Aruba,,9.205
3,ABW,SP.DYN.CDRT.IN,2020,,Aruba,,9.362
4,ABW,SP.DYN.LE00.IN,1991,,Aruba,,73.509


In [129]:
dem = pd.merge(
    dem.pivot_table(index=['Country Code', 'Year'], columns='series', values='Value', aggfunc='sum').reset_index(),
    dem.loc[:, ['Country Code', 'Group']].groupby('Country Code').last().reset_index(),
    on='Country Code',
    how='left'
)
#dem.pivot_table(index=['Country Code', 'Year'], columns='series', values='Value', aggfunc='sum').reset_index()

In [130]:
dem.head()

Unnamed: 0,Country Code,Year,SP.DYN.CDRT.IN,SP.DYN.LE00.IN,Group
0,ABW,1991,7.054,73.509,
1,ABW,2010,7.918,75.017,
2,ABW,2019,9.205,76.293,
3,ABW,2020,9.362,76.434,
4,AFG,1991,14.783,50.999,


In [131]:
dem = dem.rename(columns={'SP.DYN.LE00.IN': 'Life Expectancy', 'SP.DYN.CDRT.IN': 'Mortality'})

In [132]:
alt.Chart(
    dem.loc[dem.Year == 1991].replace(0, np.nan)
).mark_circle(size=70).encode(
    x=alt.X('Life Expectancy:Q', scale=alt.Scale(domain=[20, 90])),
    y=alt.Y('Mortality:Q', scale=alt.Scale(domain=[0, 40])),
    color='Group',
    tooltip=['Country Code']
)

In [134]:
alt.Chart(
    dem.loc[dem.Year == 2019].replace(0, np.nan)
).mark_circle(size=70).encode(
    x=alt.X('Life Expectancy:Q', scale=alt.Scale(domain=[20, 90])),
    y=alt.Y('Mortality:Q', scale=alt.Scale(domain=[0, 40])),
    color='Group',
    tooltip=['Country Code']
)

In [156]:
alt.Chart(
    dem.loc[dem.Year == 2010].replace(0, np.nan)
).mark_circle(size=70).encode(
    x=alt.X('Life Expectancy:Q', scale=alt.Scale(domain=[40, 90])),
    y=alt.Y('Mortality:Q', scale=alt.Scale(domain=[0, 25])),
    color='Group',
    tooltip=['Country Code']
)

In [153]:
alt.Chart(
    dem.loc[
        (dem.Year==2020)&(dem['Country Code'].isin(iso_codes))
    ]
).mark_bar(size=10).encode(
    x='Life Expectancy',
    y=alt.Y('Country Code', sort='-x'),
    color='Group'
).properties(
    height=300
)

In [154]:
alt.Chart(
    dem.loc[
        (dem.Year==2020)&(dem['Country Code'].isin(iso_codes))
    ]
).mark_bar(size=10).encode(
    x='Mortality',
    y=alt.Y('Country Code', sort='-x'),
    color='Group'
).properties(
    height=300
)

In [173]:
df_le_mort = dem.loc[
    dem.Year.isin([1991, 2020]) & dem['Country Code'].isin(iso_codes)
].copy()

df_le_mort.loc[
    df_le_mort['Country Code'].isin(['LTU', 'EST', 'LVA']),
    'Group'
] = 'FSU-B'

# Add Pct Change and Labels
pct_change_le = df_le_mort.loc[:, ['Country Code', 'Year', 'Life Expectancy']].groupby('Country Code').apply(
    lambda tx: tx.sort_values(by='Year').loc[:, ['Life Expectancy']].pct_change().iloc[1] * 100
).rename(columns = {'Life Expectancy': 'Pct Change LE'})

pct_change_mort = df_le_mort.loc[:, ['Country Code', 'Year', 'Mortality']].groupby('Country Code').apply(
    lambda tx: tx.sort_values(by='Year').loc[:, ['Mortality']].pct_change().iloc[1] * 100
).rename(columns = {'Mortality': 'Pct Change MORT'})

df_le_mort = df_le_mort.merge(pct_change_le, on = 'Country Code')
df_le_mort = df_le_mort.merge(pct_change_mort, on = 'Country Code')

In [175]:
alt.Chart(
    df_le_mort.loc[df_le_mort.Year==2020]
).mark_bar(size=10).encode(
    x='Pct Change LE',
    y=alt.Y('Country Code', sort='-x'),
    color='Group'
).properties(
    height=300
)

In [176]:
alt.Chart(
    df_le_mort.loc[df_le_mort.Year==2020]
).mark_bar(size=10).encode(
    x='Pct Change MORT',
    y=alt.Y('Country Code', sort='-x'),
    color='Group'
).properties(
    height=300
)

#### LIFE EXPECTANCY - TIME SERIES

In [213]:
le_ts_df = wb.data.DataFrame('SP.DYN.LE00.IN', iso_codes, time=range(1991, 2021), labels=True, skipAggs=True)

In [214]:
le_ts = le_ts_df.reset_index().rename(columns={'index': 'Country Code', 'Country': 'Country Name'})
le_ts = pd.wide_to_long(le_ts, 'YR', i='Country Code', j='Year').reset_index().rename(columns={'YR': 'Value'})
le_ts = le_ts.merge(countries_df.drop(['independence', 'Country Name'], axis=1), how='left', on='Country Code')

In [215]:
le_ts.loc[le_ts['Country Code'].isin(['LTU', 'LVA', 'EST']), 'Group'] = 'FSU-B'

In [231]:
alt.Chart(le_ts).mark_line().encode(
    x=alt.X('Year:Q', axis=alt.Axis(tickCount=5, grid=False)),
    y=alt.Y('Value', scale=alt.Scale(domain=[55, 85], nice=False)),
    detail='Country Name',
    color=alt.Color('Group', scale=alt.Scale(domain=['FSU', 'FSU-B', 'FWT'], range=['#800000', '#D3543F', '#FFB495']))
).properties(
    width=250,
    height=250
).facet(
    column='Group:N'
)

#### HUMAN DEVELOPMENT INDEX

In [50]:
hdi_gm = pd.read_csv(
    'data/hdi_human_development_index.csv',
    sep=','
)

In [None]:
def get_iso(name):
    try:
        iso = pycountry.countries.search_fuzzy(name)[0].alpha_3
    except LookupError as e:
        iso = None
        
    return iso

In [51]:
hdi_gm.loc[:, 'Country Code'] = hdi_gm.country.apply(get_iso)

In [52]:
hdi_gm = hdi_gm.merge(countries_df.loc[:, ['Group', 'Country Code', 'Region']], on = 'Country Code', how ='right')

In [53]:
hdi_gm

Unnamed: 0,country,1990,1991,1992,1993,1994,1995,1996,1997,1998,...,2013,2014,2015,2016,2017,2018,2019,Country Code,Group,Region
0,Armenia,0.654,0.648,0.618,0.617,0.621,0.627,0.634,0.645,0.657,...,0.762,0.764,0.768,0.766,0.769,0.771,0.776,ARM,FSU,South Caucasus
1,Azerbaijan,,,,,,0.604,0.605,0.61,0.619,...,0.735,0.74,0.744,0.751,0.754,0.754,0.756,AZE,FSU,South Caucasus
2,Bulgaria,0.708,0.705,0.705,0.704,0.705,0.71,0.718,0.713,0.718,...,0.801,0.806,0.809,0.811,0.811,0.813,0.816,BGR,FWT,Eastern Europe
3,Belarus,,,,,,0.66,0.664,0.67,0.674,...,0.811,0.814,0.814,0.815,0.819,0.823,0.823,BLR,FSU,Eastern Europe
4,Czech Republic,0.738,0.733,0.736,0.743,0.752,0.761,0.776,0.782,0.783,...,0.882,0.888,0.891,0.893,0.896,0.898,0.9,CZE,FWT,Eastern Europe
5,Estonia,0.735,0.73,0.722,0.715,0.72,0.729,0.74,0.754,0.766,...,0.869,0.871,0.877,0.881,0.885,0.889,0.892,EST,FSU,Baltic states
6,Georgia,,,,,,,,,,...,0.775,0.783,0.79,0.792,0.799,0.805,0.812,GEO,FSU,South Caucasus
7,Hungary,0.708,0.706,0.711,0.724,0.737,0.746,0.75,0.751,0.758,...,0.839,0.838,0.842,0.844,0.846,0.85,0.854,HUN,FWT,Eastern Europe
8,Kazakhstan,0.69,0.684,0.682,0.675,0.667,0.664,0.666,0.669,0.672,...,0.791,0.798,0.806,0.808,0.815,0.819,0.825,KAZ,FSU,Central Asia
9,Kyrgyz Republic,0.64,0.634,0.625,0.612,0.595,0.589,0.594,0.601,0.606,...,0.68,0.686,0.69,0.691,0.694,0.696,0.697,KGZ,FSU,Central Asia


In [56]:
hdi_gm.loc[
    hdi_gm.country.isin(['Georgia', 'Turkmenistan', 'Uzbekistan'])
].T

Unnamed: 0,6,18,20
country,Georgia,Turkmenistan,Uzbekistan
1990,,,
1991,,,
1992,,,
1993,,,
1994,,,
1995,,,
1996,,,
1997,,,
1998,,,


### AUGMENTED HUMAN DEVELOPMENT INDEX

In [62]:
ahdi = pd.read_csv(
    'data/AHDI.csv',
    sep=',',
    usecols = ['country', '1990', '1995', '2000', '2005', '2010', '2015']
)

ahdi.loc[:, 'Country Code'] = ahdi.country.apply(get_iso)

ahdi = ahdi.merge(countries_df.loc[:, ['Group', 'Country Code', 'Region']], on = 'Country Code', how ='right')

In [63]:
ahdi

Unnamed: 0,country,1990,1995,2000,2005,2010,2015,Country Code,Group,Region
0,Belarus,0.432,0.419,0.317,0.32,0.348,0.38,BLR,FSU,Eastern Europe
1,Bulgaria,0.412,0.49,0.512,0.544,0.555,0.551,BGR,FWT,Eastern Europe
2,Estonia,0.483,0.537,0.593,0.625,0.648,0.709,EST,FSU,Baltic states
3,Hungary,0.469,0.52,0.544,0.575,0.567,0.574,HUN,FWT,Eastern Europe
4,Latvia,0.473,0.499,0.528,0.561,0.573,0.623,LVA,FSU,Baltic states
5,Lithuania,0.526,0.508,0.544,0.567,0.604,0.66,LTU,FSU,Baltic states
6,Moldova,0.395,0.395,0.408,0.405,0.449,0.51,MDA,FSU,Eastern Europe
7,Poland,0.409,0.492,0.525,0.554,0.582,0.661,POL,FWT,Eastern Europe
8,Romania,0.324,0.417,0.433,0.467,0.498,0.538,ROU,FWT,Eastern Europe
9,Russia,0.316,0.377,0.358,0.352,0.385,0.373,RUS,FSU,Eastern Europe


### NOT ONLY GDP - UNEMPLOYMENT - WORLD BANK ILO ESTIMATE

In [36]:
# Unemployment, total (% of total labor force) (modeled ILO estimate)
uem_rate = pd.read_csv(
    'data/API_SL.UEM.TOTL.ZS_DS2_en_csv_v2_3629004.csv',
    sep=',',
    skiprows=4,
    usecols = use_cols
)

uem_rate = uem_rate.loc[
    uem_rate['Country Code'].isin(iso_codes + ['EUU', 'ECS', 'ECA'])
]

In [37]:
uem_rate.head()

Unnamed: 0,Country Name,Country Code,Indicator Code,1990,1991,1992,1993,1994,1995,1996,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
10,Armenia,ARM,SL.UEM.TOTL.ZS,,1.6,1.8,5.3,6.6,6.7,9.3,...,18.440001,17.299999,16.18,17.5,18.26,17.620001,17.700001,18.969999,18.299999,21.205999
15,Azerbaijan,AZE,SL.UEM.TOTL.ZS,,0.9,1.8,4.5,6.3,7.2,8.1,...,5.42,5.19,4.97,4.91,4.96,5.0,5.0,4.9,4.85,6.463
21,Bulgaria,BGR,SL.UEM.TOTL.ZS,,11.1,15.3,16.4,12.8,11.1,12.5,...,11.26,12.27,12.94,11.42,9.14,7.57,6.16,5.21,4.23,5.12
25,Belarus,BLR,SL.UEM.TOTL.ZS,,0.6,3.1,8.5,12.8,17.700001,24.4,...,6.166,6.051,6.006,5.994,5.84,5.84,5.65,4.76,4.16,4.768
54,Czech Republic,CZE,SL.UEM.TOTL.ZS,,2.27,2.6,4.32,4.3,4.02,3.89,...,6.71,6.98,6.95,6.11,5.05,3.95,2.89,2.24,2.01,2.55


In [38]:
uem_rate = uem_rate.melt(
    id_vars = ['Country Name', 'Country Code', 'Indicator Code'],
    value_name = 'Value',
    var_name = 'Year'
)

In [39]:
uem_rate.loc[
    uem_rate.Value.isnull()
]

Unnamed: 0,Country Name,Country Code,Indicator Code,Year,Value
0,Armenia,ARM,SL.UEM.TOTL.ZS,1990,
1,Azerbaijan,AZE,SL.UEM.TOTL.ZS,1990,
2,Bulgaria,BGR,SL.UEM.TOTL.ZS,1990,
3,Belarus,BLR,SL.UEM.TOTL.ZS,1990,
4,Czech Republic,CZE,SL.UEM.TOTL.ZS,1990,
5,Europe & Central Asia (excluding high income),ECA,SL.UEM.TOTL.ZS,1990,
6,Europe & Central Asia,ECS,SL.UEM.TOTL.ZS,1990,
7,Estonia,EST,SL.UEM.TOTL.ZS,1990,
8,European Union,EUU,SL.UEM.TOTL.ZS,1990,
9,Georgia,GEO,SL.UEM.TOTL.ZS,1990,


### Life expectancy at birth, total (years) - World Bank

https://data.worldbank.org/indicator/SP.DYN.LE00.IN

In [40]:
life_exp = pd.read_csv(
    'data/API_SP.DYN.LE00.IN_DS2_en_csv_v2_3630764.csv',
    sep=',',
    skiprows=4,
    usecols = use_cols
)

life_exp = life_exp.loc[
    life_exp['Country Code'].isin(iso_codes + ['EUU', 'ECS', 'ECA'])
]

In [41]:
life_exp = life_exp.melt(
    id_vars = ['Country Name', 'Country Code', 'Indicator Code'],
    value_name = 'Value',
    var_name = 'Year'
)

In [42]:
life_exp.loc[
    life_exp.Value.isnull()
]

Unnamed: 0,Country Name,Country Code,Indicator Code,Year,Value
720,Armenia,ARM,SP.DYN.LE00.IN,2020,
721,Azerbaijan,AZE,SP.DYN.LE00.IN,2020,
722,Bulgaria,BGR,SP.DYN.LE00.IN,2020,
723,Belarus,BLR,SP.DYN.LE00.IN,2020,
724,Czech Republic,CZE,SP.DYN.LE00.IN,2020,
725,Europe & Central Asia (excluding high income),ECA,SP.DYN.LE00.IN,2020,
726,Europe & Central Asia,ECS,SP.DYN.LE00.IN,2020,
727,Estonia,EST,SP.DYN.LE00.IN,2020,
728,European Union,EUU,SP.DYN.LE00.IN,2020,
729,Georgia,GEO,SP.DYN.LE00.IN,2020,


### High-technology exports (% of manufactured exports) - GP

### Current health expenditure per capita, PPP (current international $)

In [173]:
# https://data.worldbank.org/indicator/SH.XPD.CHEX.PP.CD

### Physicians (per 1,000 people)

In [174]:
# https://data.worldbank.org/indicator/SH.MED.PHYS.ZS

### GOVERNANCE INDICATORS

In [43]:
gov_ind = pd.read_csv('data/governance_indicators_data.csv')

In [44]:
gov_ind.head()

Unnamed: 0,Country Name,Country Code,Series Name,Series Code,1996 [YR1996],1998 [YR1998],2000 [YR2000],2002 [YR2002],2003 [YR2003],2004 [YR2004],...,2011 [YR2011],2012 [YR2012],2013 [YR2013],2014 [YR2014],2015 [YR2015],2016 [YR2016],2017 [YR2017],2018 [YR2018],2019 [YR2019],2020 [YR2020]
0,Afghanistan,AFG,Control of Corruption: Estimate,CC.EST,-1.291705,-1.180848,-1.29538,-1.263366,-1.351042,-1.345281,...,-1.579174,-1.419741,-1.43651,-1.354829,-1.342216,-1.526172,-1.515626,-1.487624,-1.400733,-1.475405
1,Afghanistan,AFG,Control of Corruption: Number of Sources,CC.NO.SRC,2.0,2.0,2.0,2.0,3.0,5.0,...,9.0,10.0,11.0,11.0,11.0,10.0,10.0,10.0,10.0,9.0
2,Afghanistan,AFG,Control of Corruption: Percentile Rank,CC.PER.RNK,4.301075,9.793815,5.076142,5.050505,5.050505,5.853659,...,0.9478673,2.369668,1.895735,5.288462,6.25,3.365385,3.846154,4.807693,6.730769,5.288462
3,Afghanistan,AFG,"Control of Corruption: Percentile Rank, Lower ...",CC.PER.RNK.LOWER,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.4807692,1.442308,0.0,0.0,0.0,1.923077,0.0
4,Afghanistan,AFG,"Control of Corruption: Percentile Rank, Upper ...",CC.PER.RNK.UPPER,27.41936,31.4433,29.44162,31.81818,18.18182,14.14634,...,5.687204,11.84834,9.952606,12.5,12.5,9.615385,9.615385,10.09615,11.53846,11.05769


In [45]:
gov_ind = gov_ind.loc[
    gov_ind['Country Code'].isin(iso_codes)
]

In [46]:
gov_ind['Country Code'].value_counts()

KAZ    36
MDA    36
CZE    36
ARM    36
POL    36
SVK    36
RUS    36
TKM    36
GEO    36
EST    36
UKR    36
LVA    36
BGR    36
UZB    36
BLR    36
ROU    36
TJK    36
LTU    36
HUN    36
KGZ    36
AZE    36
Name: Country Code, dtype: int64

In [47]:
gov_ind['Series Name'].value_counts().sort_index()

Control of Corruption: Estimate                                                                                   21
Control of Corruption: Number of Sources                                                                          21
Control of Corruption: Percentile Rank                                                                            21
Control of Corruption: Percentile Rank, Lower Bound of 90% Confidence Interval                                    21
Control of Corruption: Percentile Rank, Upper Bound of 90% Confidence Interval                                    21
Control of Corruption: Standard Error                                                                             21
Government Effectiveness: Estimate                                                                                21
Government Effectiveness: Number of Sources                                                                       21
Government Effectiveness: Percentile Rank                       

### HAPPINESS SCORE

In [48]:
whr_index = pd.read_csv('data/WHR/whr-index-2019-2021.csv')

In [49]:
whr_index.head()

Unnamed: 0,Country,Happiness score,Year
0,Finland,7.769,2019
1,Denmark,7.6,2019
2,Norway,7.554,2019
3,Iceland,7.494,2019
4,Netherlands,7.488,2019
