In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import folium
import seaborn as sns
sns.set()

world_map = os.path.join('./data/', 'world_map.json')

# Import data and remove useless columns 

## Import data

In [2]:
gdp_per_person = pd.read_csv('./data/economic_features/GDP_per_capita.csv', sep=';')
cpi_2015 = pd.read_csv('./data/economic_features/CPI_2015.csv')
cpi_2017 = pd.read_csv('./data/economic_features/CPI_2017.csv')
debt_2015 = pd.read_csv('./data/economic_features/DEBT_2015.csv')
debt_2017 = pd.read_csv('./data/economic_features/DEBT_2017.csv')
cpi_2000_2019 = pd.read_csv('./data/economic_features/CPI_2000_2019.csv')
debt_2000_2019 = pd.read_csv('./data/economic_features/DEBT_2000_2019.csv')

**GDP per capita**: data is available for oecd countries between 1960 and 2019 with some missing values

**CPI rate**: data is available for oecd countries between 2000 and 2019

**DEBT**: data is available for oecd countries between 2000 and 2019

## Dictionary with the country code of all OECD countries

In [3]:
oecd_countries = {'AUS': 'Australia', 'AUT': 'Austria', 'BEL': 'Belgium', 'CAN': 'Canada', 'CHL': 'Chile', 'COL': 'Colombia', 'CZE': 'Czech Republic', 'DNK': 'Denmark', 'EST': 'Estonia', 'FIN': 'Finland', 'FRA': 'France', 'DEU': 'Germany', 'GRC': 'Greece', 'HUN': 'Hungary', 'ISL': 'Iceland', 'IRL': 'Ireland', 'ISR': 'Israel', 'ITA': 'Italy', 'JPN': 'Japan', 'KOR': 'Korea', 'LVA': 'Latvia', 'LTU': 'Lithuania', 'LUX': 'Luxembourg', 'MEX': 'Mexico', 'NLD': 'Netherlands', 'NZL': 'New Zealand', 'NOR': 'Norway', 'POL': 'Poland', 'PRT': 'Portugal', 'SVK': 'Slovakia', 'SVN': 'Slovenia', 'ESP': 'Spain', 'SWE': 'Sweden', 'CHE': 'Switzerland', 'TUR': 'Turkey', 'GBR': 'United Kingdom', 'USA': 'United States'}

## Preparation of the datasets

### Removal of unnecessary columns and countries

#### GDP per capita

In [13]:
gdp_per_person_oecd = pd.DataFrame(columns=gdp_per_person.columns)
for code in oecd_countries.keys():
    gdp_per_person_oecd = gdp_per_person_oecd.append(gdp_per_person[gdp_per_person['Country Code']==code])
gdp_2015_oecd = gdp_per_person_oecd[['Country Code', '2015']]
gdp_2017_oecd = gdp_per_person_oecd[['Country Code', '2017']]
gdp_1960_2019_oecd_full = gdp_per_person_oecd.drop(['Country Name', 'Indicator Name', 'Indicator Code'], axis=1).reset_index(drop=True)

In [38]:
gdp_1960_2019_oecd = pd.DataFrame(columns=['Country', 'Time', 'GDP'])
for i in range(len(gdp_1960_2019_oecd_full)):
    country = gdp_1960_2019_oecd_full.iloc[i][0]
    for j in range(1,len(gdp_1960_2019_oecd_full.columns)):
        year = gdp_1960_2019_oecd_full.columns[j]
        gdp = gdp_1960_2019_oecd_full.iloc[i][j]
        gdp_1960_2019_oecd = gdp_1960_2019_oecd.append({'Country': country, 'Time': int(year), 'GDP': gdp}, ignore_index=True)
    gdp_1960_2019_oecd = gdp_1960_2019_oecd.dropna()

In [41]:
gdp_1960_2019_oecd

Unnamed: 0,Country,Time,GDP
0,AUS,1960,1807.785710
1,AUS,1961,1874.732106
2,AUS,1962,1851.841851
3,AUS,1963,1964.150470
4,AUS,1964,2128.068355
...,...,...,...
1945,USA,2015,56839.381774
1946,USA,2016,57951.584082
1947,USA,2017,60062.222313
1948,USA,2018,62996.471285


#### Inflation rate 

In [7]:
cpi_2015_oecd = cpi_2015[['LOCATION', 'Value']]
cpi_2017_oecd = cpi_2017[['LOCATION', 'Value']]
cpi_2000_2019 = cpi_2000_2019[['LOCATION', 'TIME', 'Value']]
cpi_2000_2019.columns = ['Country', 'Time', 'CPI']

In [8]:
cpi_2000_2019

Unnamed: 0,Country,Time,CPI
0,AUS,2000,4.457435
1,AUS,2001,4.407135
2,AUS,2002,2.981575
3,AUS,2003,2.732596
4,AUS,2004,2.343255
...,...,...,...
755,LTU,2015,-0.884097
756,LTU,2016,0.905525
757,LTU,2017,3.722889
758,LTU,2018,2.697928


#### Debt

In [9]:
debt_2015_oecd = debt_2015[['LOCATION', 'Value']]
debt_2017_oecd = debt_2017[['LOCATION', 'Value']]
debt_2000_2019 = debt_2000_2019[['LOCATION', 'TIME', 'Value']]
debt_2000_2019.columns = ['Country', 'Time', 'DEBT']

In [10]:
debt_2000_2019

Unnamed: 0,Country,Time,DEBT
0,AUS,2000,41.14750
1,AUS,2001,40.40488
2,AUS,2002,38.67284
3,AUS,2003,35.66726
4,AUS,2004,32.31054
...,...,...,...
652,LTU,2015,53.33852
653,LTU,2016,50.88945
654,LTU,2017,47.00182
655,LTU,2018,40.67271


### Panel data set

In [53]:
df_economic = pd.DataFrame(columns=['Country', 'Time', 'GDP', 'CPI', 'DEBT'])
years = [year for year in range(2000, 2020)]
for country in oecd_countries.keys():
    gdp_country = gdp_1960_2019_oecd[(gdp_1960_2019_oecd.Country==country)&(gdp_1960_2019_oecd.Time.isin(years))]
    cpi_country = cpi_2000_2019[(cpi_2000_2019.Country==country)&(cpi_2000_2019.Time.isin(years))]
    debt_country = debt_2000_2019[(debt_2000_2019.Country==country)&(debt_2000_2019.Time.isin(years))]
    for year in years:
        gdp_value = gdp_country[gdp_country.Time==year].GDP.values
        cpi_value = cpi_country[cpi_country.Time==year].CPI.values
        debt_value = debt_country[debt_country.Time==year].DEBT.values
        if (gdp_value.size>0) and (cpi_value.size>0) and (debt_value.size>0):
            if (gdp_value==gdp_value) and (cpi_value==cpi_value) and (debt_value==debt_value):
                df_economic = df_economic.append({'Country': country, 'Time': year, 'GDP': gdp_value[0], 'CPI':cpi_value[0], 'DEBT': debt_value[0]}, ignore_index=True)
df_economic = df_economic.set_index(['Country', 'Time'])

In [54]:
df_economic

Unnamed: 0_level_0,Unnamed: 1_level_0,GDP,CPI,DEBT
Country,Time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AUS,2000,21679.247842,4.457435,41.14750
AUS,2001,19490.861110,4.407135,40.40488
AUS,2002,20082.483267,2.981575,38.67284
AUS,2003,23447.031001,2.732596,35.66726
AUS,2004,30430.676437,2.343255,32.31054
...,...,...,...,...
USA,2015,56839.381774,0.118627,136.43000
USA,2016,57951.584082,1.261583,138.11100
USA,2017,60062.222313,2.130110,134.67420
USA,2018,62996.471285,2.442583,136.17960


In [56]:
df_economic.to_csv('./economic_features.csv')

# Overall descriptive statistics

## GDP per capita

In [None]:
print('Descriptive statistics for GDP per capita in 2015\n{}'.format(gdp_2015_oecd['2015'].describe()))
print('\n')
print('Descriptive statistics for GDP per capita in 2017\n{}'.format(gdp_2017_oecd['2017'].describe()))

In [None]:
plt.figure(figsize=(20,10))
sns.barplot(x='Country Code',y='2015',data=gdp_2015_oecd,order=gdp_2015_oecd.sort_values('2015')['Country Code'],color="C0")
plt.xlabel('Country Code', size=15)
plt.ylabel('GDP per capita', size=15)
plt.title('GDP per capita in OECD countries in 2015', size=20)

In [None]:
plt.figure(figsize=(20,10))
sns.barplot(x='Country Code',y='2017',data=gdp_2017_oecd,order=gdp_2017_oecd.sort_values('2017')['Country Code'],color="C0")
plt.xlabel('Country Code', size=15)
plt.ylabel('GDP per capita', size=15)
plt.title('GDP per capita in OECD countries in 2017', size=20)

## Inflation rate measured by the CPI

In [None]:
print('Descriptive statistics for inflation rate in 2015\n{}'.format(cpi_2015_oecd.Value.describe()))
print('\n')
print('Descriptive statistics for inflation rate in 2017\n{}'.format(cpi_2017_oecd.Value.describe()))

In [None]:
plt.figure(figsize=(20,10))
sns.barplot(x='LOCATION',y='Value',data=cpi_2015_oecd,order=cpi_2015_oecd.sort_values('Value')['LOCATION'],color="C0")
plt.xlabel('Country Code', size=15)
plt.ylabel('CPI', size=15)
plt.title('Inflation rate in OECD countries in 2015', size=20)

In [None]:
plt.figure(figsize=(20,10))
sns.barplot(x='LOCATION',y='Value',data=cpi_2017_oecd,order=cpi_2017_oecd.sort_values('Value')['LOCATION'],color="C0")
plt.xlabel('Country Code', size=15)
plt.ylabel('CPI', size=15)
plt.title('Inflation rate in OECD countries in 2017', size=20)

## Debt in % of GDP

In [None]:
print('Descriptive statistics for debt in 2015\n{}'.format(debt_2015_oecd.Value.describe()))
print('\n')
print('Descriptive statistics for debt in 2017\n{}'.format(debt_2017_oecd.Value.describe()))

In [None]:
plt.figure(figsize=(20,10))
sns.barplot(x='LOCATION',y='Value',data=debt_2015_oecd,order=debt_2015_oecd.sort_values('Value')['LOCATION'],color="C0")
plt.xlabel('Country Code', size=15)
plt.ylabel('Debt', size=15)
plt.title('Debt in % of GDP in OECD countries in 2015', size=20)

In [None]:
plt.figure(figsize=(20,10))
sns.barplot(x='LOCATION',y='Value',data=debt_2017_oecd,order=debt_2017_oecd.sort_values('Value')['LOCATION'],color="C0")
plt.xlabel('Country Code', size=15)
plt.ylabel('Debt', size=15)
plt.title('Debt in % of GDP in OECD countries in 2017', size=20)

# World maps

## GDP per capita in 2015

In [None]:
gdp_map_2015 = folium.Map(min_zoom=1, max_zoom=3)
folium.Choropleth(
 geo_data=world_map,
 name='choropleth',
 data=gdp_2015_oecd,
 columns=['Country Code', '2015'],
 key_on='feature.properties.adm0_a3',
 line_opacity=0.4,
 nan_fill_color="black",
 nan_fill_opacity=0.2,
 bins = np.linspace(np.min(gdp_2015_oecd['2015']), np.max(gdp_2015_oecd['2015']), 6),
 highlight=True,
 legend_name='GDP per person for OECD countries in 2015 (in current US$)',
).add_to(gdp_map_2015)

## GDP per capita in 2017

In [None]:
gdp_map_2017 = folium.Map(min_zoom=1, max_zoom=3)
folium.Choropleth(
 geo_data=world_map,
 name='choropleth',
 data=gdp_2017_oecd,
 columns=['Country Code', '2017'],
 key_on='feature.properties.adm0_a3',
 line_opacity=0.4,
 nan_fill_color="black",
 nan_fill_opacity=0.2,
 bins = np.linspace(np.min(gdp_2017_oecd['2017']), np.max(gdp_2017_oecd['2017']), 6),
 highlight=True,
 legend_name='GDP per person for OECD countries in 2017 (in current US$)',
).add_to(gdp_map_2017)

## Inflation rate in 2015

In [None]:
cpi_map_2015 = folium.Map(min_zoom=1, max_zoom=3)
folium.Choropleth(
 geo_data=world_map,
 name='choropleth',
 data=cpi_2015_oecd,
 columns=['LOCATION', 'Value'],
 key_on='feature.properties.adm0_a3',
 line_opacity=0.4,
 nan_fill_color="black",
 nan_fill_opacity=0.2,
 bins = np.linspace(np.min(cpi_2015_oecd['Value']), np.max(cpi_2015_oecd['Value']), 6),
 highlight=True,
 legend_name='Inflation rate for OECD countries in 2015 (in current US$)',
).add_to(cpi_map_2015)

## Inflation rate in 2017

In [None]:
cpi_map_2017 = folium.Map(min_zoom=1, max_zoom=3)
folium.Choropleth(
 geo_data=world_map,
 name='choropleth',
 data=cpi_2017_oecd,
 columns=['LOCATION', 'Value'],
 key_on='feature.properties.adm0_a3',
 line_opacity=0.4,
 nan_fill_color="black",
 nan_fill_opacity=0.2,
 bins = np.linspace(np.min(cpi_2017_oecd['Value']), np.max(cpi_2017_oecd['Value']), 6),
 highlight=True,
 legend_name='GDP per person for OECD countries in 2017 (in current US$)',
).add_to(cpi_map_2017)

## Debt in 2015

In [None]:
debt_map_2015 = folium.Map(min_zoom=1, max_zoom=3)
folium.Choropleth(
 geo_data=world_map,
 name='choropleth',
 data=debt_2015_oecd,
 columns=['LOCATION', 'Value'],
 key_on='feature.properties.adm0_a3',
 line_opacity=0.4,
 nan_fill_color="black",
 nan_fill_opacity=0.2,
 bins = np.linspace(np.min(debt_2015_oecd['Value']), np.max(debt_2015_oecd['Value']), 6),
 highlight=True,
 legend_name='GDP per person for OECD countries in 2017 (in current US$)',
).add_to(debt_map_2015)

## Debt in 2017

In [None]:
debt_map_2017 = folium.Map(min_zoom=1, max_zoom=3)
folium.Choropleth(
 geo_data=world_map,
 name='choropleth',
 data=debt_2017_oecd,
 columns=['LOCATION', 'Value'],
 key_on='feature.properties.adm0_a3',
 line_opacity=0.4,
 nan_fill_color="black",
 nan_fill_opacity=0.2,
 bins = np.linspace(np.min(debt_2017_oecd['Value']), np.max(debt_2017_oecd['Value']), 6),
 highlight=True,
 legend_name='GDP per person for OECD countries in 2017 (in current US$)',
).add_to(debt_map_2017)