In [1]:
import pandas as pd
from datetime import datetime
import country_converter as coco
import wbgapi as wb
import os

cc = coco.CountryConverter()

In [2]:
# define path
path_rawdata = os.path.normpath(os.getcwd()+os.sep+os.pardir)+'/RawData/'

In [3]:
# create dictionary with variables names
varnames = {'SP.POP.TOTL': 'population', 
            'NY.GDP.MKTP.CD': 'gdp_cur_dol',
            'NY.GDP.MKTP.KD': 'gdp_const_dol',
            'NY.GDP.MKTP.CN': 'gdp_cur_lcu',
            'NY.GDP.MKTP.KN': 'gdp_const_lcu',
            'NE.CON.TOTL.CD': 'cons_cur_dol',
            'NE.CON.TOTL.KD': 'cons_const_dol',
            'NE.CON.TOTL.CN': 'cons_cur_lcu',
            'NE.CON.TOTL.KN': 'cons_const_lcu',
            'NE.CON.PRVT.PP.KD': 'cons_const_icu',
            'NE.IMP.GNFS.KD': 'imp_const_dol',
            'NE.EXP.GNFS.KD': 'exp_const_dol',
            'NE.IMP.GNFS.CD': 'imp_cur_dol',
            'NE.EXP.GNFS.CD': 'exp_cur_dol'}

mydesc = list(varnames.values()) # list with values
myvals = list(varnames.keys()) # list with variables names

In [4]:
# The code for all countries takes a couple of minutes to run
# To run it for a subset of countries, line below can be replaced by
#wdi = wb.data.DataFrame(myvals,['FRA','DEU','GBR','ITA','USA','JPN','CAN','CHN','IND'],time=range(1970,2019), numericTimeKeys=True,columns='series').reset_index()
wdi = wb.data.DataFrame(myvals,time=range(1970,2023), numericTimeKeys=True,columns='series',skipAggs=True).reset_index()
wdi.rename(columns = {'economy': 'iso3','time': 'year'}, inplace = True)
wdi.rename(columns = varnames, inplace = True) # renames WDI variables with more intuitive labels

In [5]:
# [EMILY] remove ISO3 code CHI, which doesn't match any country/region
wdi.drop(wdi[wdi['iso3'] == 'CHI'].index, inplace=True)

In [6]:
# [EMILY] separately import taiwan
taiwan = pd.read_csv(path_rawdata+'taiwan_haver.csv')
taiwan['iso3'] = 'TWN'
taiwan = taiwan.astype({'exp_cur_dol':'float',
                        'imp_cur_dol':'float',
                        'gdp_cur_dol':'float', 
                        'cons_cur_dol':'float'})

In [7]:
# [EMILY] merge with wdi
wdi = pd.merge(left=wdi, right=taiwan, how='outer', on=list(taiwan.columns))

In [8]:
# Get data about eurozone entry dates
euro = pd.read_csv(path_rawdata+'Euro_Yield_Dates.csv', encoding='latin-1')
euro['Currency'] = None
euro['EntryDate'] = pd.DatetimeIndex(pd.to_datetime(euro['Date'], format='%d/%m/%Y')).year
euro['Date'] = None
euro['iso2'] = euro['Code'].str[:2]
euro['iso3'] = cc.pandas_convert(series=euro['iso2'], to='ISO3')
euro = euro[['iso3','EntryDate']] # euro dataframe is only left with 2 columns, country code and entry year

In [9]:
# merge wdi with euro
tmpeuro = pd.merge(left=euro, right=wdi, on='iso3', how='left') 
tmpeuro['year'] = tmpeuro['year'].astype('Int64')
tmpeuro = tmpeuro[tmpeuro['year'] >= tmpeuro['EntryDate']]
tmpeuro = tmpeuro.drop('EntryDate', axis=1)

# Unpivot data from wide to long format
tmpeuro = pd.melt(tmpeuro,id_vars=['iso3','year'])

# Sum over all countries belonging to eurozone
tmpeuro = tmpeuro.groupby(['year','variable'], as_index=False)['value'].sum()

# Add new column iso3 with EUR value for all rows
tmpeuro['iso3']='EUR'

# Reshape from long to wide format (51)
tmpeuro = tmpeuro.pivot_table(index=['iso3','year'],columns='variable',values='value').reset_index()\
            .rename_axis(None, axis=1)

# Concatenate wdi and tmpeuro
wdi = pd.concat([wdi, tmpeuro], ignore_index=True)

In [10]:
# Sort values
wdi.sort_values(by=['iso3','year'])

# Save output
path_cleandata = os.path.normpath(os.getcwd()+os.sep+os.pardir)+'/CleanData/'
wdi.to_pickle(path_cleandata+'WDI.pkl')
