In [1]:
import pandas as pd
from datetime import datetime
import country_converter as coco
import os

cc = coco.CountryConverter()

# To check consistency with Richmond's R code, use pwt81
path_rawdata =os.path.normpath(os.getcwd()+os.sep+os.pardir)+'/RawData/'
pwtdt = pd.read_excel(path_rawdata+'pwt81.xlsx', sheet_name='Data')
# Once consistency with Richmond's R code is checked, we can use
# pwt1001 and erase the lines above
# pwtdt = pd.read_excel('pwt1001.xlsx', sheet_name='Data')

# Select relevant columns only
pwtdt = pwtdt[['countrycode', 'year', 'pop', 'ccon','cgdpo']]

# Rename countrycode column and ccon column
pwtdt.rename(columns = {'countrycode': 'iso3','ccon': 'totcons'}, inplace = True)

# Order observations by iso3 and year
pwtdt.sort_values(by=['iso3','year'])

# Add euro area aggregates
euro = pd.read_csv(path_rawdata+'Euro_Yield_Dates.csv', encoding='latin-1')
euro['Currency'] = None
euro['EntryDate'] = pd.DatetimeIndex(pd.to_datetime(euro['Date'], format='%d/%m/%Y')).year
euro['Date'] = None
euro['iso2'] = euro['Code'].str[:2]
euro['iso3'] = cc.pandas_convert(series=euro['iso2'], to='ISO3')
euro = euro[['iso3','EntryDate']] # euro dataframe is only left with 2 columns, country code and entry year

# merge wdi with euro
tmpeuro = pd.merge(left=euro, right=pwtdt, on='iso3', how='left') 
tmpeuro['year'] = tmpeuro['year'].astype('Int64')
tmpeuro = tmpeuro[tmpeuro['year'] >= tmpeuro['EntryDate']]
tmpeuro = tmpeuro.drop('EntryDate', axis=1)

# Unpivot data from wide to long format
tmpeuro = pd.melt(tmpeuro,id_vars=['iso3','year'])

# Sum over all countries belonging to eurozone
tmpeuro = tmpeuro.groupby(['year','variable'], as_index=False)['value'].sum()

# Add new column iso3 with EUR value for all rows
tmpeuro['iso3']='EUR'

# Reshape from long to wide format (51)
tmpeuro = tmpeuro.pivot_table(index=['iso3','year'],columns='variable',values='value').reset_index()\
            .rename_axis(None, axis=1)

# Concatenate wdi and tmpeuro
pwtdt = pd.concat([pwtdt, tmpeuro], ignore_index=True)

# Last thing to do is create cons column totcons/pop
pwtdt['cons'] = pwtdt['totcons']/pwtdt['pop']

# Sort values
pwtdt.sort_values(by=['iso3','year'])

# Save output
path_cleandata = os.path.normpath(os.getcwd()+os.sep+os.pardir)+'/CleanData/'
pwtdt.to_pickle(path_cleandata+'PWT.pkl')


