In [2]:
import pandas as pd
import numpy as np
from datetime import datetime
import xlrd
import pickle
from pandas.tseries.offsets import MonthEnd
import calendar
import country_converter as coco
import os
from dotenv import dotenv_values, find_dotenv

# this looks for your configuration file and then reads it as a dictionary
config = dotenv_values(find_dotenv())

# set path using the dictionary key for which one you want
path_cleandata = os.path.abspath(config["CLEANDATA"]) + '\\'
newpath = os.path.abspath(config["RAWDATA"]) + '\\'

# Import monthly CPI
CPI_temp1 = pd.read_excel(newpath+'CPI Monthly - Datastream - Raw Data.xlsx', sheet_name='CPI Monthly', skiprows=0)

# Convert date to datetime object 
CPI_temp1['Date'] = pd.to_datetime(CPI_temp1['Code'])
    
# Load in EU CPI
CPI_temp_EU = pd.read_excel(newpath+'CPI Monthly - Datastream - Raw Data.xlsx', sheet_name='CPI - Euro Monthly', skiprows=0)
CPI_temp_EU['Date'] = pd.to_datetime(CPI_temp_EU['Date'])
CPI_temp1['year']=CPI_temp1['Date'].dt.year
CPI_temp1['month']=CPI_temp1['Date'].dt.month
CPI_temp_EU['year']=CPI_temp_EU['Date'].dt.year
CPI_temp_EU['month']=CPI_temp_EU['Date'].dt.month
CPI_temp_EU=CPI_temp_EU.drop(columns='Date')
CPI_temp1 = pd.merge(CPI_temp1,CPI_temp_EU,on=['year','month'],how='left')
CPI_temp1.drop(columns=['year','month'],inplace=True)

# Replace original dates (15th of each month) by end-of-month dates
CPI_temp1['EndOfMonth'] = pd.to_datetime(CPI_temp1['Date']) + MonthEnd(1)
CPI_temp1 = CPI_temp1.drop(['Code','Date'],axis=1)
CPI_temp1 = pd.melt(CPI_temp1,id_vars=['EndOfMonth'])


# Import quarterly CPI
CPI_temp2_Q = pd.read_excel(newpath+'CPI Monthly - Datastream - Raw Data.xlsx', sheet_name='CPI Quarterly AUS NZ', skiprows=0)

# Create date column in YYYYQQ format
CPI_temp2_Q['Date'] = pd.to_datetime(CPI_temp2_Q['Code']).dt.year.astype(str)+'Q'+pd.to_datetime(CPI_temp2_Q['Code']).dt.quarter.astype(str)


# Convert string into period object
#print(CPI_temp2_Q.dtypes)
CPI_temp2_Q['Date'] = pd.PeriodIndex(CPI_temp2_Q['Date'], freq='Q')
# Set Date column as index (needed to do resampling below into montly frequency)
CPI_temp2_Q = CPI_temp2_Q.set_index('Date')
# Drop Code column (original date information)
CPI_temp2_Q = CPI_temp2_Q.drop('Code', axis=1)

# Transform quarterly into monthly series
CPI_temp2 = CPI_temp2_Q.copy()
CPI_temp2 = CPI_temp2.resample('M').interpolate()
CPI_temp2.reset_index(inplace=True)
# Get last day of month in datetime64[ns] type but with day only (without time)
CPI_temp2['EndOfMonth'] = CPI_temp2['Date'].dt.end_time.dt.normalize()
CPI_temp2 = CPI_temp2.drop(['Date'],axis=1)


In [3]:
# Import list of codes and countries
listcodes3 = pd.read_excel(newpath+'CPI Monthly - Datastream - Raw Data.xlsx', sheet_name='CPI Codes',header=None,usecols=[0, 2], names=[ 'Country','variable'],skiprows=1)
listcodes3['Country'] = listcodes3['Country'].str.upper()
    
for ind in listcodes3.index:
    if listcodes3.loc[ind][1] == 'JORDAN':
        listcodes3.loc[ind][1] = 'JORDANIA'
    if listcodes3.loc[ind][1] == 'ESTONIA':
        listcodes3.loc[ind][1] = 'ESTONIAN'
    if listcodes3.loc[ind][1] == 'HONG KONG':
        listcodes3.loc[ind][1] = 'CHINA HONG KONG'
    if listcodes3.loc[ind][1] == 'KAZAKHSTAN':
        listcodes3.loc[ind][1] = 'KAZAKHSTAN '
    if listcodes3.loc[ind][1] == 'KOREA':
        listcodes3.loc[ind][1] = 'SOUTH KOREA'
    if listcodes3.loc[ind][1] == 'PHILIPPINES':
        listcodes3.loc[ind][1] = 'PHILIPPINE'
    if listcodes3.loc[ind][1] == 'RUSSIAN FEDERATION':
        listcodes3.loc[ind][1] = 'RUSSIA'
    if listcodes3.loc[ind][1] == 'SINGAPORE':
        listcodes3.loc[ind][1] = 'SINGAPORE '
    if listcodes3.loc[ind][1] == 'SLOVAKIA':
        listcodes3.loc[ind][1] = 'SLOVAK REPUBLIC'
    if listcodes3.loc[ind][1] == 'ZIMBABWE':
        listcodes3.loc[ind][1] = 'ZIMBABWE '
    if listcodes3.loc[ind][1] == 'EMU 11/12/13/15/16 MEMBERS':
        listcodes3.loc[ind][1] = 'EURO AREA'


In [4]:
# IMF country codes
IMF_codes = pd.read_excel(newpath+'IMF_codes.xls',header=None,names=['Country', 'IMF Code'])
List_IMFcodes = pd.merge(left=listcodes3,right=IMF_codes,how='left',on='Country')

# Match CPI_M with List_IMFcodes
CPI_M = pd.merge(left=CPI_temp1,right=List_IMFcodes,how='left',on='variable')
CPI_M.dropna(inplace=True)
CPI_M = CPI_M.astype({'IMF Code': int,'value':float})

CPI_M = CPI_M.drop(['variable','Country'],axis=1)
CPI_M = CPI_M.pivot_table(index='EndOfMonth',columns='IMF Code',values='value')

# If using our data we need to add columns for Australia and New Zealand
CPI_M[196] = np.nan # New Zealand (yes, I am putting it first)
CPI_M[193] = np.nan # Australia

    
#CPI_M.reset_index(inplace=True)
#CPI_M = CPI_M.rename_axis(None, axis=1)

# Match CPI_temp2 with List_IMFcodes
CPI_temp2 = pd.melt(CPI_temp2,id_vars=['EndOfMonth'])
CPI_temp2 = pd.merge(left=CPI_temp2,right=List_IMFcodes,how='left',on='variable')
CPI_temp2 = CPI_temp2.astype({"IMF Code": int})
CPI_temp2 = CPI_temp2.drop(['variable','Country'],axis=1)
CPI_temp2 = CPI_temp2.pivot_table(index='EndOfMonth',columns='IMF Code',values='value')
#CPI_temp2.reset_index(inplace=True)
#CPI_temp2 = CPI_temp2.rename_axis(None, axis=1)

# Replace Australia and New Zealand with interpolated values from quarterly data
#CPI_M = CPI_M.set_index('EndOfMonth')
#CPI_temp2 = CPI_temp2.set_index('EndOfMonth')
CPI_M.update(CPI_temp2)

# Corrections
CPI_M.replace(0, np.nan, inplace=True)

In [5]:
# Select subsamples
with open(path_cleandata+'BR_Spot_dM.pkl', 'rb') as f:
    BR_Spot_M = pickle.load(f)

# this is defined in the matlab file, although i'm not sure what it's used for
FX_Spot = BR_Spot_M

# because the BR column names are strings and we want to compare them
CPI_M.columns = CPI_M.columns.astype(str)

BR_CPI_M = CPI_M[list(set(CPI_M.columns) & set(BR_Spot_M.columns))]

In [6]:
# Save
CPI_M.to_pickle(path_cleandata+'DataStream_CPI_M'+'.pkl')
BR_CPI_M.to_pickle(path_cleandata+'DataStream_BR_CPI_M'+'.pkl')
