In [2]:
import pandas as pd
import numpy as np
from datetime import datetime
import datetime
import xlrd
import pickle
from pandas.tseries.offsets import MonthEnd
import calendar
import country_converter as coco
import os as os

In [3]:
path_rawdata = os.path.normpath(os.getcwd()+os.sep+os.pardir)+'/RawData/'
path_cleandata = os.path.normpath(os.getcwd()+os.sep+os.pardir)+'/CleanData/'

# edit these to load and save different types of data sources.
# loadAppend = '_OurData_NScode'
# saveAppend = '_OurData_NScode'
loadAppend = ''
saveAppend = ''


# Import Reuters forward rates
Reuters_FX_Fwd_D = pd.read_pickle(path_cleandata+'Reuters_FX_Fwd_D'+loadAppend+'.pkl')

# Import Reuters spot rates
Reuters_FX_Spot_D = pd.read_pickle(path_cleandata+'Reuters_FX_Spot_D'+loadAppend+'.pkl')

# Import Barclays forward rates
Barclays_FX_Fwd_D = pd.read_pickle(path_cleandata+'Barclays_FX_Fwd_D'+loadAppend+'.pkl')

# Import Barclays spot rates
Barclays_FX_Spot_D = pd.read_pickle(path_cleandata+'Barclays_FX_Spot_D'+loadAppend+'.pkl')

# for some reason my data has these column names. Need to go back to Barclays 
Barclays_FX_Spot_D.rename(columns={"index": "Date"}, inplace=True)
Barclays_FX_Fwd_D.rename(columns={"index": "Date"}, inplace=True)

# use only 1988 and after for barclays data
Barclays_FX_Spot_D = Barclays_FX_Spot_D.loc[Barclays_FX_Spot_D["Date"] >= pd.to_datetime('1/01/1988')]
Barclays_FX_Fwd_D = Barclays_FX_Fwd_D.loc[Barclays_FX_Fwd_D["Date"] >= pd.to_datetime('1/01/1988')]

In [4]:
# trim series so that all end on the same day
# barclays spot/forward series end on 31/01/2009 and reuters spot/forward series end on 01/01/2009
# so the barclays series should be trimmed to end on 01/01/2009
# don't have to redo for forward rates since they end at the same times
end_barclays = max(list(Barclays_FX_Spot_D['Date']))
end_reuters = max(list(Reuters_FX_Spot_D['Date']))
common_end = min([end_barclays, end_reuters]) # should give us 01/01/2009

all_data = [Barclays_FX_Spot_D, Reuters_FX_Spot_D, Barclays_FX_Fwd_D, Reuters_FX_Fwd_D]

for data in all_data:
    
    data.drop(data[data["Date"] >= common_end].index, inplace=True)
    
    data.set_index('Date', inplace=True)


In [5]:
# find list of countries that show up in only Reuters, only Barclays, and both
# the _s lists should be the same as the _f lists, just doing this to make sure
countriesinboth_s = list(set(Reuters_FX_Spot_D.columns) & set(Barclays_FX_Spot_D.columns))
onlyreuterscountries_s = [country for country in list(Reuters_FX_Spot_D.columns) if country not in countriesinboth_s]
onlybarclayscountries_s =  [country for country in list(Barclays_FX_Spot_D.columns) if country not in countriesinboth_s]

countriesinboth_f = list(set(Reuters_FX_Fwd_D.columns) & set(Barclays_FX_Fwd_D.columns))
onlyreuterscountries_f = [country for country in list(Reuters_FX_Fwd_D.columns) if country not in countriesinboth_f]
onlybarclayscountries_f =  [country for country in list(Barclays_FX_Fwd_D.columns) if country not in countriesinboth_f]

# mergedate = pd.to_datetime('1/01/1997') # for testing only
mergedate = pd.to_datetime('12/31/1996')

In [6]:
# create merged data: spot rates

# for countries only in Reuters or only in Barclays, will add the data as additional columns
onlyB_Spot_data = Barclays_FX_Spot_D[onlybarclayscountries_s]
onlyR_Spot_data = Reuters_FX_Spot_D[onlyreuterscountries_s]

# for countries in both, take Barclays data until 01/01/1997
BR_Spot_data_1 = Barclays_FX_Spot_D[countriesinboth_s].loc[Barclays_FX_Spot_D.index <= mergedate]
# then take Reuters data afterwards
BR_Spot_data_2 = Reuters_FX_Spot_D[countriesinboth_s].loc[Reuters_FX_Spot_D.index > mergedate]
# merging everything
BR_Spot_D = pd.concat([BR_Spot_data_1, BR_Spot_data_2]).join(onlyB_Spot_data).join(onlyR_Spot_data)

In [8]:
# create merged data: forward rates

# for countries only in Reuters or only in Barclays, will add the data as additional columns
onlyB_Fwd_data = Barclays_FX_Fwd_D[onlybarclayscountries_f]
onlyR_Fwd_data = Reuters_FX_Fwd_D[onlyreuterscountries_f]

# for countries in both, take Barclays data until 01/01/1997
BR_Fwd_data_1 = Barclays_FX_Fwd_D[countriesinboth_f].loc[Barclays_FX_Fwd_D.index <= mergedate]
# then take Reuters data afterwards
BR_Fwd_data_2 = Reuters_FX_Fwd_D[countriesinboth_f].loc[Reuters_FX_Fwd_D.index > mergedate]
# merging everything
BR_Fwd_D = pd.concat([BR_Fwd_data_1, BR_Fwd_data_2]).join(onlyB_Fwd_data).join(onlyR_Fwd_data)

In [9]:
# Create dictionaries out of excel file with IMF codes
IMF_codes = pd.read_excel(path_rawdata+'IMF_codes.xls',header=None,names=['Country', 'IMF Code'])
IMF_dict = IMF_codes.set_index('Country').to_dict()['IMF Code']
IMF_dict_inv = IMF_codes.set_index('IMF Code').to_dict()['Country']

# get list of countries
country_codes_spot = list(BR_Spot_D.columns)

# using spot code countries only
# don't have to redo for forward rates since they have the same sets of countries
country_codes_num = [eval(i) for i in country_codes_spot]
# Create list of countries in merged dataset (to be saved)
List_names = [IMF_dict_inv[key] for key in country_codes_num]

In [10]:
# Extend sample to End-of-Month

def Extend_EndofMonth(data):
    
    # find date of last entry
    last_entry = data.index[-1]
    last_year = last_entry.year
    last_month = last_entry.month
    
    # date to extend to (the end of the month of last entry)
    to_extend = datetime.datetime(year=last_year, month=last_month, day=calendar.monthrange(last_year, last_month)[1])
    
    # create dates to end of month
    dates_to_add = pd.date_range(data.index[-1],to_extend,freq='d')
    
    # create a dataframe with dates_to_add as index and filled in values, then append this to the end of original data
    idx = data.index.union(dates_to_add)
    
    # print(dates_to_add)
    
    # fill out values
    extended = data.reindex(idx, method='ffill')
    return extended

In [11]:
BR_Spot_D = Extend_EndofMonth(BR_Spot_D)
BR_Fwd_D = Extend_EndofMonth(BR_Fwd_D)

In [12]:
# convert index to datetimeindex (necessary for monthly conversion)

BR_Spot_D.index = pd.to_datetime(BR_Spot_D.index, format='%Y-%m-%d')
BR_Fwd_D.index = pd.to_datetime(BR_Fwd_D.index, format='%Y-%m-%d')

In [13]:
# convert to monthly (end-of-month)
BR_Spot_M = BR_Spot_D.groupby(BR_Spot_D.index.strftime('%Y-%m')).last()
BR_Fwd_M = BR_Fwd_D.groupby(BR_Fwd_D.index.strftime('%Y-%m')).last()

# when the dates were converted to monthly, the index became a string, so need to convert all back to datetime
BR_Spot_M.index = pd.to_datetime(BR_Spot_M.index)
BR_Fwd_M.index = pd.to_datetime(BR_Fwd_M.index)
BR_Spot_M.index = BR_Spot_M.index + pd.offsets.MonthEnd(0)
BR_Fwd_M.index = BR_Fwd_M.index + pd.offsets.MonthEnd(0)

In [14]:
# Save monthly data using pickle

BR_Spot_M.to_pickle(path_cleandata+'BR_Spot_dM'+saveAppend+'.pkl')
BR_Fwd_M.to_pickle(path_cleandata+'BR_Fwd_dM'+saveAppend+'.pkl')

with open(path_cleandata+'BR_Countries.pkl', 'wb') as f:
    pickle.dump(List_names, f)
    
# Save daily data using pickle
BR_Spot_D.to_pickle(path_cleandata+'BR_Spot_D'+saveAppend+'.pkl')
BR_Fwd_D.to_pickle(path_cleandata+'BR_Fwd_D'+saveAppend+'.pkl')