In [2]:
import pandas as pd
import numpy as np
from datetime import datetime
import datetime
import xlrd
import pickle
from pandas.tseries.offsets import MonthEnd
import calendar
import country_converter as coco
import os
from dotenv import dotenv_values, find_dotenv

# this looks for your configuration file and then reads it as a dictionary
config = dotenv_values(find_dotenv())

# set path using the dictionary key for which one you want
path_cleandata = os.path.abspath(config["CLEANDATA"]) + '\\'
path_rawdata = os.path.abspath(config["RAWDATA"]) + '\\'

           
# Import Barclays forward rates (using actual dataset)
FX_Fwd_temp = pd.read_excel(path_rawdata+'Datastream & Barclays - Spots & 1M Forwards.xlsx', sheet_name='Barclays FX Fwd', skiprows=0)

# Convert column names to country names (Nick comment: I don't like the hard coded method of the fake data)
colDetails =  pd.read_excel(path_rawdata+'Exchange Rates - Barclays.xlsx', sheet_name='Barclays FX Fwd Codes', skiprows=0)
colDetails = colDetails[['Symbol','Country','From Currency','To Currency']]
colDetails = colDetails[colDetails['Symbol'].isin(FX_Fwd_temp.columns)]

# fix couple of outlier labels
colDetails.loc[colDetails['Country']=='Euro','Country']='Euro Area'
colDetails.loc[colDetails['Country']=='Hong Kong','Country']='CHINA HONG KONG'

# make upper case
colDetails.loc[:,'Country']=colDetails['Country'].str.upper()

# drop duplicate Euro series (keep one that only exists post Euro creation - so no synthetic Euro)
FX_Fwd_temp = FX_Fwd_temp.drop(columns=['BBDM.1F','BBXEU1F'])
FX_Fwd_temp =FX_Fwd_temp.rename(columns = colDetails.set_index('Symbol')['Country'])

for nam in FX_Fwd_temp.columns:
    if '(INVERSE)' in nam:
        # remove inverse from name
        newNam = nam.replace(' (INVERSE)','')
        FX_Fwd_temp=FX_Fwd_temp.rename(columns={nam:newNam})
        
        # invert series
        FX_Fwd_temp.loc[:,newNam]=1.0/FX_Fwd_temp.loc[:,newNam]
        
        
# now replace country with IMF code
IMF_codes = pd.read_excel(path_rawdata+'IMF_codes.xls',header=None,names=['Country', 'IMF Code'])
FX_Fwd_temp = FX_Fwd_temp.rename(columns = IMF_codes.set_index('Country')['IMF Code'].astype('str'))

country_codes = FX_Fwd_temp.columns[1:]

# Now repeat the same with the spot data!
FX_Spot_temp = pd.read_excel(path_rawdata+'Datastream & Barclays - Spots & 1M Forwards.xlsx', sheet_name='Barclays Spot', skiprows=0)

# Convert column names to country names (Nick comment: I don't like the hard coded method of the fake data)
colDetails =  pd.read_excel(path_rawdata+'Exchange Rates - Barclays.xlsx', sheet_name='Barclays Spot Codes', skiprows=0)
colDetails = colDetails[['Symbol','Country']]
colDetails = colDetails[colDetails['Symbol'].isin(FX_Spot_temp.columns)]

# fix couple of outlier labels
colDetails.loc[colDetails['Country']=='Euro','Country']='Euro Area'
colDetails.loc[colDetails['Country']=='Hong Kong','Country']='CHINA HONG KONG'

# make upper case
colDetails.loc[:,'Country']=colDetails['Country'].str.upper()

FX_Spot_temp =FX_Spot_temp.rename(columns = colDetails.set_index('Symbol')['Country'])
FX_Spot_temp=FX_Spot_temp.rename(columns={'Code':'Date'})

for nam in FX_Spot_temp.columns:
    if '(INVERSE)' in nam:
        # remove inverse from name
        newNam = nam.replace(' (INVERSE)','')
        FX_Spot_temp=FX_Spot_temp.rename(columns={nam:newNam})
        
        # invert series
        FX_Spot_temp.loc[:,newNam]=1.0/FX_Spot_temp.loc[:,newNam]


    # now replace country with IMF code
FX_Spot_temp = FX_Spot_temp.rename(columns = IMF_codes.set_index('Country')['IMF Code'].astype('str'))

path_codeNames = path_rawdata
country_codes = FX_Spot_temp.columns[1:]

# Use 1M Fwd only, as they are the only ones in Richmond's excel file (probably used 2M and 3M in earlier versions)
FX_Fwd = FX_Fwd_temp.copy()
FX_Spot = FX_Spot_temp.copy()


In [3]:
# Create dictionaries out of excel file with IMF codes
IMF_codes = pd.read_excel(path_codeNames+'IMF_codes.xls',header=None,names=['Country', 'IMF Code'])
IMF_dict = IMF_codes.set_index('Country').to_dict()['IMF Code']
IMF_dict_inv = IMF_codes.set_index('IMF Code').to_dict()['Country']
country_codes_num = [eval(i) for i in country_codes]
# Create list of countries in Barclays dataset (to be saved)
List_names = [IMF_dict_inv[key] for key in country_codes_num]

In [4]:
# ----------------------------------------------------------------------
# --------------------------- Corrections ------------------------------
# ----------------------------------------------------------------------
# For all the corrections below, need to change dates once use real data
# ----------------------------------------------------------------------
# Belgium - Series are stale starting in 12/19/1989 for future contracts 
# and 1/1/1990 for spot rates
col_BE = IMF_dict['BELGIUM']
# endBG_Spot = '1/5/1983'
# endBG_Fwd = '1/8/1983'
endBG_Spot = '1/1/1990'
endBG_Fwd = '12/19/1989'
FX_Spot.loc[FX_Spot.Date>=pd.to_datetime(endBG_Spot),str(col_BE)] = np.nan
FX_Fwd.loc[FX_Fwd.Date>=pd.to_datetime(endBG_Fwd),str(col_BE)] = np.nan

# Australia - Missing data for Fwd 1M on 10/1/2001 (and Fwd 3M on 12/1/1996)
col_AU = IMF_dict['AUSTRALIA']
# outlier_AU = '1/11/1983'
outlier_AU = '10/1/2001'
FX_Fwd.loc[FX_Fwd.Date==pd.to_datetime(outlier_AU),str(col_AU)] = np.nan

# Norway - Replace min Fwd by NaN, need to figure out why
col_NW = IMF_dict['NORWAY']
row_NW = FX_Fwd[str(col_NW)].idxmin()
FX_Fwd.at[row_NW,str(col_NW)] = np.nan

# New Zealand - Replace max Fwd by NaN, need to figure out why
col_NZ = IMF_dict['NEW ZEALAND']
row_NZ = FX_Fwd[str(col_NZ)].idxmax()
FX_Fwd.at[row_NZ,str(col_NZ)] = np.nan

# Hong Kong - Replace max Fwd by NaN, need to figure out why
col_HK = IMF_dict['CHINA HONG KONG']
row_HK = FX_Fwd[str(col_HK)].idxmax()
FX_Fwd.at[row_HK,str(col_HK)] = np.nan

# Euro area countries - Delete spot and forward values for
# Germany, France, Italy, Belgium and Netherlands from 1/1/1999 on
# starteuro = '1/18/1983'
starteuro = '1/1/1999'
col_DE = IMF_dict['GERMANY']
col_FR = IMF_dict['FRANCE']
col_IT = IMF_dict['ITALY']
col_BE = IMF_dict['BELGIUM']
col_NL = IMF_dict['NETHERLANDS']
col_euro = [str(col_DE),str(col_FR),str(col_IT),str(col_BE),str(col_NL)]

FX_Spot.loc[FX_Spot.Date>=pd.to_datetime(starteuro),col_euro] = np.nan
FX_Fwd.loc[FX_Fwd.Date>=pd.to_datetime(starteuro),col_euro] = np.nan

Barclays_FX_Spot_D = FX_Spot.copy()
Barclays_FX_Fwd_D = FX_Fwd.copy()

In [5]:
# Extend sample to End-of-Month

def Extend_EndofMonth(data):
    
    data.set_index('Date', inplace=True)
    
    # find date of last entry
    last_entry = data.index[-1]
    last_year = last_entry.year
    last_month = last_entry.month
    
    # date to extend to (the end of the month of last entry)
    to_extend = datetime.datetime(year=last_year, month=last_month, day=calendar.monthrange(last_year, last_month)[1])
    
    # create dates to end of month
    dates_to_add = pd.date_range(data.index[-1],to_extend,freq='d')
    
    # create a dataframe with dates_to_add as index and filled in values, then append this to the end of original data
    idx = data.index.union(dates_to_add)
    
    # print(dates_to_add)
    
    # fill out values
    extended = data.reindex(idx, method='ffill')
    extended.reset_index(inplace=True)
    return extended

Barclays_FX_Spot_D_ex = Extend_EndofMonth(Barclays_FX_Spot_D)
Barclays_FX_Fwd_D_ex = Extend_EndofMonth(Barclays_FX_Fwd_D)

In [7]:
# Save using pickle
# Barclays_FX_Fwd_D_ex.to_pickle(path_cleandata+'Barclays_FX_Fwd_D.pkl')
# Barclays_FX_Spot_D_ex.to_pickle(path_cleandata+'Barclays_FX_Spot_D.pkl')

# saving my cleaned data
Barclays_FX_Fwd_D_ex.to_pickle(path_cleandata+'Barclays_FX_Fwd_D'+'.pkl')
Barclays_FX_Spot_D_ex.to_pickle(path_cleandata+'Barclays_FX_Spot_D'+'.pkl')

# with open(path_cleandata+'Barclays_Countries.pkl', 'wb') as f:
#     pickle.dump(List_names, f)