In [2]:
import pandas as pd
import numpy as np
from datetime import datetime as dt
import country_converter as coco
import xlrd
import pickle
import os
import math
import time
from pandas.tseries.offsets import MonthEnd
import statsmodels.api as sm
from statsmodels.regression.rolling import RollingOLS
import matplotlib.pyplot as plt

from dotenv import dotenv_values, find_dotenv

# this looks for your configuration file and then reads it as a dictionary
config = dotenv_values(find_dotenv())

cc = coco.CountryConverter()

In [5]:
# set path using the dictionary key for which one you want
path_cleandata = os.path.abspath(config["CLEANDATA"]) + '\\'
path_rawdata = os.path.abspath(config["RAWDATA"]) + '\\'

# loadAppend = '_OurData_NScode'
# saveAppend = '_OurData_NScode'
loadAppend = ''
saveAppend = ''

PortfolioData_load = path_cleandata+ 'PortfolioData' + loadAppend + '.pkl'
Factors_FX_Portfolios_Cent = path_cleandata+'Factors_FX_Portfolios_Cent'+saveAppend+'.pkl'
Factors_FX_Portfolios_Invoice = path_cleandata+'Factors_FX_Portfolios_Invoice'+saveAppend+'.pkl'
PortfolioData_PortfolioRX_AllSorts_Cent = path_cleandata+'PortfolioData_PortfolioRX_AllSorts_Cent'+saveAppend+'.pkl'
PortfolioData_PortfolioRX_AllSorts_Invoice = path_cleandata+'PortfolioData_PortfolioRX_AllSorts_Invoice'+saveAppend+'.pkl'
PortfolioData_AllSorts_BootStrap = path_cleandata+'PortfolioData_AllSorts_BootStrap'+saveAppend+'.pkl'
PortfolioData_BootStrap_centSort = path_cleandata+'PortfolioData_BootStrap_centSort'+saveAppend+'.pkl'
PortfolioData_BootStrap_InvoiceSort = path_cleandata+'PortfolioData_BootStrap_InvoiceSort'+saveAppend+'.pkl'
PortfolioData_BootStrap_FSSort = path_cleandata+'PortfolioData_BootStrap_FSSort'+saveAppend+'.pkl'
PortfolioData_BootStrap_UnconditionalSort = path_cleandata+'PortfolioData_BootStrap_UnconditionalSort'+saveAppend+'.pkl'

FullData_load = path_cleandata + 'FullData_TSICIO.pkl'

name_to_ISO = pd.read_csv(path_rawdata+'Country_iso_codes.csv',usecols=['Name','iso3'],index_col='Name')

developedcountries = ['Australia','Belgium','Canada','Denmark','France',
                     'Germany','Italy','Japan','Netherlands','New Zealand',
                     'Norway','Sweden','Switzerland','United Kingdom']

# Create list of developed countries ISO codes
listdeveloped = name_to_ISO.loc[developedcountries].squeeze().tolist()

In [6]:
# Import the data
portsub = pd.read_pickle(PortfolioData_load)

# Richmond replication
portsub = portsub[portsub['year'].isin(list(range(1988,2016+1)))]
portsub['cent'] = portsub['cent_exp']

# Our work on invoicing currencies
portsubours = pd.read_pickle(FullData_load)
portsubours = portsubours[portsubours['year'].isin(list(range(1988,2016+1)))]
portsubours['cent'] = portsubours['cent_exp']

# Create sub-dataframe with only developed country data
portsubdeveloped = portsub.loc[portsub['iso3'].isin(listdeveloped)]
#portsubdeveloped = portsub[portsub['iso3'].isin(listdeveloped)].copy()
portsuboursdeveloped = portsubours.loc[portsubours['iso3'].isin(listdeveloped)]

numports = 4

sampsize = len(portsub)

# Function taking dataseries as input, sorting them into bins, and returning the bin categories
def GetPortNums(ds,numports):
    #return pd.qcut(ds,q=numports,labels=False,duplicates='drop')
    return pd.qcut(ds,q=numports,labels=False)

# Function to annualize monthly mean
def mean1200(series):
    return np.mean(series)*1200

# Function to annualize monthly std
def std12(series):
    return np.std(series)*math.sqrt(12)*100

# Function to compute Sharpe ratio
def SR12(series):
    return np.mean(series)*1200/(np.std(series)*math.sqrt(12)*100)

In [175]:
# ----------------------------------------
# --------| Sort on centrality |----------
# ----------------------------------------

# Define function computing means by portfolios
def getCentMeans(portsubin):
    portsubin.sort_values(by=['iso3', 'Date'], inplace=True)
    portsubin['prevcent'] = portsubin.groupby('iso3')['cent'].shift(12)
    portsubin['prevfwdspread'] = portsubin.groupby('iso3')['forwardspread'].shift(1)
    
    # We want to have risk premia info and the centrality variable to rank on
    portsubin.dropna(subset=['logrx','prevcent'], inplace=True)
    
    # sort on last year's centrality (add one since categories start at 0 otherwise)
    portsubin['portnum'] = (1 + portsubin.groupby('year',group_keys=False)['prevcent']
                            .apply(GetPortNums,numports=numports)
                           )

    # get averages within portfolio at each given date
    portbydate = ( portsubin.groupby(by=['Date','portnum'],as_index=False)
                  .agg({'logrx':'mean', 
                        'forwardspread':'mean', 
                        'realfwdspread':'mean',
                        'prevcent':['count','mean'], 
                        'consrs_pwt':'mean'})
                 )
    portbydate.columns = ['Date','portnum', 
                         'logrx', 'forwardspread','realfwdspread',
                          'numcountries','prevcent','conscor']
    
    # get a high minus low portfolio
    # reshape to long form with new 'measure' column
    longform = pd.melt(portbydate,id_vars=['Date','portnum'],var_name='measure')
    # reshape to wide form but this time with the portfolios in columns
    byportfolio = longform.pivot_table(index=['Date','measure'],columns='portnum',values='value').reset_index()\
            .rename_axis(None, axis=1)
    # compute returns with various measures of HML portfolio
    byportfolio['HML'] = byportfolio[1] - byportfolio[4]
    # reshape to long form again, with 'measure' column
    tmp = pd.melt(byportfolio, id_vars=['Date','measure'],var_name='portnum')
    # reshape to wide form again, with HML being another colum in addition to the numbered portfolios
    portbydate = tmp.pivot_table(index=['Date','portnum'],columns='measure',values='value').reset_index()\
            .rename_axis(None, axis=1)
    
    # get averages within portfolio at each given date
    portmeans = ( portbydate.groupby(by=['portnum'],as_index=False)
               .agg({'prevcent':'mean',
                     'numcountries':'mean',
                     'logrx':[mean1200,std12],
                     'forwardspread':mean1200,
                     'realfwdspread':mean1200,
                     'conscor':'mean'
                    })
             )
    portmeans.columns = ['portnum', 'prevcent', 'numcountries', 
                         'logrx', 'sdlogrx', 'forwardspread',
                         'realfwdspread','conscor']
    portmeans['sharperatio'] = portmeans['logrx']/portmeans['sdlogrx']
    
    return portmeans, portbydate
    
portsubin = portsub.copy()
portmeans_cent, portbydate_cent = getCentMeans(portsubin)
PMC_factor = portbydate_cent.loc[portbydate_cent['portnum']=='HML'][['Date','logrx']]
PMC_factor.rename(columns={'logrx':'PMC'},inplace=True)  

In [176]:
# portsubin = portsubours.copy()
# portsubin.sort_values(by=['iso3', 'Date'], inplace=True)
# portsubin['prevSumShares'] = portsubin.groupby('iso3')['SumShares'].shift(12)
# #portsubin['prevcent'] = portsubin.groupby('iso3')['cent'].shift(12)
# portsubin['prevfwdspread'] = portsubin.groupby('iso3')['forwardspread'].shift(1)
        
# # We want to have risk premia info and the invoicing variable to rank on
# portsubin.dropna(subset=['logrx','prevSumShares'], inplace=True)
# #portsubin.dropna(subset=['logrx','prevcent'], inplace=True)

# # Compute number of countries for which prevSumShares is available at given date
# portsubin['count_prevSumShares'] =  portsubin.groupby('Date')['prevSumShares'].transform('count')  
# # We want to have at least 4 countries per date (to be able to form 4 portfolios)
# portsubin = portsubin.loc[portsubin['count_prevSumShares'] >= 4]
    
# # sort on last year's invoicing concentration (add one since categories start at 0 otherwise)
# portsubin['portnum'] = (1 + portsubin.groupby('year',group_keys=False)['prevSumShares']
#                             .apply(GetPortNums,numports=numports)
#                            )

In [178]:
# ----------------------------------------
# ----| Sort on invoicing currencies |----
# ----------------------------------------

# Define function computing means by portfolios
def getInvoiceMeans(portsubin):
    portsubin.sort_values(by=['iso3', 'Date'], inplace=True)
    portsubin['prevSumShares'] = portsubin.groupby('iso3')['SumShares'].shift(12)
    portsubin['prevfwdspread'] = portsubin.groupby('iso3')['forwardspread'].shift(1)
    
    # We want to have risk premia info and the invoicing variable to rank on
    portsubin.dropna(subset=['logrx','prevSumShares'], inplace=True)
    
    # Compute number of countries for which prevSumShares is available at given date
    portsubin['count_prevSumShares'] =  portsubin.groupby('Date')['prevSumShares'].transform('count')
    # We want to have at least 4 countries per date (to be able to form 4 portfolios)
    portsubin = portsubin.loc[portsubin['count_prevSumShares'] >= 4].copy()
    
    # sort on last year's invoicing concentration (add one since categories start at 0 otherwise)
    portsubin['portnum'] = (1 + portsubin.groupby('year',group_keys=False)['prevSumShares']
                            .apply(GetPortNums,numports=numports)
                           )   

    # get averages within portfolio at each given date
    portbydate = ( portsubin.groupby(by=['Date','portnum'],as_index=False)
                  .agg({'logrx':'mean', 
                        'forwardspread':'mean', 
                        'realfwdspread':'mean',
                        'prevSumShares':['count','mean'], 
                        'consrs_pwt':'mean'})
                 )
    portbydate.columns = ['Date','portnum', 
                         'logrx', 'forwardspread','realfwdspread',
                          'numcountries','prevSumShares','conscor']
    
    # get a high minus low portfolio
    # reshape to long form with new 'measure' column
    longform = pd.melt(portbydate,id_vars=['Date','portnum'],var_name='measure')
    # reshape to wide form but this time with the portfolios in columns
    byportfolio = longform.pivot_table(index=['Date','measure'],columns='portnum',values='value').reset_index()\
            .rename_axis(None, axis=1)
    # compute returns with various measures of HML portfolio
    byportfolio['HML'] = byportfolio[1] - byportfolio[4]
    # reshape to long form again, with 'measure' column
    tmp = pd.melt(byportfolio, id_vars=['Date','measure'],var_name='portnum')
    # reshape to wide form again, with HML being another colum in addition to the numbered portfolios
    portbydate = tmp.pivot_table(index=['Date','portnum'],columns='measure',values='value').reset_index()\
            .rename_axis(None, axis=1)
    
    # get averages within portfolio at each given date
    portmeans = ( portbydate.groupby(by=['portnum'],as_index=False)
               .agg({'prevSumShares':'mean',
                     'numcountries':'mean',
                     'logrx':[mean1200,std12],
                     'forwardspread':mean1200,
                     'realfwdspread':mean1200,
                     'conscor':'mean'
                    })
             )
    portmeans.columns = ['portnum', 'prevSumShares', 'numcountries', 
                         'logrx', 'sdlogrx', 'forwardspread',
                         'realfwdspread','conscor']
    portmeans['sharperatio'] = portmeans['logrx']/portmeans['sdlogrx']
    
    return portmeans, portbydate
    
portsubin = portsubours.copy()
portmeans_invoice, portbydate_invoice = getInvoiceMeans(portsubin)
DMC_factor = portbydate_invoice.loc[portbydate_invoice['portnum']=='HML'][['Date','logrx']]
DMC_factor.rename(columns={'logrx':'DMC'},inplace=True)  

In [180]:
# ----------------------------------------
# ------| Sort on forward spreads |-------
# ----------------------------------------

# Define function computing means by portfolios
def getFsMeans(portsubin):
    # Lag the sorting varaible so that it is feasible
    portsubin['prevfwdspread'] = portsubin.groupby('iso3')['forwardspread'].shift(1)
    
    # We want to have risk premia info and the variable to rank on
    portsubin.dropna(subset=['logrx','prevfwdspread'], inplace=True)
    
    # sort on last month's fwdspread (add one since categories start at 0 otherwise)
    portsubin['portnum'] = (1 + portsubin.groupby('Date',group_keys=False)['prevfwdspread']
                            .apply(GetPortNums,numports=numports)
                           )
    
    # get averages within portfolio at each given date
    portbydate = ( portsubin.groupby(by=['Date','portnum'],as_index=False)
                  .agg({'logrx':'mean', 
                        'prevfwdspread':'mean',
                        'forwardspread':'mean', 
                        'iso3':'count', 
                        'consrs_pwt':'mean',
                        'gdpshare':'mean'})
                 )
    portbydate.columns = ['Date','portnum', 
                         'logrx', 'prevfwdspread','fwdspread',
                          'numcountries','conscor','gdpshare']
    
    # get a high minus low portfolio
    # reshape to long form with new 'measure' column
    longform = pd.melt(portbydate,id_vars=['Date','portnum'],var_name='measure')
    # reshape to wide form but this time with the portfolios in columns
    byportfolio = longform.pivot_table(index=['Date','measure'],columns='portnum',values='value').reset_index()\
            .rename_axis(None, axis=1)
    # compute returns with various measures of HML portfolio
    byportfolio['HML'] = byportfolio[4] - byportfolio[1]
    # reshape to long form again, with 'measure' column
    tmp = pd.melt(byportfolio, id_vars=['Date','measure'],var_name='portnum')
    # reshape to wide form again, with HML being another colum in addition to the numbered portfolios
    portbydate = tmp.pivot_table(index=['Date','portnum'],columns='measure',values='value').reset_index()\
            .rename_axis(None, axis=1)
    
    # get time series statistics of the portfolios
    portmeans = ( portbydate.groupby(by=['portnum'],as_index=False)
               .agg({'numcountries':'mean',
                     'logrx':[mean1200,std12],
                     'fwdspread':[mean1200,std12],
                     'prevfwdspread':[mean1200,std12],
                     'conscor':'mean',
                     'gdpshare':'mean'})
             )
    portmeans.columns = ['portnum', 'numcountries', 
                         'logrx', 'sdlogrx', 
                         'fwdspread', 'sdfwdspread',
                         'prevfwdspread', 'sdprevfwdspread',
                         'conscor','gdpshare']
    portmeans['sharperatio'] = portmeans['logrx']/portmeans['sdlogrx']
    
    return portmeans, portbydate

portsubin = portsub.copy()
portmeans_fs, portbydate_fs = getFsMeans(portsubin)
HML_factor = portbydate_fs.loc[portbydate_fs['portnum']=='HML'][['Date','logrx']]
HML_factor.rename(columns={'logrx':'HML'},inplace=True)

In [181]:
# ----------------------------------------
# ---| Sort on unconditional spreads |----
# ----------------------------------------

# Define function computing means by portfolios
def getUnconditionalMeans(portsubin):
    # We want to have risk premia info and the variable to rank on
    portsubin.dropna(subset=['logrx','meanforwardhalf'], inplace=True)
    
    # sort on meanforwardhalf (add one since categories start at 0 otherwise)
    portsubin['portnum'] = (1 + portsubin.groupby('Date',group_keys=False)['meanforwardhalf']
                            .apply(GetPortNums,numports=numports)
                           )
    
    # get averages within portfolio at each given date
    portbydate = ( portsubin.groupby(by=['Date','portnum'],as_index=False)
                  .agg({'logrx':'mean', 
                        'forwardspread':'mean', 
                        'meanforwardhalf':'mean',
                        'iso3':'count',
                        'consrs_pwt':'mean',
                        'gdpshare':'mean'})
                 )
    portbydate.columns = ['Date','portnum', 
                          'logrx', 'fwdspread','meanforwardfirst',
                          'numcountries','conscor','gdpshare']
    
    # get a high minus low portfolio
    # reshape to long form with new 'measure' column
    longform = pd.melt(portbydate,id_vars=['Date','portnum'],var_name='measure')
    # reshape to wide form but this time with the portfolios in columns
    byportfolio = longform.pivot_table(index=['Date','measure'],columns='portnum',values='value').reset_index()\
            .rename_axis(None, axis=1)
    # compute returns with various measures of HML portfolio
    byportfolio['HML'] = byportfolio[4] - byportfolio[1]
    # reshape to long form again, with 'measure' column
    tmp = pd.melt(byportfolio, id_vars=['Date','measure'],var_name='portnum')
    # reshape to wide form again, with HML being another colum in addition to the numbered portfolios
    portbydate = tmp.pivot_table(index=['Date','portnum'],columns='measure',values='value').reset_index()\
            .rename_axis(None, axis=1)
    
    # get time series statistics of the portfolios
    portmeans = ( portbydate.groupby(by=['portnum'],as_index=False)
                   .agg({'numcountries':'mean',
                     'logrx':[mean1200,std12],
                     'fwdspread':mean1200,
                     'meanforwardfirst':mean1200,
                     'conscor':'mean',
                     'gdpshare':'mean'})
                )
    portmeans.columns = ['portnum', 'numcountries', 
                         'logrx', 'sdlogrx', 
                         'fwdspread','meanforwardfirst', 
                         'conscor','gdpshare']
    portmeans['sharperatio'] = portmeans['logrx']/portmeans['sdlogrx']
    
    return portmeans, portbydate

portsinunconditional = portsub.copy()

# calculate the mean of f-s for the first half of the sample
firsthalfmean = ( portsinunconditional[(portsinunconditional['year'].isin(list(range(1988,2001+1)))) & 
                                    (portsinunconditional['iso3']!='TUR')]
                                    .groupby(by=['iso3'],as_index=False)
                                    .agg({'forwardspread':'mean'})
                                    .rename(columns={'forwardspread':'meanforwardhalf'})               
                 )
firsthalfmean.dropna(subset=['meanforwardhalf'], inplace=True)
portsinunconditional = pd.merge(left=portsinunconditional,right=firsthalfmean,how='left',on='iso3')

# now limit to second half of sample
portsinunconditional.drop(portsinunconditional[portsinunconditional['year']<2002].index,
                         inplace=True)
# drop observations with NaN on meanforwardhalf and logrx columns
portsinunconditional.dropna(subset=['meanforwardhalf','logrx'], inplace=True)

# compute returns of unconditional high minus low (UHML) 
portmeans_unconditional, portbydate_unconditional = getUnconditionalMeans(portsinunconditional)
UHML_factor = portbydate_unconditional.loc[portbydate_unconditional['portnum']=='HML'][['Date','logrx']]
UHML_factor.rename(columns={'logrx':'UHML'},inplace=True)
                  

In [182]:
# ----------------------------------------
# --| Sort on unconditional centrality |--
# ----------------------------------------

# generate a ranking
portsinunconditionalcent = portsub.copy()
portsinunconditionalcent['rank'] = portsinunconditionalcent.groupby(by='Date')['cent'].rank('average')
portsinunconditionalcent['maxrank'] = portsinunconditionalcent.groupby(by='Date')['rank'].transform(max)
max_n = 39
# Division by 1200 is because we use the same function as for unconditional spreads (*1200 in function)
portsinunconditionalcent['rank'] = portsinunconditionalcent['rank']/portsinunconditionalcent['maxrank']*max_n\
                                    /1200

## calculate the mean of f-s for the first half of the sample
firsthalfmeancent = ( portsinunconditionalcent[(portsinunconditionalcent['year'].isin(list(range(1988,2001+1))))]
                                    .groupby(by=['iso3'],as_index=False)
                                    .agg({'rank':'mean'})
                                    .rename(columns={'rank':'meanforwardhalf'})
                    )

portsinunconditionalcent = pd.merge(left=portsinunconditionalcent,right=firsthalfmeancent,how='left',on='iso3')

# now limit to second half of sample
portsinunconditionalcent.drop(portsinunconditionalcent[portsinunconditionalcent['year']<2002].index,
                         inplace=True)
# drop observations with NaN on meanforwardhalf and logrx columns
portsinunconditionalcent.dropna(subset=['meanforwardhalf','logrx'], inplace=True)

portmeans_unconditionalcent, portbydate_unconditionalcent = getUnconditionalMeans(portsinunconditionalcent)
    

In [183]:
# Merge all the factors together
# Richmond replication
all_factors = pd.merge(left=PMC_factor,right=HML_factor,how='outer',on='Date')
all_factors = pd.merge(left=all_factors,right=UHML_factor,how='outer',on='Date')
# Our currency invoicing measure
all_factorsours = pd.merge(left=DMC_factor,right=HML_factor,how='outer',on='Date')
all_factorsours = pd.merge(left=all_factorsours,right=UHML_factor,how='outer',on='Date')

# save
all_factors.to_pickle(Factors_FX_Portfolios_Cent)
all_factorsours.to_pickle(Factors_FX_Portfolios_Invoice)

In [184]:
# Export the log rx for all the ports used
# export the sorted portfolios too
tmp_ports_fs = portbydate_fs[['Date','portnum','logrx']].copy()
tmp_ports_fs['type'] = 'fs'
tmp_ports_unconditional = portbydate_unconditional[['Date','portnum','logrx']].copy()
tmp_ports_unconditional['type'] = 'unconditional'

# Richmond replication
all_ports = portbydate_cent[['Date','portnum','logrx']].copy()
all_ports['type'] = 'cent'
all_ports = pd.concat([all_ports, tmp_ports_fs, tmp_ports_unconditional])
#all_ports = pd.concat([all_ports, tmp_ports_unconditional])

# Our currency invoicing measure
all_portsinv = portbydate_invoice[['Date','portnum','logrx']].copy()
all_portsinv['type'] = 'invoice'
all_portsinv = pd.concat([all_portsinv, tmp_ports_fs, tmp_ports_unconditional])
#all_portsinv = pd.concat([all_portsinv, tmp_ports_unconditional])

# save
all_ports.to_pickle(PortfolioData_PortfolioRX_AllSorts_Cent)
all_portsinv.to_pickle(PortfolioData_PortfolioRX_AllSorts_Invoice)

In [185]:
# Boostrap everything
def getBootSample(datain,nreps=10000,nblock=240,varlist = ['forwardspread','realfwdspread','logrx']):
    
    varlistboot = ['Date','portnum']
    varlistoutboot = ['portnum']
    varlistboot.extend(varlist)
    varlistoutboot.extend(varlist)
    varlistoutboot.extend(['sharperatio'])

    tmp = datain[varlistboot]
    #print(tmp)
    Outbootsample = pd.DataFrame(columns=varlistoutboot)
    
    # For loop to generate Outbootsample
    for nn in range(nreps):
        
        # Move to wide form so as to make each date a realization
        wideform = ( tmp.pivot_table(index=['Date'],columns=['portnum'],
                                               values=varlist)
                                    .reset_index()
                                    .rename_axis(None, axis=0)
                   )
        # Sample with replacement
        mysample_wide=wideform.sample(n=len(wideform),replace=True,axis=0)
        # Put sample back in long form
        mysample_long=pd.melt(mysample_wide,id_vars=['Date'],var_name=['measure','portnum'])
        # Compute means by portfolios
        outmeans = ( mysample_long.groupby(by=['portnum','measure'],as_index=False)
                    .agg({'value':mean1200})
                   )
        
        # Compute sharpe ratio by portfolio
        outSR = ( mysample_long.groupby(by=['portnum','measure'],as_index=False)
                  .agg({'value':SR12})
                )
        
        # Put means and SR in wide format
        outmeans_wide = outmeans.pivot_table(index='portnum',columns='measure',values='value').reset_index()\
            .rename_axis(None, axis=1)
        outSR_wide = ( outSR.pivot_table(index='portnum',columns='measure',values='value')
                        .reset_index()
                        .rename_axis(None, axis=1)
                        .rename(columns={'logrx':'sharperatio'})
                     ) 
        
        outSR_wide = outSR_wide[['portnum','sharperatio']]
        
        outsample = pd.merge(left=outmeans_wide,right=outSR_wide,on='portnum',how='left')
        
        Outbootsample = pd.concat([Outbootsample, outsample], ignore_index=True)
        
    return Outbootsample        
        
    # leaving out runconscor block for now

start_time = time.time()
centbootsample = getBootSample(portbydate_cent)
invoicebootsample = getBootSample(portbydate_invoice)
FSbootsample = getBootSample(portbydate_fs,varlist = ['fwdspread','prevfwdspread','logrx'])
Unconditionalbootsample = getBootSample(portbydate_unconditional,varlist = ['fwdspread','logrx'])
        
end_time = time.time()
elapsed_time = end_time - start_time

print(elapsed_time)

4059.089542865753


In [186]:
# Bind everything together into one dataset

centbootsample['sorttype'] = 'cent'
invoicebootsample['sorttype'] = 'invoice'
FSbootsample['sorttype'] = 'fwdspread'
Unconditionalbootsample['sorttype'] = 'unconditional'

allboot = pd.concat([centbootsample,invoicebootsample,FSbootsample,Unconditionalbootsample],axis=0,ignore_index=True)

# Compute standard deviation of statistics
stderrors = ( allboot.groupby(by=['sorttype','portnum'],as_index=False)
                 .agg('std')
            )

# Append _se to all column title
stderrors.columns += '_se'
stderrors.rename(columns={'sorttype_se':'sorttype','portnum_se':'portnum'},inplace=True)

tmplong = portmeans_cent.copy()
tmplonginv = portmeans_invoice.copy()
tmplongfs = portmeans_fs.copy()
tmplongunconditional = portmeans_unconditional.copy()
tmplong['sorttype'] = 'cent'
tmplonginv['sorttype'] = 'invoice'
tmplongfs['sorttype'] = 'fwdspread'
tmplongunconditional['sorttype'] = 'unconditional'

tmpall = pd.concat([tmplong,tmplonginv,tmplongfs,tmplongunconditional],axis=0,ignore_index=True)
portdataout = pd.merge(left=tmpall,right=stderrors,how='left',on=['sorttype','portnum'])
# Displace 'sorttype' column to make it first
portdataout.insert(0, 'sorttype', portdataout.pop('sorttype'))


In [187]:
# Save 
# (strings defined at beginning of program)
portdataout.to_pickle(PortfolioData_AllSorts_BootStrap)
centbootsample.to_pickle(PortfolioData_BootStrap_centSort)
invoicebootsample.to_pickle(PortfolioData_BootStrap_InvoiceSort)
FSbootsample.to_pickle(PortfolioData_BootStrap_FSSort)
Unconditionalbootsample.to_pickle(PortfolioData_BootStrap_UnconditionalSort)

In [188]:
DMC_factor

Unnamed: 0,Date,DMC
4,1996-09-30,0.031964
9,1996-10-31,0.023415
14,1996-11-30,0.008308
19,1996-12-31,0.021772
24,1997-01-31,-0.023425
...,...,...
1144,2016-08-31,0.008149
1149,2016-09-30,-0.005317
1154,2016-10-31,-0.012533
1159,2016-11-30,0.015657
