# Subsample analysis

In [13]:
%matplotlib inline

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt

### Load and combine data

In [16]:
df_FamaBliss = pd.read_stata('/Users/dioscuroi/OneDrive - UNSW/Research Data/Bonds/Bond Yields from CRSP/Fama-Bliss Discount Bond Yields.dta')
df_m3 = pd.read_stata('/Users/dioscuroi/OneDrive - UNSW/Research Data/Bonds/Bond Yields from FRB/Treasury Bills/yield_tbills_monthly_end.dta')
df_pers_infl = pd.read_stata('/Users/dioscuroi/OneDrive - UNSW/Research Data/Bonds/Bond Yields from CRSP/risk_premium_factors.dta')

In [18]:
df_combined = df_FamaBliss.merge(df_pers_infl[['date', 'pers_infl', 'tent']], on='date', how='inner')
df_combined = df_combined.merge(df_m3[['date', 'm3']], on='date', how='inner')

df_combined = df_combined.dropna()

df_combined.index = pd.PeriodIndex(df_combined['date'], freq='M')

del df_combined['date']

df_combined.head()

Unnamed: 0_level_0,const,yield1,yield2,yield3,yield4,yield5,pers_infl,tent,m3
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1968-01,1.0,5.47024,5.313072,5.403243,5.435728,5.391583,0.020368,0.001426,4.88
1968-02,1.0,5.528738,5.367476,5.57768,5.505597,5.38463,0.02073,0.00375,5.02
1968-03,1.0,5.685554,5.503867,5.684779,5.715086,5.539747,0.021086,0.008373,5.17
1968-04,1.0,5.909851,5.619697,5.854537,5.867024,5.68424,0.021435,0.008876,5.51
1968-05,1.0,6.056748,5.784714,5.787423,5.809674,5.636915,0.021789,0.001338,5.68


In [19]:
df_combined.tail()

Unnamed: 0_level_0,const,yield1,yield2,yield3,yield4,yield5,pers_infl,tent,m3
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2016-08,1.0,0.67632,0.819596,0.939961,1.128652,1.221504,0.018654,-0.001519,0.33
2016-09,1.0,0.6638,0.782954,0.896652,1.068444,1.175224,0.01868,-0.003147,0.28
2016-10,1.0,0.697406,0.859965,1.000571,1.205819,1.335289,0.018695,-0.00089,0.34
2016-11,1.0,0.889716,1.115353,1.401609,1.721781,1.879066,0.018711,0.009221,0.48
2016-12,1.0,0.858817,1.195558,1.451491,1.759157,1.948628,0.018744,0.008096,0.5


### load excess returns

In [20]:
df_exrets = pd.read_stata('/Users/dioscuroi/OneDrive - UNSW/Research Data/Bonds/Bond Yields from FRB/excess_returns.dta')

df_exrets.index = pd.PeriodIndex(df_exrets['date'], freq='M')

del df_exrets['date']

df_exrets.head(3)

Unnamed: 0_level_0,rf1m,exret_annual2,exret_annual3,exret_annual4,exret_annual5,exret_annual6,exret_annual7,exret_annual8,exret_annual9,exret_annual10,...,exret_monthly6,exret_monthly7,exret_monthly8,exret_monthly9,exret_monthly10,exret_monthly11,exret_monthly12,exret_monthly13,exret_monthly14,exret_monthly15
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1961-07,0.18,,,,,,,,,,...,-0.004323,-0.006508,,,,,,,,
1961-08,0.14,,,,,,,,,,...,-0.002179,-0.002025,,,,,,,,
1961-09,0.17,,,,,,,,,,...,0.00996,0.011699,,,,,,,,


## Subsample 1: prior to 1987

In [47]:
subsample = df_combined[:'1987-12'].copy()

pca = sm.PCA(subsample.loc[:,'yield1':'yield5'])

subsample = subsample.join(pca.factors)
subsample.tail(3)

Unnamed: 0_level_0,const,yield1,yield2,yield3,yield4,yield5,pers_infl,tent,m3,comp_0,comp_1,comp_2,comp_3,comp_4
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1987-10,1.0,6.713565,7.630636,8.023462,8.321124,8.367856,0.059198,0.03631,5.27,-0.012194,-0.082083,-0.096791,-0.03212,-0.035637
1987-11,1.0,6.882601,7.5546,7.96515,8.269432,8.45745,0.058906,0.030471,5.21,-0.011864,-0.074383,0.012874,0.009389,-0.010353
1987-12,1.0,7.242368,7.638648,7.887379,8.116063,8.30357,0.058611,0.019049,5.68,-0.011752,-0.036692,0.040628,0.030886,-0.015154


In [48]:
exog = subsample.loc[:,'comp_0':'comp_2']
# exog = exog.join(subsample['pers_infl'])
exog = sm.add_constant(exog)

ols = sm.OLS(subsample['m3'], exog, missing='drop')

results = ols.fit()

print(results.summary())

subsample['resid'] = results.resid

                            OLS Regression Results                            
Dep. Variable:                     m3   R-squared:                       0.974
Model:                            OLS   Adj. R-squared:                  0.973
Method:                 Least Squares   F-statistic:                     2892.
Date:                Mon, 06 Nov 2017   Prob (F-statistic):          9.81e-186
Time:                        22:00:47   Log-Likelihood:                -141.59
No. Observations:                 240   AIC:                             291.2
Df Residuals:                     236   BIC:                             305.1
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          7.3928      0.028    260.190      0.0

In [49]:
df_temp = subsample.join(df_exrets, how='outer')

ols = sm.OLS(df_temp['exret_annual3'].shift(-12), df_temp[['const', 'tent', 'resid']], missing='drop')

results = ols.fit()

print(results.summary())

                            OLS Regression Results                            
Dep. Variable:          exret_annual3   R-squared:                       0.316
Model:                            OLS   Adj. R-squared:                  0.310
Method:                 Least Squares   F-statistic:                     54.80
Date:                Mon, 06 Nov 2017   Prob (F-statistic):           2.74e-20
Time:                        22:00:49   Log-Likelihood:                 499.69
No. Observations:                 240   AIC:                            -993.4
Df Residuals:                     237   BIC:                            -982.9
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0106      0.002     -4.481      0.0

In [50]:
ols = sm.OLS(df_temp['exret_annual10'].shift(-12), df_temp[['const', 'tent', 'resid']], missing='drop')

results = ols.fit()

print(results.summary())

                            OLS Regression Results                            
Dep. Variable:         exret_annual10   R-squared:                       0.393
Model:                            OLS   Adj. R-squared:                  0.387
Method:                 Least Squares   F-statistic:                     62.76
Date:                Mon, 06 Nov 2017   Prob (F-statistic):           9.57e-22
Time:                        22:00:52   Log-Likelihood:                 177.30
No. Observations:                 197   AIC:                            -348.6
Df Residuals:                     194   BIC:                            -338.7
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0636      0.009     -7.174      0.0

## Subsample 2: post to 1988

In [59]:
subsample = df_combined['1988-01':].copy()

pca = sm.PCA(subsample.loc[:,'yield1':'yield5'])

subsample = subsample.join(pca.factors)
subsample.head(3)

Unnamed: 0_level_0,const,yield1,yield2,yield3,yield4,yield5,pers_infl,tent,m3,comp_0,comp_1,comp_2,comp_3,comp_4
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1988-01,1.0,6.718936,7.137876,7.371835,7.578492,7.677678,0.058336,0.019319,5.64,-0.071847,0.030278,-0.002691,0.021495,0.022988
1988-02,1.0,6.607727,7.003947,7.238081,7.413629,7.547151,0.058064,0.016604,5.62,-0.068914,0.026763,0.003609,0.0456,-0.005707
1988-03,1.0,6.849095,7.277961,7.551726,7.783437,7.972164,0.057815,0.019598,5.71,-0.076018,0.043661,0.034194,0.056285,0.002912


In [60]:
exog = subsample.loc[:,'comp_0':'comp_2']
# exog = exog.join(subsample['pers_infl'])
exog = sm.add_constant(exog)

ols = sm.OLS(subsample['m3'], exog, missing='drop')

results = ols.fit()

print(results.summary())

subsample['resid'] = results.resid

                            OLS Regression Results                            
Dep. Variable:                     m3   R-squared:                       0.992
Model:                            OLS   Adj. R-squared:                  0.992
Method:                 Least Squares   F-statistic:                 1.384e+04
Date:                Mon, 06 Nov 2017   Prob (F-statistic):               0.00
Time:                        22:02:43   Log-Likelihood:                 21.104
No. Observations:                 348   AIC:                            -34.21
Df Residuals:                     344   BIC:                            -18.80
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          3.1336      0.012    255.211      0.0

In [61]:
df_temp = subsample.join(df_exrets, how='outer')

ols = sm.OLS(df_temp['exret_annual3'].shift(-12), df_temp[['const', 'tent', 'resid']], missing='drop')

results = ols.fit()

print(results.summary())

                            OLS Regression Results                            
Dep. Variable:          exret_annual3   R-squared:                       0.085
Model:                            OLS   Adj. R-squared:                  0.079
Method:                 Least Squares   F-statistic:                     15.59
Date:                Mon, 06 Nov 2017   Prob (F-statistic):           3.34e-07
Time:                        22:02:44   Log-Likelihood:                 828.15
No. Observations:                 340   AIC:                            -1650.
Df Residuals:                     337   BIC:                            -1639.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0092      0.002      6.143      0.0

In [62]:
ols = sm.OLS(df_temp['exret_annual10'].shift(-12), df_temp[['const', 'tent', 'resid']], missing='drop')

results = ols.fit()

print(results.summary())

                            OLS Regression Results                            
Dep. Variable:         exret_annual10   R-squared:                       0.194
Model:                            OLS   Adj. R-squared:                  0.189
Method:                 Least Squares   F-statistic:                     40.62
Date:                Mon, 06 Nov 2017   Prob (F-statistic):           1.57e-16
Time:                        22:02:48   Log-Likelihood:                 450.65
No. Observations:                 340   AIC:                            -895.3
Df Residuals:                     337   BIC:                            -883.8
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0213      0.005      4.672      0.0