# Subsample analysis

In [1]:
%matplotlib inline

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt

## Compute the principal components of yield curves

In [2]:
df_FamaBliss = pd.read_stata('/Users/dioscuroi/OneDrive - UNSW/Research Data/Bonds/Bond Yields from CRSP/Fama-Bliss Discount Bond Yields.dta')

pc = sm.PCA(df_FamaBliss.loc[:,'yield1':])

df_pc = pd.concat([df_FamaBliss['date'], pc.factors.loc[:,:'comp_2']], axis=1)

### Load and combine data

In [3]:
df_m3 = pd.read_stata('/Users/dioscuroi/OneDrive - UNSW/Research Data/Bonds/Bond Yields from FRB/Treasury Bills/yield_tbills_monthly_end.dta')
df_pers_infl = pd.read_stata('/Users/dioscuroi/OneDrive - UNSW/Research Data/Bonds/Bond Yields from CRSP/risk_premium_factors.dta')

In [4]:
df_combined = df_pc.merge(df_pers_infl[['date', 'pers_infl', 'tent']], on='date', how='inner')
df_combined = df_combined.merge(df_m3[['date', 'm3']], on='date', how='inner')

df_combined = df_combined.dropna()

df_combined.index = pd.PeriodIndex(df_combined['date'], freq='M')

del df_combined['date']

df_combined = sm.add_constant(df_combined)

df_combined.head()

Unnamed: 0_level_0,const,comp_0,comp_1,comp_2,pers_infl,tent,m3
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1968-01,1.0,0.002575,0.03195,-0.003944,0.020368,0.001426,4.88
1968-02,1.0,0.003395,0.033001,0.025993,0.02073,0.00375,5.02
1968-03,1.0,0.0052,0.031321,0.015275,0.021086,0.008373,5.17
1968-04,1.0,0.007093,0.033489,0.002866,0.021435,0.008876,5.51
1968-05,1.0,0.007391,0.045332,0.002235,0.021789,0.001338,5.68


In [5]:
df_combined.tail()

Unnamed: 0_level_0,const,comp_0,comp_1,comp_2,pers_infl,tent,m3
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-08,1.0,-0.049705,0.020409,0.020818,0.018654,-0.001519,0.33
2016-09,1.0,-0.050179,0.022442,0.017902,0.01868,-0.003147,0.28
2016-10,1.0,-0.048957,0.016064,0.013594,0.018695,-0.00089,0.34
2016-11,1.0,-0.044413,-0.003687,-0.004502,0.018711,0.009221,0.48
2016-12,1.0,-0.043919,-0.007269,0.007138,0.018744,0.008096,0.5


### load excess returns

In [6]:
df_exrets = pd.read_stata('/Users/dioscuroi/OneDrive - UNSW/Research Data/Bonds/Bond Yields from FRB/excess_returns.dta')

df_exrets.index = pd.PeriodIndex(df_exrets['date'], freq='M')

del df_exrets['date']

df_exrets.head(3)

Unnamed: 0_level_0,rf1m,exret_annual2,exret_annual3,exret_annual4,exret_annual5,exret_annual6,exret_annual7,exret_annual8,exret_annual9,exret_annual10,...,exret_monthly6,exret_monthly7,exret_monthly8,exret_monthly9,exret_monthly10,exret_monthly11,exret_monthly12,exret_monthly13,exret_monthly14,exret_monthly15
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1961-07,0.18,,,,,,,,,,...,-0.004323,-0.006508,,,,,,,,
1961-08,0.14,,,,,,,,,,...,-0.002179,-0.002025,,,,,,,,
1961-09,0.17,,,,,,,,,,...,0.00996,0.011699,,,,,,,,


## Subsample 1: prior to 1987

In [7]:
subsample = df_combined[:'1987-12'].copy()

ols = sm.OLS(subsample['m3'], subsample.loc[:,:'pers_infl'], missing='drop')

results = ols.fit()

print(results.summary())

subsample['resid'] = results.resid

                            OLS Regression Results                            
Dep. Variable:                     m3   R-squared:                       0.974
Model:                            OLS   Adj. R-squared:                  0.973
Method:                 Least Squares   F-statistic:                     2171.
Date:                Mon, 06 Nov 2017   Prob (F-statistic):          3.19e-184
Time:                        10:25:31   Log-Likelihood:                -140.99
No. Observations:                 240   AIC:                             292.0
Df Residuals:                     235   BIC:                             309.4
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          4.1819      0.128     32.586      0.0

In [8]:
df_temp = subsample.join(df_exrets, how='outer')

ols = sm.OLS(df_temp['exret_annual3'].shift(-12), df_temp[['const', 'tent', 'resid']], missing='drop')

results = ols.fit()

print(results.summary())

                            OLS Regression Results                            
Dep. Variable:          exret_annual3   R-squared:                       0.309
Model:                            OLS   Adj. R-squared:                  0.303
Method:                 Least Squares   F-statistic:                     53.03
Date:                Mon, 06 Nov 2017   Prob (F-statistic):           9.27e-20
Time:                        10:25:31   Log-Likelihood:                 498.46
No. Observations:                 240   AIC:                            -990.9
Df Residuals:                     237   BIC:                            -980.5
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0107      0.002     -4.476      0.0

In [9]:
ols = sm.OLS(df_temp['exret_annual10'].shift(-12), df_temp[['const', 'tent', 'resid']], missing='drop')

results = ols.fit()

print(results.summary())

                            OLS Regression Results                            
Dep. Variable:         exret_annual10   R-squared:                       0.383
Model:                            OLS   Adj. R-squared:                  0.376
Method:                 Least Squares   F-statistic:                     60.16
Date:                Mon, 06 Nov 2017   Prob (F-statistic):           4.70e-21
Time:                        10:25:31   Log-Likelihood:                 175.68
No. Observations:                 197   AIC:                            -345.4
Df Residuals:                     194   BIC:                            -335.5
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0648      0.009     -7.231      0.0

## Subsample 2: post to 1988

In [10]:
subsample = df_combined['1988-01':].copy()

ols = sm.OLS(subsample['m3'], subsample.loc[:,:'pers_infl'], missing='drop')

results = ols.fit()

print(results.summary())

subsample['resid'] = results.resid

                            OLS Regression Results                            
Dep. Variable:                     m3   R-squared:                       0.992
Model:                            OLS   Adj. R-squared:                  0.992
Method:                 Least Squares   F-statistic:                 1.029e+04
Date:                Mon, 06 Nov 2017   Prob (F-statistic):               0.00
Time:                        10:25:31   Log-Likelihood:                 20.131
No. Observations:                 348   AIC:                            -30.26
Df Residuals:                     343   BIC:                            -11.00
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          4.4236      0.088     50.089      0.0

In [11]:
df_temp = subsample.join(df_exrets, how='outer')

ols = sm.OLS(df_temp['exret_annual3'].shift(-12), df_temp[['const', 'tent', 'resid']], missing='drop')

results = ols.fit()

print(results.summary())

                            OLS Regression Results                            
Dep. Variable:          exret_annual3   R-squared:                       0.085
Model:                            OLS   Adj. R-squared:                  0.079
Method:                 Least Squares   F-statistic:                     15.60
Date:                Mon, 06 Nov 2017   Prob (F-statistic):           3.32e-07
Time:                        10:25:31   Log-Likelihood:                 828.15
No. Observations:                 340   AIC:                            -1650.
Df Residuals:                     337   BIC:                            -1639.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0092      0.002      6.142      0.0

In [12]:
ols = sm.OLS(df_temp['exret_annual10'].shift(-12), df_temp[['const', 'tent', 'resid']], missing='drop')

results = ols.fit()

print(results.summary())

                            OLS Regression Results                            
Dep. Variable:         exret_annual10   R-squared:                       0.195
Model:                            OLS   Adj. R-squared:                  0.190
Method:                 Least Squares   F-statistic:                     40.72
Date:                Mon, 06 Nov 2017   Prob (F-statistic):           1.45e-16
Time:                        10:25:31   Log-Likelihood:                 450.73
No. Observations:                 340   AIC:                            -895.5
Df Residuals:                     337   BIC:                            -884.0
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0213      0.005      4.675      0.0