In [1]:
import pandas as pd
import numpy as np 
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import math
from sklearn import linear_model
from sklearn.covariance import ShrunkCovariance 
import statsmodels.api as sm
from statsmodels.tsa.api import VAR 
import statsmodels.tsa.stattools 

file = "CMT-all.xlsx"

#importing the data
data = pd.read_excel('CMT-all.xlsx', index_col = 0)

#creating sample 1 and sample 2
sample1=data.ix['2013-01-01':'2014-12-31',['3M','2Y','5Y','7Y','10Y','30Y']]
sample2=data.ix['2015-01-01':'2016-12-31',['3M','2Y','5Y','7Y','10Y','30Y']]

#finding the returns for sample 1 and sample 2 
sample1_returns= sample1.diff()
sample2_returns= sample2.diff()


"""defining all the functions needed for the homework"""
def PCA_Function(sample):
    covariance_matrix = np.matrix(sample.cov()) #finding the covariance
    eig_val, eig_vect = np.linalg.eigh(covariance_matrix) #gives in ascending order
    eig_val=eig_val[::-1] #sort descending
    eig_vect=np.fliplr(eig_vect) #sort eigen vectors as per the eigen values 
    L=np.zeros((covariance_matrix.shape[1],covariance_matrix.shape[1]),np.float64)  
    np.fill_diagonal(L,np.sqrt(eig_val))
    S= eig_vect*L 
    return pd.DataFrame(eig_vect)

def Weight_Solve(weight,PCAsample):
    """A function that will solve for the weights when you give the weights in a array format"""
    PCAsample = np.matrix(PCAsample).transpose()
    A = np.zeros(PCAsample.shape)
    B = np.zeros(PCAsample[1,:].shape)
    for i in range(weight.count('w')):
        temp=0 
        for j in range(len(A[0,:])):
            if type(weight[j]) == str:
                A[i,j] = PCAsample[i,j]
            elif (weight[j]) == 0:
                A[i,j] = 0
            elif type(weight[j]) == int:
                temp=temp+(weight[j])*PCAsample[i,j]
                B[0,i]=-1*temp
    B=B.transpose()
    return np.linalg.lstsq(A,B)[0]

def Box_Tiao(sample):
    X= np.matrix(sample.cov())
    fit_time_series= VAR(sample)
    est_val=fit_time_series.fit(maxlags=1) # AR(1) 
    est_cov= np.matrix(est_val.forecast_cov())
    est_cov_var=est_cov.transpose()*est_cov
    C=np.linalg.cholesky(X)
    Xinv=np.linalg.inv(C)
    D= np.transpose(Xinv) * est_cov_var * Xinv
    eig_val, eig_vect = np.linalg.eigh(D) #gives in ascending order
    eig_val=eig_val[::-1] #sort descending
    eig_vect=np.fliplr(eig_vect) #sort eigen vectors as per the eigen values
    return eig_val,eig_vect

#creating the fly for sample1 and sample2
fly_sample1= pd.Series(sample1['2Y'] - sample1['5Y'] + sample1['10Y']) 
fly_sample2= pd.Series(sample2['2Y'] - sample2['5Y'] + sample2['10Y'])

PCA_sample1 = PCA_Function(sample1_returns) #the principal components are along the columns

weights = [0, 'w',-1,0,'w',0] #w1 * 2y - 5y + w2 * 10y
neutral_weights = Weight_Solve(weights,PCA_sample1)

print("------The PCA neutral weights are------\n")
print("w1 = "+str(neutral_weights[1])+"\nw2 = "+str(neutral_weights[4]))

------The PCA neutral weights are------

w1 = [ 1.13693977]
w2 = [ 0.54779512]


In [2]:
#getting the pca neutral portfolio for sample 1 and sample 2
PCA_fly1=pd.Series(neutral_weights[1]*sample1['2Y'] - sample1['5Y'] + neutral_weights[4]*sample1['10Y'])
PCA_fly2=pd.Series(neutral_weights[1]*sample2['2Y'] - sample2['5Y'] + neutral_weights[4]*sample2['10Y'])

#performing the adf test for all the samples

ADF_fly_sample1 = statsmodels.tsa.stattools.adfuller(fly_sample1, 1)
ADF_PCA_fly1 = statsmodels.tsa.stattools.adfuller(PCA_fly1, 1)
ADF_fly_sample2 = statsmodels.tsa.stattools.adfuller(fly_sample2, 1)
ADF_PCA_fly2 = statsmodels.tsa.stattools.adfuller(PCA_fly2, 1)


print("\n------ADF test for FLY sample 1------\n")
print(ADF_fly_sample1)
print("\n------ADF test for PCA FLY sample 1------\n")
print(ADF_PCA_fly1)
print("\n------ADF test for FLY sample 2------\n")
print(ADF_fly_sample2)
print("\n------ADF test for PCA FLY sample 2------\n")
print(ADF_PCA_fly2)


------ADF test for FLY sample 1------

(-1.2346415775209645, 0.65851914425371905, 1, 498, {'1%': -3.4435494520411605, '5%': -2.8673612117611267, '10%': -2.5698704830567247}, -2331.1833807906446)

------ADF test for PCA FLY sample 1------

(-1.7035351930071734, 0.42929196572743877, 1, 498, {'1%': -3.4435494520411605, '5%': -2.8673612117611267, '10%': -2.5698704830567247}, -2760.8044047014723)

------ADF test for FLY sample 2------

(-1.3396832436200279, 0.61082740350275455, 1, 499, {'1%': -3.4435228622952065, '5%': -2.8673495105661462, '10%': -2.569864247011056}, -1966.4445844806642)

------ADF test for PCA FLY sample 2------

(-0.98557292534025775, 0.75848980916729025, 1, 499, {'1%': -3.4435228622952065, '5%': -2.8673495105661462, '10%': -2.569864247011056}, -2503.3148168679154)


In [3]:
#finding the coint fly using OLS (Best Cointegrated Vector)
res= sm.OLS(sample1_returns.ix[1:,'5Y'],sample1_returns.ix[1:,['2Y','10Y']]).fit()
print("\n-----Summary for Regression-----\n",res.summary())
print ("\n------Weights for COINT FLY ------\n ",res.params)


-----Summary for Regression-----
                             OLS Regression Results                            
Dep. Variable:                     5Y   R-squared:                       0.905
Model:                            OLS   Adj. R-squared:                  0.904
Method:                 Least Squares   F-statistic:                     2363.
Date:                Tue, 11 Apr 2017   Prob (F-statistic):          1.40e-254
Time:                        15:40:32   Log-Likelihood:                 1451.7
No. Observations:                 499   AIC:                            -2899.
Df Residuals:                     497   BIC:                            -2891.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
2Y             0.

In [4]:
#COINT FLY portfolio
COINT_fly1=pd.Series(res.params.ix[0]*sample1['2Y'] - sample1['5Y'] + res.params.ix[1]*sample1['10Y'])
COINT_fly2=pd.Series(res.params.ix[0]*sample2['2Y'] - sample2['5Y'] + res.params.ix[1]*sample2['10Y'])

##Performing the ADF test for the COINT FLY 
ADF_COINT_fly1 = statsmodels.tsa.stattools.adfuller(COINT_fly1, 1)
ADF_COINT_fly2 = statsmodels.tsa.stattools.adfuller(COINT_fly2, 1)

print("\n------ADF test for COINT FLY sample 1------\n")
print(ADF_COINT_fly1)
print("\n------ADF test for COINT FLY sample 2------\n")
print(ADF_COINT_fly2)


------ADF test for COINT FLY sample 1------

(-0.30943568537451671, 0.92415728475935177, 1, 498, {'1%': -3.4435494520411605, '5%': -2.8673612117611267, '10%': -2.5698704830567247}, -2904.060496235817)

------ADF test for COINT FLY sample 2------

(-1.9841236310005035, 0.29357742476546916, 1, 499, {'1%': -3.4435228622952065, '5%': -2.8673495105661462, '10%': -2.569864247011056}, -2968.4634894800815)


In [5]:
BT_vals, BT_vectors = Box_Tiao(sample1)
print("\n-----Results from Box Tiao Method-----\n")
print(BT_vectors)

###Summary###
""" It is observed from ADF Test that the weights obtained by making the 
weighted FLY for sample 1 PC 1 and PC 2 neutral do not give us stationary
series for the sample 2. This means that we shouldn't use the same weights 
for trading on the 2nd sample. We should rebalance our position (Dynamic).
We didn't obtain any mean-reverting nature.

The same applies to the weights obtained from the best cointegrated vector."""


-----Results from Box Tiao Method-----

[[-0.10855543  0.5259525   0.81705832  0.19391968  0.07772336 -0.01896377]
 [-0.34807028  0.48918596 -0.16220437 -0.68661847 -0.37648382  0.00699468]
 [-0.30972121  0.11137707 -0.18658524  0.56757172 -0.51400193  0.52011367]
 [ 0.06410582 -0.50076746  0.45549971 -0.40332309 -0.1596393   0.59117526]
 [ 0.64073856  0.46984819 -0.21259155 -0.06551788  0.23871405  0.51207872]
 [-0.59705956  0.01186785 -0.13644135 -0.04301704  0.71102174  0.34257906]]


" It is observed from ADF Test that the weights obtained by making the \nweighted FLY for sample 1 PC 1 and PC 2 neutral do not give us stationary\nseries for the sample 2. This means that we shouldn't use the same weights \nfor trading on the 2nd sample. We should rebalance our position (Dynamic).\nWe didn't obtain any mean-reverting nature.\n\nThe same applies to the weights obtained from the best cointegrated vector."