In [1]:
import pandas as pd
import numpy as np
import scipy 

## Import and Clean Data

In [3]:
data = pd.read_csv('yield-curve-rates-2011-2020.csv')

print(data.head())      ## display data
data.info()

       Date  1 Mo  2 Mo  3 Mo  6 Mo  1 Yr  2 Yr  3 Yr  5 Yr  7 Yr  10 Yr  \
0  12/31/20  0.08  0.08  0.09  0.09  0.10  0.13  0.17  0.36  0.65   0.93   
1  12/30/20  0.06  0.06  0.08  0.09  0.12  0.12  0.17  0.37  0.66   0.93   
2  12/29/20  0.08  0.09  0.10  0.12  0.11  0.12  0.17  0.37  0.66   0.94   
3  12/28/20  0.09  0.10  0.11  0.11  0.11  0.13  0.17  0.38  0.65   0.94   
4  12/24/20  0.09  0.09  0.09  0.09  0.10  0.13  0.17  0.37  0.66   0.94   

   20 Yr  30 Yr  
0   1.45   1.65  
1   1.46   1.66  
2   1.47   1.67  
3   1.46   1.67  
4   1.46   1.66  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2501 entries, 0 to 2500
Data columns (total 13 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Date    2501 non-null   object 
 1   1 Mo    2501 non-null   float64
 2   2 Mo    552 non-null    float64
 3   3 Mo    2501 non-null   float64
 4   6 Mo    2501 non-null   float64
 5   1 Yr    2501 non-null   float64
 6   2 Yr    2501 non-null   flo

In [4]:
data = data.drop("2 Mo", axis=1)   #drop column 2 since it has too many missing values

In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2501 entries, 0 to 2500
Data columns (total 12 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Date    2501 non-null   object 
 1   1 Mo    2501 non-null   float64
 2   3 Mo    2501 non-null   float64
 3   6 Mo    2501 non-null   float64
 4   1 Yr    2501 non-null   float64
 5   2 Yr    2501 non-null   float64
 6   3 Yr    2501 non-null   float64
 7   5 Yr    2501 non-null   float64
 8   7 Yr    2501 non-null   float64
 9   10 Yr   2501 non-null   float64
 10  20 Yr   2501 non-null   float64
 11  30 Yr   2501 non-null   float64
dtypes: float64(11), object(1)
memory usage: 234.6+ KB


In [6]:
data = data.set_index('Date')

data.index.names = ["Year"]

data.tail()

Unnamed: 0_level_0,1 Mo,3 Mo,6 Mo,1 Yr,2 Yr,3 Yr,5 Yr,7 Yr,10 Yr,20 Yr,30 Yr
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1/7/11,0.13,0.14,0.18,0.29,0.6,1.02,1.96,2.69,3.34,4.25,4.48
1/6/11,0.13,0.15,0.18,0.3,0.68,1.11,2.09,2.8,3.44,4.31,4.53
1/5/11,0.13,0.14,0.19,0.31,0.71,1.16,2.14,2.86,3.5,4.34,4.55
1/4/11,0.12,0.14,0.19,0.28,0.63,1.04,2.01,2.72,3.36,4.21,4.44
1/3/11,0.11,0.15,0.19,0.29,0.61,1.03,2.02,2.74,3.36,4.18,4.39


In [7]:
data = data[::-1] #reverse rows

data.head()

Unnamed: 0_level_0,1 Mo,3 Mo,6 Mo,1 Yr,2 Yr,3 Yr,5 Yr,7 Yr,10 Yr,20 Yr,30 Yr
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1/3/11,0.11,0.15,0.19,0.29,0.61,1.03,2.02,2.74,3.36,4.18,4.39
1/4/11,0.12,0.14,0.19,0.28,0.63,1.04,2.01,2.72,3.36,4.21,4.44
1/5/11,0.13,0.14,0.19,0.31,0.71,1.16,2.14,2.86,3.5,4.34,4.55
1/6/11,0.13,0.15,0.18,0.3,0.68,1.11,2.09,2.8,3.44,4.31,4.53
1/7/11,0.13,0.14,0.18,0.29,0.6,1.02,1.96,2.69,3.34,4.25,4.48


## Visualizating Data

In [8]:
import matplotlib.pyplot as plt
import plotly
import plotly.graph_objects as go

import plotly.express as px

Maturities = ['1 Mo','3 Mo', '6 Mo', '1 Yr', '2 Yr', '3 Yr', '5 Yr', '7 Yr', '10 Yr', '20 Yr', '30 Yr']
          

fig = px.line(data, x=data.index, y= Maturities, title = 'Plot of US Treasuries Yield Curve', 
labels= "Maturities"  )
fig.update_yaxes(title_text='Maturities')

fig.show()



In [9]:
import plotly.express as px

z = data.corr()

fig = px.imshow(z, text_auto=False)
fig.show()


### Standardize the Data

In [13]:
# Standardise the data in the df into z scores

data_std = ((data-data.mean()) / data.std())
data_std.head()

Unnamed: 0_level_0,1 Mo,3 Mo,6 Mo,1 Yr,2 Yr,3 Yr,5 Yr,7 Yr,10 Yr,20 Yr,30 Yr
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1/3/11,-0.576462,-0.55857,-0.587376,-0.559474,-0.410584,-0.116214,0.765967,1.387103,1.860388,2.262982,2.174857
1/4/11,-0.56374,-0.571038,-0.587376,-0.571659,-0.384841,-0.102567,0.750962,1.355741,1.860388,2.307439,2.24877
1/5/11,-0.551019,-0.571038,-0.587376,-0.535103,-0.281868,0.061196,0.946029,1.575278,2.079509,2.500084,2.41138
1/6/11,-0.551019,-0.55857,-0.59962,-0.547288,-0.320483,-0.007039,0.871003,1.481191,1.9856,2.455628,2.381814
1/7/11,-0.551019,-0.571038,-0.59962,-0.559474,-0.423456,-0.129861,0.675936,1.308697,1.829085,2.366714,2.307901


### PCA Decomposition

In [15]:
from sklearn.decomposition import PCA

pca = PCA()

pca.fit(data_std)


PCA()

In [16]:
print(pca.explained_variance_ratio_)
print(pca.singular_values_)

[6.66964936e-01 3.06256070e-01 2.34143777e-02 2.45520394e-03
 3.47101217e-04 2.76957227e-04 1.17860939e-04 7.89541522e-05
 3.74811087e-05 3.40719723e-05 1.69859153e-05]
[135.43092605  91.7716837   25.37509382   8.21694033   3.08954422
   2.75976878   1.80032658   1.47351253   1.01524898   0.96797688
   0.68345641]


In [118]:
print(pca.explained_variance_ratio_)
print(pca.singular_values_)

[7.07221777e-01 2.66967603e-01 2.23521729e-02 2.57478757e-03
 3.46092398e-04 2.53882072e-04 1.21710284e-04 6.78417629e-05
 4.31239865e-05 3.55757828e-05 1.54336060e-05]
[102.25884535  62.82787241  18.17954237   6.17012232   2.26213804
   1.9374883    1.34148774   1.00154769   0.79851398   0.72527099
   0.47770169]


In [23]:
pca.explained_variance_ratio_

array([6.66964936e-01, 3.06256070e-01, 2.34143777e-02, 2.45520394e-03,
       3.47101217e-04, 2.76957227e-04, 1.17860939e-04, 7.89541522e-05,
       3.74811087e-05, 3.40719723e-05, 1.69859153e-05])

In [56]:
fig2 = px.bar(pca.explained_variance_ratio_, title = "Variance Explained(%) by the 11 PCs")
#fig2.update_xaxes("Eigenvalues")
#fig2.update_yaxes("Explained Proportion")

fig2.show()

In [25]:
pca.components_[0]

array([0.32357177, 0.32845786, 0.33401293, 0.34103812, 0.35481717,
       0.36135982, 0.3459509 , 0.30616953, 0.24067278, 0.14275121,
       0.1065169 ])

In [26]:
pca_comp = pd.DataFrame(data = pca.components_)
pca_comp

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,0.323572,0.328458,0.334013,0.341038,0.354817,0.36136,0.345951,0.30617,0.240673,0.142751,0.106517
1,-0.235226,-0.230348,-0.222579,-0.20315,-0.137431,-0.052768,0.146218,0.289142,0.412706,0.493557,0.510115
2,-0.385098,-0.327909,-0.229613,-0.093203,0.153501,0.345592,0.43105,0.324915,-0.000851,-0.331296,-0.372341
3,0.479784,0.237177,-0.081964,-0.411079,-0.477681,-0.181241,0.25148,0.32205,0.091563,-0.031273,-0.317022
4,0.399652,-0.128396,-0.207376,-0.31687,0.082654,0.322941,0.128487,-0.138677,-0.290858,-0.343827,0.575068
5,0.234641,-0.044626,-0.346516,-0.243832,0.301169,0.338366,-0.103997,-0.329645,0.035876,0.541243,-0.380155
6,0.411605,-0.409849,-0.252311,0.269711,0.169731,-0.189838,-0.297217,0.103082,0.515136,-0.304043,-0.067691
7,0.224547,-0.313857,-0.154804,0.331298,0.091462,-0.295437,0.131577,0.3381,-0.621752,0.321275,-0.016888
8,0.157666,-0.545977,0.4245,0.163416,-0.446519,0.213808,0.268927,-0.35469,0.076167,0.122622,-0.078021
9,0.043453,-0.301278,0.582262,-0.507219,0.298084,-0.021302,-0.333904,0.317293,-0.068179,0.042419,-0.055862


In [55]:
pca.components_[0:3]  ### why i transpose the principal component dataframe

array([[ 0.32357177,  0.32845786,  0.33401293,  0.34103812,  0.35481717,
         0.36135982,  0.3459509 ,  0.30616953,  0.24067278,  0.14275121,
         0.1065169 ],
       [-0.23522621, -0.23034792, -0.22257924, -0.20314976, -0.13743106,
        -0.05276815,  0.14621751,  0.28914208,  0.4127061 ,  0.4935572 ,
         0.51011529],
       [-0.38509799, -0.3279094 , -0.22961319, -0.09320297,  0.15350071,
         0.34559186,  0.43105009,  0.32491535, -0.00085066, -0.331296  ,
        -0.37234101]])

In [27]:
pca_T = pca_comp.T

In [28]:
pcx = pca_T[pca_T.columns[0:3]]

pcx.columns = ['PC1', 'PC2', 'PC3']
pcx["Maturity"] = ['1 Mo','3 Mo', '6 Mo', '1 Yr', '2 Yr', '3 Yr', '5 Yr', '7 Yr', '10 Yr', '20 Yr', '30 Yr']
pcx



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,PC1,PC2,PC3,Maturity
0,0.323572,-0.235226,-0.385098,1 Mo
1,0.328458,-0.230348,-0.327909,3 Mo
2,0.334013,-0.222579,-0.229613,6 Mo
3,0.341038,-0.20315,-0.093203,1 Yr
4,0.354817,-0.137431,0.153501,2 Yr
5,0.36136,-0.052768,0.345592,3 Yr
6,0.345951,0.146218,0.43105,5 Yr
7,0.30617,0.289142,0.324915,7 Yr
8,0.240673,0.412706,-0.000851,10 Yr
9,0.142751,0.493557,-0.331296,20 Yr


In [31]:
pcx = pcx.set_index('Maturity')

In [33]:
## Plotting Eigenvector 



fig = px.line(pcx )
fig.update_yaxes(title_text='Loadings')

fig.show()


### PCA Projections

In [41]:
pca_comp.shape

(11, 11)

In [42]:
data_std.shape

(2501, 11)

In [45]:
principal_component = data_std.dot(pca.components_.T)
principal_component.head()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,10
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1/3/11,0.747459,4.078419,-0.291246,0.135703,-0.09348,-0.005485,-0.02025,0.0516,-0.004699,-0.002887,-0.002574
1/4/11,0.756816,4.1249,-0.34116,0.090392,-0.046758,0.02126,-0.028705,0.053112,0.000319,0.007801,-0.010255
1/5/11,1.10137,4.452151,-0.246205,0.084845,-0.033699,0.059412,-0.01927,0.051916,-0.000451,0.005691,0.002237
1/6/11,0.972003,4.349432,-0.312949,0.077624,-0.025122,0.053978,-0.038435,0.06541,-0.008895,-0.007704,-0.005139
1/7/11,0.704302,4.150832,-0.448997,0.058399,-0.035389,0.036443,-0.039166,0.078163,0.007305,-0.004414,0.023671


In [46]:
principal_component.shape

(2501, 11)

In [54]:
### Plotting PC1


fig3 = px.line(principal_component[0], title = 'First Principal Component' )

fig3.show()


In [52]:
### Plotting PC2


fig4 = px.line(principal_component[1], title = 'Second Principal Component' )

fig4.show()


In [53]:

### Plotting PC2


fig5 = px.line(principal_component[2], title = 'Third Principal Component' )

fig5.show()

###  Correlation between PC2 and Slope of Actual Yeild Curve

In [57]:
# Calaculate 10Y-2Y slope

df_s = pd.DataFrame(data = data)
df_s["slope"] = df_s['10 Yr'] - df_s['2 Yr']
df_s.head()

Unnamed: 0_level_0,1 Mo,3 Mo,6 Mo,1 Yr,2 Yr,3 Yr,5 Yr,7 Yr,10 Yr,20 Yr,30 Yr,slope
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1/3/11,0.11,0.15,0.19,0.29,0.61,1.03,2.02,2.74,3.36,4.18,4.39,2.75
1/4/11,0.12,0.14,0.19,0.28,0.63,1.04,2.01,2.72,3.36,4.21,4.44,2.73
1/5/11,0.13,0.14,0.19,0.31,0.71,1.16,2.14,2.86,3.5,4.34,4.55,2.79
1/6/11,0.13,0.15,0.18,0.3,0.68,1.11,2.09,2.8,3.44,4.31,4.53,2.76
1/7/11,0.13,0.14,0.18,0.29,0.6,1.02,1.96,2.69,3.34,4.25,4.48,2.74


In [64]:
pc2_slope = pd.DataFrame(df_s['slope'])

In [66]:
pc2_slope["PC2"] = principal_component[1]

In [71]:
pc2_slope = pc2_slope.rename(columns = {'slope':'10Y-2Y Slope'})
pc2_slope.head()

Unnamed: 0_level_0,10Y-2Y Slope,PC2
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
1/3/11,2.75,4.078419
1/4/11,2.73,4.1249
1/5/11,2.79,4.452151
1/6/11,2.76,4.349432
1/7/11,2.74,4.150832


In [72]:
fig6 = px.line(pc2_slope, title = 'PC2 vs. Slope of Actual Yield Curve' )

fig6.show()

In [73]:
### Finding the Correlation between PC2 and Slope of Actual Yield Curve

np.corrcoef(pc2_slope["PC2"], pc2_slope["10Y-2Y Slope"])


array([[1.        , 0.89225468],
       [0.89225468, 1.        ]])