In [12]:
import pandas as pd
import numpy as np

## Data Cleaning

- Load Data
- Calculate yield curve slope and Y/Y changes in relevant columns


#### Data Definitions
- TNX: US 10y Treasury
- US_Corp: ML US Corporate Bond Total Return Index
- LIBOR: 3m LIBOR Rate
- BAA: Moody's long-term corporate bond yields index
- UNRATE: US seasonally-adjusted unemployment rate
- SPY: S\&P 500 Index
- IRX: US 3m treasury rate
- RGDP: US seasonally-adjusted Real GDP


In [134]:
#Missing Commodity Index Data
data = pd.read_csv('data/data.csv', index_col=0)
data.index = pd.to_datetime(data.index)

In [135]:
#Linear Interpolation Forward fill - for GDP data which is quarterly
#Could potentially want to only use quarterly data - set lin_interp = False
lin_interp = True

if lin_interp:
    data['RGDP'] = data['RGDP'].interpolate()
else:
    data = data.dropna(subset = ['RGDP'])

In [136]:
#Yield Curve Slope: 10y yields - 3m yields
data['YC_Slope'] =  data.TNX - data.IRX

In [137]:
#Fill in Y/Y changes
YY_cols = ['CPI', 'RGDP']
data[[x + '_YY' for x in YY_cols]] = data[YY_cols]/data[YY_cols].shift(1)

#Drop null rows
data = data.dropna()


In [98]:
data.query('TNX > 6.39 & US_Corp > 684')

Unnamed: 0_level_0,TNX,US_Corp,LIBOR,CPI,BAA,UNRATE,SPY,IRX,RGDP,YC_Slope,CPI_YY,RGDP_YY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1994-03-01,6.774,684.651304,3.877378,155.3,8.13,6.5,44.59375,3.47,10280.997333,3.304,0.99678,0.995524
1995-02-01,7.21,694.397368,6.261719,159.4,8.85,5.4,49.015625,5.75,10554.129333,1.46,0.997491,0.999007
1995-03-01,7.189,709.248261,6.268682,159.9,8.7,5.4,50.109375,5.7,10564.614667,1.489,0.996873,0.999008
1995-04-01,7.046,720.167,6.236111,160.4,8.6,5.8,51.59375,5.69,10575.1,1.356,0.996883,0.999008
1995-07-01,6.429,764.0385,5.89788,161.4,8.04,5.7,56.15625,5.42,10665.06,1.009,0.998141,0.997188
1996-04-01,6.633,794.759546,5.493946,164.6,8.19,5.6,65.390625,5.01,10998.322,1.623,0.998785,0.994532
1996-05-01,6.844,792.749091,5.496652,165.0,8.3,5.6,66.875,5.04,11031.206667,1.804,0.997576,0.997019
1996-06-01,6.711,791.897143,5.569727,165.4,8.4,5.3,67.109375,5.03,11064.091333,1.681,0.997582,0.997028
1996-07-01,6.79,798.948182,5.640286,165.7,8.35,5.5,64.09375,5.18,11096.976,1.61,0.998189,0.997037
1996-08-01,6.936,813.626522,5.527344,166.0,8.18,5.1,65.328125,5.14,11135.385667,1.796,0.998193,0.996551
