In [211]:
import pandas as pd
import numpy as np
from scipy.spatial.distance import mahalanobis as maha

## Data Cleaning

- Load Data
- Calculate yield curve slope and Y/Y changes in relevant columns


#### Data Definitions
- TNX: US 10y Treasury
- US_Corp: ML US Corporate Bond Total Return Index
- LIBOR: 3m LIBOR Rate
- BAA: Moody's long-term corporate bond yields index
- UNRATE: US seasonally-adjusted unemployment rate
- SPY: S\&P 500 Index
- IRX: US 3m treasury rate
- RGDP: US seasonally-adjusted Real GDP


In [207]:
#Missing Commodity Index Data
data = pd.read_csv('data/data.csv', index_col=0)
data.index = pd.to_datetime(data.index)

In [208]:
#Linear Interpolation Forward fill - for GDP data which is quarterly
#Could potentially want to only use quarterly data - set lin_interp = False
lin_interp = False

if lin_interp:
    data['RGDP'] = data['RGDP'].interpolate()
else:
    data = data.dropna(subset = ['RGDP'])

In [209]:
#Yield Curve Slope: 10y yields - 3m yields
data['YC_Slope'] =  pd.eval('data.TNX - data.IRX')
#Credit Spread: long-term BAA (corp bonds) - 10y treasury rate
data['Cred_Spread'] = pd.eval('data.BAA - data.TNX')

In [210]:
#Fill in Y/Y changes
YY_cols = ['CPI', 'RGDP']
data[[x + '_Growth' for x in YY_cols]] = data[YY_cols]/data[YY_cols].shift(1) - 1

#Drop null rows
data = data.dropna()

#Subtract mean
# data = data - data.mean()

## Defining Scenarios

- Define a scenario with a boolean string. Make sure to use spaces between operators (for calculation of mahalanobis distances.
- Filtering on this scenario, we calculate the Mahalanobis distance
- We then convert scenario Mahalanobis distance into likelihood measure:
$$ e^{\frac{-d}{2}}$$
- Rescale probabilities to sum to 1

In [247]:
#Define scenario, use spaces between operators for ease of parsing.
scenario = 'RGDP_Growth < 0.01 & UNRATE >= 6'

#Get relevant variables from the scenario defined above.
scenario_vars = [v.split(' ')[0] for v in scenario.split(' & ')]

#Filter data on the above scenario
scen_data = data.query(scenario)[scenario_vars]

#Get the empirical mean & correlation matrix of scenario vars during specified scenario
v = scen_data.mean().values
scen_corr = scen_data.corr()

#Calculate mahalanobis distance, transform to likelihood measure
data['L_1'] = scen_data.apply(lambda x: np.exp(-maha(x ,v, scen_corr)/2), raw = True, axis = 1)

#Rescale for likelihood
data['Prob_1'] = data['L_1'].dropna()/data['L_1'].sum()
data['Prob_1'] = data['Prob_1'].fillna(0)
data.Prob_1

Date
1993-01-01    0.043120
1993-04-01    0.039028
1993-07-01    0.035311
1993-10-01    0.000000
1994-01-01    0.030403
1994-04-01    0.000000
1994-07-01    0.023672
1994-10-01    0.000000
1995-01-01    0.000000
1995-04-01    0.000000
1995-07-01    0.000000
1995-10-01    0.000000
1996-01-01    0.000000
1996-04-01    0.000000
1996-07-01    0.000000
1996-10-01    0.000000
1997-01-01    0.000000
1997-04-01    0.000000
1997-07-01    0.000000
1997-10-01    0.000000
1998-01-01    0.000000
1998-04-01    0.000000
1998-07-01    0.000000
1998-10-01    0.000000
1999-01-01    0.000000
1999-04-01    0.000000
1999-07-01    0.000000
1999-10-01    0.000000
2000-01-01    0.000000
2000-04-01    0.000000
                ...   
2012-04-01    0.054731
2012-07-01    0.054742
2012-10-01    0.055365
2013-01-01    0.060427
2013-04-01    0.050097
2013-07-01    0.043138
2013-10-01    0.041034
2014-01-01    0.030377
2014-04-01    0.000000
2014-07-01    0.000000
2014-10-01    0.000000
2015-01-01    0.000000
2015-0