In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy import stats

import plotly as py
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

  import pandas.util.testing as tm


In [2]:
## load sp500 data
path_sp500 = "../data/market_data/SP500_HistoricalPrices.csv"
df_sp500 = pd.read_csv(path_sp500, usecols=["Date", " Close"])
df_sp500['Date'] = df_sp500['Date'].astype('datetime64[ns]')
df_sp500 = df_sp500.iloc[::-1]


## load aggregate measure data
df_measure = pd.read_excel("../data/market_data/aggregate_measure.xlsx", usecols=["ReleaseDate", "our_measure", "EndDate"])
df_measure["EndDate"] = pd.to_datetime(df_measure["EndDate"], format='%B/%d/%Y')
print("Mean of measure: ", df_measure["our_measure"].mean())
print("Std of measure: ", df_measure["our_measure"].std())
df_measure["delay"] = (df_measure["ReleaseDate"] - df_measure["EndDate"]).dt.days



## load treasury yield data  
# Sorce: https://home.treasury.gov/resource-center/data-chart-center/interest-rates/TextView?type=daily_treasury_yield_curve&field_tdr_date_value=all
df_treasury = pd.read_csv("../data/market_data/daily-treasury-rates.csv", usecols=["Date", "1 Yr", "10 Yr", "3 Mo"])
df_treasury['Date'] = df_treasury['Date'].astype('datetime64[ns]')
df_treasury['slope_10_1'] = df_treasury['10 Yr'] - df_treasury['1 Yr']
df_treasury['slope_10y_3m'] = df_treasury['10 Yr'] - df_treasury['3 Mo']
df_treasury = df_treasury.iloc[::-1]


Mean of measure:  -0.07036516881815903
Std of measure:  0.25285972934531076


In [8]:
######################## one day window analysis (% points) ########################

# calculate return of the day from closing price 
df_sp500["one_day_return"] = df_sp500[" Close"].pct_change(1)*100

df_merge = pd.merge(df_measure, df_sp500, left_on="ReleaseDate", right_on="Date", how="left")
#df_merge.to_csv("temp.csv")

# run OLS
var_x = df_merge["our_measure"].tolist()
var_x = sm.add_constant(var_x)

var_y = df_merge["one_day_return"].tolist()

result = sm.OLS(var_y, var_x).fit()
print(result.summary())


                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                 -0.002
Method:                 Least Squares   F-statistic:                    0.5355
Date:                Tue, 19 Apr 2022   Prob (F-statistic):              0.465
Time:                        22:58:43   Log-Likelihood:                -310.25
No. Observations:                 200   AIC:                             624.5
Df Residuals:                     198   BIC:                             631.1
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0277      0.084      0.328      0.7

In [9]:
######################## one day window analysis (% points) Meeting End Date ########################
df_merge = pd.merge(df_measure, df_sp500, left_on="EndDate", right_on="Date", how="inner") # march 15, 2020 was holiday. 
#df_merge.to_csv("temp.csv")

# run OLS
var_x = df_merge["our_measure"].tolist()
var_x = sm.add_constant(var_x)

var_y = df_merge["one_day_return"].tolist()

result = sm.OLS(var_y, var_x).fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.010
Model:                            OLS   Adj. R-squared:                  0.005
Method:                 Least Squares   F-statistic:                     2.009
Date:                Tue, 19 Apr 2022   Prob (F-statistic):              0.158
Time:                        22:58:49   Log-Likelihood:                -308.53
No. Observations:                 199   AIC:                             621.1
Df Residuals:                     197   BIC:                             627.6
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.2531      0.084      3.005      0.0

In [10]:
######################## intermeeting period analysis (Regression) ########################
df_merge = pd.merge(df_measure, df_sp500, left_on="ReleaseDate", right_on="Date", how="left")

df_merge["intermeeting_return"] = df_merge[" Close"].pct_change(1)*100
#df_merge.to_csv("temp.csv")

# run OLS
var_x = df_merge["our_measure"].tolist()[:-1]
var_x = sm.add_constant(var_x)

var_y = df_merge["intermeeting_return"].tolist()[1:]

result = sm.OLS(var_y, var_x).fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.001
Model:                            OLS   Adj. R-squared:                 -0.004
Method:                 Least Squares   F-statistic:                    0.2552
Date:                Tue, 19 Apr 2022   Prob (F-statistic):              0.614
Time:                        23:25:55   Log-Likelihood:                -628.58
No. Observations:                 199   AIC:                             1261.
Df Residuals:                     197   BIC:                             1268.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.9988      0.421      2.373      0.0

In [11]:
######################## intermeeting period analysis (long-short portfolio) ########################
sp500_BAH_list = [100.0]
sp500_LSM_list = [100.0]
for i in range(177, len(var_x)):
    measure_value = df_merge["our_measure"].tolist()[i]
    pct_change = var_y[i]
    sp500_BAH_list.append(sp500_BAH_list[-1]*(1.0+(pct_change/100.0)))
    if measure_value <= 0.0: 
        sp500_LSM_list.append(sp500_LSM_list[-1]*(1.0+(pct_change/100.0)))
    else:
        sp500_LSM_list.append(sp500_LSM_list[-1]*(1.0-(pct_change/100.0)))

print(sp500_BAH_list[-1], sp500_LSM_list[-1])

141.85732290259222 157.623883223298


In [3]:
######################## Treasury Yield Analysis ########################
df_merge = pd.merge(df_measure, df_treasury, left_on="ReleaseDate", right_on="Date", how="left")
#df_merge.to_csv("temp.csv")
df_merge = df_merge.dropna()

# run OLS
var_x = df_merge["our_measure"].tolist()
var_x = sm.add_constant(var_x)

var_y = df_merge["slope_10_1"].tolist() #"1 Yr", "10 Yr", "slope_10_1", slope_10y_3m, 3 Mo

result = sm.OLS(var_y, var_x).fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.230
Model:                            OLS   Adj. R-squared:                  0.226
Method:                 Least Squares   F-statistic:                     58.72
Date:                Thu, 02 Jun 2022   Prob (F-statistic):           8.06e-13
Time:                        23:31:28   Log-Likelihood:                -268.39
No. Observations:                 199   AIC:                             540.8
Df Residuals:                     197   BIC:                             547.4
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.2137      0.069     17.604      0.0

In [4]:
######################## Predict S&P returns from residual on 10Yr - 1Yr Treasury ########################
### get residual from regression
pred_y = result.predict(var_x)
residual_list = [(var_y[i]-pred_y[i]) for i in range(len(var_x)) ]

df_merge["slope_10_1_residual"] = pd.Series(residual_list)
df_measure_residual = df_merge[["ReleaseDate", "slope_10_1_residual"]]
df_measure_residual = df_measure_residual.dropna()

######################## one day window analysis (% points) ########################

# calculate return of the day from closing price 
df_sp500["one_day_return"] = df_sp500[" Close"].pct_change(1)*100

df_merge_sp = pd.merge(df_measure_residual, df_sp500, left_on="ReleaseDate", right_on="Date", how="left")
#df_merge.to_csv("temp.csv")

# run OLS
var_x = df_merge_sp["slope_10_1_residual"].tolist()
var_x = sm.add_constant(var_x)

var_y = df_merge_sp["one_day_return"].tolist()

result = sm.OLS(var_y, var_x).fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.001
Model:                            OLS   Adj. R-squared:                 -0.004
Method:                 Least Squares   F-statistic:                    0.2078
Date:                Thu, 02 Jun 2022   Prob (F-statistic):              0.649
Time:                        23:32:03   Log-Likelihood:                -307.91
No. Observations:                 198   AIC:                             619.8
Df Residuals:                     196   BIC:                             626.4
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0371      0.082      0.453      0.6

In [17]:
df_merge_sp.to_csv("temp.csv", index=False)

# Beta for Different Maturities of Treasury

| Maturity | Alpha       | Beta       | 
| -------- | ----------- | ---------- | 
| 3 Month  | 2.5581***   | 6.1386***  | 
| 1 Year   | 2.7937***   | 6.3661***  | 
| 10 Year  | 4.0074***   | 4.3534***  | 
| 10Y-1Y   | -1.2137***  | -2.0127*** | 
| 10Y-3M   | 1.4493***   | -1.7851*** | 

In [13]:
######################## Comparison with CPI ########################
df_CPI = pd.read_csv("../data/market_data/CPIAUCSL.csv")
df_CPI['DATE'] = pd.to_datetime(df_CPI['DATE'], format='%d-%m-%Y')
df_CPI["CPI_change"] =  df_CPI["CPIAUCSL"].pct_change(12)*100
df_CPI = df_CPI[(df_CPI['DATE'] >= '1996-01-01') & (df_CPI['DATE'] <= '2021-12-31')]

# plot in plotly
# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=df_measure["ReleaseDate"], y=df_measure["our_measure"]*10, name="Our Measure"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=df_CPI['DATE'], y=df_CPI["CPI_change"], name="CPI"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="CPI vs Our Measure"
)

# Set x-axis title
fig.update_xaxes(title_text="Date")

# Set y-axes titles
fig.update_yaxes(title_text="Hawkish-Dovish Measure*10", secondary_y=False)
fig.update_yaxes(title_text="CPI Percentage Change", secondary_y=True)

fig.show()


In [14]:
######################## CPI correlation ########################
list_next_CPI_data = []
release_date_idx = 0
CPI_date_index = 0
while release_date_idx < len(list(df_measure["ReleaseDate"])):  
    release_date = df_measure.iloc[release_date_idx]["ReleaseDate"]
    if (df_CPI.iloc[CPI_date_index]['DATE'] < release_date):
        CPI_date_index = CPI_date_index + 1
    else:
        list_next_CPI_data.append(df_CPI.iloc[CPI_date_index]['CPI_change'])
        release_date_idx = release_date_idx + 1

# full sample
print("Full sample: ", stats.pearsonr(list(df_measure["our_measure"]), list_next_CPI_data))
print(np.mean(list(df_measure["delay"])))

# Greenspan: 0, 82
print("Greenspan: ", stats.pearsonr(list(df_measure["our_measure"])[0:82], list_next_CPI_data[0:82]))
print(np.mean(list(df_measure["delay"])[0:82]))

# Bernanke: 82, 145
print("Bernanke: ", stats.pearsonr(list(df_measure["our_measure"])[82:145], list_next_CPI_data[82:145]))
print(np.mean(list(df_measure["delay"])[82:145]))

# Yellen: 145, 177
print("Yellen: ", stats.pearsonr(list(df_measure["our_measure"])[145:177], list_next_CPI_data[145:177]))
print(np.mean(list(df_measure["delay"])[145:177]))

# Powell: 177, 200
print("Powell: ", stats.pearsonr(list(df_measure["our_measure"])[177:200], list_next_CPI_data[177:200]))
print(np.mean(list(df_measure["delay"])[177:200]))


Full sample:  (0.5278368322302096, 9.634862846121302e-16)
30.39
Greenspan:  (0.42319088270594657, 7.459177133827703e-05)
43.86585365853659
Bernanke:  (0.477341130059395, 7.631139724913931e-05)
20.96825396825397
Yellen:  (0.6581590933381282, 4.2351570750057156e-05)
21.0
Powell:  (0.7629765742517846, 2.2979711195544488e-05)
21.217391304347824


In [15]:
######################## Comparison with PPI ########################
df_PPI = pd.read_csv("../data/market_data/PPIACO.csv")
df_PPI['DATE'] = pd.to_datetime(df_PPI['DATE'], format='%Y-%m-%d')
df_PPI["PPI_change"] =  df_PPI["PPIACO"].pct_change(12)*100
df_PPI = df_PPI[(df_PPI['DATE'] >= '1996-01-01') & (df_PPI['DATE'] <= '2021-12-31')]

# plot in plotly
# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=df_measure["ReleaseDate"], y=df_measure["our_measure"]*10, name="Our Measure"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=df_CPI['DATE'], y=df_PPI["PPI_change"], name="PPI"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="PPI vs Our Measure"
)

# Set x-axis title
fig.update_xaxes(title_text="Date")

# Set y-axes titles
fig.update_yaxes(title_text="Hawkish-Dovish Measure*10", secondary_y=False)
fig.update_yaxes(title_text="PPI Percentage Change", secondary_y=True)

fig.show()


In [16]:
######################## PPI correlation ########################
list_next_PPI_data = []
release_date_idx = 0
PPI_date_index = 0
while release_date_idx < len(list(df_measure["ReleaseDate"])):  
    release_date = df_measure.iloc[release_date_idx]["ReleaseDate"]
    if (df_PPI.iloc[PPI_date_index]['DATE'] < release_date):
        PPI_date_index = PPI_date_index + 1
    else:
        list_next_PPI_data.append(df_PPI.iloc[PPI_date_index]['PPI_change'])
        release_date_idx = release_date_idx + 1

# full sample
print("Full sample: ", stats.pearsonr(list(df_measure["our_measure"]), list_next_PPI_data))
print(np.mean(list(df_measure["our_measure"])))

# Greenspan: 0, 82
print("Greenspan: ", stats.pearsonr(list(df_measure["our_measure"])[0:82], list_next_PPI_data[0:82]))
print(np.mean(list(df_measure["delay"])[0:82]))

# Bernanke: 82, 145
print("Bernanke: ", stats.pearsonr(list(df_measure["our_measure"])[82:145], list_next_PPI_data[82:145]))
print(np.mean(list(df_measure["delay"])[82:145]))

# Yellen: 145, 177
print("Yellen: ", stats.pearsonr(list(df_measure["our_measure"])[145:177], list_next_PPI_data[145:177]))
print(np.mean(list(df_measure["delay"])[145:177]))

# Powell: 177, 200
print("Powell: ", stats.pearsonr(list(df_measure["our_measure"])[177:200], list_next_PPI_data[177:200]))
print(np.mean(list(df_measure["delay"])[177:200]))


Full sample:  (0.4082512354522528, 1.9624932093120983e-09)
-0.07036516881815903
Greenspan:  (0.34747801154559316, 0.0013812875039265055)
43.86585365853659
Bernanke:  (0.3981904853034765, 0.0012291265992130835)
20.96825396825397
Yellen:  (0.658522600540456, 4.1798050514694006e-05)
21.0
Powell:  (0.7421542326640974, 5.029677343791348e-05)
21.217391304347824


In [25]:
######################## Recession Prediction ########################
# data source: https://fred.stlouisfed.org/series/USREC
df_REC = pd.read_csv("../data/market_data/USREC.csv")
df_REC['DATE'] = pd.to_datetime(df_REC['DATE'], format='%Y-%m-%d')
df_REC = df_REC[(df_REC['DATE'] >= '1996-01-01') & (df_REC['DATE'] <= '2021-12-31')]

list_next_REC_data = []
release_date_idx = 0
REC_date_index = 0
while release_date_idx < len(list(df_measure["ReleaseDate"])):  
    release_date = df_measure.iloc[release_date_idx]["ReleaseDate"]
    if (df_REC.iloc[REC_date_index]['DATE'] < release_date):
        REC_date_index = REC_date_index + 1
    else:
        list_next_REC_data.append(df_REC.iloc[REC_date_index]['USREC'])
        release_date_idx = release_date_idx + 1

### logistics regression
var_x = df_measure["our_measure"].tolist()
var_x = sm.add_constant(var_x)

var_y = list_next_REC_data
print(df_measure.shape)
print(len(var_x),len(var_y))

# building the model and fitting the data
log_reg = sm.Logit(var_y, var_x).fit()
print(log_reg.summary())

df_measure['yhat'] = log_reg.predict(var_x)

(200, 4)
200 200
Optimization terminated successfully.
         Current function value: 0.278908
         Iterations 7
                           Logit Regression Results                           
Dep. Variable:                      y   No. Observations:                  200
Model:                          Logit   Df Residuals:                      198
Method:                           MLE   Df Model:                            1
Date:                Tue, 19 Apr 2022   Pseudo R-squ.:                 0.07810
Time:                        23:47:40   Log-Likelihood:                -55.782
converged:                       True   LL-Null:                       -60.508
Covariance Type:            nonrobust   LLR p-value:                  0.002109
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -2.8176      0.363     -7.755      0.000      -3.530      -2.106
x1          

In [26]:
# plot in plotly
# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=df_measure["ReleaseDate"], y=df_measure['yhat'], name="Estimated Prob."),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=df_REC['DATE'], y=df_REC["USREC"], name="USREC"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Model implied probability of recession and recession indicator"
)

# Set x-axis title
fig.update_xaxes(title_text="Date")

# Set y-axes titles
fig.update_yaxes(title_text="Probability", secondary_y=False)
fig.update_yaxes(title_text="USREC", secondary_y=True)

fig.show()