In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy import stats

  import pandas.util.testing as tm


# Load Data

In [2]:
## load sp500 data
path_sp500 = "../data/market_data/SP500_HistoricalPrices.csv"
df_sp500 = pd.read_csv(path_sp500, usecols=["Date", " Close"])
df_sp500['Date'] = df_sp500['Date'].astype('datetime64[ns]')
df_sp500 = df_sp500.iloc[::-1]

# calculate return of the day from closing price 
df_sp500["one_day_return"] = df_sp500[" Close"].pct_change(1)*100


## load CPI data  
df_CPI = pd.read_csv("../data/market_data/CPIAUCSL.csv")
df_CPI['DATE'] = pd.to_datetime(df_CPI['DATE'], format='%d-%m-%Y')
df_CPI["CPI_change"] =  df_CPI["CPIAUCSL"].pct_change(12)*100
df_CPI = df_CPI[(df_CPI['DATE'] >= '1996-01-01') & (df_CPI['DATE'] <= '2021-12-31')]


## load aggregate measure data for meeting minutes
df_measure = pd.read_excel("../data/market_data/aggregate_measure.xlsx", usecols=["ReleaseDate", "our_measure", "EndDate"])
df_measure["EndDate"] = pd.to_datetime(df_measure["EndDate"], format='%B/%d/%Y')
print("Mean of measure: ", df_measure["our_measure"].mean())
print("Std of measure: ", df_measure["our_measure"].std())
df_measure["delay"] = (df_measure["ReleaseDate"] - df_measure["EndDate"]).dt.days


## load aggregate measure data for speeches
df_measure_speeches = pd.read_excel("../data/market_data/aggregate_measure_speech.xlsx", usecols=["Date", "our_measure", "number_of_filtered_sent"])
df_measure_speeches['Date'] = df_measure_speeches['Date'].astype('datetime64[ns]')
df_measure_speeches = df_measure_speeches.loc[df_measure_speeches['number_of_filtered_sent'] > 10]
print("Mean of measure (Speeches): ", df_measure_speeches["our_measure"].mean())
print("Std of measure (Speeches): ", df_measure_speeches["our_measure"].std())


## load aggregate measure data for testimonies
df_measure_testimonies = pd.read_excel("../data/market_data/aggregate_measure_testimony.xlsx", usecols=["Date", "our_measure", "number_of_filtered_sent"])
df_measure_testimonies['Date'] = df_measure_testimonies['Date'].astype('datetime64[ns]')
df_measure_testimonies = df_measure_testimonies.loc[df_measure_testimonies['number_of_filtered_sent'] > 10]
print("Mean of measure (Testimonies): ", df_measure_testimonies["our_measure"].mean())
print("Std of measure (Testimonies): ", df_measure_testimonies["our_measure"].std())


Mean of measure:  -0.07036516881815903
Std of measure:  0.25285972934531076
Mean of measure (Speeches):  -0.16540441916008608
Std of measure (Speeches):  0.2649489325956476
Mean of measure (Testimonies):  -0.1728534318526911
Std of measure (Testimonies):  0.30086316687490094


In [4]:
######################## Predict S&P returns from residual on CPI  ########################
## For Meeting Minutes
date_type = "ReleaseDate"#"EndDate" 

list_next_CPI_data = []
release_date_idx = 0
CPI_date_index = 0
while release_date_idx < len(list(df_measure["ReleaseDate"])):  
    release_date = df_measure.iloc[release_date_idx]["ReleaseDate"]
    if (df_CPI.iloc[CPI_date_index]['DATE'] < release_date):
        CPI_date_index = CPI_date_index + 1
    else:
        list_next_CPI_data.append(df_CPI.iloc[CPI_date_index]['CPI_change'])
        release_date_idx = release_date_idx + 1

# run OLS
var_x = df_measure["our_measure"].tolist()
var_x = sm.add_constant(var_x)

var_y = list_next_CPI_data

result = sm.OLS(var_y, var_x).fit()
#print(result.summary())


### get residual from regression
pred_y = result.predict(var_x)
residual_list = [(var_y[i]-pred_y[i]) for i in range(len(var_x)) ]

df_measure["cpi_residual"] = pd.Series(residual_list)
df_measure_residual = df_measure[[date_type, "cpi_residual"]]
df_measure_residual = df_measure_residual.dropna()


######################## one day window analysis (% points) ########################
# calculate return of the day from closing price 
df_sp500["one_day_return"] = df_sp500[" Close"].pct_change(1)*100

df_merge_sp = pd.merge(df_measure_residual, df_sp500, left_on=date_type, right_on="Date", how="left")

# run OLS
var_x = df_merge_sp["cpi_residual"].tolist()
var_x = sm.add_constant(var_x)

var_y = df_merge_sp["one_day_return"].tolist()

result = sm.OLS(var_y, var_x).fit()
print(result.summary())


                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.279
Model:                            OLS   Adj. R-squared:                  0.275
Method:                 Least Squares   F-statistic:                     76.47
Date:                Fri, 03 Jun 2022   Prob (F-statistic):           9.63e-16
Time:                        00:18:09   Log-Likelihood:                -275.42
No. Observations:                 200   AIC:                             554.8
Df Residuals:                     198   BIC:                             561.4
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          2.3112      0.071     32.663      0.0

In [7]:
######################## Predict S&P returns from residual on 10Yr - 1Yr Treasury  ########################
## For Speeches
date_type = "Date"
list_next_CPI_data = []
release_date_idx = 0
CPI_date_index = 0
while release_date_idx < len(list(df_measure_speeches["Date"])):  
    release_date = df_measure_speeches.iloc[release_date_idx]["Date"]
    if (df_CPI.iloc[CPI_date_index]['DATE'] < release_date):
        CPI_date_index = CPI_date_index + 1
    else:
        list_next_CPI_data.append(df_CPI.iloc[CPI_date_index]['CPI_change'])
        release_date_idx = release_date_idx + 1

# run OLS
var_x = df_measure_speeches["our_measure"].tolist()
var_x = sm.add_constant(var_x)

var_y = list_next_CPI_data

result = sm.OLS(var_y, var_x).fit()
#print(result.summary())


### get residual from regression
pred_y = result.predict(var_x)
residual_list = [(var_y[i]-pred_y[i]) for i in range(len(var_x)) ]

df_measure_speeches["cpi_residual"] = pd.Series(residual_list)
df_measure_residual = df_measure_speeches[[date_type, "cpi_residual"]]
df_measure_residual = df_measure_residual.dropna()


######################## one day window analysis (% points) ########################
# calculate return of the day from closing price 
df_sp500["one_day_return"] = df_sp500[" Close"].pct_change(1)*100

df_merge_sp = pd.merge(df_measure_residual, df_sp500, left_on=date_type, right_on="Date", how="left")
df_merge_sp = df_merge_sp.dropna()

# run OLS
var_x = df_merge_sp["cpi_residual"].tolist()
var_x = sm.add_constant(var_x)

var_y = df_merge_sp["one_day_return"].tolist()

result = sm.OLS(var_y, var_x).fit()
print(result.summary())



                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.001
Model:                            OLS   Adj. R-squared:                 -0.001
Method:                 Least Squares   F-statistic:                    0.4394
Date:                Fri, 03 Jun 2022   Prob (F-statistic):              0.508
Time:                        00:21:50   Log-Likelihood:                -634.24
No. Observations:                 378   AIC:                             1272.
Df Residuals:                     376   BIC:                             1280.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0321      0.067     -0.480      0.6

In [8]:
######################## Predict S&P returns from residual on 10Yr - 1Yr Treasury  ########################
## For Testimonies
date_type = "Date"
list_next_CPI_data = []
release_date_idx = 0
CPI_date_index = 0
while release_date_idx < len(list(df_measure_testimonies["Date"])):  
    release_date = df_measure_testimonies.iloc[release_date_idx]["Date"]
    if (df_CPI.iloc[CPI_date_index]['DATE'] < release_date):
        CPI_date_index = CPI_date_index + 1
    else:
        list_next_CPI_data.append(df_CPI.iloc[CPI_date_index]['CPI_change'])
        release_date_idx = release_date_idx + 1

# run OLS
var_x = df_measure_testimonies["our_measure"].tolist()
var_x = sm.add_constant(var_x)

var_y = list_next_CPI_data

result = sm.OLS(var_y, var_x).fit()
#print(result.summary())


### get residual from regression
pred_y = result.predict(var_x)
residual_list = [(var_y[i]-pred_y[i]) for i in range(len(var_x)) ]

df_measure_testimonies["cpi_residual"] = pd.Series(residual_list)
df_measure_residual = df_measure_testimonies[[date_type, "cpi_residual"]]
df_measure_residual = df_measure_residual.dropna()


######################## one day window analysis (% points) ########################
# calculate return of the day from closing price 
df_sp500["one_day_return"] = df_sp500[" Close"].pct_change(1)*100

df_merge_sp = pd.merge(df_measure_residual, df_sp500, left_on=date_type, right_on="Date", how="left")
df_merge_sp = df_merge_sp.dropna()

# run OLS
var_x = df_merge_sp["cpi_residual"].tolist()
var_x = sm.add_constant(var_x)

var_y = df_merge_sp["one_day_return"].tolist()

result = sm.OLS(var_y, var_x).fit()
print(result.summary())



                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                      -0.000
Model:                            OLS   Adj. R-squared:                 -0.000
Method:                 Least Squares   F-statistic:                      -inf
Date:                Fri, 03 Jun 2022   Prob (F-statistic):                nan
Time:                        00:22:59   Log-Likelihood:                -105.79
No. Observations:                  57   AIC:                             213.6
Df Residuals:                      56   BIC:                             215.6
Df Model:                           0                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.1822      0.207      0.881      0.3

  return self.ess/self.df_model
