## Import required packages

In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy import stats

import plotly as py
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import dateutil

## Load necessary datasets in form of pandas dataframe

In [2]:
## load aggregate measure data

# meeting minutes
df_measure_mm = pd.read_excel("../data/market_analysis_data/aggregate_measure_mm.xlsx", usecols=["ReleaseDate", "EndDate", "our_measure"])
df_measure_mm["EndDate"] = pd.to_datetime(df_measure_mm["EndDate"], format='%B/%d/%Y')
df_measure_mm["delay"] = (df_measure_mm["ReleaseDate"] - df_measure_mm["EndDate"]).dt.days

# speeches
df_measure_sp = pd.read_excel("../data/market_analysis_data/aggregate_measure_sp.xlsx", usecols=["Date", "our_measure"])
df_measure_sp['Date'] = df_measure_sp['Date'].apply(dateutil.parser.parse)
df_measure_sp["Date"] = pd.to_datetime(df_measure_sp["Date"], format='%m/%d/%Y')

# press conferences 
df_measure_pc = pd.read_excel("../data/market_analysis_data/aggregate_measure_pc.xlsx", usecols=["EndDate", "our_measure"])
df_measure_pc["EndDate"] = pd.to_datetime(df_measure_pc["EndDate"], format='%B/%d/%Y')


## load CPI and PPI data 

# CPI
df_CPI = pd.read_csv("../data/market_analysis_data/CPIAUCSL.csv")
df_CPI['DATE'] = pd.to_datetime(df_CPI['DATE'], format='%Y-%m-%d')
df_CPI["CPI_change"] =  df_CPI["CPIAUCSL"].pct_change(12)*100
df_CPI = df_CPI[(df_CPI['DATE'] >= '1996-01-01') & (df_CPI['DATE'] <= '2022-12-31')]

# PPI
df_PPI = pd.read_csv("../data/market_analysis_data/PPIACO.csv")
df_PPI['DATE'] = pd.to_datetime(df_PPI['DATE'], format='%Y-%m-%d')
df_PPI["PPI_change"] =  df_PPI["PPIACO"].pct_change(12)*100
df_PPI = df_PPI[(df_PPI['DATE'] >= '1996-01-01') & (df_PPI['DATE'] <= '2022-12-31')]

## Visualise CPI and PPI %change along with our measure (from meeting minutes) for qualitative comparison

In [6]:
# CPI plot
# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=df_measure_mm["ReleaseDate"], y=df_measure_mm["our_measure"]*10, name="Our Measure"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=df_CPI['DATE'], y=df_CPI["CPI_change"], name="CPI"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="CPI vs Our Measure",
    font=dict(
        size=22,
    )
)

# Set x-axis title
fig.update_xaxes(title_text="Date")

# Set y-axes titles
fig.update_yaxes(title_text="Hawkish-Dovish Measure*10", secondary_y=False)
fig.update_yaxes(title_text="CPI Percentage Change", secondary_y=True)

fig.show()



# PPI plot
# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=df_measure_mm["ReleaseDate"], y=df_measure_mm["our_measure"]*10, name="Our Measure"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=df_CPI['DATE'], y=df_PPI["PPI_change"], name="PPI"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="PPI vs Our Measure",
    font=dict(
        size=22,
    )
)

# Set x-axis title
fig.update_xaxes(title_text="Date")

# Set y-axes titles
fig.update_yaxes(title_text="Hawkish-Dovish Measure*10", secondary_y=False)
fig.update_yaxes(title_text="PPI Percentage Change", secondary_y=True)

fig.show()

## Correlation analysis of measure with CPI and PPI

In [4]:
######################## CPI correlation ########################
list_next_CPI_data = []
release_date_idx = 0
CPI_date_index = 0
while release_date_idx < len(list(df_measure_mm["ReleaseDate"])):  
    release_date = df_measure_mm.iloc[release_date_idx]["ReleaseDate"]
    if (df_CPI.iloc[CPI_date_index]['DATE'] < release_date):
        CPI_date_index = CPI_date_index + 1
    else:
        list_next_CPI_data.append(df_CPI.iloc[CPI_date_index]['CPI_change'])
        release_date_idx = release_date_idx + 1

# full sample
print("Full sample: ", stats.pearsonr(list(df_measure_mm["our_measure"]), list_next_CPI_data))
print(np.mean(list(df_measure_mm["delay"])))

# Greenspan: 0, 81 (August 11, 1987	January 31, 2006)
print("Greenspan: ", stats.pearsonr(list(df_measure_mm["our_measure"])[0:81], list_next_CPI_data[0:81]))
print(np.mean(list(df_measure_mm["delay"])[0:81]))

# Bernanke: 81, 145 (February 1, 2006	January 31, 2014)
print("Bernanke: ", stats.pearsonr(list(df_measure_mm["our_measure"])[81:145], list_next_CPI_data[81:145]))
print(np.mean(list(df_measure_mm["delay"])[81:145]))

# Yellen: 145, 177 (February 3, 2014	February 3, 2018)
print("Yellen: ", stats.pearsonr(list(df_measure_mm["our_measure"])[145:177], list_next_CPI_data[145:177]))
print(np.mean(list(df_measure_mm["delay"])[145:177]))

# Powell: 177 (February 5, 2018) onwards
print("Powell: ", stats.pearsonr(list(df_measure_mm["our_measure"])[177:], list_next_CPI_data[177:]))
print(np.mean(list(df_measure_mm["delay"])[177:]))


######################## PPI correlation ########################
list_next_PPI_data = []
release_date_idx = 0
PPI_date_index = 0
while release_date_idx < len(list(df_measure_mm["ReleaseDate"])):  
    release_date = df_measure_mm.iloc[release_date_idx]["ReleaseDate"]
    if (df_PPI.iloc[PPI_date_index]['DATE'] < release_date):
        PPI_date_index = PPI_date_index + 1
    else:
        list_next_PPI_data.append(df_PPI.iloc[PPI_date_index]['PPI_change'])
        release_date_idx = release_date_idx + 1

# full sample
print("Full sample: ", stats.pearsonr(list(df_measure_mm["our_measure"]), list_next_PPI_data))
print(np.mean(list(df_measure_mm["our_measure"])))

# Greenspan: 0, 81
print("Greenspan: ", stats.pearsonr(list(df_measure_mm["our_measure"])[0:81], list_next_PPI_data[0:81]))
print(np.mean(list(df_measure_mm["delay"])[0:81]))

# Bernanke: 81, 145
print("Bernanke: ", stats.pearsonr(list(df_measure_mm["our_measure"])[81:145], list_next_PPI_data[81:145]))
print(np.mean(list(df_measure_mm["delay"])[81:145]))

# Yellen: 145, 177
print("Yellen: ", stats.pearsonr(list(df_measure_mm["our_measure"])[145:177], list_next_PPI_data[145:177]))
print(np.mean(list(df_measure_mm["delay"])[145:177]))

# Powell: 177, 200
print("Powell: ", stats.pearsonr(list(df_measure_mm["our_measure"])[177:], list_next_PPI_data[177:]))
print(np.mean(list(df_measure_mm["delay"])[177:]))


Full sample:  (0.5407550746008091, 1.1938343833005748e-17)
29.77570093457944
Greenspan:  (0.45474833852468577, 2.0002203099362766e-05)
44.148148148148145
Bernanke:  (0.5073566250619854, 1.87971433999437e-05)
20.96875
Yellen:  (0.5476391904065704, 0.0011777949886025638)
21.0
Powell:  (0.8147155037450695, 8.440464628639026e-10)
21.135135135135137
Full sample:  (0.45084318418277086, 4.132355784055568e-12)
0.01971763887248014
Greenspan:  (0.4227537305933533, 8.436115643605386e-05)
44.148148148148145
Bernanke:  (0.40024031968930046, 0.0010493280751330681)
20.96875
Yellen:  (0.5722036189087323, 0.0006220540937779639)
21.0
Powell:  (0.7072066850043599, 9.908802534706542e-07)
21.135135135135137


## Correlation analysis of CPI and PPI with PC and SP measure

In [5]:
######################## CPI correlation with SP ########################
list_next_CPI_data = []
release_date_idx = 0
CPI_date_index = 0
while release_date_idx < len(list(df_measure_sp["Date"])):  
    release_date = df_measure_sp.iloc[release_date_idx]["Date"]
    if (df_CPI.iloc[CPI_date_index]['DATE'] < release_date):
        CPI_date_index = CPI_date_index + 1
    else:
        list_next_CPI_data.append(df_CPI.iloc[CPI_date_index]['CPI_change'])
        release_date_idx = release_date_idx + 1

print("CPI and SP", stats.pearsonr(list(df_measure_sp["our_measure"]), list_next_CPI_data))

######################## PPI correlation with SP ########################
list_next_PPI_data = []
release_date_idx = 0
PPI_date_index = 0
while release_date_idx < len(list(df_measure_sp["Date"])):  
    release_date = df_measure_sp.iloc[release_date_idx]["Date"]
    if (df_PPI.iloc[PPI_date_index]['DATE'] < release_date):
        PPI_date_index = PPI_date_index + 1
    else:
        list_next_PPI_data.append(df_PPI.iloc[PPI_date_index]['PPI_change'])
        release_date_idx = release_date_idx + 1

print("PPI and SP", stats.pearsonr(list(df_measure_sp["our_measure"]), list_next_PPI_data))


######################## CPI correlation with PC ########################
list_next_CPI_data = []
release_date_idx = 0
CPI_date_index = 0
while release_date_idx < len(list(df_measure_pc["EndDate"])):  
    release_date = df_measure_pc.iloc[release_date_idx]["EndDate"]
    if (df_CPI.iloc[CPI_date_index]['DATE'] < release_date):
        CPI_date_index = CPI_date_index + 1
    else:
        list_next_CPI_data.append(df_CPI.iloc[CPI_date_index]['CPI_change'])
        release_date_idx = release_date_idx + 1

print("CPI and SP", stats.pearsonr(list(df_measure_pc["our_measure"]), list_next_CPI_data))

######################## PPI correlation with PC ########################
list_next_PPI_data = []
release_date_idx = 0
PPI_date_index = 0
while release_date_idx < len(list(df_measure_pc["EndDate"])):  
    release_date = df_measure_pc.iloc[release_date_idx]["EndDate"]
    if (df_PPI.iloc[PPI_date_index]['DATE'] < release_date):
        PPI_date_index = PPI_date_index + 1
    else:
        list_next_PPI_data.append(df_PPI.iloc[PPI_date_index]['PPI_change'])
        release_date_idx = release_date_idx + 1

print("PPI and SP", stats.pearsonr(list(df_measure_pc["our_measure"]), list_next_PPI_data))

CPI and SP (0.57808410548646, 2.568608421734531e-19)
PPI and SP (0.388443124391766, 1.2112642163112014e-08)
CPI and SP (0.7779315452346669, 6.34485621526749e-14)
PPI and SP (0.6802523971678465, 8.663538346121313e-10)
