# Implied Volatility Surprise Regressions
- regress implied volatility surprises on language scores, controlling for policy factors

In [21]:
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
from datetime import datetime
from pathlib import Path
import os
import matplotlib.pyplot as plt
from stargazer.stargazer import Stargazer
from IPython.core.display import display, HTML
from sklearn.linear_model import LinearRegression
from sklearn.decomposition import PCA
pd.set_option('display.max_rows', 10)

  from IPython.core.display import display, HTML


In [22]:
# Read in country scores data
countries = ['fomc','ecb','boe']
fomc = []
ecb = []
boe = []
dfs = [fomc, boe, ecb]
for country in countries:
    semantic_scores = pd.read_excel("text_scores.xlsx", sheet_name=country, usecols = ['Date','fk_score','dc_score','wc'])
    conceptual_scores = pd.read_excel("CCI/CCI_scores.xlsx", sheet_name=country, usecols = ['Date','PoJ','CCI'])
    factors = pd.read_excel("factors.xlsx", sheet_name=country, usecols = ['Date', 'Target', 'Path', 'QE'])
    
    semantic_scores['Date'] = pd.to_datetime(semantic_scores['Date']).dt.date
    conceptual_scores['Date'] = pd.to_datetime(conceptual_scores['Date']).dt.date
    factors['Date'] = pd.to_datetime(factors['Date']).dt.date

    scores = pd.merge(semantic_scores, conceptual_scores, on='Date')
    scores = pd.merge(scores, factors, on='Date')
    dfs[countries.index(country)] = scores

fomc = dfs[0]
ecb = dfs[1]
boe = dfs[2]

## FOMC Regressions

In [23]:
#Read in VIX Data
vix = pd.read_excel('volatility_data.xlsx',sheet_name='VIX',usecols=['Date','Price'])
vix['Date'] = pd.to_datetime(vix['Date'], errors='coerce')
vix['pct_change'] = vix['Price'].pct_change()
vix

Unnamed: 0,Date,Price,pct_change
0,2004-11-10,13.08,
1,2004-11-11,13.04,-0.003058
2,2004-11-12,13.33,0.022239
3,2004-11-15,13.38,0.003751
4,2004-11-16,13.21,-0.012706
...,...,...,...
5047,2024-11-04,21.98,0.004570
5048,2024-11-05,20.49,-0.067789
5049,2024-11-06,16.27,-0.205954
5050,2024-11-07,15.20,-0.065765


In [24]:
fomc['Date'] = pd.to_datetime(fomc['Date'])
vix['Date'] = pd.to_datetime(vix['Date'])

In [25]:
fomc_combined = pd.merge(fomc, vix, on='Date', how='inner')
fomc_combined = fomc_combined.dropna()
fomc_combined.rename(columns={'Target': 'target', 'Path': 'path'}, inplace=True)
fomc_combined

Unnamed: 0,Date,fk_score,dc_score,wc,PoJ,CCI,target,path,QE,Price,pct_change
1,2004-12-14,14.600000,9.780000,166,0.037037,0.053715,0.006821,-0.009112,0.003982,12.73,0.015152
2,2005-02-02,14.600000,9.710000,165,0.037267,0.054049,0.001001,-0.019184,0.006297,11.66,-0.030756
3,2005-03-22,13.800000,9.700000,196,0.041667,0.066314,0.049127,0.157422,-0.025618,14.27,0.048494
4,2005-05-03,13.600000,9.650000,188,0.038251,0.060157,0.023252,0.038442,0.004470,14.53,-0.039021
5,2005-06-30,14.600000,9.980000,166,0.043210,0.067955,0.052535,0.062076,0.015680,12.04,0.022940
...,...,...,...,...,...,...,...,...,...,...,...
146,2023-06-14,14.133333,9.786667,309,0.098980,0.165245,0.020335,0.163058,0.006125,13.88,-0.049966
147,2023-07-26,14.166667,9.466667,297,0.105808,0.178526,0.011788,0.015831,0.004307,13.19,-0.048341
148,2023-09-20,14.900000,9.870000,301,0.097360,0.154412,0.044047,0.115062,0.008971,15.14,0.072998
149,2023-11-01,14.000000,9.843333,310,0.093333,0.160756,-0.002114,-0.008679,0.003169,16.87,-0.070011


In [26]:
fomc_combined['target_positive'] = fomc_combined['target'].apply(lambda x: x if x > 0 else 0)
fomc_combined['target_negative'] = fomc_combined['target'].apply(lambda x: x if x < 0 else 0)
fomc_combined['path_positive'] = fomc_combined['path'].apply(lambda x: x if x > 0 else 0)
fomc_combined['path_negative'] = fomc_combined['path'].apply(lambda x: x if x < 0 else 0)
fomc_combined['qe_positive'] = fomc_combined['QE'].apply(lambda x: x if x > 0 else 0)
fomc_combined['qe_negative'] = fomc_combined['QE'].apply(lambda x: x if x < 0 else 0)

In [27]:
fomc_models = [
    {
        'name': 'FK',
        'formula': 'pct_change ~ fk_score + target + path + QE'
    },
    {
        'name': 'DC',
        'formula': 'pct_change ~ dc_score + target + path + QE'
    },
    {
        'name': 'CCI',
        'formula': 'pct_change ~ CCI + target + path + QE'
    },
    {
        'name': 'FK (Separated Surprises)',
        'formula': 'pct_change ~ fk_score + target_positive + target_negative + path_positive + path_negative + qe_positive + qe_negative'
    },
    
    {
        'name': 'DC (Separated Surprises)',
        'formula': 'pct_change ~ dc_score + target_positive + target_negative + path_positive + path_negative + qe_positive + qe_negative'
    },
    {
        'name': 'CCI (Separated Surprises)',
        'formula': 'pct_change ~ CCI + target_positive + target_negative + path_positive + path_negative + qe_positive + qe_negative'
    }
]

fomc_results = []
for model in fomc_models:
    regression = smf.ols(formula=model['formula'], data=fomc_combined)
    results = regression.fit()
    fomc_results.append(results)

In [28]:
# Latex Output for FOMC regressions
stargazer = Stargazer(fomc_results)
stargazer.custom_columns([model['name'] for model in fomc_models], [1 for _ in fomc_models])

latex_table = stargazer.render_latex()

print(latex_table)
with open("regression_table.tex", "w") as f:
    f.write(latex_table)

\begin{table}[!htbp] \centering
\begin{tabular}{@{\extracolsep{5pt}}lcccccc}
\\[-1.8ex]\hline
\hline \\[-1.8ex]
& \multicolumn{6}{c}{\textit{Dependent variable: pct_change}} \
\cr \cline{2-7}
\\[-1.8ex] & \multicolumn{1}{c}{FK} & \multicolumn{1}{c}{DC} & \multicolumn{1}{c}{CCI} & \multicolumn{1}{c}{FK (Separated Surprises)} & \multicolumn{1}{c}{DC (Separated Surprises)} & \multicolumn{1}{c}{CCI (Separated Surprises)}  \\
\\[-1.8ex] & (1) & (2) & (3) & (4) & (5) & (6) \\
\hline \\[-1.8ex]
 CCI & & & -0.129$^{}$ & & & -0.069$^{}$ \\
& & & (0.175) & & & (0.188) \\
 Intercept & -0.093$^{}$ & -0.466$^{**}$ & -0.009$^{}$ & -0.075$^{}$ & -0.467$^{**}$ & -0.005$^{}$ \\
& (0.056) & (0.206) & (0.019) & (0.059) & (0.209) & (0.019) \\
 QE & -0.008$^{}$ & -0.007$^{}$ & -0.013$^{}$ & & & \\
& (0.449) & (0.445) & (0.451) & & & \\
 dc_score & & 0.045$^{**}$ & & & 0.047$^{**}$ & \\
& & (0.021) & & & (0.021) & \\
 fk_score & 0.005$^{}$ & & & 0.004$^{}$ & & \\
& (0.004) & & & (0.004) & & \\
 path & 0.040

## ECB Regressions

In [29]:
# Read in surprise data for ECB
vstoxx = pd.read_excel('volatility_data.xlsx',sheet_name='VSTOXX',usecols=['Date','Price'])
vstoxx['Date'] = pd.to_datetime(vstoxx['Date'], errors='coerce')
vstoxx['pct_change'] = vstoxx['Price'].pct_change()
vstoxx

Unnamed: 0,Date,Price,pct_change
0,2000-11-10,25.5200,
1,2000-11-13,28.0500,0.099138
2,2000-11-14,25.5800,-0.088057
3,2000-11-15,25.1000,-0.018765
4,2000-11-16,24.9600,-0.005578
...,...,...,...
6106,2024-11-04,20.9586,-0.006419
6107,2024-11-05,19.8482,-0.052981
6108,2024-11-06,18.4866,-0.068601
6109,2024-11-07,16.7385,-0.094560


In [30]:
ecb['Date'] = pd.to_datetime(ecb['Date'])
vstoxx['Date'] = pd.to_datetime(vstoxx['Date'])

In [31]:
ecb_combined = pd.merge(ecb, vstoxx, on='Date', how='inner')
ecb_combined.rename(columns={'Target': 'target', 'Path': 'path'}, inplace=True)
ecb_combined

Unnamed: 0,Date,fk_score,dc_score,wc,PoJ,CCI,target,path,QE,Price,pct_change
0,2001-02-01,10.633333,9.330000,1344,0.035017,0.061118,0.469688,0.033036,-0.021171,21.7500,-0.022472
1,2001-03-01,12.333333,9.503333,658,0.040000,0.060662,2.899933,3.844687,0.696180,23.3800,0.003003
2,2001-04-11,10.966667,8.820000,1071,0.036667,0.059348,11.101716,11.882244,2.519214,25.3500,-0.044118
3,2001-05-10,12.833333,9.516667,1345,0.048333,0.080366,-18.132955,-10.909472,-8.366147,21.2000,-0.064843
4,2001-06-07,12.800000,9.456667,1238,0.051667,0.089485,1.286506,-1.036969,1.020805,19.9700,0.000501
...,...,...,...,...,...,...,...,...,...,...,...
225,2023-05-04,9.066667,8.930000,1483,0.051667,0.083232,-5.793791,-6.520874,-1.029808,20.8326,0.044591
226,2023-06-15,12.300000,9.260000,1688,0.090000,0.158171,1.131847,-1.265959,3.839601,13.7205,-0.035520
227,2023-07-27,9.966667,8.996667,1538,0.060000,0.095342,-1.354523,-4.555295,1.355185,14.9653,-0.070461
228,2023-09-14,10.133333,9.160000,1565,0.058535,0.098020,6.162280,-0.003078,3.274754,14.7497,-0.106306


In [32]:
# different slopes positive and negative
ecb_combined['target_positive'] = ecb_combined['target'].apply(lambda x: x if x > 0 else 0)
ecb_combined['target_negative'] = ecb_combined['target'].apply(lambda x: x if x < 0 else 0)
ecb_combined['path_positive'] = ecb_combined['path'].apply(lambda x: x if x > 0 else 0)
ecb_combined['path_negative'] = ecb_combined['path'].apply(lambda x: x if x < 0 else 0)
ecb_combined['qe_positive'] = ecb_combined['QE'].apply(lambda x: x if x > 0 else 0)
ecb_combined['qe_negative'] = ecb_combined['QE'].apply(lambda x: x if x < 0 else 0)

In [33]:
ecb_models = [
    {
        'name': 'FK',
        'formula': 'pct_change ~ fk_score + target + path + QE'
    },
    {
        'name': 'DC',
        'formula': 'pct_change ~ dc_score + target + path + QE'
    },
    {
        'name': 'CCI',
        'formula': 'pct_change ~ CCI + target + path + QE'
    },
    {
        'name': 'FK (Separated Surprises)',
        'formula': 'pct_change ~ fk_score + target_positive + target_negative + path_positive + path_negative + qe_positive + qe_negative'
    },
    
    {
        'name': 'DC (Separated Surprises)',
        'formula': 'pct_change ~ dc_score + target_positive + target_negative + path_positive + path_negative + qe_positive + qe_negative'
    },
    {
        'name': 'CCI (Separated Surprises)',
        'formula': 'pct_change ~ CCI + target_positive + target_negative + path_positive + path_negative + qe_positive + qe_negative'
    }
]

ecb_results = []
for model in ecb_models:
    regression = smf.ols(formula=model['formula'], data=ecb_combined)
    results = regression.fit()
    ecb_results.append(results)

In [34]:
# Latex Output for ECB regressions
stargazer = Stargazer(ecb_results)
stargazer.custom_columns([model['name'] for model in ecb_models], [1 for _ in ecb_models])

latex_table = stargazer.render_latex()

print(latex_table)
with open("regression_table.tex", "w") as f:
    f.write(latex_table)

\begin{table}[!htbp] \centering
\begin{tabular}{@{\extracolsep{5pt}}lcccccc}
\\[-1.8ex]\hline
\hline \\[-1.8ex]
& \multicolumn{6}{c}{\textit{Dependent variable: pct_change}} \
\cr \cline{2-7}
\\[-1.8ex] & \multicolumn{1}{c}{FK} & \multicolumn{1}{c}{DC} & \multicolumn{1}{c}{CCI} & \multicolumn{1}{c}{FK (Separated Surprises)} & \multicolumn{1}{c}{DC (Separated Surprises)} & \multicolumn{1}{c}{CCI (Separated Surprises)}  \\
\\[-1.8ex] & (1) & (2) & (3) & (4) & (5) & (6) \\
\hline \\[-1.8ex]
 CCI & & & -0.222$^{*}$ & & & -0.178$^{}$ \\
& & & (0.123) & & & (0.125) \\
 Intercept & -0.082$^{**}$ & -0.109$^{}$ & 0.011$^{}$ & -0.081$^{**}$ & -0.138$^{}$ & 0.005$^{}$ \\
& (0.037) & (0.098) & (0.013) & (0.038) & (0.100) & (0.015) \\
 QE & -0.003$^{}$ & -0.003$^{}$ & -0.003$^{}$ & & & \\
& (0.003) & (0.003) & (0.003) & & & \\
 dc_score & & 0.010$^{}$ & & & 0.013$^{}$ & \\
& & (0.010) & & & (0.010) & \\
 fk_score & 0.006$^{*}$ & & & 0.005$^{*}$ & & \\
& (0.003) & & & (0.003) & & \\
 path & 0.001$^{

## BoE Regressions

In [35]:
# Read volatility data for BoE (IVI)
ivi = pd.read_excel('volatility_data.xlsx',sheet_name='IVI',usecols=['Date','Price'])
ivi['Date'] = pd.to_datetime(ivi['Date'], errors='coerce')
ivi['pct_change'] = ivi['Price'].pct_change()
ivi

Unnamed: 0,Date,Price,pct_change
0,2000-11-12,20.27,
1,2000-11-13,22.56,0.112975
2,2000-11-14,21.27,-0.057181
3,2000-11-15,20.20,-0.050306
4,2000-11-16,19.86,-0.016832
...,...,...,...
6027,2024-11-04,17.00,-0.005266
6028,2024-11-05,17.27,0.015882
6029,2024-11-06,16.16,-0.064273
6030,2024-11-07,14.58,-0.097772


In [36]:
boe['Date'] = pd.to_datetime(boe['Date'])
ivi['Date'] = pd.to_datetime(ivi['Date'])

In [37]:
boe_combined = pd.merge(boe, ivi, on='Date', how='inner')
boe_combined.rename(columns={'Target': 'target', 'Path': 'path'}, inplace=True)
boe_combined

Unnamed: 0,Date,fk_score,dc_score,wc,PoJ,CCI,target,path,QE,Price,pct_change
0,2015-08-06,12.000000,9.336667,1075,0.063917,0.126272,-0.018978,-0.064462,0.016628,12.41,0.029022
1,2015-09-10,13.000000,9.440000,783,0.069224,0.131845,0.001886,0.001799,0.001956,27.73,0.116794
2,2015-10-08,14.066667,10.466667,836,0.057524,0.107406,0.002819,-0.007969,-0.003020,16.73,-0.155477
3,2015-11-05,10.700000,9.420000,954,0.088777,0.181475,-0.007861,-0.062228,0.005537,15.32,0.035135
4,2015-12-10,12.200000,9.240000,740,0.075412,0.143364,0.003291,-0.012618,0.007348,17.18,-0.008656
...,...,...,...,...,...,...,...,...,...,...,...
62,2023-12-14,11.433333,9.223333,914,0.105299,0.207808,0.003475,0.048683,-0.026891,11.78,-0.002540
63,2024-02-01,9.933333,9.113333,877,0.105528,0.200285,-0.002369,0.070990,-0.008510,12.46,-0.017350
64,2024-03-21,10.433333,9.283333,697,0.104382,0.205239,-0.001065,-0.033679,-0.001218,9.92,-0.042471
65,2024-05-09,8.900000,9.486667,783,0.110360,0.250235,0.008155,-0.034409,0.002543,11.71,-0.066188


In [38]:
# different slopes positive and negative
boe_combined['target_positive'] = boe_combined['target'].apply(lambda x: x if x > 0 else 0)
boe_combined['target_negative'] = boe_combined['target'].apply(lambda x: x if x < 0 else 0)
boe_combined['path_positive'] = boe_combined['path'].apply(lambda x: x if x > 0 else 0)
boe_combined['path_negative'] = boe_combined['path'].apply(lambda x: x if x < 0 else 0)
boe_combined['qe_positive'] = boe_combined['QE'].apply(lambda x: x if x > 0 else 0)
boe_combined['qe_negative'] = boe_combined['QE'].apply(lambda x: x if x < 0 else 0)

In [39]:
boe_models = [
    {
        'name': 'FK',
        'formula': 'pct_change ~ fk_score + target + path + QE'
    },
    {
        'name': 'DC',
        'formula': 'pct_change ~ dc_score + target + path + QE'
    },
    {
        'name': 'CCI',
        'formula': 'pct_change ~ CCI + target + path + QE'
    },
    {
        'name': 'FK (Separated Surprises)',
        'formula': 'pct_change ~ fk_score + target_positive + target_negative + path_positive + path_negative + qe_positive + qe_negative'
    },
    
    {
        'name': 'DC (Separated Surprises)',
        'formula': 'pct_change ~ dc_score + target_positive + target_negative + path_positive + path_negative + qe_positive + qe_negative'
    },
    {
        'name': 'CCI (Separated Surprises)',
        'formula': 'pct_change ~ CCI + target_positive + target_negative + path_positive + path_negative + qe_positive + qe_negative'
    }
]

boe_results = []
for model in boe_models:
    regression = smf.ols(formula=model['formula'], data=boe_combined)
    results = regression.fit()
    boe_results.append(results)

In [40]:
# Latex Output for BOE regressions
stargazer = Stargazer(boe_results)
stargazer.custom_columns([model['name'] for model in boe_models], [1 for _ in boe_models])

latex_table = stargazer.render_latex()

print(latex_table)
with open("regression_table.tex", "w") as f:
    f.write(latex_table)

\begin{table}[!htbp] \centering
\begin{tabular}{@{\extracolsep{5pt}}lcccccc}
\\[-1.8ex]\hline
\hline \\[-1.8ex]
& \multicolumn{6}{c}{\textit{Dependent variable: pct_change}} \
\cr \cline{2-7}
\\[-1.8ex] & \multicolumn{1}{c}{FK} & \multicolumn{1}{c}{DC} & \multicolumn{1}{c}{CCI} & \multicolumn{1}{c}{FK (Separated Surprises)} & \multicolumn{1}{c}{DC (Separated Surprises)} & \multicolumn{1}{c}{CCI (Separated Surprises)}  \\
\\[-1.8ex] & (1) & (2) & (3) & (4) & (5) & (6) \\
\hline \\[-1.8ex]
 CCI & & & 0.151$^{}$ & & & 0.123$^{}$ \\
& & & (0.193) & & & (0.199) \\
 Intercept & 0.001$^{}$ & -0.279$^{}$ & -0.031$^{}$ & -0.010$^{}$ & -0.262$^{}$ & -0.020$^{}$ \\
& (0.084) & (0.224) & (0.035) & (0.086) & (0.228) & (0.037) \\
 QE & 0.471$^{}$ & 0.519$^{}$ & 0.487$^{}$ & & & \\
& (0.690) & (0.683) & (0.687) & & & \\
 dc_score & & 0.029$^{}$ & & & 0.027$^{}$ & \\
& & (0.024) & & & (0.024) & \\
 fk_score & -0.000$^{}$ & & & 0.001$^{}$ & & \\
& (0.007) & & & (0.007) & & \\
 path & 0.454$^{**}$ & 0.4