# Regression analysis

## 1. Import packages

In [1]:
import pandas as pd

import statsmodels.api as sm
from statsmodels.miscmodels.ordinal_model import OrderedModel

## 2. Load the data

In [2]:
data = pd.read_csv("../../data/reg_data_cz.csv")

In [3]:
data["delta_r_cat"] = pd.Categorical(data["delta_r_cat"], ordered=True)

In [4]:
correlation = data[['hawk_pref_score', 'delta_r']].corr()

In [5]:
correlation

Unnamed: 0,hawk_pref_score,delta_r
hawk_pref_score,1.0,0.533152
delta_r,0.533152,1.0


In [6]:
data

Unnamed: 0,date,sentiment,hawk_pref_score,disagreement,r,delta_r,delta_r_cat,hicp
0,1998-01-22,neutral,-0.661030,0.000000,0.00,0.00,0,12.1
1,1998-02-19,neutral,0.540458,0.000000,0.00,0.00,0,12.4
2,1998-03-19,neutral,1.018434,0.075000,15.00,0.00,0,12.4
3,1998-04-30,hawkish,0.194856,0.000000,0.00,0.00,0,12.0
4,1998-05-28,neutral,0.082861,0.000000,0.00,0.00,0,12.0
...,...,...,...,...,...,...,...,...
260,2024-03-20,neutral,-0.639292,0.014881,5.75,-0.50,-1,2.2
261,2024-05-02,neutral,0.767536,0.000000,5.25,-0.50,-1,3.1
262,2024-06-27,dovish,-0.091087,0.014881,4.75,-0.50,-1,2.2
263,2024-08-01,dovish,0.399827,0.000000,4.50,-0.25,-1,2.5


## 3. Estimate the models

### Model 1

Model 1 is specified as:
$$
\Delta r_{t+1} = \beta_{0} + \beta_{1} \Delta r_{t} + \beta_{2} S_{t} + \epsilon_{t}
$$

- $\Delta r_{t+1}$: Policy rate change at $t+1$
- $\Delta r_{t}$: Policy rate change at $t$
- $S_{t}$: Sentiment index at $t$
- $\epsilon_{t}$: Error term.

#### OLS

In [7]:
y = data["delta_r"][1:].reset_index(drop=True)

In [8]:
X = pd.DataFrame({
    "delta_r": data["delta_r"][:-1],
    "sentiment": data["hawk_pref_score"][:-1],
})

In [9]:
X = sm.add_constant(X)

In [10]:
model = sm.OLS(y,X)

In [11]:
results = model.fit()

In [12]:
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                delta_r   R-squared:                       0.411
Model:                            OLS   Adj. R-squared:                  0.406
Method:                 Least Squares   F-statistic:                     90.94
Date:                Mon, 21 Oct 2024   Prob (F-statistic):           1.08e-30
Time:                        11:21:42   Log-Likelihood:                 32.787
No. Observations:                 264   AIC:                            -59.57
Df Residuals:                     261   BIC:                            -48.85
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0252      0.014     -1.841      0.0

### Ordered probit regression

In [13]:
y = data["delta_r_cat"].cat.codes[1:].reset_index(drop=True)

In [14]:
X = pd.DataFrame({
    "delta_r_cat": data["delta_r_cat"].cat.codes[:-1],
    "sentiment_index": data["hawk_pref_score"][:-1],
})

In [15]:
ordered_probit = OrderedModel(y, X, distr='probit')

In [16]:
results = ordered_probit.fit()

Optimization terminated successfully.
         Current function value: 0.671307
         Iterations: 197
         Function evaluations: 346


In [17]:
print(results.summary())

                             OrderedModel Results                             
Dep. Variable:                      y   Log-Likelihood:                -177.22
Model:                   OrderedModel   AIC:                             362.4
Method:            Maximum Likelihood   BIC:                             376.8
Date:                Mon, 21 Oct 2024                                         
Time:                        11:21:42                                         
No. Observations:                 264                                         
Df Residuals:                     260                                         
Df Model:                           2                                         
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
delta_r_cat         0.6970      0.178      3.919      0.000       0.348       1.046
sentiment_index     0.4789      0.110

### Model 2

Model 1 is specified as:
$$
\Delta r_{t+1} = \beta_{0} + \beta_{1} \Delta r_{t} + \beta_{2} I(S_{t} = hawk) + \beta_{3} I(S_{t} = dov) + \epsilon_{t}
$$

- $\Delta r_{t+1}$: Policy rate change at $t+1$
- $\Delta r_{t}$: Policy rate change at $t$
- $I(S_{t} = hawk)$: Indicator variable of sentiment being hawkish at $t$
- $I(S_{t} = dov)$: Indicator variable of sentiment being dovish at $t$
- $\epsilon_{t}$: Error term.

In [18]:
data['hawkish'] = (data['sentiment'] == 'hawkish').astype(int)
data['dovish'] = (data['sentiment'] == 'dovish').astype(int)

In [19]:
y = data["delta_r"][1:].reset_index(drop=True)

In [20]:
X = pd.DataFrame({
    "delta_r": data["delta_r"][:-1],
    "I_hawkish": data["hawkish"][:-1],
    "I_dovish": data["dovish"][:-1]
})

In [21]:
X = sm.add_constant(X)

In [22]:
model = sm.OLS(y,X)

In [23]:
results = model.fit()

In [24]:
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                delta_r   R-squared:                       0.401
Model:                            OLS   Adj. R-squared:                  0.395
Method:                 Least Squares   F-statistic:                     58.14
Date:                Mon, 21 Oct 2024   Prob (F-statistic):           8.59e-29
Time:                        11:21:42   Log-Likelihood:                 30.748
No. Observations:                 264   AIC:                            -53.50
Df Residuals:                     260   BIC:                            -39.19
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0352      0.017     -2.117      0.0

#### Ordered logistic regression

In [25]:
y = data["delta_r_cat"].cat.codes[1:].reset_index(drop=True)

In [26]:
X = pd.DataFrame({
    "delta_r_cat": data["delta_r_cat"].cat.codes[:-1],
    "I_hawkish": data["hawkish"][:-1],
    "I_dovish": data["dovish"][:-1],
})

In [27]:
ordered_probit = OrderedModel(y, X, distr='probit')

In [28]:
results = ordered_probit.fit()

Optimization terminated successfully.
         Current function value: 0.695901
         Iterations: 424
         Function evaluations: 671


In [29]:
print(results.summary())

                             OrderedModel Results                             
Dep. Variable:                      y   Log-Likelihood:                -183.72
Model:                   OrderedModel   AIC:                             377.4
Method:            Maximum Likelihood   BIC:                             395.3
Date:                Mon, 21 Oct 2024                                         
Time:                        11:21:43                                         
No. Observations:                 264                                         
Df Residuals:                     259                                         
Df Model:                           3                                         
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
delta_r_cat     0.9473      0.169      5.597      0.000       0.616       1.279
I_hawkish       0.6051      0.237      2.553     

### Model 2

Model 2 is specified as:
$$
\Delta r_{t+1} = \beta_{0} + \beta_{1} \Delta r_{t} + \beta_{2} S_{t} + \beta_{3} Disag_{t} +\epsilon_{t}
$$

- $\Delta r_{t+1}$: Policy rate change at $t+1$.
- $\Delta r_{t}$: Policy rate change at $t$.
- $S_{t}$: Sentiment at $t$.
- $Disag_{t}$: Level of disagreement about policy rate change at $t$.
- $\epsilon_{t}$: Error term.

### Model 3

Model 3 is specified as:
$$
\Delta r_{t+1} = \beta_{0} + \beta_{1} \Delta r_{t} + \beta_{2} S_{t} + \beta_{3} (\pi_{t} - \pi^*) +\epsilon_{t}
$$

- $\Delta r_{t+1}$: Policy rate change at $t+1$.
- $\Delta r_{t}$: Policy rate change at $t$.
- $S_{t}$: Sentiment at $t$.
- $Disag_{t}$: Level of disagreement about policy rate change at $t$.
- $\epsilon_{t}$: Error term.