# Empirical Application 5 Financial Econometrics

*By Daniel Deutsch, José Lucas Barretto, and Stéphane Roblet*

## Data Preparation

In [205]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.tsa as tsa
from matplotlib import pyplot as plt

In [206]:
# load datasets
russel = pd.read_csv("^RUT.csv", ';', na_values = '.', parse_dates=['Date'])
ir = pd.read_csv("USD1MTD156N.csv", na_values = '.', parse_dates=['DATE'])

# merge to have equal dates
df = russel.merge(ir, left_on='Date', right_on='DATE', how='inner').drop(columns=['DATE'])

# rename columns
df.rename(columns={
    'Date': 'date',
    'Close': 'rut',
    'USD1MTD156N': 'interest_rate'
}, inplace=True)

# apply log to russell index prices
df['rut'] = np.log(df['rut'])

# convert from percentage to decimal
df['interest_rate'] = df['interest_rate']/100

# calculate the daily index return
df['return'] = df['rut'] - df['rut'].shift()

  exec(code_obj, self.user_global_ns, self.user_ns)


In [207]:
# load inflation dataset
inflation = pd.read_csv('5y_inflation_expected.csv', na_values = '.', parse_dates=['DATE']).rename(
    columns={
        'DATE': 'date',
        'T5YIE': 'inflation'
    })

In [208]:
# load VIX dataset
vix = pd.read_csv('vix.csv', na_values = '.', parse_dates=['DATE']).rename(
    columns={
        'DATE': 'date',
        'VIXCLS': 'vix'
        })

# apply log values
vix['vix'] = np.log(vix['vix'])

In [209]:
# load GDP dataset
gdp = pd.read_csv(
    'GDP.csv',
    na_values = '.',
    parse_dates=['DATE']
    ).rename(columns={'DATE': 'date', 'GDP': 'gdp'})
gdp.set_index('date', inplace=True)
gdp.index = pd.to_datetime(gdp.index)

# interpolate to obtain daily values
gdp = gdp.resample('d').interpolate()
gdp.reset_index(inplace=True)

# apply log values
gdp['gdp'] = np.log(gdp['gdp'])

In [210]:
# merge all datasets
df = df.merge(inflation, on='date', how='left')
df = df.merge(vix, on='date', how='left')
df = df.merge(gdp, on='date', how='left')
df.dropna(inplace=True)

In [211]:
df.head()

Unnamed: 0,date,rut,interest_rate,return,inflation,vix,gdp
1,2015-01-05,7.074413,0.00168,-0.014663,1.25,2.991724,9.798146
2,2015-01-06,7.057304,0.001678,-0.017109,1.16,3.05022,9.798271
3,2015-01-07,7.069849,0.001665,0.012545,1.16,2.960623,9.798396
4,2015-01-08,7.086847,0.001663,0.016998,1.21,2.833801,9.798521
5,2015-01-09,7.078072,0.001668,-0.008775,1.2,2.865054,9.798646


## Checking stationarity of series

Here we will apply ADF Tests to check for thee stationarity of the different series used.

In [212]:
print('ADF Test P-values - H0: The series contains a unit root -> Non-stationary')
for series in ['rut', 'interest_rate', 'inflation', 'vix', 'gdp']:
    print(
        f'{series}: {np.round(tsa.stattools.adfuller(df[series])[1], 4)}'
    )

ADF Test P-values - H0: The series contains a unit root -> Non-stationary
rut: 0.2112
interest_rate: 0.84
inflation: 0.0612
vix: 0.0008
gdp: 0.6539


We can see for all the series, but for the VIX index, the ADF test indicates the presence of a unit root, meaning that they are non-stationary. For this reason, we will use the first difference of the series, except for the VIX index, where the unit root hypothesis is rejected.

In [213]:
# apply first difference to series with unit roots
df['interest_rate'] = df['interest_rate'].diff()
df['inflation'] = df['inflation'].diff()
df['gdp'] = df['gdp'].diff()

df.dropna(inplace=True)

## Estimating Russell 2000 returns with a Linear Model

Now, let's use a linear model to use as a baseline.

In [265]:
# calculate order 3 lags of each variable
for series in ['interest_rate', 'inflation', 'vix', 'gdp']:
    df[f'{series}_lag1'] = df[series].shift(1)

df['return_lag1'] = df['return'].shift(1)
df['return_lag2'] = df['return'].shift(2)
df['return_lag3'] = df['return'].shift(3)

df.dropna(inplace=True)

# build feature dataset
X = sm.add_constant(df.drop(columns=['rut', 'return']).set_index('date'))
y = df[['date', 'return']].set_index('date')

# run OLS model
linear_model = sm.OLS(y, X).fit()
print(linear_model.summary())

                            OLS Regression Results                            
Dep. Variable:                 return   R-squared:                       0.526
Model:                            OLS   Adj. R-squared:                  0.522
Method:                 Least Squares   F-statistic:                     139.3
Date:                Sun, 14 Nov 2021   Prob (F-statistic):          9.77e-215
Time:                        19:02:21   Log-Likelihood:                 4431.3
No. Observations:                1393   AIC:                            -8839.
Df Residuals:                    1381   BIC:                            -8776.
Df Model:                          11                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const                  0.0072      0

  x = pd.concat(x[::order], 1)


## Testing for linearity with transition variable $y_{t-1}$

In [261]:
X = df.drop(columns=['rut', 'return']).set_index('date')

wald_cols = []
for j in range(1,4):
    for col in X.columns:
        wald_cols.append(f'{col}*return_lag1^{j}')
        X[f'{col}*return_lag1^{j}'] = X[col]*X['return_lag1']**j

X = sm.add_constant(X)
y = df[['date', 'return']].set_index('date')

# run OLS model
linear_model = sm.OLS(y, X).fit()

# set-up hypothesis
hypothesis = ' = 0, '.join(wald_cols)
hypothesis = '(' + hypothesis + ' = 0)'

# run wald test
test = linear_model.wald_test(hypothesis)
p_value = test.pvalue.item()



In [264]:
print('P-value for null hypothesis of linearity:', p_value)

P-value for null hypothesis of linearity: 4.3597910233333997e-44


We can see that we strongly reject the null hypothesis of linearity.

## Selecting a good transition variable