# ARMA-GARCH

In [1]:
import paneltime as pt
import pandas as pd

pt.options.pqdkm.set([1,1,0,2,2])
pt.options.fixed_random_group_eff.set(0)
pt.options.fixed_random_time_eff.set(0)
pt.options.EGARCH.set(False)

%load_ext autoreload
%autoreload 2

#### ARMA-GARCH (Using *changes* in the news sentiment series, for which we use forecast error of AR(1))

##### 1. For real news

In [2]:
real_news = pd.read_excel("RealNewsDjiaReturns.xlsx")
real_news = real_news.rename(columns={'Real News':'News_sentiment'})
real_news = real_news.fillna(0)

In [3]:
real_news

Unnamed: 0,Date,News_sentiment,return
0,2014-11-17,-0.011141,0.000737
1,2014-11-18,-0.023757,0.002268
2,2014-11-19,-0.020513,-0.000118
3,2014-11-20,-0.017292,0.001879
4,2014-11-21,-0.014237,0.005126
...,...,...,...
1856,2022-04-04,-0.010877,0.002982
1857,2022-04-05,-0.010975,-0.008044
1858,2022-04-06,-0.013732,-0.004198
1859,2022-04-07,0.377177,0.002513


In [5]:
import numpy as np
from statsmodels.tsa.ar_model import AutoReg
from statsmodels.tsa.stattools import adfuller

df = real_news[['News_sentiment']]

# Check for stationarity. If p > 0.05, difference the series.
result = adfuller(df)
if result[1] > 0.05:
    df = sdf.diff().dropna()

errors = []

for t in range(4, len(df)):
    train = df.iloc[:t]
    test = df.iloc[t]
    
    model = AutoReg(train, lags=1)
    model_fit = model.fit()
    
    prediction = model_fit.predict(start=len(train), end=len(train))
    
    # Check if the prediction is nan.
    if not np.isnan(prediction.iloc[0]):
        error = test - prediction.iloc[0]
        errors.append(error)

# Convert errors to a numpy array
errors = np.array(errors)

In [6]:
len(errors)

1857

In [7]:
real_news = real_news [4:]
real_news ['News_Change'] = errors
del real_news['News_sentiment']
len(real_news)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  real_news ['News_Change'] = errors


1857

In [8]:
real_news['LNews_Change1'] = real_news['News_Change'].shift(1)

real_news = real_news.dropna()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  real_news['LNews_Change1'] = real_news['News_Change'].shift(1)


In [10]:
s = pt.execute("return~News_Change+LNews_Change1",real_news,T='Date')

print(s)

its:0, f:5859.586184361455, gnorm: 0.00908892235863256
its:1, f:5978.013185345577, gnorm: 0.030383357778257267
its:2, f:6113.6853543078605, gnorm: 0.053073636165500634
its:3, f:6199.712025523056, gnorm: 0.03407646686199868
its:4, f:6203.950987086902, gnorm: 0.05445345312794408
its:5, f:6211.836888483907, gnorm: 0.009012562690195751
its:6, f:6213.323538667338, gnorm: 0.007292118093241004
its:7, f:6213.9831734485715, gnorm: 0.0011420250093131953
its:8, f:6224.493115390089, gnorm: 0.00997755009301662
its:9, f:6226.135207962834, gnorm: 0.0073322439440423725
its:10, f:6231.244050751822, gnorm: 0.007789305930624122
its:11, f:6231.663257321745, gnorm: 0.009485271033190489
its:12, f:6231.928717469995, gnorm: 0.00899713742555405
its:13, f:6232.16283861388, gnorm: 0.0012493921136737098
its:14, f:6232.255336495797, gnorm: 0.0013852310063113462
node: 5, its: 15,  LL:6232.280987308162
Convergence on zero gradient; local or global minimum identified
Statistics:
Dep. Variable:          return  R-squa

##### 2. For fake news

In [10]:
fake_news = pd.read_excel("FakeNewsDjiaReturn.xlsx")
fake_news = fake_news.rename(columns={'News sentiment':'News_sentiment'})
fake_news = fake_news.fillna(0)

In [11]:
df = fake_news[['News_sentiment']]

# Check for stationarity. If p > 0.05, difference the series.
result = adfuller(df)
if result[1] > 0.05:
    df = sdf.diff().dropna()

errors = []

for t in range(4, len(df)):
    train = df.iloc[:t]
    test = df.iloc[t]
    
    model = AutoReg(train, lags=1)
    model_fit = model.fit()
    
    prediction = model_fit.predict(start=len(train), end=len(train))
    
    
    if not np.isnan(prediction.iloc[0]):
        error = test - prediction.iloc[0]
        errors.append(error)

# Convert errors to a numpy array
errors = np.array(errors)

In [12]:
len(errors)

1377

In [13]:
fake_news = fake_news [4:]
fake_news ['News_Change'] = errors
del fake_news['News_sentiment']
len(fake_news)

1377

In [14]:
fake_news['LNews_Change1'] = fake_news['News_Change'].shift(1)

fake_news = fake_news.dropna()

In [15]:
s = pt.execute("return~News_Change+LNews_Change1",fake_news,T='Date')

print(s)

its:0, f:4454.402541775244, gnorm: 0.010684283740289856
its:1, f:4546.822460264395, gnorm: 0.03223647586959485
its:2, f:4631.074355474359, gnorm: 0.041414817198516134
its:3, f:4656.886149309241, gnorm: 0.005212854287561262
its:4, f:4661.25770274345, gnorm: 0.012902305779860354
its:5, f:4661.521068159587, gnorm: 0.0037381076080320996
its:6, f:4663.205228274819, gnorm: 0.028943611655292502
its:7, f:4665.361674290194, gnorm: 0.0033675958875192887
its:8, f:4675.166054388039, gnorm: 0.010654190004684531
its:9, f:4685.014452090507, gnorm: 0.04075661530808496
its:10, f:4693.751868559929, gnorm: 0.013212049995356061
its:11, f:4699.165162308738, gnorm: 0.008525145450450002
its:12, f:4699.859882366327, gnorm: 0.002774814916715081
its:13, f:4702.170729747144, gnorm: 0.01938562292260008
its:14, f:4703.328458407403, gnorm: 0.003651476231278178
its:15, f:4703.57727499851, gnorm: 0.012403560399662344
its:16, f:4704.444543013282, gnorm: 0.0011694478390024618
node: 5, its: 17,  LL:4704.4510990664
Conve