In [None]:
import numpy as np
import pandas as pd
import yaml
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
from arch import arch_model
import os
import sys
import warnings
from dotenv import load_dotenv

warnings.filterwarnings("ignore")
load_dotenv()
REPO_PATH = os.getenv("REPO_PATH")
sys.path.insert(0, rf'{REPO_PATH}src_HF')


## GARCH

### Import data

In [None]:
future = 'CLc1'
topic= 'CRU'

file_path = os.path.join(
    REPO_PATH,
    'data',
    'prepared_data',
    f"{future}_{topic}_5min_resampled.csv"
)

df = pd.read_csv(file_path, index_col='date', parse_dates=True)

display(df.head())

### Fit model

In [None]:

target = 'TARGET'

X_train, X_test, y_train, y_test = train_test_split(df, df[target], test_size=0.20, shuffle=False)

garch_model = arch_model(X_train['LOGRET'], vol='Garch', p=1, q=1, dist='Normal')
train_fit = garch_model.fit(disp='off')
print(train_fit.summary())

In [None]:
rolling_preds = []

for i in tqdm(range(X_test.shape[0])):
    train = X_train['LOGRET'][:-(y_test.shape[0]-i)]
    model = arch_model(train, p=1, q=1, rescale = False)
    model_fit = model.fit(disp='off')
    # One step ahead predictor
    pred = model_fit.forecast(horizon=1, reindex=True)
    rolling_preds.append(np.sqrt(pred.variance.values[-1,:][0]))

rolling_preds = pd.Series(rolling_preds, index=y_test.index)

In [None]:
view = 1000

fig, ax = plt.subplots(figsize=(12, 6))

ax.plot(X_test.index[-view:], X_test['TARGET'][-view:], label='Train')

# plot garch results
ax.plot(X_test.index[-view:], pd.Series(rolling_preds)[-view:] * 252, label='Garch Volatility', color='red')

# caluculate mse and mae
mse = mean_squared_error(X_test['TARGET'], rolling_preds)
mae = mean_absolute_error(X_test['TARGET'], rolling_preds)

print(f'MSE: {mse}')
print(f'MAE: {mae}')
