In [None]:
import pandas as pd
import yaml
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm
import os
import sys
import warnings
from dotenv import load_dotenv

warnings.filterwarnings("ignore")
load_dotenv()
REPO_PATH = os.getenv("REPO_PATH")
sys.path.insert(0, rf'{REPO_PATH}src_HF')

from utils.model_utils import RNNGenerator
from utils.forecast_utils import ForecastModel

### Load data

In [None]:
# Specify data
gen = RNNGenerator(future='CLc1', topic='CRU')

# load yaml file
with open('variable_config.yaml') as file:
    config = yaml.load(file, Loader=yaml.FullLoader)

# Define feature list and target
feature_columns = list()
feature_columns += config['BASE']
feature_columns += config['SENTIMENT']
feature_columns += config['TEMPORAL']
feature_columns += config['LAGS']

target_column = 'TARGET'

# Define window size
window_size = 30


### Load models

In [None]:
MODEL_NAMES = [
    'CLc1_CRU_BiGRU_Std_2024.05.15_18.17',
    'CLc1_CRU_BiGRU_Rob_2024.05.15_18.30',
    'CLc1_CRU_BiGRU_light_2024.05.15_19.21'
]

MODEL_PARAMS = {
    'units_first_layer': 64,
    'units_second_layer': 64,
    'dropout_rate_first': 0.1,
    'dropout_rate_second': 0.1,
    'l2_strength': 1e-05,
    'learning_rate': 0.01,
    'batch_size': 32,
    'noise_std': 0.01
}

OPT_PARAMS = {
    'units_first_layer': 16,
    'units_second_layer': 16,
    'dropout_rate_first': 0.2104,
    'dropout_rate_second': 0.2869,
    'l2_strength': 0.000033,
    'learning_rate': 0.0028,
    'batch_size': 64,
    'noise_std': 0.0851
}

params = [MODEL_PARAMS, MODEL_PARAMS, OPT_PARAMS]

data_params = {
    'feature_columns': feature_columns,
    'target_column': target_column,
    'window_size': window_size,
    'test_size': 0.2,
    'val_size': 0.2,
    'CV': True
}

model_dict = dict()
for i, model_name in enumerate(tqdm(MODEL_NAMES)):
    model_dict[model_name] = ForecastModel(model_name, params[i], data_params)


In [None]:
for model in model_dict.values():
    model.describe()
    print()

In [None]:
# Evaluate the model
view = 300

fig, ax = plt.subplots(figsize=(10, 5), dpi=200)

for i, model in enumerate(model_dict.values()):
    if i == 0:
        actual = model.test_targets[-view:]
        ax.plot(actual, label='Actual ' + model.model_name.split('_')[0], color='gray', lw=0.8)

    ax.plot(model.test_predictions[-view:], label=' '.join(model.model_name.split('_')[1:4]), lw=0.8)
    
ax.set_title('Realized 5-min volatiltiy, Model Fit vs Actual')
ax.set_xlabel('Samples')
ax.set_ylabel('Price')
ax.legend(frameon=False)
ax.grid(alpha=0.3)

### Daily realized volatiltiy

In [None]:
# Evaluate the model
fig, ax = plt.subplots(figsize=(10, 5), dpi=200)

for i, model in enumerate(model_dict.values()):
    test_pred = pd.Series(model.test_predictions, index=model.gen.test_dates[-len(model.test_predictions):])

    if i == 0:
        actual = pd.Series(model.test_targets, index=model.gen.test_dates[-len(model.test_targets):])
        ax.plot(actual.resample('D').sum(), label='Actual ' + model.model_name.split('_')[0], color='gray', lw=0.8)

    ax.plot(test_pred.resample('D').sum(), label=' '.join(model.model_name.split('_')[1:4]), lw=0.8)
    
ax.set_title('Realized 5-min volatiltiy, Model Fit vs Actual')
ax.set_xlabel('Samples')
ax.set_ylabel('Price')
ax.legend(frameon=False, loc='upper left', ncols=3)
ax.grid(alpha=0.3)