In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import json
import seaborn as sns
import scienceplots

import os
import sys
import warnings
from dotenv import load_dotenv

plt.style.use('science')
load_dotenv()
warnings.filterwarnings("ignore")
REPO_PATH = os.getenv("REPO_PATH")
sys.path.insert(0, rf'{REPO_PATH}src')

from utils.forecast_utils import ForecastPredictions, load_models
from utils.var_utils import forecast_var

### RNN model results

In [None]:
BENCHMARK = 'LCOc1_VAR_BASE'

MODEL_NAMES = [
    'LCOc1_VAR_BASE',
    'LCOc1_VAR_BASE.TEMP'
]

benchmark_fc = ForecastPredictions(BENCHMARK, forecast_var)

model_dict, metric_df = load_models(
    MODEL_NAMES,
    benchmark_fc
)

pd.options.display.float_format = '{:.4f}'.format
display(metric_df)

In [None]:
# Evaluate the model
view = 400

fig, ax = plt.subplots(figsize=(10, 5), dpi=200)

colors = sns.color_palette('bright', n_colors=len(model_dict))

PLOT_MODEL = MODEL_NAMES

PLOT_NAME = 'LCOc1_VAR'

PLOT_LABELS = [
    'VAR B',
    'VAR BT'
]

for i, model_name in enumerate(PLOT_MODEL):
    model = model_dict[model_name]
    if i == 0:
        actual = model.y_test[-view:]
        ax.plot(
            actual,
            label=f'Actual {model.model_name.split("_")[0]} RV', 
            color='gray', 
            lw=0.9
        )
    ax.plot(
        model.y_pred[-view:], 
        label=' '.join(
            model.model_name.split('_')[1:3]
        ) if len(PLOT_LABELS) == 0 else PLOT_LABELS[i],
        color=colors[i],
        lw=0.9
    )
    
ax.set_xlabel('Time (5-min intervals)', fontsize=13)
ax.set_ylabel('Annualized Realized Volatility', fontsize=13)
ax.legend(frameon=False, ncols=3, fontsize=12)
ax.grid(alpha=0.2)

if PLOT_NAME:
    fig.savefig(f'images/{PLOT_NAME}.png')


### Loss comparison

In [None]:
loss_df_list= list()
for i, model_name in enumerate(MODEL_NAMES):
    if model_name.split('_')[1] == 'VAR':
        continue
    with open(f'model_archive/{model_name}/loss_data.json', 'r') as f:
        loss_dict = json.load(f)
        
        loss_df = pd.DataFrame(loss_dict).add_suffix(f'_{model_name}')
        loss_df_list.append(loss_df)

loss_df = pd.concat(loss_df_list, axis=1)

colors = sns.color_palette('bright', n_colors=len(MODEL_NAMES))
                           
fig, ax = plt.subplots(figsize=(9, 5), dpi=200)
model_lines = []
for i, model_name in enumerate(MODEL_NAMES):
    ax.plot(loss_df.index, loss_df[f'train_loss_{model_name}'], color=colors[i], linestyle='--')
    line = ax.plot(loss_df.index, loss_df[f'val_loss_{model_name}'], color=colors[i], label=PLOT_LABELS[i])
    model_lines.append(line[0])  # Only need one handle per model

# First legend for models
first_legend = ax.legend(
    handles=model_lines, 
    loc='upper right', 
    ncol=1,
    fontsize=14
)
ax.add_artist(first_legend)  # Add the first legend explicitly

# Second legend for train and validation using generic line styles
train_lines = ax.plot([], [], color='black', linestyle='--', label='Training Loss')
val_lines = ax.plot([], [], color='black', label='Validation Loss')
second_legend = ax.legend(
    handles=[train_lines[0], val_lines[0]], 
    loc='lower center', 
    bbox_to_anchor=(0.5, -0.22), 
    ncol=2,
    fontsize=14
)

ax.set_xlabel('Epoch', fontsize=14)
ax.set_ylabel('Loss [MSE]', fontsize=14)
ax.grid(alpha=0.3)
fig.tight_layout()

if PLOT_NAME:
    fig.savefig(f'images/{PLOT_NAME}_loss.png')

In [None]:
# Evaluate the model
view = 400

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6), dpi=200)

colors = sns.color_palette('bright', n_colors=len(model_dict))

PLOT_MODEL = MODEL_NAMES

PLOT_NAME = None

PLOT_LABELS = [
    # 'LSTM BT',
    # 'GRU BT',
    # 'BiLSTM BT',
    # 'BiGRU BT'
]

for i, model_name in enumerate(PLOT_MODEL):
    model = model_dict[model_name]
    if i == 0:
        actual = model.y_test[-view:]
        ax1.plot(
            actual,
            label=f'Actual {model.model_name.split("_")[0]} RV', 
            color='gray', 
            lw=0.7
        )
    ax1.plot(
        model.y_pred[-view:], 
        label=' '.join(
            model.model_name.split('_')[1:3]
        ) if len(PLOT_LABELS) == 0 else PLOT_LABELS[i],
        color=colors[i],
        lw=0.7
    )
    
ax1.set_xlabel('Time (5-min intervals)', fontsize=13)
ax1.set_ylabel('Annualized Realized Volatility', fontsize=13)
ax1.grid(alpha=0.2)

second_legend = ax1.legend(
    loc='lower left', 
    bbox_to_anchor=(0, -0.3), 
    ncol=3,
    fontsize=13
)

loss_df_list= list()
for i, model_name in enumerate(MODEL_NAMES):
    if model_name.split('_')[1] == 'VAR':
        continue
    with open(f'model_archive/{model_name}/loss_data.json', 'r') as f:
        loss_dict = json.load(f)
        
        loss_df = pd.DataFrame(loss_dict).add_suffix(f'_{model_name}')
        loss_df_list.append(loss_df)

loss_df = pd.concat(loss_df_list, axis=1)

colors = sns.color_palette('bright', n_colors=len(MODEL_NAMES))
                           
model_lines = []
for i, model_name in enumerate(MODEL_NAMES):
    ax2.plot(loss_df.index, loss_df[f'train_loss_{model_name}'], color=colors[i], linestyle='--')
    line = ax2.plot(loss_df.index, loss_df[f'val_loss_{model_name}'], color=colors[i], label=model_name)
    model_lines.append(line[0])  # Only need one handle per model


train_lines = ax2.plot([], [], color='darkgrey', linestyle='--', label='Training Loss')
val_lines = ax2.plot([], [], color='darkgrey', label='Validation Loss')
first_legend = ax2.legend(
    handles=[train_lines[0], val_lines[0]], 
    loc='upper right', 
    ncol=1,
    fontsize=13
)
ax2.add_artist(first_legend)



ax2.set_xlabel('Epoch', fontsize=14)
ax2.set_ylabel('Loss [MSE]', fontsize=14)
ax2.grid(alpha=0.3)
fig.tight_layout()

if PLOT_NAME:
    fig.savefig(f'images/{PLOT_NAME}.png')
