In [None]:
import numpy as np
import pandas as pd
import yaml
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error

import os
import sys
import warnings
from dotenv import load_dotenv

warnings.filterwarnings("ignore")
load_dotenv()
REPO_PATH = os.getenv("REPO_PATH")
sys.path.insert(0, rf'{REPO_PATH}src')

from utils.forecast_utils import mean_directional_accuracy
from utils.model_utils import save_model_info, train_RNN

### Load features

In [None]:
# load yaml file
with open('variable_config.yaml') as file:
    config = yaml.load(file, Loader=yaml.FullLoader)

# Define feature list and target
feature_columns = [
    'REALIZED_VOL',
    *config['BASE'],
    *config['TEMPORAL'],
    *config['SENTIMENT_CUSTOM']
]


### Build and fit model


In [None]:
DATA_PARAMS: dict[str, any] = {
    'feature_columns': feature_columns,
    'target_column': 'REALIZED_VOL',
    'window_size': 14,
    'test_size': 0.2,
    'val_size': 0.2,
    'CV': False,
    'scaler_type': 'RobustScaler'
}

MODEL_PARAMS: dict[str, any] = {
    "units_first_layer": 64,
    "units_second_layer": 64,
    "l2_strength": 0.003,
    "learning_rate": 0.0003,
    "batch_size": 64,
    "noise_std": 0.0851
}

RNN_TYPE: str ='BiLSTM'
FUTURE: str = 'CLc1'
MAX_EPOCHS: int = 150
IDENTIFIER: str = 'NOdropout'

model, gen, loss_dict = train_RNN(
    FUTURE,
    DATA_PARAMS, 
    MODEL_PARAMS, 
    RNN_TYPE, 
    MAX_EPOCHS
)

current_dt: str = pd.Timestamp.now().strftime('%Y.%m.%d_%H.%M')
model_name: str = f'{FUTURE}_{RNN_TYPE}_{IDENTIFIER}_{current_dt}'
# save model
save_model_info(
    model,
    model_name,
    MODEL_PARAMS,
    DATA_PARAMS,
    loss_dict
)

In [None]:
# Evaluate the model
view = 500

test_predictions = model.predict(gen.test_generator)
test_predictions = test_predictions.flatten()

test_targets = np.concatenate([y for _, y in gen.test_generator])
test_targets = test_targets.flatten()

mse = mean_squared_error(test_targets, test_predictions)
mae = mean_absolute_error(test_targets, test_predictions)
mda = mean_directional_accuracy(
    pd.Series(test_targets), pd.Series(test_predictions)
)

print(f'Mean Squared Error: {mse}')
print(f'Mean Absolute Error: {mae}')
print(f'Mean Directional Accuracy: {mda}')

fig, ax = plt.subplots(figsize=(10, 5), dpi=200)
ax.plot(test_targets[-view:], label='Actual')
ax.plot(test_predictions[-view:], label='Predicted')
ax.set_title('Model Fit vs Actual')
ax.set_xlabel('Samples')
ax.set_ylabel('Price')
ax.legend(frameon=False)
ax.grid(alpha=0.3)