In [None]:
import numpy as np
import pandas as pd
import yaml
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.callbacks import EarlyStopping
import os
import sys
from dotenv import load_dotenv

load_dotenv()
REPO_PATH = os.getenv("REPO_PATH")
sys.path.insert(0, rf'{REPO_PATH}src_HF')

from utils.forecast_utils import load_prepared_data, preprocess_data, optimize_hyperparameters, build_rnn_model

### Import data

In [None]:
# Specify data
future = 'CLc1'
topic = 'CEN'

# Load data
df = load_prepared_data(future, topic)

display(df)

In [None]:
# load yaml file
with open('variable_config.yaml') as file:
    config = yaml.load(file, Loader=yaml.FullLoader)

# Define feature list and target
feature_columns = list()

feature_columns += config['BASE']
feature_columns += config['SENTIMENT']
feature_columns += config['TEMPORAL']

print(feature_columns)

target_column = 'TARGET'

# Define window size
window_size = 30

# Scaling and splitting into test and train data sets. Default train size = 80%
train_generator, test_generator = preprocess_data(df, feature_columns, target_column, window_size)


### Hyperparameter tuning

In [None]:
# Specify RNN model. Alternatives: BiLSTM, GRU or BiGRU
rnn_type='LSTM'

trial_params = {
        'units_first_layer': [16, 32, 64, 128],
        'units_second_layer': [16, 32, 64, 96],
        'dropout_rate_first': [0.1, 0.5],
        'dropout_rate_second': [0.1, 0.5],
        'l2_strength': [1e-5, 1e-3],
        'learning_rate': [1e-5, 1e-2],
        'batch_size': [16, 32, 64],
        'noise_std': [0.01, 0.1]
}

# Find the best hyperparameters using Optuna
best_params = optimize_hyperparameters(
    train_generator,
    test_generator,
    trial_params,
    feature_columns, 
    rnn_type, 
    window_size, 
    n_trials=50, 
    n_jobs=-1
)

print(f'Best parameters for {rnn_type} model: ', best_params)

In [None]:
# Build the model
model = build_rnn_model(rnn_type, best_params, (window_size, len(feature_columns)))

# Configure early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=1)

# Train the model with early stopping
history = model.fit(train_generator, epochs=150, batch_size=best_params['batch_size'], validation_split=0.2, callbacks=[early_stopping], verbose=1)

In [None]:
# Plot training history
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# Evaluate the model
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print("MAE:", mae)
print("RMSE:", rmse)

In [None]:
# Plotting predictions vs actuals
plt.figure(figsize=(15, 6))
plt.plot(y_test, label='Actual Volatility', color='blue')
plt.plot(y_pred.flatten(), label='Predicted Volatility', color='red', alpha=0.7)
plt.title('Volatility Forecasting Performance')
plt.xlabel('Time Steps')
plt.ylabel('Volatility')
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(10, 5))
residuals = y_test - y_pred.flatten()
plt.plot(residuals, color='blue')
plt.title('Residuals of Model Predictions', fontsize=16)
plt.xlabel('Time Steps', fontsize=15)
plt.ylabel('Residuals', fontsize=15)
plt.show()