In [None]:
import numpy as np
import pandas as pd
import yaml
import json
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.callbacks import EarlyStopping
import os
import sys
import warnings
from dotenv import load_dotenv

warnings.filterwarnings("ignore")
load_dotenv()
REPO_PATH = os.getenv("REPO_PATH")
sys.path.insert(0, rf'{REPO_PATH}src_HF')

from utils.forecast_utils import RNNGenerator, build_rnn_model
from utils.model_utils import save_model_info

### Create RNN data generator

In [None]:
# load yaml file
with open('variable_config.yaml') as file:
    config = yaml.load(file, Loader=yaml.FullLoader)

# Define feature list and target
feature_columns = list()
feature_columns += config['BASE']
feature_columns += config['TEMPORAL']
# feature_columns += config['SENTIMENT']
target_column = config['SENTIMENT_CUSTOM']

### Build and fit model

In [None]:
DATA_PARAMS: dict[str, any] = {
    'feature_columns': feature_columns,
    'target_column': 'TARGET_1',
    'window_size': 14,
    'test_size': 0.2,
    'val_size': 0.2,
    'CV': False,
    'scaler_type': 'RobustScaler'
}

MODEL_PARAMS: dict[str, any] = {
    'units_first_layer': 64,
    'units_second_layer': 64,
    'dropout_rate_first': 0.2104,
    'dropout_rate_second': 0.2869,
    'l2_strength': 0.002,
    'learning_rate': 0.0003,
    'batch_size': 64,
    'noise_std': 0.0851
}

RNN_TYPE='BiLSTM'
MAX_EPOCHS = 150

# Configure early stopping
early_stopping = EarlyStopping(
    monitor='val_loss', 
    patience=10, 
    restore_best_weights=True, 
    verbose=1
)

# Create generator
gen = RNNGenerator(
    future='CLc1', 
    CV=DATA_PARAMS['CV']
)

gen.preprocess_data(
    DATA_PARAMS['feature_columns'], 
    DATA_PARAMS['target_column'], 
    DATA_PARAMS['window_size'],
    test_size=DATA_PARAMS['test_size'],
    val_size=DATA_PARAMS['val_size'],
    scaler_type=DATA_PARAMS['scaler_type']
)

# Build the model
model = build_rnn_model(
    RNN_TYPE, 
    MODEL_PARAMS, 
    (DATA_PARAMS['window_size'], len(DATA_PARAMS['feature_columns']))
)

fig, ax = plt.subplots(figsize=(7, 5), dpi=200)

history_list = list()
# Train the model with early stopping and Cross Validation
for i in tqdm(range(len(gen.train_generators))):
    history = model.fit(
        gen.train_generators[i],
        epochs=MAX_EPOCHS,
        batch_size=MODEL_PARAMS['batch_size'], 
        validation_data=gen.val_generators[i], 
        callbacks=[early_stopping],
        verbose=1
    )
    history_list.append(history)
    ax.plot(history.history['loss'], label=f'Train Loss k={i + 1}')
    ax.plot(history.history['val_loss'], linestyle=':', label=f'Val Loss k={i + 1}')

    ax.set_yscale('log')

    fig.savefig(f'model_vizualizations/live.png')

ax.set_title('Model Training and Validation Loss')
ax.set_xlabel('Epochs')
ax.set_ylabel('Loss MSE')
ax.legend(frameon=False)

current_dt = pd.Timestamp.now().strftime('%Y.%m.%d_%H.%M')

### Save model

In [None]:
IDENTIFIER: str = 'loss' + '_'

# save model
model_name = f'{gen.future}_{RNN_TYPE}_{IDENTIFIER}{current_dt}'

save_model_info(
    model,
    model_name,
    MODEL_PARAMS,
    DATA_PARAMS
)


In [None]:
# Evaluate the model
view = 500

test_predictions = model.predict(gen.test_generator)
test_predictions = test_predictions.flatten()

test_targets = np.concatenate([y for _, y in gen.test_generator])
test_targets = test_targets.flatten()

mse = mean_squared_error(test_targets, test_predictions)
mae = mean_absolute_error(test_targets, test_predictions)

print(f'Mean Squared Error: {mse}')
print(f'Mean Absolute Error: {mae}')

# plot fit vs actual

fig, ax = plt.subplots(figsize=(10, 5), dpi=200)
ax.plot(test_targets[-view:], label='Actual')
ax.plot(test_predictions[-view:], label='Predicted')
ax.set_title('Model Fit vs Actual')
ax.set_xlabel('Samples')
ax.set_ylabel('Price')
ax.legend(frameon=False)

In [None]:
plt.figure(figsize=(10, 5))
residuals = test_targets - test_predictions
plt.plot(residuals[-view:], color='blue')
plt.title('Residuals of Model Predictions', fontsize=16)
plt.xlabel('Time Steps', fontsize=15)
plt.ylabel('Residuals', fontsize=15)
plt.show()