In [None]:
import numpy as np
import pandas as pd
import yaml
import json
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.callbacks import EarlyStopping
import os
import sys
import warnings
from dotenv import load_dotenv

warnings.filterwarnings("ignore")
load_dotenv()
REPO_PATH = os.getenv("REPO_PATH")
sys.path.insert(0, rf'{REPO_PATH}src_HF')

from utils.forecast_utils import load_prepared_data, preprocess_data, build_rnn_model

### Import data

In [None]:
# Specify data
future = 'CLc1'
topic = 'CRU'

# Load data
df = load_prepared_data(future, topic)

display(df)

### Process data

In [None]:
# load yaml file
with open('variable_config.yaml') as file:
    config = yaml.load(file, Loader=yaml.FullLoader)

# Define feature list and target
feature_columns = list()

feature_columns += config['BASE']
feature_columns += config['SENTIMENT']
feature_columns += config['TEMPORAL']
feature_columns += config['LAGS']

print(feature_columns)

target_column = 'TARGET'

# Specify RNN model. Alternatives: BiLSTM, GRU or BiGRU
rnn_type='GRU'

# Define window size
window_size = 30

# Scaling and splitting into test and train data sets. Default train/val size = 80%
train_generator, val_generator, test_generator = preprocess_data(
    df, 
    feature_columns, 
    target_column, 
    window_size,
    test_size=0.2,
    val_size=0.2
)

### Build and fit model

In [None]:
best_params = {
    'units_first_layer': 64,
    'units_second_layer': 64,
    'dropout_rate_first': 0.1,
    'dropout_rate_second': 0.1,
    'l2_strength': 1e-05,
    'learning_rate': 0.01,
    'batch_size': 32,
    'noise_std': 0.01
}

# Build the model
model = build_rnn_model(
    rnn_type, 
    best_params, 
    (window_size, len(feature_columns))
)

# Configure early stopping
early_stopping = EarlyStopping(
    monitor='val_loss', 
    patience=10, 
    restore_best_weights=True, 
    verbose=1
)

# Train the model with early stopping
history = model.fit(
    train_generator, 
    epochs=3, 
    batch_size=best_params['batch_size'], 
    validation_data=val_generator, 
    callbacks=[early_stopping], 
    verbose=1
)

current_dt = pd.Timestamp.now().strftime('%Y.%m.%d_%H.%M')

### Save model

In [None]:
# save model
model_name = f'{future}_{topic}_{rnn_type}_{current_dt}'

model_info: dict[str, any] = {
    'model_name': model_name,
    'model_type': rnn_type,
    'feature_columns': feature_columns,
    'target_column': target_column,
    'window_size': window_size,
    'best_params': best_params,
    'loss': history.history['loss'],
    'val_loss': history.history['val_loss']
}

# check if file model is not already in the folder
if not os.path.exists(f'model_archive/{model_name}.h5'):
    model.save(f'model_archive/{model_name}.h5')

    # write model info to json
    with open(f'model_archive/{model_name}_info.json', 'w') as f:
        json.dump(model_info, f)

    model_json = model.to_json()
    # write model info to json
    with open(f'model_archive/{model_name}_model.json', 'w') as f:
        f.write(model_json)

    


In [None]:
# Plot training history
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# Evaluate the model
view = 500

test_predictions = model.predict(test_generator)
test_predictions = test_predictions.flatten()

test_targets = np.concatenate([y for _, y in test_generator])
test_targets = test_targets.flatten()

mse = mean_squared_error(test_targets, test_predictions)
mae = mean_absolute_error(test_targets, test_predictions)

print(f'Mean Squared Error: {mse}')
print(f'Mean Absolute Error: {mae}')

# plot fit vs actual
plt.figure(figsize=(10, 5))
plt.plot(test_targets[-view:], label='Actual')
plt.plot(test_predictions[-view:], label='Predicted')
plt.title('Model Fit vs Actual')
plt.xlabel('Samples')
plt.ylabel('Price')
plt.legend()

In [None]:
plt.figure(figsize=(10, 5))
residuals = test_targets - test_predictions
plt.plot(residuals[-view:], color='blue')
plt.title('Residuals of Model Predictions', fontsize=16)
plt.xlabel('Time Steps', fontsize=15)
plt.ylabel('Residuals', fontsize=15)
plt.show()