In [None]:
import numpy as np
import pandas as pd
import yaml
import json
import os
import sys
from dotenv import load_dotenv

load_dotenv()
REPO_PATH = os.getenv("REPO_PATH")
sys.path.insert(0, rf'{REPO_PATH}src_HF')

from utils.forecast_utils import load_prepared_data, preprocess_data, optimize_hyperparameters

### Import data

In [None]:
# Specify data
future = 'CLc1'
topic = 'CEN'

# Load data
df = load_prepared_data(future, topic)

display(df.head(2))

In [None]:
# load yaml file
with open('variable_config.yaml') as file:
    config = yaml.load(file, Loader=yaml.FullLoader)

# Define feature list and target
feature_columns = list()

feature_columns += config['BASE']
feature_columns += config['SENTIMENT']
feature_columns += config['TEMPORAL']

print(feature_columns)

target_column = 'TARGET'

# Specify RNN model. Alternatives: BiLSTM, GRU or BiGRU
rnn_type='LSTM'

# Define window size
window_size = 30

# Scaling and splitting into test and train data sets. Default train size = 80%
train_generator, val_generator, test_generator = preprocess_data(
    df, 
    feature_columns, 
    target_column, 
    window_size,
    test_size=0.2,
    val_size=0.2
)


In [None]:
trial_params = {
        'units_first_layer': [16, 32, 64, 128],
        'units_second_layer': [16, 32, 64, 96],
        'dropout_rate_first': [0.1, 0.5],
        'dropout_rate_second': [0.1, 0.5],
        'l2_strength': [1e-5, 1e-3],
        'learning_rate': [1e-5, 1e-2],
        'batch_size': [16, 32, 64],
        'noise_std': [0.01, 0.1]
}

# Find the best hyperparameters using Optuna
best_params = optimize_hyperparameters(
    train_generator,
    val_generator,
    trial_params,
    feature_columns, 
    rnn_type,
    window_size, 
    n_trials=50, 
    n_jobs=-1
)

current_dt = pd.Timestamp.now().strftime('%Y.%m.%d_%H.%M')
hp_filename = f'{future}_{topic}_{rnn_type}_{current_dt}'

print(f'Best parameters for {rnn_type} model: ', best_params)

# Save best hyperparameters to .json file
with open(f'hyperpm_archive/{hp_filename}.json', 'w') as file:
    json.dump(best_params, file)