In [None]:
import pandas as pd

import yaml
import json
import os
import sys
import warnings
from dotenv import load_dotenv

load_dotenv()
REPO_PATH = os.getenv("REPO_PATH")
sys.path.insert(0, rf'{REPO_PATH}src')
warnings.filterwarnings("ignore")

from utils.model_utils import optimize_hyperparameters

### Hyperparameter tuning with Optuna

In [None]:

# Load the YAML variable config file
with open(f'{REPO_PATH}variable_config.yaml', 'r') as file:
    var_config = yaml.load(file, Loader=yaml.FullLoader)

# Define feature list and target
SELECTED_FEATURES = [
    *var_config['BASE'],
    *var_config['TEMPORAL']
]

DATA_PARAMS: dict[str, any] = {
    'feature_columns': SELECTED_FEATURES,
    'target_column': 'REALIZED_VOL',
    'window_size': 14,
    'test_size': 0.2,
    'val_size': 0.2,
    'CV': False,
    'scaler_type': 'RobustScaler'
}

TRIAL_PARAMS = {
        'units_first_layer': [16, 32, 64, 128],
        'units_second_layer': [16, 32, 64, 96],
        'l2_strength': [1e-5, 1e-3],
        'learning_rate': [1e-5, 1e-2],
        'batch_size': [16, 32, 64],
        'noise_std': [0.01, 0.1]
}

# Specify data
FUTURE = 'CLc1'
RNN_TYPE='GRU'
IDENTIFIER = 'BASE.TEMP'
TRIALS = 50

# Find the best hyperparameters using Optuna
best_params = optimize_hyperparameters(
    FUTURE,
    TRIAL_PARAMS,
    DATA_PARAMS,
    RNN_TYPE,
    n_trials=TRIALS,
    n_jobs=-1
)

current_dt = pd.Timestamp.now().strftime('%Y.%m.%d_%H.%M')
hp_filename = f'{IDENTIFIER}_{RNN_TYPE}_{FUTURE}_{current_dt}'

print(f'Best parameters for {RNN_TYPE} model: ', best_params)

# Save best hyperparameters to .json file
with open(f'hyperpm_archive/{hp_filename}.json', 'w') as file:
    json.dump(best_params, file, indent=4)
