In [1]:
#imports
import optuna
from model import *

In [4]:
## Optuna Function
def objective(trial):
    # Detect device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Suggest hyperparameters
    # Restrict d_model and nhead to best from first optuna to get better insights (no inf)
    d_model = 48
    nhead = 6
    

    params = {
        'dropout': trial.suggest_float("dropout", 0.0, 0.3),
        'learning_rate': trial.suggest_float("learning_rate", 1e-4, 1e-2, log=True),
        'batch_size': trial.suggest_categorical("batch_size", [8, 16, 32, 64]),
        'seq_length': trial.suggest_categorical("seq_length", [7, 14, 21]),
        'transformer_encoder_layer_params': {
            'd_model': d_model,
            'nhead': nhead,
            'dim_feedforward': trial.suggest_int("dim_feedforward", 128, 512),
            'dropout': trial.suggest_float("dropout", 0.0, 0.3),
            'activation': trial.suggest_categorical("activation", ['relu', 'gelu'])
        },
        'transformer_layer_params': {
            'num_layers': trial.suggest_int("num_layers", 1, 4),
        },
        'dataset': '2016-2019',
        'train_test_split': '80:20'
    }

    sequences, targets, datetimes, scaler_X = prepare_data(params)

    # Build and train model
    model, train_losses, val_losses = train_transformer_model(sequences, targets, input_dim, datetimes, params)

    # 5. Evaluate and plot results
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    test_mask = pd.to_datetime(datetimes).dt.year == 2019
    seq_mask = test_mask[params['seq_length']:].to_numpy()
    predictions = evaluate_model(model, sequences[seq_mask], targets[seq_mask], device)

    # Inverse transform predictions to original scale
    # Create dummy array for inverse transform
    dummy = np.zeros((len(predictions), 2))
    dummy[:, 0] = predictions.flatten()
    predictions_original = scaler_X.inverse_transform(dummy)[:, 0]

    dummy[:, 0] = targets[seq_mask].numpy().flatten()
    targets_original = scaler_X.inverse_transform(dummy)[:, 0]

    # Calculate mae
    mae = np.mean(np.abs(predictions_original - targets_original))
    return mae

In [None]:
study = optuna.create_study(storage="sqlite:///db.sqlite3", study_name="full_data_transformer_optimization_fix_nhead_d_model", load_if_exists=True)
study.optimize(objective, n_trials=100, n_jobs=1)

study.best_params 

[I 2025-09-25 18:02:01,424] Using an existing study with name 'full_data_transformer_optimization_fix_nhead_d_model' instead of creating a new one.


Using device: cuda


In [84]:
study.best_params

{'dropout': 0.1503743360111539,
 'learning_rate': 0.0011392910510892803,
 'batch_size': 32,
 'seq_length': 21,
 'dim_feedforward': 339,
 'activation': 'relu',
 'num_layers': 2}