In [None]:
#imports
import optuna
from model import *

## Optuna Function
def objective(trial):
    # Detect device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Suggest hyperparameters
    # Restrict d_model and nhead to best from first optuna to get better insights (no inf)
    d_model = 48
    nhead = 6
    

    params = {
        'learning_rate': trial.suggest_float("learning_rate", 1e-4, 1e-2, log=True),
        'batch_size': trial.suggest_int("batch_size", 8, 64),
        'seq_length': trial.suggest_int("seq_length", 3, 21),
        'transformer_encoder_layer_params': {
            'd_model': d_model,
            'nhead': nhead,
            'dim_feedforward': trial.suggest_int("dim_feedforward", 128, 512),
            'dropout': trial.suggest_float("dropout", 0.0, 0.1),
            'activation': 'relu'
        },
        'transformer_layer_params': {
            'num_layers': trial.suggest_int("num_layers", 1, 5),
        },
        'dataset': '2010-2019',
        'train_test_split': '80:20',
        'features': ['sum_30_min_demand', 'is_weekend', 'is_weekday'],
    }

    input_dim = len(params['features'])

    sequences, targets, datetimes, scaler_X = prepare_data(params)

    # Build and train model
    model, train_losses, val_losses = train_transformer_model(sequences, targets, input_dim, datetimes, params)

    # 5. Evaluate and plot results
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    test_mask = pd.to_datetime(datetimes).dt.year == 2019
    seq_mask = test_mask[params['seq_length']:].to_numpy()
    predictions = evaluate_model(model, sequences[seq_mask], targets[seq_mask], device)

    # Inverse transform predictions to original scale
    # Create dummy array for inverse transform
    dummy = np.zeros((len(predictions), input_dim))
    dummy[:, 0] = predictions.flatten()
    predictions_original = scaler_X.inverse_transform(dummy)[:, 0]

    dummy[:, 0] = targets[seq_mask].numpy().flatten()
    targets_original = scaler_X.inverse_transform(dummy)[:, 0]

    # Calculate mae
    mae = np.mean(np.abs(predictions_original - targets_original))
    return mae

In [4]:
study = optuna.create_study(storage="sqlite:///db.sqlite3", study_name="weekday_weekend", load_if_exists=True)
study.optimize(objective, n_trials=100, n_jobs=1)

study.best_params 

[I 2025-09-27 12:29:00,636] Using an existing study with name 'weekday_weekend' instead of creating a new one.


Using device: cuda


[I 2025-09-27 12:29:25,956] Trial 1 finished with value: 32558.705603360082 and parameters: {'learning_rate': 0.009902221327787213, 'batch_size': 30, 'seq_length': 17, 'dim_feedforward': 349, 'dropout': 0.05947804526842565, 'num_layers': 4}. Best is trial 1 with value: 32558.705603360082.


Using device: cuda


[I 2025-09-27 12:30:02,900] Trial 2 finished with value: 32652.88532690524 and parameters: {'learning_rate': 0.007604271717775313, 'batch_size': 19, 'seq_length': 9, 'dim_feedforward': 337, 'dropout': 0.04344567227542288, 'num_layers': 5}. Best is trial 1 with value: 32558.705603360082.


Using device: cuda


[I 2025-09-27 12:30:31,391] Trial 3 finished with value: 32571.005574166786 and parameters: {'learning_rate': 0.009018630793045278, 'batch_size': 54, 'seq_length': 20, 'dim_feedforward': 290, 'dropout': 0.0068097210554369575, 'num_layers': 5}. Best is trial 1 with value: 32558.705603360082.


Using device: cuda


[I 2025-09-27 12:31:10,065] Trial 4 finished with value: 12446.47814749759 and parameters: {'learning_rate': 0.00014455621375081303, 'batch_size': 33, 'seq_length': 18, 'dim_feedforward': 378, 'dropout': 0.08197487298907828, 'num_layers': 3}. Best is trial 4 with value: 12446.47814749759.


Using device: cuda


[I 2025-09-27 12:31:41,230] Trial 5 finished with value: 12123.602621360556 and parameters: {'learning_rate': 0.00020319073491950008, 'batch_size': 26, 'seq_length': 16, 'dim_feedforward': 214, 'dropout': 0.04206989945612383, 'num_layers': 3}. Best is trial 5 with value: 12123.602621360556.


Using device: cuda


[I 2025-09-27 12:32:19,825] Trial 6 finished with value: 12491.767663852912 and parameters: {'learning_rate': 0.006537282161152554, 'batch_size': 28, 'seq_length': 15, 'dim_feedforward': 322, 'dropout': 0.09690409764054787, 'num_layers': 2}. Best is trial 5 with value: 12123.602621360556.


Using device: cuda


[W 2025-09-27 12:32:27,940] Trial 7 failed with parameters: {'learning_rate': 0.00018844978424596874, 'batch_size': 22, 'seq_length': 16, 'dim_feedforward': 393, 'dropout': 0.09836494955384124, 'num_layers': 4} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/home/harry/personal/uni/project/.venv/lib/python3.10/site-packages/optuna/study/_optimize.py", line 201, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_25361/984775292.py", line 40, in objective
    model, train_losses, val_losses = train_transformer_model(sequences, targets, input_dim, datetimes, params)
  File "/home/harry/personal/uni/project/individual/harry/model/model.py", line 134, in train_transformer_model
    predictions = model(batch_X)
  File "/home/harry/personal/uni/project/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/home/harry/personal/uni/p

KeyboardInterrupt: 

In [84]:
study.best_params

{'dropout': 0.1503743360111539,
 'learning_rate': 0.0011392910510892803,
 'batch_size': 32,
 'seq_length': 21,
 'dim_feedforward': 339,
 'activation': 'relu',
 'num_layers': 2}