In [1]:
#imports
import optuna
import sys
sys.path.append('/home/harry/personal/uni/project/individual/harry/model')

from model import *

feature_names = [
    'is_summer', 'is_autumn', 'is_winter', 'is_spring',
    'is_monday', 'is_tuesday', 'is_wednesday', 'is_thursday', 'is_friday', 'is_saturday', 'is_sunday',
    'is_weekday', 'is_weekend',
    'is_jan', 'is_feb', 'is_mar', 'is_apr', 'is_may', 'is_jun', 'is_jul', 'is_aug', 'is_sep', 'is_oct', 'is_nov', 'is_dec',
    'min_30_min_demand',
    'avg_30_min_demand',
    'max_30_min_demand',
    'avg_temp',
    'max_temp',
    'min_temp',
    'hd_next_24h',
    'cd_next_24h'
]

## Optuna Function
def objective(trial):
    # Suggest hyperparameters
    # Restrict d_model and nhead to best from first optuna to get better insights (no inf)
    d_model = 64
    nhead = 4
    

    params = {
        'learning_rate': trial.suggest_float("learning_rate", 1e-4, 1e-2, log=True),
        'batch_size': trial.suggest_int("batch_size", 8, 64),
        'seq_length': trial.suggest_int("seq_length", 3, 21),
        'transformer_encoder_layer_params': {
            'd_model': d_model,
            'nhead': nhead,
            'dim_feedforward': trial.suggest_int("dim_feedforward", 128, 512),
            'dropout': trial.suggest_float("dropout", 0.0, 0.1),
            'activation': 'relu'
        },
        'transformer_layer_params': {
            'num_layers': trial.suggest_int("num_layers", 1, 5),
        },
        'dataset': '2010-2019',
        'train_test_split': '80:20',
        'features': ['sum_30_min_demand'] + feature_names,
        'visualise': False,
    }

    return median_mape(params)

In [2]:
study = optuna.create_study(storage="sqlite:///../db.sqlite3", study_name="2010_2019+all_features", load_if_exists=True)
study.optimize(objective, n_trials=100, n_jobs=5)

study.best_params

[I 2025-09-30 12:40:57,355] A new study created in RDB with name: 2010_2019+all_features
[I 2025-09-30 12:45:46,038] Trial 2 finished with value: 2.8634632002488623 and parameters: {'learning_rate': 0.00018585700846077905, 'batch_size': 49, 'seq_length': 3, 'dim_feedforward': 188, 'dropout': 0.024189809312727007, 'num_layers': 3}. Best is trial 2 with value: 2.8634632002488623.
[I 2025-09-30 12:50:10,110] Trial 3 finished with value: 5.9072948290230025 and parameters: {'learning_rate': 0.0035799314873008682, 'batch_size': 27, 'seq_length': 20, 'dim_feedforward': 355, 'dropout': 0.008675456862006358, 'num_layers': 5}. Best is trial 2 with value: 2.8634632002488623.
[I 2025-09-30 12:51:05,386] Trial 0 finished with value: 2.9854029532726796 and parameters: {'learning_rate': 0.0001902422434983739, 'batch_size': 48, 'seq_length': 15, 'dim_feedforward': 227, 'dropout': 0.08292489649280406, 'num_layers': 3}. Best is trial 2 with value: 2.8634632002488623.
[I 2025-09-30 12:53:23,062] Trial 4 

{'learning_rate': 0.00010837405552052769,
 'batch_size': 49,
 'seq_length': 11,
 'dim_feedforward': 463,
 'dropout': 0.059466553772069955,
 'num_layers': 2}