### Hyperparameter tuning with grid search

In [1]:
import torch
from core.util.save_model import save_parameters
from sklearn.model_selection import ParameterGrid
from core.models import LSTM
from core.models.model_training import blocked_training
from core.util.hyperparameter_configuration import get_hyperparameter_configuration

Set ML model, loss function and hyperparameters that that will be tested.

In [2]:
gridsearch_params = {
    "learning_rate": [0.001, 0.003, 0.005],
    "batch_size": [32, 64, 128],
    "lookback": [36, 48, 96],
    "num_layers": [1, 2],
}

# Extract hyperparameters configuration that will not be tuned upon
(
    hidden_size,
    EPOCHS,
    horizon,
    loss_function,
    dropout_rate,
    folds,
    early_stopper,
) = get_hyperparameter_configuration()

# Specific to experiment
features = {}
experiment_name = "00_LSTM_trefor_park"
model_used = LSTM
model_input_size = 1

Use CUDA (GPU) if available.

In [3]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
device

'cuda:0'

Train a model with specified hyperparameters

In [4]:
def train_with_params(params: dict) -> tuple[float, model_used]:
    """Train model with the specified hyperparameters."""
    # Extract hyperparameters
    batch_size = params["batch_size"]
    learning_rate = params["learning_rate"]
    num_layers = params["num_layers"]
    lookback = params["lookback"]

    # Initialize model
    model = model_used(
        input_size=model_input_size,
        hidden_size=hidden_size,
        num_layers=num_layers,
        dropout_rate=dropout_rate,
        horizon=horizon,
        lookback=lookback,
    ).to(device)

    _, val_loss, best_model = blocked_training(
        model=model,
        learning_rate=learning_rate,
        device=device,
        batch_size=batch_size,
        lookback=lookback,
        early_stopper=early_stopper,
        features=features,
    )

    return min(val_loss), best_model

Iterate over all hyperparameters and train a model for each combination.

In [5]:
best_loss = float("inf")
best_params = None
best_model = None

for params in ParameterGrid(gridsearch_params):
    early_stopper.reset()
    v_loss, model = train_with_params(params)
    print(params, v_loss)
    if v_loss < best_loss:
        best_loss = v_loss
        best_params = params
        best_model = model

save_parameters(experiment_name, best_params)
print("Best Hyperparameters:", best_params)
print("Best Validation Loss:", best_loss)

Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.001, 'lookback': 36, 'num_layers': 1} 0.12613590349419723


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.001, 'lookback': 36, 'num_layers': 2} 0.060842865146045325


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.001, 'lookback': 48, 'num_layers': 1} 0.06205204037780031


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.001, 'lookback': 48, 'num_layers': 2} 0.1269007124781548


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.001, 'lookback': 96, 'num_layers': 1} 0.10615985829689983


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.001, 'lookback': 96, 'num_layers': 2} 0.10390820040946118


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.003, 'lookback': 36, 'num_layers': 1} 0.06321769934152985


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.003, 'lookback': 36, 'num_layers': 2} 0.12768588637477535


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.003, 'lookback': 48, 'num_layers': 1} 0.10564908776051858


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.003, 'lookback': 48, 'num_layers': 2} 0.08335718083664552


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.003, 'lookback': 96, 'num_layers': 1} 0.12796894581913


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.003, 'lookback': 96, 'num_layers': 2} 0.557967360875746


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.005, 'lookback': 36, 'num_layers': 1} 0.14984053135752073


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.005, 'lookback': 36, 'num_layers': 2} 0.5592864187868672


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.005, 'lookback': 48, 'num_layers': 1} 0.12860024647129592


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.005, 'lookback': 48, 'num_layers': 2} 0.44844391652387217


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.005, 'lookback': 96, 'num_layers': 1} 0.0623985346695985


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.005, 'lookback': 96, 'num_layers': 2} 0.5303591075379149


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.001, 'lookback': 36, 'num_layers': 1} 0.08212673188981968


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.001, 'lookback': 36, 'num_layers': 2} 0.06053406831011267


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.001, 'lookback': 48, 'num_layers': 1} 0.08284029663942127


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.001, 'lookback': 48, 'num_layers': 2} 0.060799529921810674


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.001, 'lookback': 96, 'num_layers': 1} 0.08341082757244384


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.001, 'lookback': 96, 'num_layers': 2} 0.08287738393199914


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.003, 'lookback': 36, 'num_layers': 1} 0.23707021813445156


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.003, 'lookback': 36, 'num_layers': 2} 0.08225986077718798


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.003, 'lookback': 48, 'num_layers': 1} 0.16951157862992805


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.003, 'lookback': 48, 'num_layers': 2} 0.1469367244125924


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.003, 'lookback': 96, 'num_layers': 1} 0.12123179484595673


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.003, 'lookback': 96, 'num_layers': 2} 0.10415967307800864


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.005, 'lookback': 36, 'num_layers': 1} 0.061684463163809045


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.005, 'lookback': 36, 'num_layers': 2} 0.1941444813742293


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.005, 'lookback': 48, 'num_layers': 1} 0.08331004050638444


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.005, 'lookback': 48, 'num_layers': 2} 0.06052172840978012


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.005, 'lookback': 96, 'num_layers': 1} 0.08418591196140718


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.005, 'lookback': 96, 'num_layers': 2} 0.4874077886170536


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.001, 'lookback': 36, 'num_layers': 1} 0.04090221218526202


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.001, 'lookback': 36, 'num_layers': 2} 0.0611318246149573


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.001, 'lookback': 48, 'num_layers': 1} 0.10441261560032374


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.001, 'lookback': 48, 'num_layers': 2} 0.08244559320011809


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.001, 'lookback': 96, 'num_layers': 1} 0.06170257958181111


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.001, 'lookback': 96, 'num_layers': 2} 0.03602415317724125


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.003, 'lookback': 36, 'num_layers': 1} 0.06079986321101724


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.003, 'lookback': 36, 'num_layers': 2} 0.08560853135588085


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.003, 'lookback': 48, 'num_layers': 1} 0.10705556993280757


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.003, 'lookback': 48, 'num_layers': 2} 0.12963571901940513


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.003, 'lookback': 96, 'num_layers': 1} 0.05912830339411251


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.003, 'lookback': 96, 'num_layers': 2} 0.08193438931261937


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.005, 'lookback': 36, 'num_layers': 1} 0.19416295126099198


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.005, 'lookback': 36, 'num_layers': 2} 0.06599391284930507


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.005, 'lookback': 48, 'num_layers': 1} 0.17171354690655768


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.005, 'lookback': 48, 'num_layers': 2} 0.19395546403743705


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.005, 'lookback': 96, 'num_layers': 1} 0.08351904896833866


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.005, 'lookback': 96, 'num_layers': 2} 0.06593655076677002
Best Hyperparameters: {'batch_size': 128, 'learning_rate': 0.001, 'lookback': 96, 'num_layers': 2}
Best Validation Loss: 0.03602415317724125
