### Hyperparameter tuning with grid search

In [1]:
import torch
from core.util.save_model import save_parameters
from sklearn.model_selection import ParameterGrid
from core.models import GRU
from core.models.model_training import blocked_training
from core.util.hyperparameter_configuration import get_hyperparameter_configuration

Set ML model, loss function and hyperparameters that that will be tested.

In [2]:
gridsearch_params = {
    "learning_rate": [0.001, 0.003, 0.005],
    "batch_size": [32, 64, 128],
    "lookback": [36, 48, 96],
    "num_layers": [1, 2],
}

# Extract hyperparameters configuration that will not be tuned upon
(
    hidden_size,
    EPOCHS,
    horizon,
    loss_function,
    dropout_rate,
    folds,
    early_stopper,
) = get_hyperparameter_configuration()

# Specific to experiment
features = {}
experiment_name = "00_GRU_trefor_park"
model_used = GRU
model_input_size = 1

Use CUDA (GPU) if available.

In [3]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
device

'cuda:0'

Train a model with specified hyperparameters

In [4]:
def train_with_params(params: dict) -> tuple[float, model_used]:
    """Train model with the specified hyperparameters."""
    # Extract hyperparameters
    batch_size = params["batch_size"]
    learning_rate = params["learning_rate"]
    num_layers = params["num_layers"]
    lookback = params["lookback"]

    # Initialize model
    model = model_used(
        input_size=model_input_size,
        hidden_size=hidden_size,
        num_layers=num_layers,
        dropout_rate=dropout_rate,
        horizon=horizon,
        lookback=lookback,
    ).to(device)

    _, val_loss, best_model = blocked_training(
        model=model,
        learning_rate=learning_rate,
        device=device,
        batch_size=batch_size,
        lookback=lookback,
        early_stopper=early_stopper,
        features=features,
    )

    return min(val_loss), best_model

Iterate over all hyperparameters and train a model for each combination.

In [5]:
best_loss = float("inf")
best_params = None
best_model = None

for params in ParameterGrid(gridsearch_params):
    early_stopper.reset()
    v_loss, model = train_with_params(params)
    print(params, v_loss)
    if v_loss < best_loss:
        best_loss = v_loss
        best_params = params
        best_model = model

save_parameters(experiment_name, best_params)
print("Best Hyperparameters:", best_params)
print("Best Validation Loss:", best_loss)

Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.001, 'lookback': 36, 'num_layers': 1} 0.03950754623842785


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.001, 'lookback': 36, 'num_layers': 2} 0.03998099955655853


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.001, 'lookback': 48, 'num_layers': 1} 0.03843020877372319


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.001, 'lookback': 48, 'num_layers': 2} 0.3721823308178164


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.001, 'lookback': 96, 'num_layers': 1} 0.07893950382016174


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.001, 'lookback': 96, 'num_layers': 2} 0.03878634471178053


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.003, 'lookback': 36, 'num_layers': 1} 0.366561118773304


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.003, 'lookback': 36, 'num_layers': 2} 0.05543531338047121


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.003, 'lookback': 48, 'num_layers': 1} 0.0625339740945492


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.003, 'lookback': 48, 'num_layers': 2} 0.37883862427056386


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.003, 'lookback': 96, 'num_layers': 1} 0.07981338812494891


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.003, 'lookback': 96, 'num_layers': 2} 0.3400782697755387


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.005, 'lookback': 36, 'num_layers': 1} 0.35032751177773824


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.005, 'lookback': 36, 'num_layers': 2} 0.13660864494625985


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.005, 'lookback': 48, 'num_layers': 1} 0.5317712703917893


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.005, 'lookback': 48, 'num_layers': 2} 0.5265980635579771


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.005, 'lookback': 96, 'num_layers': 1} 0.3495718500085382


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 32, 'learning_rate': 0.005, 'lookback': 96, 'num_layers': 2} 0.3374885942047973


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.001, 'lookback': 36, 'num_layers': 1} 0.5591888896889469


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.001, 'lookback': 36, 'num_layers': 2} 0.03914541001484642


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.001, 'lookback': 48, 'num_layers': 1} 0.03900575084858078


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.001, 'lookback': 48, 'num_layers': 2} 0.040253398473362685


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.001, 'lookback': 96, 'num_layers': 1} 0.03948632675387615


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.001, 'lookback': 96, 'num_layers': 2} 0.03902317121313068


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.003, 'lookback': 36, 'num_layers': 1} 0.039025566400439096


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.003, 'lookback': 36, 'num_layers': 2} 0.05976478346668573


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.003, 'lookback': 48, 'num_layers': 1} 0.06928297540690875


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.003, 'lookback': 48, 'num_layers': 2} 0.04598534243906428


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.003, 'lookback': 96, 'num_layers': 1} 0.5688679861826434


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.003, 'lookback': 96, 'num_layers': 2} 0.5559493509253465


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.005, 'lookback': 36, 'num_layers': 1} 0.03996169519136239


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.005, 'lookback': 36, 'num_layers': 2} 0.5533509831464156


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.005, 'lookback': 48, 'num_layers': 1} 0.03875691053649967


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.005, 'lookback': 48, 'num_layers': 2} 0.56032284277499


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.005, 'lookback': 96, 'num_layers': 1} 0.039124490573023515


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 64, 'learning_rate': 0.005, 'lookback': 96, 'num_layers': 2} 0.5688679861826434


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.001, 'lookback': 36, 'num_layers': 1} 0.04016248326185217


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.001, 'lookback': 36, 'num_layers': 2} 0.04036907241809078


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.001, 'lookback': 48, 'num_layers': 1} 0.061113729962308745


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.001, 'lookback': 48, 'num_layers': 2} 0.03861431423847759


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.001, 'lookback': 96, 'num_layers': 1} 0.038834381848932994


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.001, 'lookback': 96, 'num_layers': 2} 0.039990273459863356


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.003, 'lookback': 36, 'num_layers': 1} 0.04137221065832784


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.003, 'lookback': 36, 'num_layers': 2} 0.09064033490542414


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.003, 'lookback': 48, 'num_layers': 1} 0.07120588175013925


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.003, 'lookback': 48, 'num_layers': 2} 0.040020095930725086


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.003, 'lookback': 96, 'num_layers': 1} 0.568925659236912


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.003, 'lookback': 96, 'num_layers': 2} 0.04103266436140984


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.005, 'lookback': 36, 'num_layers': 1} 0.043713464621198656


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.005, 'lookback': 36, 'num_layers': 2} 0.04169776444079032


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.005, 'lookback': 48, 'num_layers': 1} 0.04166429068743723


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.005, 'lookback': 48, 'num_layers': 2} 0.5602075639313783


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.005, 'lookback': 96, 'num_layers': 1} 0.061035090450910316


Iterating epochs:   0%|          | 0/200 [00:00<?, ?it/s]

{'batch_size': 128, 'learning_rate': 0.005, 'lookback': 96, 'num_layers': 2} 0.13419393668336405
Best Hyperparameters: {'batch_size': 32, 'learning_rate': 0.001, 'lookback': 48, 'num_layers': 1}
Best Validation Loss: 0.03843020877372319
