### Hyperparameter tuning with grid search

In [1]:
import torch
import torch.nn as nn
import copy
from torch.utils.data import Dataset, DataLoader
from core.util.get_datasets import get_park_dataset
from sklearn.model_selection import ParameterGrid
from core.models.LSTM import LSTM

Set ML model, loss function and hyperparameters that that will be tested.

In [2]:
param_grid = {
    "learning_rate": [0.001],
    "batch_size": [16],
    "hidden_size": [32, 64],
    "num_stacked_layers": [1, 2],
    "epochs": [10],
    "lookback": [24, 168],
}
model_used = LSTM
loss_function = nn.HuberLoss()

Use CUDA (GPU) if available.

In [3]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
device

'cpu'

Create the datasets for train, validation and test.

In [4]:
class TreforData(Dataset):
    """Initialize Trefor dataset."""

    def __init__(self, x: torch.tensor, y: torch.tensor) -> None:
        """Initialize dataset.

        Arguments:
            x: feature as torch
            y: target as torch

        """
        self.x = x.to(device)
        self.y = y.to(device)

    def __len__(self) -> int:
        """Return length of dataset."""
        return len(self.x)

    def __getitem__(self, i: int) -> tuple:
        """Return tuple from dataset."""
        return self.x[i], self.y[i]

Function for training one epoch.

In [5]:
def train_one_epoch(
    training_loader: torch.utils.data.Dataset, optimizer: torch.optim.Adam, model: LSTM
) -> float:
    """Train one epoch."""
    running_loss = 0.0
    last_loss = 0.0

    for i, data in enumerate(training_loader):
        # Every data instance is an input + target
        inputs, target = data

        # Zero your gradients for every batch!
        optimizer.zero_grad()

        # Make predictions for this batch
        predictions = model(inputs)

        # Compute the loss and its gradients
        loss = loss_function(predictions, target)
        loss.backward()

        # Adjust learning weights
        optimizer.step()

        # Gather data and report
        running_loss += loss.item()
        if i % 100 == 99:
            last_loss = running_loss / 100  # loss per 100 batch
            # print(f'  batch {i+1} loss: {last_loss}')
            running_loss = 0.0
    return last_loss

Train a model with specified hyperparameters

In [6]:
def train_with_params(params: dict) -> (float, LSTM):
    """Train model with the specified hyperparameters."""
    # Extract hyperparameters
    learning_rate = params["learning_rate"]
    batch_size = params["batch_size"]
    hidden_size = params["hidden_size"]
    num_stacked_layers = params["num_stacked_layers"]
    epochs = params["epochs"]
    lookback = params["lookback"]

    # Load the data and split into training and validation sets
    x_train, y_train, x_val, y_val, x_test, y_test = get_park_dataset(lookback, 24)
    x_train = torch.tensor(x_train).float()
    y_train = torch.tensor(y_train).float()
    x_val = torch.tensor(x_val).float()
    y_val = torch.tensor(y_val).float()
    x_test = torch.tensor(x_test).float()
    y_test = torch.tensor(y_test).float()
    train_dataset = TreforData(x_train, y_train)
    val_dataset = TreforData(x_val, y_val)

    # Create data loaders
    training_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
    validation_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    # Initialize model
    model = model_used(
        input_size=x_train.shape[-1],
        hidden_size=hidden_size,
        num_stacked_layers=num_stacked_layers,
    ).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    loss_function = nn.HuberLoss()

    best_v_loss = float("inf")
    best_model = None

    # Train the model
    for epoch in range(epochs):
        model.train(True)
        train_one_epoch(training_loader, optimizer, model)

        # Evaluate on validation set
        model.eval()
        running_v_loss = 0.0
        with torch.no_grad():
            for i, v_data in enumerate(validation_loader):
                v_inputs, v_target = v_data
                v_predictions = model(v_inputs)
                v_loss = loss_function(v_predictions, v_target)
                running_v_loss += v_loss.item()

        avg_v_loss = running_v_loss / (i + 1)

        # Save the best model based on validation loss
        if avg_v_loss < best_v_loss:
            best_v_loss = avg_v_loss
            best_model = copy.deepcopy(model)

    return best_v_loss, best_model

Iterate over all hyperparameters and train a model for each combination.

In [7]:
best_loss = float("inf")
best_params = None
best_model = None

for params in ParameterGrid(param_grid):
    v_loss, model = train_with_params(params)
    print(params, v_loss)
    if v_loss < best_loss:
        best_loss = v_loss
        best_params = params
        best_model = model

print("Best Hyperparameters:", best_params)
print("Best Validation Loss:", best_loss)

{'batch_size': 16, 'epochs': 10, 'hidden_size': 32, 'learning_rate': 0.001, 'lookback': 24, 'num_stacked_layers': 1} 0.011656270244343535
{'batch_size': 16, 'epochs': 10, 'hidden_size': 32, 'learning_rate': 0.001, 'lookback': 24, 'num_stacked_layers': 2} 0.008218861209943267
{'batch_size': 16, 'epochs': 10, 'hidden_size': 32, 'learning_rate': 0.001, 'lookback': 168, 'num_stacked_layers': 1} 0.008944848196588081
{'batch_size': 16, 'epochs': 10, 'hidden_size': 32, 'learning_rate': 0.001, 'lookback': 168, 'num_stacked_layers': 2} 0.007304121765608125
{'batch_size': 16, 'epochs': 10, 'hidden_size': 64, 'learning_rate': 0.001, 'lookback': 24, 'num_stacked_layers': 1} 0.012365928589952528
{'batch_size': 16, 'epochs': 10, 'hidden_size': 64, 'learning_rate': 0.001, 'lookback': 24, 'num_stacked_layers': 2} 0.008774734605004655
{'batch_size': 16, 'epochs': 10, 'hidden_size': 64, 'learning_rate': 0.001, 'lookback': 168, 'num_stacked_layers': 1} 0.008452891295847163
{'batch_size': 16, 'epochs': 10