# Hyperparameter tuning

## Dataset

In [1]:
# Use the below functionality to execute your model (that you will adjust later step by step)
# This block of code provides you the functionality to train a model. Results are printed after each epoch

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import tqdm


def load_mnist_data(root_path='./data', batch_size=4):
    """
    Loads MNIST dataset into your directory.
    You can change the root_path to point to a already existing path if you want to safe a little bit of memory :)
    """
    transform = transforms.Compose(
        [transforms.ToTensor(),
        transforms.Normalize((0.5), (0.5))]
    )

    trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

    testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

    return trainloader, testloader

## Training loops

In [2]:
def train_model(model, optimizer, loss_fn, dataloader):
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    model = model.to(device)
    model.train()

    running_loss = 0.0
    running_accuracy = []
    for imgs, targets in dataloader:
        imgs, targets = imgs.to(device=device), targets.to(device=device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(imgs.reshape(imgs.shape[0], -1))

        loss = loss_fn(outputs, targets)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()

        # Calculate the Accuracy (how many of all samples are correctly classified?)
        max_outputs = torch.max(outputs, dim=1).indices
        accuracy = (max_outputs.detach() == targets.detach()).to(dtype=torch.float32).mean()
        running_accuracy.append(accuracy)
    
    avg_loss = running_loss / len(dataloader)
    avg_acc = torch.tensor(running_accuracy).mean()
    # print(f'Training iteration finished with loss: {avg_loss:.3f} and accuracy {avg_acc:.3f}')

    return avg_loss, avg_acc


def eval_model(model, loss_fn, dataloader):
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    model = model.to(device)
    model.eval()

    running_loss = 0.0
    running_accuracy = []
    with torch.no_grad():
        for imgs, targets in dataloader:
            imgs, targets = imgs.to(device=device), targets.to(device=device)

            # forward + backward + optimize
            outputs = model(imgs.reshape(imgs.shape[0], -1))

            loss = loss_fn(outputs, targets)

            # print statistics
            running_loss += loss.item()

            # Calculate the Accuracy (how many of all samples are correctly classified?)
            max_outputs = torch.max(outputs, dim=1).indices
            accuracy = (max_outputs.detach() == targets.detach()).to(dtype=torch.float32).mean()
            running_accuracy.append(accuracy)
    
    avg_loss = running_loss / len(dataloader)
    avg_acc = torch.tensor(running_accuracy).mean()
    # print(f'Evaluation iteration finished with loss: {avg_loss:.3f} and accuracy {avg_acc:.3f}')

    return avg_loss, avg_acc


def operate(model, optimizer, loss_fn, train_dataloader, test_dataloader, epochs):
    t_losses, t_accs = [], []
    e_losses, e_accs = [], []
    for epoch in range(0, epochs):
        t_avg_loss, t_avg_acc = train_model(
            model, optimizer, loss_fn, train_dataloader
        )
        t_losses.append(t_avg_loss)
        t_accs.append(t_accs)

        e_avg_loss, e_avg_acc = eval_model(
            model, loss_fn, test_dataloader
        )
        e_losses.append(e_avg_loss)
        e_accs.append(e_accs)

    return torch.as_tensor(t_losses), torch.as_tensor(t_accs), torch.as_tensor(e_losses), torch.as_tensor(e_accs)

## Define your model

In [3]:
# use two parameters to create your model
# 1) the amount of hidden layers
# 2) the neurons per hidden layer

# we tune those two parameters with Optuna

In [4]:
from torch.nn import Linear

def build_model(trial):
    layers = []

    n_layers = trial.suggest_int("n_layers", 1, 5)
    n_neurons_per_layer = trial.suggest_int("n_neurons", 8, 64)

    first_layer = Linear(784, n_neurons_per_layer)
    layers.append(first_layer)

    for i in range(n_layers - 1):
        layers.append(Linear(n_neurons_per_layer, n_neurons_per_layer))
    
    layers.append(Linear(n_neurons_per_layer, 10))

    return nn.Sequential(*layers)

## Define your objective with optuna hyperparameter tuning

In [5]:
# find the best hyperparameters for
# 1) the amount of hidden layers
# 2) the neurons per hidden layer
# 3) batch size
# 4) learning rate

DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
loss_fn = torch.nn.CrossEntropyLoss()
EPOCHS = 10

In [6]:
import optuna
from torch.optim import Adam
import tqdm


def objective(trial):
    model = build_model(trial).to(DEVICE)

    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    batch_size = trial.suggest_categorical("batch_size", [4, 8, 16])

    optimizer = Adam(model.parameters(), lr=lr)

    train_loader, test_loader = load_mnist_data(batch_size=batch_size)

    for epoch in tqdm.tqdm(range(EPOCHS), desc='Iterating epoch'):
        t_avg_loss, t_acc = train_model(
            model=model,
            optimizer=optimizer,
            loss_fn=loss_fn,
            dataloader=train_loader
        )

        e_avg_loss, e_acc = eval_model(
            model=model,
            loss_fn=loss_fn,
            dataloader=train_loader
        )

        trial.report(e_acc, epoch)

        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
    
    return e_acc


In [7]:
from optuna.trial import TrialState

# Add stream handler of stdout to show the messages
study_name = "example-study"  # Unique identifier of the study.
storage_name = "sqlite:///{}.db".format(study_name)
study = optuna.create_study(study_name=study_name, storage=storage_name, direction='maximize')

study.optimize(objective, n_trials=15)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

[I 2023-12-27 04:15:02,900] A new study created in RDB with name: example-study


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|█████████████████████████████████████████████████████| 9912422/9912422 [00:01<00:00, 9046688.22it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|████████████████████████████████████████████████████████| 28881/28881 [00:00<00:00, 33070077.48it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|█████████████████████████████████████████████████████| 1648877/1648877 [00:00<00:00, 7232735.86it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|████████████████████████████████████████████████████████████| 4542/4542 [00:00<00:00, 902654.76it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



Iterating epoch: 100%|███████████████████████████████████████████████████| 10/10 [15:49<00:00, 94.98s/it]
[I 2023-12-27 04:30:56,102] Trial 0 finished with value: 0.8635833263397217 and parameters: {'n_layers': 1, 'n_neurons': 55, 'lr': 0.020480115443510482, 'batch_size': 8}. Best is trial 0 with value: 0.8635833263397217.
Iterating epoch: 100%|███████████████████████████████████████████████████| 10/10 [15:45<00:00, 94.50s/it]
[I 2023-12-27 04:46:41,405] Trial 1 finished with value: 0.9192500114440918 and parameters: {'n_layers': 1, 'n_neurons': 17, 'lr': 4.179381868033112e-05, 'batch_size': 4}. Best is trial 1 with value: 0.9192500114440918.
Iterating epoch: 100%|███████████████████████████████████████████████████| 10/10 [10:41<00:00, 64.19s/it]
[I 2023-12-27 04:57:23,672] Trial 2 finished with value: 0.8551666736602783 and parameters: {'n_layers': 5, 'n_neurons': 54, 'lr': 0.008077144706907966, 'batch_size': 8}. Best is trial 1 with value: 0.9192500114440918.
Iterating epoch: 100%|██

Study statistics: 
  Number of finished trials:  15
  Number of pruned trials:  5
  Number of complete trials:  10
Best trial:
  Value:  0.9232000112533569
  Params: 
    n_layers: 2
    n_neurons: 39
    lr: 8.330760120488075e-05
    batch_size: 16


## Open the optuna trial with optuna-dashboard