# Hyperparameter Search

- Example with **AX**: https://pytorch.org/tutorials/intermediate/ax_multiobjective_nas_tutorial.html
- Example with **Optuna**: https://github.com/optuna/optuna-examples/blob/main/pytorch/pytorch_simple.py
  - notebook1: https://github.com/marcellusruben/medium-resources/blob/main/Optuna/Optuna.ipynb

## Optuna

In [2]:
import os
import optuna
from optuna.trial import TrialState
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
from torchvision import datasets
from torchvision import transforms

# Visualization
from optuna.visualization import plot_contour
from optuna.visualization import plot_edf
from optuna.visualization import plot_intermediate_values
from optuna.visualization import plot_optimization_history
from optuna.visualization import plot_parallel_coordinate
from optuna.visualization import plot_param_importances
from optuna.visualization import plot_slice

In [3]:
DEVICE = torch.device("cpu")
BATCHSIZE = 128
CLASSES = 10
DIR = os.getcwd()
EPOCHS = 10
N_TRAIN_EXAMPLES = BATCHSIZE * 30
N_VALID_EXAMPLES = BATCHSIZE * 10

In [4]:
def define_model(trial):
    # We optimize the number of layers, hidden units and dropout ratio in each layer.
    n_layers = trial.suggest_int("n_layers", 1, 3)
    layers = []

    in_features = 28 * 28
    for i in range(n_layers):
        out_features = trial.suggest_int("n_units_l{}".format(i), 4, 128)
        layers.append(nn.Linear(in_features, out_features))
        layers.append(nn.ReLU())
        p = trial.suggest_float("dropout_l{}".format(i), 0.2, 0.5)
        layers.append(nn.Dropout(p))

        in_features = out_features
    layers.append(nn.Linear(in_features, CLASSES))
    layers.append(nn.LogSoftmax(dim=1))

    return nn.Sequential(*layers)

In [5]:
def get_mnist():
    # Load FashionMNIST dataset.
    train_loader = torch.utils.data.DataLoader(
        datasets.FashionMNIST(DIR, train=True, download=True, transform=transforms.ToTensor()),
        batch_size=BATCHSIZE,
        shuffle=True,
    )
    valid_loader = torch.utils.data.DataLoader(
        datasets.FashionMNIST(DIR, train=False, transform=transforms.ToTensor()),
        batch_size=BATCHSIZE,
        shuffle=True,
    )

    return train_loader, valid_loader

In [6]:
def objective(trial):
    # Generate the model.
    model = define_model(trial).to(DEVICE)

    # Generate the optimizers.
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

    # Get the FashionMNIST dataset.
    train_loader, valid_loader = get_mnist()

    # Training of the model.
    for epoch in range(EPOCHS):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            # Limiting training data for faster epochs.
            if batch_idx * BATCHSIZE >= N_TRAIN_EXAMPLES:
                break

            data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)

            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            optimizer.step()

        # Validation of the model.
        model.eval()
        correct = 0
        with torch.no_grad():
            for batch_idx, (data, target) in enumerate(valid_loader):
                # Limiting validation data.
                if batch_idx * BATCHSIZE >= N_VALID_EXAMPLES:
                    break
                data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)
                output = model(data)
                # Get the index of the max log-probability.
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()

        accuracy = correct / min(len(valid_loader.dataset), N_VALID_EXAMPLES)

        trial.report(accuracy, epoch)

        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return accuracy

In [27]:
study = optuna.create_study(
    direction="maximize", 
    storage="sqlite:///db.sqlite3",  # Specify the storage URL here.
    study_name="mnist"
    )

[32m[I 2023-05-09 20:12:19,511][0m A new study created in RDB with name: mnist[0m


In [28]:
study.optimize(objective, n_trials=10, timeout=600)

[32m[I 2023-05-09 20:12:51,510][0m Trial 0 finished with value: 0.0921875 and parameters: {'n_layers': 2, 'n_units_l0': 19, 'dropout_l0': 0.4695922979051355, 'n_units_l1': 19, 'dropout_l1': 0.3844412020198067, 'optimizer': 'SGD', 'lr': 0.00014425559433537405}. Best is trial 0 with value: 0.0921875.[0m
[32m[I 2023-05-09 20:13:00,309][0m Trial 1 finished with value: 0.7265625 and parameters: {'n_layers': 1, 'n_units_l0': 15, 'dropout_l0': 0.45241986695335873, 'optimizer': 'RMSprop', 'lr': 0.004147143605174518}. Best is trial 1 with value: 0.7265625.[0m
[32m[I 2023-05-09 20:13:09,108][0m Trial 2 finished with value: 0.7578125 and parameters: {'n_layers': 1, 'n_units_l0': 107, 'dropout_l0': 0.2480036016557838, 'optimizer': 'RMSprop', 'lr': 0.017405135925527674}. Best is trial 2 with value: 0.7578125.[0m
[32m[I 2023-05-09 20:13:18,340][0m Trial 3 finished with value: 0.61484375 and parameters: {'n_layers': 3, 'n_units_l0': 12, 'dropout_l0': 0.44481062924036485, 'n_units_l1': 86, 

## Visualizations

In [22]:
pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])

In [23]:
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

In [24]:
print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))



Study statistics: 
  Number of finished trials:  60
  Number of pruned trials:  36
  Number of complete trials:  24


In [25]:
print("Best trial:")
trial = study.best_trial
print("  Value: ", trial.value)
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Best trial:
  Value:  0.85234375
  Params: 
    n_layers: 1
    n_units_l0: 115
    dropout_l0: 0.30686750726727063
    optimizer: Adam
    lr: 0.0043727807057160435


In [None]:
plot_optimization_history(study)

In [None]:
plot_intermediate_values(study)

In [None]:
plot_parallel_coordinate(study)

In [None]:
plot_contour(study)
# plot_contour(study, params=["bagging_freq", "bagging_fraction"])

In [None]:
plot_slice(study)

In [None]:
plot_param_importances(study)

Learn which hyperparameters are affecting the trial duration with hyperparameter importance.

In [None]:
optuna.visualization.plot_param_importances(
    study, target=lambda t: t.duration.total_seconds(), target_name="duration"
)

## Dashboard

Terminal run:

'optuna-dashboard sqlite:///db.sqlite3'