# Optuna simple

In [1]:
%pip install optuna

Collecting optuna
  Downloading optuna-3.3.0-py3-none-any.whl (404 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m404.2/404.2 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.12.0-py3-none-any.whl (226 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m226.0/226.0 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting cmaes>=0.10.0 (from optuna)
  Downloading cmaes-0.10.0-py3-none-any.whl (29 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.7.0-py2.py3-none-any.whl (11 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.2.4-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.7/78.7 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: Mako, colorlog, cmaes, alembic, optuna
Successfully installed Mako-1.2.4 alembic-1.12.0 cmaes-0.10.0 colorlog-6.7.0 optuna-3.3.0


In [2]:
# https://optuna.org/
import optuna

In [3]:
def objective(trial):

   x = trial.suggest_uniform('x', -10, 10)
   return (x - 2) ** 2

In [4]:
study = optuna.create_study()
study.optimize(objective, n_trials=5)
study.best_params

[I 2023-09-01 16:03:37,341] A new study created in memory with name: no-name-a14835d5-f58e-44bf-8620-17716f9a0d6a
  x = trial.suggest_uniform('x', -10, 10)
[I 2023-09-01 16:03:37,347] Trial 0 finished with value: 2.4666652511407507 and parameters: {'x': 0.42943791872439796}. Best is trial 0 with value: 2.4666652511407507.
[I 2023-09-01 16:03:37,356] Trial 1 finished with value: 31.41621483711224 and parameters: {'x': -3.6050169345963834}. Best is trial 0 with value: 2.4666652511407507.
[I 2023-09-01 16:03:37,359] Trial 2 finished with value: 22.990428110763123 and parameters: {'x': -2.794833481025501}. Best is trial 0 with value: 2.4666652511407507.
[I 2023-09-01 16:03:37,361] Trial 3 finished with value: 4.611669003167927 and parameters: {'x': 4.147479686322534}. Best is trial 0 with value: 2.4666652511407507.
[I 2023-09-01 16:03:37,363] Trial 4 finished with value: 90.55122797552917 and parameters: {'x': -7.515840896921784}. Best is trial 0 with value: 2.4666652511407507.


{'x': 0.42943791872439796}

# Optuna advanced
### taken from https://github.com/optuna/optuna/blob/master/examples/pytorch/pytorch_simple.py

## Imports and params

In [5]:
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
from torchvision import datasets
from torchvision import transforms

import optuna
from optuna.trial import TrialState

DEVICE = torch.device("cpu")
BATCHSIZE = 128
CLASSES = 10
DIR = os.getcwd()
EPOCHS = 10
LOG_INTERVAL = 10
N_TRAIN_EXAMPLES = BATCHSIZE * 30
N_VALID_EXAMPLES = BATCHSIZE * 10

## Layers & dropout optimization

In [6]:
def define_model(trial):
    # We optimize the number of layers, hidden units and dropout ratio in each layer.
    n_layers = trial.suggest_int("n_layers", 1, 3)
    layers = []

    in_features = 28 * 28
    for i in range(n_layers):
        out_features = trial.suggest_int("n_units_l{}".format(i), 4, 128)
        layers.append(nn.Linear(in_features, out_features))
        layers.append(nn.ReLU())
        p = trial.suggest_float("dropout_l{}".format(i), 0.2, 0.5)
        layers.append(nn.Dropout(p))

        in_features = out_features
    layers.append(nn.Linear(in_features, CLASSES))
    layers.append(nn.LogSoftmax(dim=1))

    return nn.Sequential(*layers)

## Dataset

In [7]:
def get_mnist():
    # Load FashionMNIST dataset.
    train_loader = torch.utils.data.DataLoader(
        datasets.FashionMNIST(DIR, train=True, download=True, transform=transforms.ToTensor()),
        batch_size=BATCHSIZE,
        shuffle=True,
    )
    valid_loader = torch.utils.data.DataLoader(
        datasets.FashionMNIST(DIR, train=False, transform=transforms.ToTensor()),
        batch_size=BATCHSIZE,
        shuffle=True,
    )

    return train_loader, valid_loader

In [8]:
def objective(trial):

    # Generate the model.
    model = define_model(trial).to(DEVICE)

    # Generate the optimizers.
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

    # Get the FashionMNIST dataset.
    train_loader, valid_loader = get_mnist()

    # Training of the model.
    for epoch in range(EPOCHS):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            # Limiting training data for faster epochs.
            if batch_idx * BATCHSIZE >= N_TRAIN_EXAMPLES:
                break

            data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)

            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            optimizer.step()

        # Validation of the model.
        model.eval()
        correct = 0
        with torch.no_grad():
            for batch_idx, (data, target) in enumerate(valid_loader):
                # Limiting validation data.
                if batch_idx * BATCHSIZE >= N_VALID_EXAMPLES:
                    break
                data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)
                output = model(data)
                # Get the index of the max log-probability.
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()

        accuracy = correct / min(len(valid_loader.dataset), N_VALID_EXAMPLES)

        trial.report(accuracy, epoch)

        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return accuracy


In [9]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100, timeout=600)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
  print("    {}: {}".format(key, value))

[I 2023-09-01 16:04:11,216] A new study created in memory with name: no-name-a13838c0-618a-4270-b9d3-ae1a74b68922


Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to /content/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:01<00:00, 17202107.78it/s]


Extracting /content/FashionMNIST/raw/train-images-idx3-ubyte.gz to /content/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to /content/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 300684.66it/s]


Extracting /content/FashionMNIST/raw/train-labels-idx1-ubyte.gz to /content/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to /content/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:00<00:00, 5519091.23it/s]


Extracting /content/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to /content/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to /content/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 17713106.64it/s]


Extracting /content/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to /content/FashionMNIST/raw



[I 2023-09-01 16:04:24,903] Trial 0 finished with value: 0.79140625 and parameters: {'n_layers': 3, 'n_units_l0': 119, 'dropout_l0': 0.3215310431200013, 'n_units_l1': 87, 'dropout_l1': 0.36561960364472884, 'n_units_l2': 120, 'dropout_l2': 0.2324700714598736, 'optimizer': 'Adam', 'lr': 0.011309192760152936}. Best is trial 0 with value: 0.79140625.
[I 2023-09-01 16:04:33,224] Trial 1 finished with value: 0.11015625 and parameters: {'n_layers': 2, 'n_units_l0': 118, 'dropout_l0': 0.3871247141259224, 'n_units_l1': 44, 'dropout_l1': 0.38128212956858676, 'optimizer': 'SGD', 'lr': 2.6955510999228292e-05}. Best is trial 0 with value: 0.79140625.
[I 2023-09-01 16:04:40,595] Trial 2 finished with value: 0.45 and parameters: {'n_layers': 3, 'n_units_l0': 42, 'dropout_l0': 0.2495440686916282, 'n_units_l1': 108, 'dropout_l1': 0.47272209706183776, 'n_units_l2': 26, 'dropout_l2': 0.2032251556240097, 'optimizer': 'RMSprop', 'lr': 8.343541211125262e-05}. Best is trial 0 with value: 0.79140625.
[I 2023-

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  46
  Number of complete trials:  54
Best trial:
  Value:  0.84765625
  Params: 
    n_layers: 1
    n_units_l0: 118
    dropout_l0: 0.2418038787544261
    optimizer: Adam
    lr: 0.0027409775694508588


## Visualisations

In [10]:
optuna.visualization.plot_param_importances(study)

In [11]:
optuna.visualization.plot_optimization_history(study)

In [12]:
optuna.visualization.plot_slice(study)

In [13]:
optuna.visualization.plot_slice(study, ['optimizer'])

In [14]:
optuna.visualization.plot_contour(study, ['lr', 'optimizer'])

In [15]:
fix_optimizer = {}
fix_optimizer['optimizer'] = study.best_params['optimizer']
study.sampler = optuna.samplers.PartialFixedSampler(fix_optimizer, study.sampler)
study.optimize(objective, n_trials=100)


PartialFixedSampler is experimental (supported from v2.4.0). The interface can change in the future.

[I 2023-09-01 16:50:37,584] Trial 100 finished with value: 0.82734375 and parameters: {'n_layers': 1, 'n_units_l0': 110, 'dropout_l0': 0.2076745607309212, 'optimizer': 'Adam', 'lr': 0.016758153847456502}. Best is trial 53 with value: 0.84765625.
[I 2023-09-01 16:50:39,794] Trial 101 pruned. 
[I 2023-09-01 16:50:47,714] Trial 102 finished with value: 0.80703125 and parameters: {'n_layers': 1, 'n_units_l0': 113, 'dropout_l0': 0.23561723882125063, 'optimizer': 'Adam', 'lr': 0.004178003645129532}. Best is trial 53 with value: 0.84765625.
[I 2023-09-01 16:50:55,072] Trial 103 finished with value: 0.8234375 and parameters: {'n_layers': 1, 'n_units_l0': 94, 'dropout_l0': 0.2470851966958348, 'optimizer': 'Adam', 'lr': 0.007687359952597211}. Best is trial 53 with value: 0.84765625.
[I 2023-09-01 16:50:55,876] Trial 104 pruned. 
[I 2023-09-01 16:50:58,944] Trial 105 pruned. 
[I 2023-09-01 16:51

In [16]:
print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
  print("    {}: {}".format(key, value))

Best trial:
  Value:  0.85625
  Params: 
    n_layers: 1
    n_units_l0: 92
    dropout_l0: 0.22071090228200904
    optimizer: Adam
    lr: 0.00479335073022351


In [17]:
study.best_trial.params.items()

dict_items([('n_layers', 1), ('n_units_l0', 92), ('dropout_l0', 0.22071090228200904), ('optimizer', 'Adam'), ('lr', 0.00479335073022351)])

In [18]:
optuna.visualization.plot_intermediate_values(study)