In [None]:
!pip install optuna

Collecting optuna
  Downloading optuna-2.9.1-py3-none-any.whl (302 kB)
[K     |████████████████████████████████| 302 kB 9.1 MB/s 
Collecting alembic
  Downloading alembic-1.6.5-py2.py3-none-any.whl (164 kB)
[K     |████████████████████████████████| 164 kB 57.2 MB/s 
Collecting cliff
  Downloading cliff-3.9.0-py3-none-any.whl (80 kB)
[K     |████████████████████████████████| 80 kB 7.7 MB/s 
[?25hCollecting colorlog
  Downloading colorlog-6.4.1-py2.py3-none-any.whl (11 kB)
Collecting cmaes>=0.8.2
  Downloading cmaes-0.8.2-py3-none-any.whl (15 kB)
Collecting Mako
  Downloading Mako-1.1.5-py2.py3-none-any.whl (75 kB)
[K     |████████████████████████████████| 75 kB 3.7 MB/s 
Collecting python-editor>=0.3
  Downloading python_editor-1.0.4-py3-none-any.whl (4.9 kB)
Collecting autopage>=0.4.0
  Downloading autopage-0.4.0-py3-none-any.whl (20 kB)
Collecting stevedore>=2.0.1
  Downloading stevedore-3.4.0-py3-none-any.whl (49 kB)
[K     |████████████████████████████████| 49 kB 5.0 MB/s 
[?

In [None]:
import os

import optuna
from optuna.trial import TrialState
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
from torchvision import datasets, transforms

In [None]:
DEVICE = torch.device("cpu")
BATCH_SIZE = 128
CLASSES = 10
DIR = os.getcwd()
EPOCHS = 10
LOG_INTERVAL = 10
N_TRAIN_EXAMPLES = BATCH_SIZE * 30
N_VALID_EXAMPLES = BATCH_SIZE * 10

In [None]:
def define_model(trial):
    n_layers = trial.suggest_int("n_layers", 1, 3)
    layers = []

    in_features = 28 * 28
    for i in range(n_layers):
        out_features = trial.suggest_int("n_units_1{}".format(i), 4, 128)
        layers.append(nn.Linear(in_features, out_features))
        layers.append(nn.ReLU())
        p = trial.suggest_float("dropout_1{}".format(i), 0.2, 0.5)
        layers.append(nn.Dropout(p))

        in_features = out_features

    layers.append(nn.Linear(in_features, CLASSES))
    layers.append(nn.LogSoftmax(dim=1))

    return nn.Sequential(*layers)

In [None]:
def get_mnist():
    train_loader = torch.utils.data.DataLoader(
        datasets.FashionMNIST(DIR, train=True, download=True, transform=transforms.ToTensor()),
        batch_size=BATCH_SIZE,
        shuffle=True,
    )
    valid_loader = torch.utils.data.DataLoader(
        datasets.FashionMNIST(DIR, train=False, transform=transforms.ToTensor()),
        batch_size=BATCH_SIZE,
        shuffle=True,
    )

    return train_loader, valid_loader

In [None]:
def objective(trial):
    model = define_model(trial).to(DEVICE)

    optimizer_name = trial.suggest_categorical("optimizer", ["SGD", "Adam", "RMSprop"])
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

    train_loader, valid_loader = get_mnist()

    for epoch in range(EPOCHS):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            if batch_idx * BATCH_SIZE >= N_TRAIN_EXAMPLES:
                break

            data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)

            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            optimizer.step()

        model.eval()
        correct = 0
        for batch_idx, (data, target) in enumerate(valid_loader):
            if batch_idx * BATCH_SIZE >= N_VALID_EXAMPLES:
                break
            
            data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)
            output = model(data)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

        accuracy = correct / min(len(valid_loader.dataset), N_VALID_EXAMPLES)

        trial.report(accuracy, epoch)

        if trial.should_prune():
            raise optuna.exceptions.TrialPruned

        return accuracy

In [None]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100, timeout=500)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
completed_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print(f"Number of trials: {len(study.trials)}")
print(f"Number of completed trials: {len(pruned_trials)}")
print(f"Number of pruned trials: {len(completed_trials)}")

best_trial = study.best_trial

print(f"Best Trial Value: {best_trial.value}")

for key, value in best_trial.params.items():
    print(f"{key}: {value}")

[32m[I 2021-08-28 12:02:39,459][0m A new study created in memory with name: no-name-3b02c5cb-b605-474b-8755-71714dc776a1[0m
[32m[I 2021-08-28 12:02:40,085][0m Trial 0 finished with value: 0.09140625 and parameters: {'n_layers': 2, 'n_units_10': 50, 'dropout_10': 0.38464918346439586, 'n_units_11': 76, 'dropout_11': 0.48595369216539297, 'optimizer': 'SGD', 'lr': 0.0025745881412939787}. Best is trial 0 with value: 0.09140625.[0m
[32m[I 2021-08-28 12:02:40,657][0m Trial 1 finished with value: 0.12109375 and parameters: {'n_layers': 3, 'n_units_10': 55, 'dropout_10': 0.2215622909918732, 'n_units_11': 43, 'dropout_11': 0.21282343512927096, 'n_units_12': 18, 'dropout_12': 0.47787240784346763, 'optimizer': 'SGD', 'lr': 0.05312719511656998}. Best is trial 1 with value: 0.12109375.[0m
[32m[I 2021-08-28 12:02:41,255][0m Trial 2 finished with value: 0.0953125 and parameters: {'n_layers': 3, 'n_units_10': 114, 'dropout_10': 0.23220593418509342, 'n_units_11': 127, 'dropout_11': 0.31907193

Number of trials: 100
Number of completed trials: 25
Number of pruned trials: 75
Best Trial Value: 0.76796875
n_layers: 1
n_units_10: 59
dropout_10: 0.3117972488892801
optimizer: Adam
lr: 0.013241372727413655
