In [1]:
import numpy as np
import pandas as pd
import optuna
from lightgbm import LGBMClassifier
from optuna import Trial
from optuna.samplers import TPESampler
from sklearn.metrics import log_loss
from sklearn.model_selection import train_test_split

## Optuna Tutorial

In [3]:
import optuna

def objective(trial):
    x = trial.suggest_float('x', -10, 10)
    return (x - 2) ** 2

study = optuna.create_study()
study.optimize(objective, n_trials=30)

study.best_params

[32m[I 2021-11-29 17:20:05,131][0m A new study created in memory with name: no-name-0464b9f7-8e64-481a-a1eb-55a945af6c2f[0m
[32m[I 2021-11-29 17:20:05,133][0m Trial 0 finished with value: 0.1363814495370643 and parameters: {'x': 2.3692985913012183}. Best is trial 0 with value: 0.1363814495370643.[0m
[32m[I 2021-11-29 17:20:05,135][0m Trial 1 finished with value: 78.75724233602091 and parameters: {'x': -6.874527724674757}. Best is trial 0 with value: 0.1363814495370643.[0m
[32m[I 2021-11-29 17:20:05,136][0m Trial 2 finished with value: 29.358555832630138 and parameters: {'x': 7.418353609043077}. Best is trial 0 with value: 0.1363814495370643.[0m
[32m[I 2021-11-29 17:20:05,137][0m Trial 3 finished with value: 19.38337760582468 and parameters: {'x': -2.4026557446414865}. Best is trial 0 with value: 0.1363814495370643.[0m
[32m[I 2021-11-29 17:20:05,138][0m Trial 4 finished with value: 120.32834656343591 and parameters: {'x': -8.969427813857745}. Best is trial 0 with value:

{'x': 2.1398961070862916}

## Optuna for Pytorch

In [9]:
import os
import optuna
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
from torchvision import datasets
from torchvision import transforms


BATCHSIZE = 128
CLASSES = 10
EPOCHS = 10
DIR = os.getcwd()

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

N_TRAIN_EXAMPLES = BATCHSIZE * 469
N_VALID_EXAMPLES = BATCHSIZE * 79

In [10]:
def get_mnist():
    # Load FashionMNIST dataset.
    train_loader = torch.utils.data.DataLoader(
        datasets.FashionMNIST(DIR, train=True, download=True, transform=transforms.ToTensor()),
        batch_size=BATCHSIZE,
        shuffle=True,
    )
    valid_loader = torch.utils.data.DataLoader(
        datasets.FashionMNIST(DIR, train=False, transform=transforms.ToTensor()),
        batch_size=BATCHSIZE,
        shuffle=True,
    )
    return train_loader, valid_loader


In [11]:
def define_model(trial):
    n_layers = trial.suggest_int("n_layers", 1, 10) # n_layers = 1~10
    layers = []

    in_features = 28 * 28
    for i in range(n_layers): # (1~10)
        out_features = trial.suggest_int("n_units_l{}".format(i), 4, 128)   # out_features = 4~128
        layers.append(nn.Linear(in_features, out_features))
        layers.append(nn.ReLU())
        p = trial.suggest_float("dropout_l{}".format(i), 0.2, 0.5)
        layers.append(nn.Dropout(p))

        in_features = out_features
    layers.append(nn.Linear(in_features, CLASSES))
    layers.append(nn.LogSoftmax(dim=1))

    return nn.Sequential(*layers)  

In [13]:
def objective(trial):
    model = define_model(trial).to(DEVICE)
    print(model)
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)
    
    train_loader, valid_loader = get_mnist()

    for epoch in range(EPOCHS):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            if batch_idx * BATCHSIZE >= N_TRAIN_EXAMPLES:
                break
            data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)

            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            optimizer.step()

        model.eval()
        correct = 0
        with torch.no_grad():
            for batch_idx, (data, target) in enumerate(valid_loader):
                if batch_idx * BATCHSIZE >= N_VALID_EXAMPLES:
                    break
                
                data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)
                output = model(data)

                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()

        accuracy = correct / min(len(valid_loader.dataset), N_VALID_EXAMPLES)
        trial.report(accuracy, epoch)

        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return accuracy


In [14]:
if __name__ == "__main__":
    study = optuna.create_study(directions=["maximize"])
    study.optimize(objective, n_trials=1)

[32m[I 2021-11-29 17:23:24,403][0m A new study created in memory with name: no-name-d3a8febb-b999-4cf7-a6af-72d4081bb228[0m


Sequential(
  (0): Linear(in_features=784, out_features=43, bias=True)
  (1): ReLU()
  (2): Dropout(p=0.44752006025862845, inplace=False)
  (3): Linear(in_features=43, out_features=35, bias=True)
  (4): ReLU()
  (5): Dropout(p=0.27608822151433465, inplace=False)
  (6): Linear(in_features=35, out_features=73, bias=True)
  (7): ReLU()
  (8): Dropout(p=0.24209810002064813, inplace=False)
  (9): Linear(in_features=73, out_features=90, bias=True)
  (10): ReLU()
  (11): Dropout(p=0.4721431571489529, inplace=False)
  (12): Linear(in_features=90, out_features=110, bias=True)
  (13): ReLU()
  (14): Dropout(p=0.3727037023122215, inplace=False)
  (15): Linear(in_features=110, out_features=84, bias=True)
  (16): ReLU()
  (17): Dropout(p=0.25751165966826334, inplace=False)
  (18): Linear(in_features=84, out_features=80, bias=True)
  (19): ReLU()
  (20): Dropout(p=0.35710382765960813, inplace=False)
  (21): Linear(in_features=80, out_features=10, bias=True)
  (22): LogSoftmax(dim=1)
)


[32m[I 2021-11-29 17:24:53,434][0m Trial 0 finished with value: 0.1 and parameters: {'n_layers': 7, 'n_units_l0': 43, 'dropout_l0': 0.44752006025862845, 'n_units_l1': 35, 'dropout_l1': 0.27608822151433465, 'n_units_l2': 73, 'dropout_l2': 0.24209810002064813, 'n_units_l3': 90, 'dropout_l3': 0.4721431571489529, 'n_units_l4': 110, 'dropout_l4': 0.3727037023122215, 'n_units_l5': 84, 'dropout_l5': 0.25751165966826334, 'n_units_l6': 80, 'dropout_l6': 0.35710382765960813, 'optimizer': 'SGD', 'lr': 0.0004100996221868376}. Best is trial 0 with value: 0.1.[0m
