In [1]:
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from torchvision import transforms
import numpy as np

In [2]:
BATCHSIZE = 128

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5,), (0.5,))])

train_set = MNIST(root='./data', train=True,
                  download=True, transform=transform)
train_loader = DataLoader(train_set, batch_size=BATCHSIZE,
                          shuffle=True, num_workers=2)

test_set = MNIST(root='./data', train=False, 
                 download=True, transform=transform)
test_loader = DataLoader(test_set, batch_size=BATCHSIZE,
                         shuffle=False, num_workers=2)

classes = tuple(np.linspace(0, 9, 10, dtype=np.uint8))

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import optuna
optuna.logging.disable_default_handler()

from tqdm import tqdm_notebook as tqdm

In [6]:
EPOCH = 10

class Net(nn.Module):
    def __init__(self, trial):
        super(Net, self).__init__()
        self.activation = get_activation(trial)
        # self.activation = trial.suggest_categorical('activation', [F.relu, F.elu])
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d(p=trial.suggest_uniform("dropout_prob", 0, 0.8))
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = self.activation(F.max_pool2d(self.conv1(x), 2))
        x = self.activation(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = self.activation(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

def train(model, device, train_loader, optimizer):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()

def test(model, device, test_loader):
    model.eval()
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()

    return 1 - correct / len(test_loader.dataset)

def get_optimizer(trial, model):
    optimizer_names = ['Adam', 'MomentumSGD']
    optimizer_name = trial.suggest_categorical('optimizer', optimizer_names)
    weight_decay = trial.suggest_loguniform('weight_decay', 1e-10, 1e-3)
    if optimizer_name == optimizer_names[0]: 
        adam_lr = trial.suggest_loguniform('adam_lr', 1e-5, 1e-1)
        optimizer = optim.Adam(model.parameters(), lr=adam_lr, weight_decay=weight_decay)
    else:
        momentum_sgd_lr = trial.suggest_loguniform('momentum_sgd_lr', 1e-5, 1e-1)
        optimizer = optim.SGD(model.parameters(), lr=momentum_sgd_lr,
                              momentum=0.9, weight_decay=weight_decay)
    return optimizer

# def adam(model, trial, weight_decay):
#     adam_lr = trial.suggest_loguniform('adam_lr', 1e-5, 1e-1)
#     return optim.Adam(model.parameters(), lr=adam_lr, weight_decay=weight_decay)
    
# def momentum(model, trial, weight_decay):
#     momentum_sgd_lr = trial.suggest_loguniform('momentum_sgd_lr', 1e-5, 1e-1)
#     return optim.SGD(model.parameters(), lr=momentum_sgd_lr,
#                      momentum=0.9, weight_decay=weight_decay)

# def get_optimizer(trial, model):
#     optimizer = trial.suggest_categorical('optimizer', [adam, momentum])
#     weight_decay = trial.suggest_loguniform('weight_decay', 1e-10, 1e-3)
#     return optimizer(model, trial, weight_decay)

def get_activation(trial):
    activation_names = ['ReLU', 'ELU']
    activation_name = trial.suggest_categorical('activation', activation_names)
    if activation_name == activation_names[0]:
        activation = F.relu
    else:
        activation = F.elu
    return activation

def objective_wrapper(pbar):
    def objective(trial):
        device = "cuda" if torch.cuda.is_available() else "cpu"

        model = Net(trial).to(device)
        optimizer = get_optimizer(trial, model)

        for step in range(EPOCH):
            train(model, device, train_loader, optimizer)
            error_rate = test(model, device, test_loader)

            trial.report(error_rate, step)
            if trial.should_prune(step):
                pbar.update()
                raise optuna.structs.TrialPruned()

        pbar.update()

        return error_rate
    
    return objective

In [7]:
TRIAL_SIZE = 100
with tqdm(total=TRIAL_SIZE) as pbar:
    study = optuna.create_study(pruner=optuna.pruners.MedianPruner())
    study.optimize(objective_wrapper(pbar), n_trials=TRIAL_SIZE)

HBox(children=(IntProgress(value=0), HTML(value='')))




In [8]:
study.best_params

{'activation': 'ReLU',
 'dropout_prob': 0.016005023709719846,
 'optimizer': 'MomentumSGD',
 'weight_decay': 0.000493215617908779,
 'momentum_sgd_lr': 0.021578184169117495}

In [9]:
study.best_value

0.009099999999999997

In [10]:
study.trials[0]

FrozenTrial(trial_id=0, state=<TrialState.COMPLETE: 1>, value=0.04520000000000002, datetime_start=datetime.datetime(2018, 12, 8, 20, 22, 13, 177788), datetime_complete=datetime.datetime(2018, 12, 8, 20, 22, 37, 675901), params={'activation': 'ELU', 'dropout_prob': 0.2632755838984565, 'optimizer': 'MomentumSGD', 'weight_decay': 5.223656518310395e-10, 'momentum_sgd_lr': 0.0003597639332994071}, user_attrs={}, system_attrs={}, intermediate_values={0: 0.36760000000000004, 1: 0.15969999999999995, 2: 0.10919999999999996, 3: 0.08720000000000006, 4: 0.07340000000000002, 5: 0.06520000000000004, 6: 0.058599999999999985, 7: 0.052200000000000024, 8: 0.0494, 9: 0.04520000000000002}, params_in_internal_repr={'activation': 1, 'dropout_prob': 0.2632755838984565, 'optimizer': 1, 'weight_decay': 5.223656518310395e-10, 'momentum_sgd_lr': 0.0003597639332994071})

In [11]:
df = study.trials_dataframe()

In [12]:
df.head()

Unnamed: 0_level_0,trial_id,state,value,datetime_start,datetime_complete,params,params,params,params,params,...,intermediate_values,intermediate_values,intermediate_values,intermediate_values,intermediate_values,intermediate_values,intermediate_values,intermediate_values,intermediate_values,intermediate_values
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,activation,adam_lr,dropout_prob,momentum_sgd_lr,optimizer,...,0,1,2,3,4,5,6,7,8,9
0,0,TrialState.COMPLETE,0.0452,2018-12-08 20:22:13.177788,2018-12-08 20:22:37.675901,ELU,,0.263276,0.00036,MomentumSGD,...,0.3676,0.1597,0.1092,0.0872,0.0734,0.0652,0.0586,0.0522,0.0494,0.0452
1,1,TrialState.COMPLETE,0.0561,2018-12-08 20:22:37.676195,2018-12-08 20:23:02.276249,ReLU,,0.782625,0.00089,MomentumSGD,...,0.6112,0.2229,0.1372,0.1053,0.0899,0.0787,0.0716,0.0675,0.061,0.0561
2,2,TrialState.COMPLETE,0.0162,2018-12-08 20:23:02.276627,2018-12-08 20:23:27.007976,ReLU,0.005142,0.466449,,Adam,...,0.0354,0.0246,0.0199,0.0177,0.0155,0.0151,0.0166,0.0134,0.0145,0.0162
3,3,TrialState.COMPLETE,0.902,2018-12-08 20:23:27.008410,2018-12-08 20:23:51.700105,ReLU,0.055845,0.407266,,Adam,...,0.5593,0.5537,0.5471,0.5561,0.5779,0.7098,0.5931,0.8865,0.8865,0.902
4,4,TrialState.COMPLETE,0.8865,2018-12-08 20:23:51.700607,2018-12-08 20:24:16.142025,ELU,,0.780593,0.070378,MomentumSGD,...,0.1265,0.4864,0.8496,0.8991,0.8972,0.9042,0.902,0.8968,0.9108,0.8865


In [13]:
df.to_csv('result.csv')