In [1]:
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from torchvision import transforms
import numpy as np

In [2]:
BATCHSIZE = 128

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5,), (0.5,))])

train_set = MNIST(root='./data', train=True,
                  download=True, transform=transform)
train_loader = DataLoader(train_set, batch_size=BATCHSIZE,
                          shuffle=True, num_workers=2)

test_set = MNIST(root='./data', train=False, 
                 download=True, transform=transform)
test_loader = DataLoader(test_set, batch_size=BATCHSIZE,
                         shuffle=False, num_workers=2)

classes = tuple(np.linspace(0, 9, 10, dtype=np.uint8))

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import optuna

In [19]:
EPOCH = 10

class Net(nn.Module):
    def __init__(self, trial):
        super(Net, self).__init__()
        self.activation = get_activation(trial)
        # self.activation = trial.suggest_categorical('activation', [F.relu, F.elu])
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d(p=trial.suggest_uniform("dropout_prob", 0, 0.8))
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = self.activation(F.max_pool2d(self.conv1(x), 2))
        x = self.activation(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = self.activation(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

def train(model, device, train_loader, optimizer):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()

def test(model, device, test_loader):
    model.eval()
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()

    return 1 - correct / len(test_loader.dataset)

def get_optimizer(trial, model):
    optimizer_names = ['Adam', 'MomentumSGD']
    optimizer_name = trial.suggest_categorical('optimizer', optimizer_names)
    weight_decay = trial.suggest_loguniform('weight_decay', 1e-10, 1e-3)
    if optimizer_name == optimizer_names[0]: 
        adam_lr = trial.suggest_loguniform('adam_lr', 1e-5, 1e-1)
        optimizer = optim.Adam(model.parameters(), lr=adam_lr, weight_decay=weight_decay)
    else:
        momentum_sgd_lr = trial.suggest_loguniform('momentum_sgd_lr', 1e-5, 1e-1)
        optimizer = optim.SGD(model.parameters(), lr=momentum_sgd_lr,
                              momentum=0.9, weight_decay=weight_decay)
    return optimizer

# def adam(model, trial, weight_decay):
#     adam_lr = trial.suggest_loguniform('adam_lr', 1e-5, 1e-1)
#     return optim.Adam(model.parameters(), lr=adam_lr, weight_decay=weight_decay)
    
# def momentum(model, trial, weight_decay):
#     momentum_sgd_lr = trial.suggest_loguniform('momentum_sgd_lr', 1e-5, 1e-1)
#     return optim.SGD(model.parameters(), lr=momentum_sgd_lr,
#                      momentum=0.9, weight_decay=weight_decay)

# def get_optimizer(trial, model):
#     optimizer = trial.suggest_categorical('optimizer', [adam, momentum])
#     weight_decay = trial.suggest_loguniform('weight_decay', 1e-10, 1e-3)
#     return optimizer(model, trial, weight_decay)

def get_activation(trial):
    activation_names = ['ReLU', 'ELU']
    activation_name = trial.suggest_categorical('activation', activation_names)
    if activation_name == activation_names[0]:
        activation = F.relu
    else:
        activation = F.elu
    return activation
        
def objective(trial):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    
    model = Net(trial).to(device)
    optimizer = get_optimizer(trial, model)
    
    for step in range(EPOCH):
        train(model, device, train_loader, optimizer)
        error_rate = test(model, device, test_loader)
        
        trial.report(error_rate, step)
        if trial.should_prune(step):
            raise optuna.structs.TrialPruned()

    return error_rate

In [20]:
study = optuna.create_study(pruner=optuna.pruners.MedianPruner())
study.optimize(objective, n_trials=100)

[I 2018-12-05 06:12:41,057] Finished a trial resulted in value: 0.09560000000000002. Current best value is 0.09560000000000002 with parameters: {'activation': 'ReLU', 'dropout_prob': 0.35100773000855123, 'optimizer': 'Adam', 'weight_decay': 6.227514717616339e-06, 'adam_lr': 1.541332256542363e-05}.
[I 2018-12-05 06:13:06,201] Finished a trial resulted in value: 0.35619999999999996. Current best value is 0.09560000000000002 with parameters: {'activation': 'ReLU', 'dropout_prob': 0.35100773000855123, 'optimizer': 'Adam', 'weight_decay': 6.227514717616339e-06, 'adam_lr': 1.541332256542363e-05}.
[I 2018-12-05 06:13:31,358] Finished a trial resulted in value: 0.01849999999999996. Current best value is 0.01849999999999996 with parameters: {'activation': 'ReLU', 'dropout_prob': 0.675273078348647, 'optimizer': 'Adam', 'weight_decay': 1.3423197864054106e-09, 'adam_lr': 0.0031045501711910307}.
[I 2018-12-05 06:13:56,193] Finished a trial resulted in value: 0.011399999999999966. Current best value

In [22]:
study.best_params

{'activation': 'ReLU',
 'dropout_prob': 0.1750391800071884,
 'optimizer': 'MomentumSGD',
 'weight_decay': 1.185808055634304e-07,
 'momentum_sgd_lr': 0.02022072772568203}

In [23]:
study.best_value

0.006900000000000017

In [24]:
study.trials[0]

FrozenTrial(trial_id=0, state=<TrialState.COMPLETE: 1>, value=0.09560000000000002, datetime_start=datetime.datetime(2018, 12, 5, 6, 12, 15, 990091), datetime_complete=datetime.datetime(2018, 12, 5, 6, 12, 41, 57094), params={'activation': 'ReLU', 'dropout_prob': 0.35100773000855123, 'optimizer': 'Adam', 'weight_decay': 6.227514717616339e-06, 'adam_lr': 1.541332256542363e-05}, user_attrs={}, system_attrs={}, intermediate_values={0: 0.7971, 1: 0.30000000000000004, 2: 0.27780000000000005, 3: 0.2239, 4: 0.16500000000000004, 5: 0.1381, 6: 0.12260000000000004, 7: 0.11160000000000003, 8: 0.10299999999999998, 9: 0.09560000000000002}, params_in_internal_repr={'activation': 0, 'dropout_prob': 0.35100773000855123, 'optimizer': 0, 'weight_decay': 6.227514717616339e-06, 'adam_lr': 1.541332256542363e-05})

In [25]:
import csv
from pathlib import Path

csv_path = Path().resolve().joinpath('result.csv')
if not csv_path.is_file(): csv_path.touch()

with csv_path.open(mode='w') as f:
    write = csv.writer(f, lineterminator='\n')
    write.writerow([
        'state', 'error', 'dropout_prob', 'activation',
        'optimizer', 'weight_decay', 'adam_lr', 'momentum_sgd_lr'
    ])
    for trial in study.trials:
        write.writerow([
            trial.state, trial.value,
            trial.params['dropout_prob'],
            trial.params['activation'],
            trial.params['optimizer'],
            trial.params['weight_decay'],
            trial.params.get('adam_lr') or '-',
            trial.params.get('momentum_sgd_lr') or '-'
        ])

In [26]:
import pandas

In [27]:
df = pandas.read_csv('result.csv')

In [28]:
df.head(10)

Unnamed: 0,state,error,dropout_prob,activation,optimizer,weight_decay,adam_lr,momentum_sgd_lr
0,TrialState.COMPLETE,0.0956,0.351008,ReLU,Adam,6.227515e-06,1.541332256542363e-05,-
1,TrialState.COMPLETE,0.3562,0.236714,ELU,MomentumSGD,3.926285e-08,-,3.766799356983935e-05
2,TrialState.COMPLETE,0.0185,0.675273,ReLU,Adam,1.34232e-09,0.0031045501711910307,-
3,TrialState.COMPLETE,0.0114,0.430041,ELU,Adam,8.600682e-08,0.0006490359130172907,-
4,TrialState.COMPLETE,0.0439,0.594458,ReLU,MomentumSGD,1.135986e-07,-,0.0008412325183643451
5,TrialState.PRUNED,0.2242,0.00811,ELU,Adam,4.767901e-07,2.0188068129558544e-05,-
6,TrialState.PRUNED,0.8185,0.479526,ReLU,MomentumSGD,0.0002283432,-,3.729526603267807e-05
7,TrialState.COMPLETE,0.0183,0.389658,ELU,Adam,0.0001034367,0.00019749595108147042,-
8,TrialState.PRUNED,0.1676,0.552339,ELU,Adam,1.460409e-09,3.094641783965987e-05,-
9,TrialState.COMPLETE,0.0069,0.175039,ReLU,MomentumSGD,1.185808e-07,-,0.02022072772568203
