In [1]:
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
from torchvision import datasets
from torchvision import transforms

import numpy as np

import optuna
from optuna.trial import TrialState

  warn(f"Failed to load image Python extension: {e}")


In [2]:
DEVICE = torch.device("cuda:1")
BATCHSIZE = 128
CLASSES = 10
DIR = os.getcwd()
EPOCHS = 10
LOG_INTERVAL = 10
N_TRAIN_EXAMPLES = BATCHSIZE * 30
N_VALID_EXAMPLES = BATCHSIZE * 10

In [3]:
# hyperparameters set
range_opt_name = ["Adam", "RMSprop", "SGD"]
range_lr = [1e-5, 1e-1]
range_n_layers = [1, 4]
range_p = [0.15, 0.5]
range_out_features = [2**2, 2**8]

In [4]:
def objective(trial):    
    layers = []
    acc = []

    # Generate the model.
    in_features = 28 * 28
    n_layers = trial.suggest_int("n_layers", range_n_layers[0], range_n_layers[1])
    
    for i in range(n_layers): 
        out_features = trial.suggest_int("n_units_l{}".format(i),range_out_features[0], range_out_features[1])
        layers.append(nn.Linear(in_features, out_features))
        layers.append(nn.ReLU())
        p = trial.suggest_float("dropout_l{}".format(i), range_p[0], range_p[1])
        layers.append(nn.Dropout(p))
        
        in_features = out_features
    layers.append(nn.Linear(in_features, CLASSES))
    layers.append(nn.LogSoftmax(dim=1))

    model =  nn.Sequential(*layers).to(DEVICE)


    # Generate the optimizers.
    optimizer_name = trial.suggest_categorical("optimizer",range_opt_name)
    lr = trial.suggest_float("lr",range_lr[0],range_lr[1],log=True)
    
    optimizer = getattr(optim, optimizer_name)(model.parameters(),lr=lr)

    # Load FashionMNIST dataset.
    train_loader = torch.utils.data.DataLoader(
        datasets.FashionMNIST(DIR, train=True, download=True, transform=transforms.ToTensor()),
        batch_size=BATCHSIZE,
        shuffle=True,
    )
    valid_loader = torch.utils.data.DataLoader(
        datasets.FashionMNIST(DIR, train=False, transform=transforms.ToTensor()),
        batch_size=BATCHSIZE,
        shuffle=True,
    )


    # Training of the model.
    for epoch in range(EPOCHS):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            # Limiting training data for faster epochs.
            if batch_idx * BATCHSIZE >= N_TRAIN_EXAMPLES:
                break

            data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)

            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            optimizer.step()

        # Validation of the model.
        model.eval()
        correct = 0
        with torch.no_grad():
            for batch_idx, (data, target) in enumerate(valid_loader):
                # Limiting validation data.
                if batch_idx * BATCHSIZE >= N_VALID_EXAMPLES:
                    break
                data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)
                output = model(data)
                # Get the index of the max log-probability.
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()

        accuracy = correct / min(len(valid_loader.dataset), N_VALID_EXAMPLES)
        acc.append(accuracy)
        
        trial.report(accuracy, epoch)
        
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

        print("epoch:",epoch,"| acc:",round(accuracy*100,2),"%")

    #print best value of acc    
    #print("--------------------------")
    #print("best acc:", np.max(acc))
    
    return np.max(acc)
    

In [5]:
study = optuna.create_study(study_name='opt_tutorial2',
                           storage='sqlite:///tutorial.db',
                           load_if_exists=True,
                           direction="maximize")

[32m[I 2022-04-21 22:14:53,442][0m Using an existing study with name 'opt_tutorial2' instead of creating a new one.[0m


In [6]:
study.optimize(objective, n_trials=20,timeout=None)

NVIDIA GeForce RTX 3060 with CUDA capability sm_86 is not compatible with the current PyTorch installation.
The current PyTorch install supports CUDA capabilities sm_37 sm_50 sm_60 sm_70.
If you want to use the NVIDIA GeForce RTX 3060 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/

[32m[I 2022-04-21 22:14:59,610][0m Trial 37 pruned. [0m


epoch: 0 | acc: 64.22 %
epoch: 1 | acc: 75.08 %
epoch: 2 | acc: 74.77 %
epoch: 3 | acc: 78.2 %


[32m[I 2022-04-21 22:15:05,735][0m Trial 38 pruned. [0m
[32m[I 2022-04-21 22:15:07,260][0m Trial 39 pruned. [0m
[32m[I 2022-04-21 22:15:08,818][0m Trial 40 pruned. [0m


epoch: 0 | acc: 70.86 %
epoch: 1 | acc: 75.39 %
epoch: 2 | acc: 78.12 %
epoch: 3 | acc: 79.92 %
epoch: 4 | acc: 80.7 %
epoch: 5 | acc: 82.19 %
epoch: 6 | acc: 81.56 %
epoch: 7 | acc: 82.03 %
epoch: 8 | acc: 81.88 %


[32m[I 2022-04-21 22:15:19,684][0m Trial 41 finished with value: 0.821875 and parameters: {'n_layers': 1, 'n_units_l0': 66, 'dropout_l0': 0.20721558824189254, 'optimizer': 'RMSprop', 'lr': 0.002223659019693438}. Best is trial 28 with value: 0.8296875.[0m


epoch: 9 | acc: 80.78 %
epoch: 0 | acc: 69.38 %
epoch: 1 | acc: 75.7 %
epoch: 2 | acc: 73.12 %
epoch: 3 | acc: 73.44 %


[32m[I 2022-04-21 22:15:25,299][0m Trial 42 pruned. [0m


epoch: 0 | acc: 69.3 %


[32m[I 2022-04-21 22:15:27,721][0m Trial 43 pruned. [0m


epoch: 0 | acc: 66.48 %


[32m[I 2022-04-21 22:15:30,300][0m Trial 44 pruned. [0m
[32m[I 2022-04-21 22:15:32,056][0m Trial 45 pruned. [0m


epoch: 0 | acc: 66.17 %
epoch: 1 | acc: 75.08 %
epoch: 2 | acc: 76.33 %
epoch: 3 | acc: 76.17 %
epoch: 4 | acc: 79.84 %
epoch: 5 | acc: 79.61 %
epoch: 6 | acc: 80.94 %
epoch: 7 | acc: 81.48 %
epoch: 8 | acc: 79.92 %


[32m[I 2022-04-21 22:15:42,777][0m Trial 46 finished with value: 0.81484375 and parameters: {'n_layers': 2, 'n_units_l0': 129, 'dropout_l0': 0.1710864140792859, 'n_units_l1': 97, 'dropout_l1': 0.32400323599732156, 'optimizer': 'RMSprop', 'lr': 0.003559121247224647}. Best is trial 28 with value: 0.8296875.[0m


epoch: 9 | acc: 81.25 %


[32m[I 2022-04-21 22:15:44,458][0m Trial 47 pruned. [0m
[32m[I 2022-04-21 22:15:46,289][0m Trial 48 pruned. [0m
[32m[I 2022-04-21 22:15:47,901][0m Trial 49 pruned. [0m
[32m[I 2022-04-21 22:15:49,312][0m Trial 50 pruned. [0m
[32m[I 2022-04-21 22:15:50,990][0m Trial 51 pruned. [0m
[32m[I 2022-04-21 22:15:52,414][0m Trial 52 pruned. [0m


epoch: 0 | acc: 70.08 %
epoch: 1 | acc: 74.3 %
epoch: 2 | acc: 78.83 %
epoch: 3 | acc: 78.28 %


[32m[I 2022-04-21 22:15:58,057][0m Trial 53 pruned. [0m


epoch: 0 | acc: 65.55 %


[32m[I 2022-04-21 22:16:00,546][0m Trial 54 pruned. [0m


epoch: 0 | acc: 68.91 %


[32m[I 2022-04-21 22:16:03,032][0m Trial 55 pruned. [0m


epoch: 0 | acc: 70.86 %


[32m[I 2022-04-21 22:16:05,505][0m Trial 56 pruned. [0m


In [7]:
trial = study.best_trial

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Best trial:")
print(" Value: ", trial.value)

print("\nStudy statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("\nBest params: ")
for key, value in trial.params.items():    
    print(" ",key,"=", value)

Best trial:
 Value:  0.8296875

Study statistics: 
  Number of finished trials:  57
  Number of pruned trials:  32
  Number of complete trials:  24

Best params: 
  dropout_l0 = 0.2083603371473296
  dropout_l1 = 0.29475991600696116
  lr = 0.004143220502481869
  n_layers = 2
  n_units_l0 = 119
  n_units_l1 = 89
  optimizer = Adam
