In [1]:
import os

import optuna
from optuna.trial import TrialState
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
from torchvision import datasets
from torchvision import transforms

In [2]:
DEVICE = torch.device("cuda:0")
CLASSES = 10
DIR = os.getcwd()
EPOCHS = 20
DATASET = 'CIFAR10'

In [3]:
train_data = getattr(datasets, DATASET)(DIR, train=True, download=True, transform=transforms.ToTensor())
test_data = getattr(datasets, DATASET)(DIR, train=False, download=True, transform=transforms.ToTensor())

len(train_data), len(test_data)

Files already downloaded and verified
Files already downloaded and verified


(50000, 10000)

In [4]:
def dimensions_shape(x, k=3):
    return int(x - (k-1) - 1) + 1

In [5]:
def get_model_size(model):
    param_size = 0
    for param in model.parameters():
        param_size += param.nelement() * param.element_size()
    buffer_size = 0
    for buffer in model.buffers():
        buffer_size += buffer.nelement() * buffer.element_size()

    size_all_mb = (param_size + buffer_size) / 1024**2
    return size_all_mb

In [6]:
def define_model(trial):
    
    # We optimize the number of layers, hidden units and dropout ratio in each layer.
    in_channels = 3
    dim_shape = 32
    
    in_features = in_channels * dim_shape * dim_shape
    
    p = trial.suggest_float("dropout_l", 0.0, 0.5)
    
    conv_filter_1 = trial.suggest_int("conv_filter_1", 64, 256)
    conv_filter_2 = trial.suggest_int("conv_filter_2", 16, 128)
    
    n_layers_conv1 = trial.suggest_int("n_layers_conv1", 1, 5)
    n_layers_conv2 = trial.suggest_int("n_layers_conv2", 0, 3)
    n_layers_fc = trial.suggest_int("n_layers_fc", 1, 4)
    
    layers = [
        nn.BatchNorm2d(in_channels)
    ]
    
    

    for i in range(n_layers_conv1):
        
        layers.append(nn.Conv2d(in_channels, conv_filter_1, 3, bias=False))
        layers.append(nn.BatchNorm2d(conv_filter_1))
        layers.append(nn.ReLU())
        layers.append(nn.Dropout2d(p))
        
        dim_shape = dimensions_shape(dim_shape)

        in_channels = conv_filter_1
        
    

    for i in range(n_layers_conv2):
        
        layers.append(nn.Conv2d(in_channels, conv_filter_2, 3, bias=False))
        layers.append(nn.BatchNorm2d(conv_filter_2))
        layers.append(nn.ReLU())
        layers.append(nn.Dropout2d(p))
        
        dim_shape = dimensions_shape(dim_shape)

        in_channels = conv_filter_2
        
    layers.append(nn.Flatten())
    in_features = in_channels * dim_shape * dim_shape

    for i in range(n_layers_fc):
        out_features = trial.suggest_int("n_units_l{}".format(i), 100, 1500)
        
        layers.append(nn.Linear(in_features, out_features, bias=False))
        layers.append(nn.BatchNorm1d(out_features))
        layers.append(nn.ReLU())
        layers.append(nn.Dropout(p))

        in_features = out_features

        
    layers.append(nn.Linear(in_features, 80, bias=False))
    layers.append(nn.BatchNorm1d(80))
    layers.append(nn.ReLU())
    layers.append(nn.Linear(80, CLASSES))

    return nn.Sequential(*layers)


In [7]:
def get_data(trial):
    
    BATCHSIZE = trial.suggest_int('batchsize', 32, 128)
    
    N_TRAIN_EXAMPLES = 50_000 // BATCHSIZE
    N_VALID_EXAMPLES = 10_000 // BATCHSIZE
    
    # Load FashionMNIST dataset.
    train_loader = torch.utils.data.DataLoader(train_data,batch_size=BATCHSIZE,shuffle=True)
    valid_loader = torch.utils.data.DataLoader(test_data,batch_size=BATCHSIZE,shuffle=True)

    return train_loader, valid_loader, BATCHSIZE, N_TRAIN_EXAMPLES, N_VALID_EXAMPLES

In [10]:
def objective(trial):
    # Generate the model.
    model = define_model(trial).to(DEVICE)

    # Generate the optimizers.
    loss_function = nn.CrossEntropyLoss()
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "SGD"])
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

    # Get the FashionMNIST dataset.
    train_loader, valid_loader, BATCHSIZE, N_TRAIN_EXAMPLES, N_VALID_EXAMPLES = get_data(trial)

    estimated_size = get_model_size(model)
    
    # Training of the model.
    for epoch in range(EPOCHS):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            # Limiting training data for faster epochs.
            if batch_idx >= N_TRAIN_EXAMPLES:
                break

            data, target = data.to(DEVICE), target.to(DEVICE)

            optimizer.zero_grad()
            output = model(data)
            loss = loss_function(output, target)
            loss.backward()
            optimizer.step()

        # Validation of the model.
        model.eval()
        correct = 0
        with torch.no_grad():
            for batch_idx, (data, target) in enumerate(valid_loader):
                # Limiting validation data.
                if batch_idx >= N_VALID_EXAMPLES:
                    break
                data, target = data.to(DEVICE), target.to(DEVICE)
                output = model(data)
                # Get the index of the max log-probability.
                pred = output.argmax(dim=1)
                correct += (pred == target).sum().item()

        accuracy = correct / (N_VALID_EXAMPLES * BATCHSIZE)

#         trial.report(epoch, accuracy, estimated_size)

        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return accuracy

In [11]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=200, timeout= 60 * 60 * 3)


# print(f"Number of trials on the Pareto front: {len(study.best_trials)}")

# trial_with_highest_accuracy = max(study.best_trials, key=lambda t: t.values[1])
# print(f"Trial with highest accuracy: ")
# print(f"\tnumber: {trial_with_highest_accuracy.number}")
# print(f"\tparams: {trial_with_highest_accuracy.params}")
# print(f"\tvalues: {trial_with_highest_accuracy.values}")


pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

[32m[I 2023-04-09 23:22:12,338][0m A new study created in memory with name: no-name-1e493fe2-de84-4c2c-804d-73c36e9261fd[0m
[32m[I 2023-04-09 23:30:07,892][0m Trial 0 finished with value: 0.5681246857717446 and parameters: {'dropout_l': 0.451178585330206, 'conv_filter_1': 181, 'conv_filter_2': 47, 'n_layers_conv1': 5, 'n_layers_conv2': 3, 'n_layers_fc': 4, 'n_units_l0': 527, 'n_units_l1': 538, 'n_units_l2': 404, 'n_units_l3': 1325, 'optimizer': 'Adam', 'lr': 0.00035386023117334794, 'batchsize': 85}. Best is trial 0 with value: 0.5681246857717446.[0m
[32m[I 2023-04-09 23:36:40,129][0m Trial 1 finished with value: 0.6775710284113645 and parameters: {'dropout_l': 0.1561979209100866, 'conv_filter_1': 119, 'conv_filter_2': 55, 'n_layers_conv1': 1, 'n_layers_conv2': 3, 'n_layers_fc': 4, 'n_units_l0': 863, 'n_units_l1': 414, 'n_units_l2': 1287, 'n_units_l3': 860, 'optimizer': 'Adam', 'lr': 1.1111688832377715e-05, 'batchsize': 102}. Best is trial 1 with value: 0.6775710284113645.[0m


[32m[I 2023-04-10 01:48:26,689][0m Trial 21 finished with value: 0.7561220393416299 and parameters: {'dropout_l': 0.0006556262012907047, 'conv_filter_1': 79, 'conv_filter_2': 17, 'n_layers_conv1': 4, 'n_layers_conv2': 0, 'n_layers_fc': 1, 'n_units_l0': 120, 'optimizer': 'SGD', 'lr': 0.07477745268179996, 'batchsize': 47}. Best is trial 11 with value: 0.7747594226142742.[0m
[32m[I 2023-04-10 01:56:05,637][0m Trial 22 finished with value: 0.7709334935897436 and parameters: {'dropout_l': 0.005456044701851986, 'conv_filter_1': 84, 'conv_filter_2': 16, 'n_layers_conv1': 4, 'n_layers_conv2': 0, 'n_layers_fc': 1, 'n_units_l0': 682, 'optimizer': 'SGD', 'lr': 0.034539420679820736, 'batchsize': 32}. Best is trial 11 with value: 0.7747594226142742.[0m
[32m[I 2023-04-10 02:04:03,150][0m Trial 23 finished with value: 0.7411858974358975 and parameters: {'dropout_l': 0.057387775039085084, 'conv_filter_1': 132, 'conv_filter_2': 29, 'n_layers_conv1': 3, 'n_layers_conv2': 1, 'n_layers_fc': 1, 'n_

Study statistics: 
  Number of finished trials:  27
  Number of pruned trials:  0
  Number of complete trials:  27
Best trial:
  Value:  0.7747594226142742
  Params: 
    dropout_l: 0.0032747313778947323
    conv_filter_1: 77
    conv_filter_2: 24
    n_layers_conv1: 4
    n_layers_conv2: 0
    n_layers_fc: 1
    n_units_l0: 1122
    optimizer: SGD
    lr: 0.03177451687554504
    batchsize: 58


In [12]:
# df = study.trials_dataframe().sort_values(['values_0', 'values_1'], ascending=False)
df = study.trials_dataframe().sort_values('value', ascending=False)
df.to_csv(DIR + f'/optuna-{DATASET.lower()}-complete-dataset.csv', index=False)

In [13]:
df.head(7)

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_batchsize,params_conv_filter_1,params_conv_filter_2,params_dropout_l,params_lr,params_n_layers_conv1,params_n_layers_conv2,params_n_layers_fc,params_n_units_l0,params_n_units_l1,params_n_units_l2,params_n_units_l3,params_optimizer,state
11,11,0.774759,2023-04-10 00:36:07.360927,2023-04-10 00:42:31.414721,0 days 00:06:24.053794,58,77,24,0.003275,0.031775,4,0,1,1122,,,,SGD,COMPLETE
22,22,0.770933,2023-04-10 01:48:26.695851,2023-04-10 01:56:05.636776,0 days 00:07:38.940925,32,84,16,0.005456,0.034539,4,0,1,682,,,,SGD,COMPLETE
13,13,0.764535,2023-04-10 00:49:20.466521,2023-04-10 00:55:39.431891,0 days 00:06:18.965370,58,65,19,0.000799,0.017728,4,0,1,1117,,,,SGD,COMPLETE
21,21,0.756122,2023-04-10 01:41:28.383899,2023-04-10 01:48:26.687677,0 days 00:06:58.303778,47,79,17,0.000656,0.074777,4,0,1,120,,,,SGD,COMPLETE
12,12,0.754215,2023-04-10 00:42:31.417228,2023-04-10 00:49:20.463979,0 days 00:06:49.046751,47,70,16,0.001269,0.09513,4,0,1,131,,,,SGD,COMPLETE
16,16,0.747009,2023-04-10 01:08:23.761561,2023-04-10 01:14:41.782562,0 days 00:06:18.021001,65,123,27,0.193812,0.026396,4,0,1,1045,,,,SGD,COMPLETE
2,2,0.746261,2023-04-09 23:36:40.131748,2023-04-09 23:44:24.578993,0 days 00:07:44.447245,41,95,57,0.110683,0.055959,4,1,1,395,,,,SGD,COMPLETE
