In [None]:
!pip install -q optuna

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m390.6/390.6 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m26.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.7/78.7 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import optuna
from optuna.trial import TrialState
import matplotlib.pyplot as plt
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR100
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# Prepare data

In [None]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 64

trainset = CIFAR100(root='./data', train=True,
                    download=True, transform=transform)
train_loader = DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = CIFAR100(root='./data', train=False,
                   download=True, transform=transform)
test_loader = DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


In [None]:
epochs = 10
classes = 100

# Define Model

In [None]:
class Net(nn.Module):

    def __init__(self, trial, num_conv_layers, num_fc_layers, num_filters, num_neurons):

        super(Net, self).__init__()
        input_size = 32
        kernel_size = 3

        # define the convolutional layers
        self.convs = nn.ModuleList([nn.Conv2d(3, num_filters[0], kernel_size=kernel_size)])
        # output size of cnn is needed for fc
        out_size = input_size - kernel_size + 1

        for i in range(1, num_conv_layers):
            self.convs.append(nn.Conv2d(in_channels=num_filters[i-1], out_channels=num_filters[i], kernel_size=kernel_size))
            out_size = out_size - kernel_size + 1

        # define fully connected layers
        self.fcs = nn.ModuleList([])
        self.out_feature = num_filters[-1] * out_size * out_size
        num_neurons = [self.out_feature] + num_neurons

        for i in range(1, num_fc_layers):
            self.fcs.append(nn.Linear(num_neurons[i-1], num_neurons[i]))

        self.fcs.append(nn.Linear(num_neurons[-1], classes))


    def forward(self, x):

        for i, conv_i in enumerate(self.convs):
            x = F.relu(conv_i(x))

        x = x.view(-1, self.out_feature)
        for fc in self.fcs:
            x = F.relu(fc(x))

        return x


# objective function

In [None]:
def objective(trial):
    """
    Hyperparameters:
    number of convolutional layers --> MAX: 4
    number of dense layers --> MAX: 3
    number of filters of convolutional layers --> MAX: 64
    number of neurons of fully connected layers --> MAX: 64
    learning rate --> MAX: 0.01
    optimizer
    """

    # Define range of values
    num_conv_layers = trial.suggest_int("num_conv_layers", 1, 4)
    num_fc_layers = trial.suggest_int("num_fc_layers", 1, 3)
    num_filters = [int(trial.suggest_float("num_filter_"+str(i), 40, 64, step=8))
                   for i in range(num_conv_layers)]              # 40, 48, 56, 64

    # number of out features of last fc layer is fix:100
    num_neurons = [int(trial.suggest_float("num_neurons_"+str(i), 32, 64, step=16))
                   for i in range(num_fc_layers-1)]

    # Generate the model
    model = Net(trial, num_conv_layers, num_fc_layers, num_filters, num_neurons).to(device)
    print(model)

    # Generate the optimizers
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
    lr = trial.suggest_float("lr", 1e-3, 1e-2, log=True)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)
    # loss function
    criterion = nn.CrossEntropyLoss()

    # Training loop
    for epoch in range(epochs):

        model.train()
        for batch_i, (x, y) in enumerate(train_loader):
            optimizer.zero_grad()
            output = model(x.to(device))
            loss = criterion(output, y.to(device))
            loss.backward()
            optimizer.step()

        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for batch_i, (x, y) in enumerate(test_loader):
                x = x.to(device)
                y = y.to(device)

                output = model(x)
                _, predicted = torch.max(output.data, 1)
                total += y.size(0)
                correct += (predicted == y).sum().item()

        accuracy_test = correct / total

    return accuracy_test

# Optuna optimizing

In [None]:
# Create an Optuna study to maximize test accuracy
study = optuna.create_study(direction="maximize")
study.optimize(objective, timeout=1800) #30-minute timout

trial = study.best_trial
print("Best trial:")
print("  Test accuracy: ", trial.value)
print("  Params: ")
for key, value in trial.params.items():
    print(f"{key}: {value}")

[I 2023-07-02 07:20:00,041] A new study created in memory with name: no-name-06ee0e95-009f-4070-9a1a-bb0012cd5701


Net(
  (convs): ModuleList(
    (0): Conv2d(3, 40, kernel_size=(3, 3), stride=(1, 1))
    (1): Conv2d(40, 40, kernel_size=(3, 3), stride=(1, 1))
  )
  (fcs): ModuleList(
    (0): Linear(in_features=31360, out_features=32, bias=True)
    (1): Linear(in_features=32, out_features=100, bias=True)
  )
)


[I 2023-07-02 07:23:12,830] Trial 0 finished with value: 0.1468 and parameters: {'num_conv_layers': 2, 'num_fc_layers': 2, 'num_filter_0': 40.0, 'num_filter_1': 40.0, 'num_neurons_0': 32.0, 'optimizer': 'Adam', 'lr': 0.0015173493739106339}. Best is trial 0 with value: 0.1468.


Net(
  (convs): ModuleList(
    (0): Conv2d(3, 40, kernel_size=(3, 3), stride=(1, 1))
    (1): Conv2d(40, 56, kernel_size=(3, 3), stride=(1, 1))
  )
  (fcs): ModuleList(
    (0): Linear(in_features=43904, out_features=64, bias=True)
    (1): Linear(in_features=64, out_features=64, bias=True)
    (2): Linear(in_features=64, out_features=100, bias=True)
  )
)


[I 2023-07-02 07:26:30,164] Trial 1 finished with value: 0.1503 and parameters: {'num_conv_layers': 2, 'num_fc_layers': 3, 'num_filter_0': 40.0, 'num_filter_1': 56.0, 'num_neurons_0': 64.0, 'num_neurons_1': 64.0, 'optimizer': 'SGD', 'lr': 0.004030697198501452}. Best is trial 1 with value: 0.1503.


Net(
  (convs): ModuleList(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1))
    (1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
    (2): Conv2d(64, 56, kernel_size=(3, 3), stride=(1, 1))
  )
  (fcs): ModuleList(
    (0): Linear(in_features=37856, out_features=100, bias=True)
  )
)


[I 2023-07-02 07:29:47,856] Trial 2 finished with value: 0.01 and parameters: {'num_conv_layers': 3, 'num_fc_layers': 1, 'num_filter_0': 64.0, 'num_filter_1': 64.0, 'num_filter_2': 56.0, 'optimizer': 'RMSprop', 'lr': 0.0016332773338419513}. Best is trial 1 with value: 0.1503.


Net(
  (convs): ModuleList(
    (0): Conv2d(3, 40, kernel_size=(3, 3), stride=(1, 1))
    (1): Conv2d(40, 64, kernel_size=(3, 3), stride=(1, 1))
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
    (3): Conv2d(64, 56, kernel_size=(3, 3), stride=(1, 1))
  )
  (fcs): ModuleList(
    (0): Linear(in_features=32256, out_features=64, bias=True)
    (1): Linear(in_features=64, out_features=100, bias=True)
  )
)


[I 2023-07-02 07:33:07,992] Trial 3 finished with value: 0.2013 and parameters: {'num_conv_layers': 4, 'num_fc_layers': 2, 'num_filter_0': 40.0, 'num_filter_1': 64.0, 'num_filter_2': 64.0, 'num_filter_3': 56.0, 'num_neurons_0': 64.0, 'optimizer': 'SGD', 'lr': 0.008895056069512136}. Best is trial 3 with value: 0.2013.


Net(
  (convs): ModuleList(
    (0): Conv2d(3, 40, kernel_size=(3, 3), stride=(1, 1))
    (1): Conv2d(40, 56, kernel_size=(3, 3), stride=(1, 1))
    (2): Conv2d(56, 40, kernel_size=(3, 3), stride=(1, 1))
    (3): Conv2d(40, 48, kernel_size=(3, 3), stride=(1, 1))
  )
  (fcs): ModuleList(
    (0): Linear(in_features=27648, out_features=100, bias=True)
  )
)


[I 2023-07-02 07:36:30,852] Trial 4 finished with value: 0.2362 and parameters: {'num_conv_layers': 4, 'num_fc_layers': 1, 'num_filter_0': 40.0, 'num_filter_1': 56.0, 'num_filter_2': 40.0, 'num_filter_3': 48.0, 'optimizer': 'SGD', 'lr': 0.008092387757307865}. Best is trial 4 with value: 0.2362.


Net(
  (convs): ModuleList(
    (0): Conv2d(3, 56, kernel_size=(3, 3), stride=(1, 1))
    (1): Conv2d(56, 48, kernel_size=(3, 3), stride=(1, 1))
    (2): Conv2d(48, 64, kernel_size=(3, 3), stride=(1, 1))
    (3): Conv2d(64, 56, kernel_size=(3, 3), stride=(1, 1))
  )
  (fcs): ModuleList(
    (0): Linear(in_features=32256, out_features=100, bias=True)
  )
)


[I 2023-07-02 07:39:56,879] Trial 5 finished with value: 0.0597 and parameters: {'num_conv_layers': 4, 'num_fc_layers': 1, 'num_filter_0': 56.0, 'num_filter_1': 48.0, 'num_filter_2': 64.0, 'num_filter_3': 56.0, 'optimizer': 'Adam', 'lr': 0.001221976402072722}. Best is trial 4 with value: 0.2362.


Net(
  (convs): ModuleList(
    (0): Conv2d(3, 40, kernel_size=(3, 3), stride=(1, 1))
  )
  (fcs): ModuleList(
    (0): Linear(in_features=36000, out_features=100, bias=True)
  )
)


[I 2023-07-02 07:43:02,120] Trial 6 finished with value: 0.01 and parameters: {'num_conv_layers': 1, 'num_fc_layers': 1, 'num_filter_0': 40.0, 'optimizer': 'Adam', 'lr': 0.002152056786264147}. Best is trial 4 with value: 0.2362.


Net(
  (convs): ModuleList(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1))
    (1): Conv2d(64, 40, kernel_size=(3, 3), stride=(1, 1))
    (2-3): 2 x Conv2d(40, 40, kernel_size=(3, 3), stride=(1, 1))
  )
  (fcs): ModuleList(
    (0): Linear(in_features=23040, out_features=48, bias=True)
    (1): Linear(in_features=48, out_features=48, bias=True)
    (2): Linear(in_features=48, out_features=100, bias=True)
  )
)


[I 2023-07-02 07:46:22,407] Trial 7 finished with value: 0.01 and parameters: {'num_conv_layers': 4, 'num_fc_layers': 3, 'num_filter_0': 64.0, 'num_filter_1': 40.0, 'num_filter_2': 40.0, 'num_filter_3': 40.0, 'num_neurons_0': 48.0, 'num_neurons_1': 48.0, 'optimizer': 'SGD', 'lr': 0.00185497613891723}. Best is trial 4 with value: 0.2362.


Net(
  (convs): ModuleList(
    (0): Conv2d(3, 56, kernel_size=(3, 3), stride=(1, 1))
    (1): Conv2d(56, 48, kernel_size=(3, 3), stride=(1, 1))
    (2): Conv2d(48, 64, kernel_size=(3, 3), stride=(1, 1))
  )
  (fcs): ModuleList(
    (0): Linear(in_features=43264, out_features=64, bias=True)
    (1): Linear(in_features=64, out_features=32, bias=True)
    (2): Linear(in_features=32, out_features=100, bias=True)
  )
)


[I 2023-07-02 07:49:42,597] Trial 8 finished with value: 0.1093 and parameters: {'num_conv_layers': 3, 'num_fc_layers': 3, 'num_filter_0': 56.0, 'num_filter_1': 48.0, 'num_filter_2': 64.0, 'num_neurons_0': 64.0, 'num_neurons_1': 32.0, 'optimizer': 'Adam', 'lr': 0.0034548331611034917}. Best is trial 4 with value: 0.2362.


Net(
  (convs): ModuleList(
    (0): Conv2d(3, 40, kernel_size=(3, 3), stride=(1, 1))
    (1): Conv2d(40, 64, kernel_size=(3, 3), stride=(1, 1))
    (2): Conv2d(64, 48, kernel_size=(3, 3), stride=(1, 1))
  )
  (fcs): ModuleList(
    (0): Linear(in_features=32448, out_features=64, bias=True)
    (1): Linear(in_features=64, out_features=64, bias=True)
    (2): Linear(in_features=64, out_features=100, bias=True)
  )
)


[I 2023-07-02 07:53:03,321] Trial 9 finished with value: 0.0418 and parameters: {'num_conv_layers': 3, 'num_fc_layers': 3, 'num_filter_0': 40.0, 'num_filter_1': 64.0, 'num_filter_2': 48.0, 'num_neurons_0': 64.0, 'num_neurons_1': 64.0, 'optimizer': 'SGD', 'lr': 0.00237670750923284}. Best is trial 4 with value: 0.2362.


Best trial:
  Test accuracy:  0.2362
  Params: 
num_conv_layers: 4
num_fc_layers: 1
num_filter_0: 40.0
num_filter_1: 56.0
num_filter_2: 40.0
num_filter_3: 48.0
optimizer: SGD
lr: 0.008092387757307865


# Part B, optimize with pruning

In [None]:
def objective(trial):
    """
    Hyperparameters to be optimized:
    number of convolutional layers --> MAX: 4
    number of dense layers --> MAX: 3
    number of filters of convolutional layers --> MAX: 64
    number of neurons of fully connected layers --> MAX: 64
    learning rate --> MAX: 0.01
    optimizer
    """

    # Define range of values
    num_conv_layers = trial.suggest_int("num_conv_layers", 1, 4)
    num_fc_layers = trial.suggest_int("num_fc_layers", 1, 3)
    num_filters = [int(trial.suggest_float("num_filter_"+str(i), 40, 64, step=8))
                   for i in range(num_conv_layers)]              # 40, 48, 56, 64

    # number of out features of last fc layer is fix:100
    num_neurons = [int(trial.suggest_float("num_neurons_"+str(i), 32, 64, step=16))
                   for i in range(num_fc_layers-1)]

    # Generate the model
    model = Net(trial, num_conv_layers, num_fc_layers, num_filters, num_neurons).to(device)
    # print(model)

    # Generate the optimizers
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
    lr = trial.suggest_float("lr", 1e-3, 1e-2, log=True)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)
    # loss function
    criterion = nn.CrossEntropyLoss()

    # Training loop
    for epoch in range(epochs):

        model.train()
        for batch_i, (x, y) in enumerate(train_loader):
            optimizer.zero_grad()
            output = model(x.to(device))
            loss = criterion(output, y.to(device))
            loss.backward()
            optimizer.step()

        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for batch_i, (x, y) in enumerate(test_loader):
                x = x.to(device)
                y = y.to(device)

                output = model(x)
                _, predicted = torch.max(output.data, 1)
                total += y.size(0)
                correct += (predicted == y).sum().item()

        accuracy_test = correct / total

        # pruning
        trial.report(accuracy_test, epoch)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return accuracy_test

In [None]:
# Create an Optuna study to maximize test accuracy
study2 = optuna.create_study(direction="maximize")
study2.optimize(objective, timeout=1800, show_progress_bar=True) #30-minute timout

trial = study2.best_trial
print("Best trial:")
print("  Test accuracy: ", trial.value)
print("  Params: ")
for key, value in trial.params.items():
    print(f"{key}: {value}")

[I 2023-07-02 08:09:43,896] A new study created in memory with name: no-name-192339f4-ef6e-4ede-9b01-4f3ac89ac245


   0%|          | 00:00/30:00

[I 2023-07-02 08:13:03,611] Trial 0 finished with value: 0.01 and parameters: {'num_conv_layers': 2, 'num_fc_layers': 2, 'num_filter_0': 48.0, 'num_filter_1': 64.0, 'num_neurons_0': 48.0, 'optimizer': 'Adam', 'lr': 0.008858099275235802}. Best is trial 0 with value: 0.01.
[I 2023-07-02 08:16:25,564] Trial 1 finished with value: 0.1211 and parameters: {'num_conv_layers': 4, 'num_fc_layers': 3, 'num_filter_0': 64.0, 'num_filter_1': 56.0, 'num_filter_2': 64.0, 'num_filter_3': 40.0, 'num_neurons_0': 64.0, 'num_neurons_1': 32.0, 'optimizer': 'Adam', 'lr': 0.002821839393933318}. Best is trial 1 with value: 0.1211.
[I 2023-07-02 08:19:33,456] Trial 2 finished with value: 0.2171 and parameters: {'num_conv_layers': 2, 'num_fc_layers': 1, 'num_filter_0': 56.0, 'num_filter_1': 48.0, 'optimizer': 'SGD', 'lr': 0.0037485950603076837}. Best is trial 2 with value: 0.2171.
[I 2023-07-02 08:22:54,138] Trial 3 finished with value: 0.01 and parameters: {'num_conv_layers': 4, 'num_fc_layers': 1, 'num_filter