In [1]:
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
import torchvision
from torchvision import datasets
from torchvision import transforms
from torchsummary import summary

!pip install optuna
import optuna



In [2]:
DEVICE = torch.device("cuda")  ##'cuda' or 'cpu'
BATCHSIZE = 128
CLASSES = 10   #CLASSES = 10 for cifar10 and 100 for cifar100
DIR = os.getcwd()
EPOCHS = 10
LOG_INTERVAL = 10
N_TRAIN_EXAMPLES = BATCHSIZE * 30
N_VALID_EXAMPLES = BATCHSIZE * 10

In [3]:
def define_model(trial):
    # We optimize the number of layers, hidden units and dropout ratio in each layer.
    #n_layers = trial.suggest_int("n_layers", 1, 3)

    in_features = 3*32 *32  ## channels first in pytorch
    layers = []

    layers.append(nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1))
    layers.append(nn.BatchNorm2d(32))
    layers.append(nn.ReLU())
    layers.append(nn.Dropout(0.2))

    layers.append(nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=2))
    layers.append(nn.BatchNorm2d(64))
    layers.append(nn.ReLU())
    layers.append(nn.Dropout(0.2))

    layers.append(nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=2))
    layers.append(nn.BatchNorm2d(128))
    layers.append(nn.ReLU())
    layers.append(nn.Dropout(0.2))

    layers.append(nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2))
    layers.append(nn.BatchNorm2d(256))
    layers.append(nn.ReLU())
    
    layers.append(nn.Flatten())
    layers.append(nn.Linear(256*2*2, 500))  #output size found by printing the model detail using summary in torchsummary 
    layers.append(nn.Dropout(0.2))
    layers.append(nn.Linear(500, CLASSES))  #CLASSES = 10 for cifar10 and 100 for cifar100
    #cross entropy loss used as loss function, therefore no softmax layer here

    return nn.Sequential(*layers)


In [4]:
def get_cifar10():
    # Load cifar10 dataset.

    transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    trainset = torchvision.datasets.CIFAR10(root=DIR, train=True,
                                        download=True, transform=transform)
    train_loader = torch.utils.data.DataLoader(trainset, batch_size=BATCHSIZE,
                                          shuffle=True, num_workers=2)

    testset = torchvision.datasets.CIFAR10(root=DIR, train=False,
                                       download=True, transform=transform)
    valid_loader = torch.utils.data.DataLoader(testset, batch_size=BATCHSIZE,
                                         shuffle=False, num_workers=2)

    return train_loader, valid_loader

In [5]:
def objective(trial):

    # Generate the model.
    model = define_model(trial).to(DEVICE)

    # Generate the optimizers.
    #optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"]) #for hp tuning
    optimizer_name = "Adam"
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True) #for hp tuning
    #lr = 0.001
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)
    CEloss = nn.CrossEntropyLoss()  ## this loss object must be used the loop. Directly using nn.CrossEntropyLoss() gives error

    # Get the MNIST dataset.
    train_loader, valid_loader = get_cifar10()

    # Training of the model.
    for epoch in range(EPOCHS):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            # Limiting training data for faster epochs.
            if batch_idx * BATCHSIZE >= N_TRAIN_EXAMPLES:
                break

            #data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)  ## for mnist
            data, target = data.to(DEVICE), target.to(DEVICE)  ## for cifar 10 and 100

            optimizer.zero_grad()
            output = model(data)
            loss = CEloss(output, target)  ## used cross entropy loss
            loss.backward()
            optimizer.step()

        # Validation of the model.
        model.eval()
        correct = 0
        with torch.no_grad():
            for batch_idx, (data, target) in enumerate(valid_loader):
                # Limiting validation data.
                if batch_idx * BATCHSIZE >= N_VALID_EXAMPLES:
                    break
                #data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)  ## for mnist
                data, target = data.to(DEVICE), target.to(DEVICE)  ## for cifar 10 and 100
                output = model(data)
                # Get the index of the max log-probability.
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()

        accuracy = correct / min(len(valid_loader.dataset), N_VALID_EXAMPLES)

        trial.report(accuracy, epoch)

        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return accuracy

In [6]:
if __name__ == "__main__":
    study = optuna.create_study(direction="maximize")  # 'maximize' because objective function is returning accuracy
    study.optimize(objective, n_trials=10, timeout=600) ## link: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.study.Study.html#optuna.study.Study.optimize

    pruned_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED]
    complete_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]

    print("Study statistics: ")
    print("  Number of finished trials: ", len(study.trials))
    print("  Number of pruned trials: ", len(pruned_trials))
    print("  Number of complete trials: ", len(complete_trials))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: ", trial.value)

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

[32m[I 2021-01-18 09:45:21,299][0m A new study created in memory with name: no-name-102b3075-59df-4bd4-a701-0604ef999d47[0m


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /content/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting /content/cifar-10-python.tar.gz to /content

Files already downloaded and verified


[32m[I 2021-01-18 09:45:55,228][0m Trial 0 finished with value: 0.084375 and parameters: {'lr': 0.047602844492126345}. Best is trial 0 with value: 0.084375.[0m


Files already downloaded and verified
Files already downloaded and verified


[32m[I 2021-01-18 09:46:13,003][0m Trial 1 finished with value: 0.42578125 and parameters: {'lr': 6.351483997675882e-05}. Best is trial 1 with value: 0.42578125.[0m


Files already downloaded and verified
Files already downloaded and verified


[32m[I 2021-01-18 09:46:30,987][0m Trial 2 finished with value: 0.3484375 and parameters: {'lr': 3.988502965923559e-05}. Best is trial 1 with value: 0.42578125.[0m


Files already downloaded and verified
Files already downloaded and verified


[32m[I 2021-01-18 09:46:48,716][0m Trial 3 finished with value: 0.4640625 and parameters: {'lr': 0.006178158267121312}. Best is trial 3 with value: 0.4640625.[0m


Files already downloaded and verified
Files already downloaded and verified


[32m[I 2021-01-18 09:47:06,619][0m Trial 4 finished with value: 0.24921875 and parameters: {'lr': 1.0154574498795375e-05}. Best is trial 3 with value: 0.4640625.[0m


Files already downloaded and verified
Files already downloaded and verified


[32m[I 2021-01-18 09:47:09,995][0m Trial 5 pruned. [0m


Files already downloaded and verified
Files already downloaded and verified


[32m[I 2021-01-18 09:47:27,762][0m Trial 6 finished with value: 0.46875 and parameters: {'lr': 0.00011989912593213019}. Best is trial 6 with value: 0.46875.[0m


Files already downloaded and verified
Files already downloaded and verified


[32m[I 2021-01-18 09:47:32,699][0m Trial 7 pruned. [0m


Files already downloaded and verified
Files already downloaded and verified


[32m[I 2021-01-18 09:47:36,099][0m Trial 8 pruned. [0m


Files already downloaded and verified
Files already downloaded and verified


[32m[I 2021-01-18 09:47:39,464][0m Trial 9 pruned. [0m


Study statistics: 
  Number of finished trials:  10
  Number of pruned trials:  4
  Number of complete trials:  6
Best trial:
  Value:  0.46875
  Params: 
    lr: 0.00011989912593213019


In [8]:
study.best_trial

FrozenTrial(number=6, values=[0.46875], datetime_start=datetime.datetime(2021, 1, 18, 9, 47, 9, 996467), datetime_complete=datetime.datetime(2021, 1, 18, 9, 47, 27, 762209), params={'lr': 0.00011989912593213019}, distributions={'lr': LogUniformDistribution(high=0.1, low=1e-05)}, user_attrs={}, system_attrs={}, intermediate_values={0: 0.21015625, 1: 0.3046875, 2: 0.33671875, 3: 0.3625, 4: 0.40234375, 5: 0.3984375, 6: 0.4203125, 7: 0.42265625, 8: 0.453125, 9: 0.46875}, trial_id=6, state=TrialState.COMPLETE, value=None)

In [9]:
optuna.visualization.plot_optimization_history(study)

In [10]:
optuna.visualization.plot_param_importances(study) ## this is important to figure out which hp is important

In [11]:
optuna.visualization.plot_slice(study)   ## this gives a clear picture 

In [12]:
optuna.visualization.plot_parallel_coordinate(study)

In [7]:
# SKIP THIS
#### used for testing output sizes of layers in the model
#****important: only change the input filter to maintain the output size of each layer
"""
model = nn.Sequential(nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1)
    ,nn.BatchNorm2d(32)
    ,nn.ReLU()
    ,nn.Dropout(0.2)
    ,nn.Conv2d(in_channels=32, out_channels=128, kernel_size=3, stride=2)
    ,nn.BatchNorm2d(128) #this must be same as the out_channel of the previous layer
    ,nn.ReLU()
    ,nn.Dropout(0.2)
    ,nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=2)
    ,nn.BatchNorm2d(128)
    ,nn.ReLU()
    ,nn.Dropout(0.2)
    ,nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2)
    ,nn.BatchNorm2d(256)
    ,nn.ReLU()
    ,nn.Flatten()
    ,nn.Linear(256*2*2, 500)  #output size found by printing the model detail using summary in torchsummary 
    ,nn.Dropout(0.2)
    ,nn.Linear(500, CLASSES))  #CLASSES = 10 for cifar10 and 100 for cifar100

print(summary(model,(3,32,32)))
"""

'\nmodel = nn.Sequential(nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1)\n    ,nn.BatchNorm2d(32)\n    ,nn.ReLU()\n    ,nn.Dropout(0.2)\n    ,nn.Conv2d(in_channels=32, out_channels=128, kernel_size=3, stride=2)\n    ,nn.BatchNorm2d(128) #this must be same as the out_channel of the previous layer\n    ,nn.ReLU()\n    ,nn.Dropout(0.2)\n    ,nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=2)\n    ,nn.BatchNorm2d(128)\n    ,nn.ReLU()\n    ,nn.Dropout(0.2)\n    ,nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2)\n    ,nn.BatchNorm2d(256)\n    ,nn.ReLU()\n    ,nn.Flatten()\n    ,nn.Linear(256*2*2, 500)  #output size found by printing the model detail using summary in torchsummary \n    ,nn.Dropout(0.2)\n    ,nn.Linear(500, CLASSES))  #CLASSES = 10 for cifar10 and 100 for cifar100\n\nprint(summary(model,(3,32,32)))\n'