In [2]:
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
import torchvision
from torchvision import datasets
from torchvision import transforms
# from torchsummary import summary

!pip install optuna
import optuna




[notice] A new release of pip available: 22.2.1 -> 22.2.2
[notice] To update, run: python.exe -m pip install --upgrade pip


ModuleNotFoundError: No module named 'torchsummary'

In [None]:
DEVICE = torch.device("cuda")  ##'cuda' or 'cpu'
BATCHSIZE = 128
CLASSES = 10   #CLASSES = 10 for cifar10 and 100 for cifar100
DIR = os.getcwd()
EPOCHS = 10
LOG_INTERVAL = 10
N_TRAIN_EXAMPLES = BATCHSIZE * 30
N_VALID_EXAMPLES = BATCHSIZE * 10

In [None]:
def define_model(trial):

    layers = []

    output_channels1 = trial.suggest_int(name="filters_1", low=32, high=64, step=32)
    layers.append(nn.Conv2d(in_channels=3, out_channels=output_channels1, kernel_size=3, stride=1))
    layers.append(nn.BatchNorm2d(output_channels1))
    layers.append(nn.ReLU())
    p1 = trial.suggest_float(name="dropout_l", low=0.2, high=0.4)
    layers.append(nn.Dropout(p1))

    output_channels2 = trial.suggest_int(name="filters_2", low=64, high=128, step=32)
    layers.append(nn.Conv2d(in_channels=output_channels1, out_channels=output_channels2, kernel_size=3, stride=2))
    layers.append(nn.BatchNorm2d(output_channels2))
    layers.append(nn.ReLU())
    p2 = trial.suggest_float(name="dropout_2", low=0.2, high=0.4)
    layers.append(nn.Dropout(p2))

    layers.append(nn.Conv2d(in_channels=output_channels2, out_channels=128, kernel_size=3, stride=2))
    layers.append(nn.BatchNorm2d(128))
    layers.append(nn.ReLU())
    layers.append(nn.Dropout(0.2))

    layers.append(nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2))
    layers.append(nn.BatchNorm2d(256))
    layers.append(nn.ReLU())
    
    layers.append(nn.Flatten())
    output_units1 = trial.suggest_int(name="linear_1", low=128, high=512, step=128)
    layers.append(nn.Linear(256*2*2, output_units1))  #output size found by printing the model detail using summary in torchsummary 
    layers.append(nn.Dropout(0.2))
    layers.append(nn.Linear(output_units1, CLASSES))  #CLASSES = 10 for cifar10 and 100 for cifar100
    #cross entropy loss used as loss function, therefore no softmax layer here

    return nn.Sequential(*layers)


In [None]:
def get_cifar10():
    # Load cifar10 dataset.

    transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    trainset = torchvision.datasets.CIFAR10(root=DIR, train=True,
                                        download=True, transform=transform)
    
    #split training data into training-80% and validation-20%
    train_set, val_set = torch.utils.data.random_split(trainset, [int(0.8*len(trainset)), int(0.2*len(trainset))])

    train_loader = torch.utils.data.DataLoader(train_set, batch_size=BATCHSIZE,
                                          shuffle=True, num_workers=2)
    
    valid_loader = torch.utils.data.DataLoader(val_set, batch_size=BATCHSIZE,
                                         shuffle=False, num_workers=2)
    

    """
    testset = torchvision.datasets.CIFAR10(root=DIR, train=False,
                                       download=True, transform=transform)
    test_loader = torch.utils.data.DataLoader(testset, batch_size=BATCHSIZE,
                                         shuffle=False, num_workers=2)
    """

    return train_loader, valid_loader

In [None]:
def objective(trial):

    # Generate the model.
    model = define_model(trial).to(DEVICE)

    # Generate the optimizers.
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"]) #for hp tuning
    #optimizer_name = "Adam"
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True) #for hp tuning
    #lr = 0.001
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)
    CEloss = nn.CrossEntropyLoss()  ## this loss object must be used the loop. Directly using nn.CrossEntropyLoss() gives error

    # Get the MNIST dataset.
    train_loader, valid_loader = get_cifar10()

    # Training of the model.
    for epoch in range(EPOCHS):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            # Limiting training data for faster epochs.
            if batch_idx * BATCHSIZE >= N_TRAIN_EXAMPLES:
                break

            #data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)  ## for mnist
            data, target = data.to(DEVICE), target.to(DEVICE)  ## for cifar 10 and 100

            optimizer.zero_grad()
            output = model(data)
            loss = CEloss(output, target)  ## used cross entropy loss
            loss.backward()
            optimizer.step()

        # Validation of the model.
        model.eval()
        correct = 0
        with torch.no_grad():
            val_loss_batch = 0
            for batch_idx, (data, target) in enumerate(valid_loader):
                # Limiting validation data.
                if batch_idx * BATCHSIZE >= N_VALID_EXAMPLES:
                    break
                #data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)  ## for mnist
                data, target = data.to(DEVICE), target.to(DEVICE)  ## for cifar 10 and 100
                output = model(data)
                # Get the index of the max log-probability.
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()

                val_loss_batch += CEloss(output, target).item()  ## used cross entropy loss

        #accuracy = correct / min(len(valid_loader.dataset), N_VALID_EXAMPLES)
        val_loss_epoch = val_loss_batch / min(len(valid_loader.dataset), N_VALID_EXAMPLES)

        #trial.report(accuracy, epoch)
        trial.report(val_loss_epoch, epoch)

        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return val_loss_epoch #accuracy 

In [None]:
if __name__ == "__main__":
    #study = optuna.create_study(direction="maximize")  # 'maximize' because objective function is returning accuracy
    study = optuna.create_study(direction="minimize")  # 'minimize' because objective function is returning loss
    study.optimize(objective, n_trials=30, timeout=600)

    pruned_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED]
    complete_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]

    print("Study statistics: ")
    print("  Number of finished trials: ", len(study.trials))
    print("  Number of pruned trials: ", len(pruned_trials))
    print("  Number of complete trials: ", len(complete_trials))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: ", trial.value)

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

In [None]:
study.best_trial

In [None]:
optuna.visualization.plot_optimization_history(study)

In [None]:
optuna.visualization.plot_param_importances(study) ## this is important to figure out which hp is important

In [None]:
optuna.visualization.plot_slice(study)   ## this gives a clear picture 

In [None]:
optuna.visualization.plot_parallel_coordinate(study)

In [None]:
# SKIP THIS
#### used for testing output sizes of layers in the model
#****important: only change the input filter to maintain the output size of each layer
"""
model = nn.Sequential(nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1)
    ,nn.BatchNorm2d(32)
    ,nn.ReLU()
    ,nn.Dropout(0.2)
    ,nn.Conv2d(in_channels=32, out_channels=128, kernel_size=3, stride=2)
    ,nn.BatchNorm2d(128) #this must be same as the out_channel of the previous layer
    ,nn.ReLU()
    ,nn.Dropout(0.2)
    ,nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=2)
    ,nn.BatchNorm2d(128)
    ,nn.ReLU()
    ,nn.Dropout(0.2)
    ,nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2)
    ,nn.BatchNorm2d(256)
    ,nn.ReLU()
    ,nn.Flatten()
    ,nn.Linear(256*2*2, 500)  #output size found by printing the model detail using summary in torchsummary 
    ,nn.Dropout(0.2)
    ,nn.Linear(500, CLASSES))  #CLASSES = 10 for cifar10 and 100 for cifar100

print(summary(model,(3,32,32)))
"""