# Tuning an Image Classification model (MNIST)

In this tutorial we will see how we can use PyTorchWrapper in order to tune an Image Classification model on the MNIST dataset.

#### Additional libraries

First of all we need to install the `torchvision` library in order to download the data.

In [None]:
! pip install torchvision


#### Import Statements

In [None]:
import torch
import torchvision
import math
import random
import hyperopt

from pprint import pprint
from hyperopt import hp
from torch import nn
from torchvision.datasets import MNIST
from torch.utils.data.dataset import Dataset
from torch.utils.data.dataloader import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from pytorch_wrapper import modules, System
from pytorch_wrapper import evaluators as evaluators
from pytorch_wrapper.loss_wrappers import GenericPointWiseLossWrapper
from pytorch_wrapper.training_callbacks import EarlyStoppingCriterionCallback
from pytorch_wrapper.tuner import Tuner


#### Dataset Definition
Since torchvision provides ready to use `torch.utils.data.Dataset` object for the MNIST Dataset we just need to wrap it with a custom class in order to adhere to the requirements of PyTorchWrapper, i.e. the data loaders must represent a batch as a dictionary.

In [None]:
class MNISTDatasetWrapper(Dataset):
    def __init__(self, is_train):
        self.dataset = MNIST(
            'data/mnist/',
            train=is_train,
            download=True,
            transform=torchvision.transforms.ToTensor()
        )

    def __getitem__(self, index):
        return {'input': self.dataset[index][0], 'target': self.dataset[index][1]}

    def __len__(self):
        return len(self.dataset)


#### Model Definition
The model will be CNN based, but the exact architecture will be chosen by the tuner.

In [None]:
class Model(nn.Module):
    def __init__(self, channels, kernel_size, depth, dp, mlp_depth, mlp_hl):
        super(Model, self).__init__()

        cnn_list = [
            nn.Conv2d(
                in_channels=1,
                out_channels=channels,
                kernel_size=kernel_size,
                padding=math.floor(kernel_size / 2)
            ),
            nn.Dropout(p=dp),
            nn.MaxPool2d(kernel_size=2),
            nn.ReLU()
        ]

        for _ in range(depth - 1):
            cnn_list.extend([
                nn.Conv2d(
                    in_channels=channels,
                    out_channels=channels,
                    kernel_size=kernel_size,
                    padding=math.floor(kernel_size / 2)
                ),
                nn.Dropout(p=dp),
                nn.MaxPool2d(kernel_size=2),
                nn.ReLU()
            ])

        self.cnn = nn.Sequential(*cnn_list)
        self.out_mlp = modules.MLP(
            input_size=int(pow(int(28 // (math.pow(2, depth))), 2)) * channels,
            num_hidden_layers=mlp_depth,
            hidden_layer_size=mlp_hl,
            hidden_activation=nn.ReLU,
            hidden_dp=dp,
            output_size=10,
            output_activation=None
        )

    def forward(self, x):
        x = self.cnn(x)
        x = x.view(x.shape[0], -1)
        return self.out_mlp(x)


#### Training

First of all we create the dataset objects alongside four data loaders. The train_dataloader will be used for training, the val_dataloader for early stopping, the dev_dataloader for hyperparameter optimization, and the test_dataloader
for the final evaluation.

In [None]:
train_val_dev_dataset = MNISTDatasetWrapper(True)
test_dataset = MNISTDatasetWrapper(False)

eval_size = math.floor(0.1 * len(train_val_dev_dataset))
train_val_dev_indexes = list(range(len(train_val_dev_dataset)))
random.seed(12345)
random.shuffle(train_val_dev_indexes)
train_indexes = train_val_dev_indexes[eval_size * 2:]
val_indexes = train_val_dev_indexes[eval_size:eval_size * 2]
dev_indexes = train_val_dev_indexes[:eval_size]

train_dataloader = DataLoader(
    train_val_dev_dataset,
    sampler=SubsetRandomSampler(train_indexes),
    batch_size=128
)

val_dataloader = DataLoader(
    train_val_dev_dataset,
    sampler=SubsetRandomSampler(val_indexes),
    batch_size=128
)

dev_dataloader = DataLoader(
    train_val_dev_dataset,
    sampler=SubsetRandomSampler(dev_indexes),
    batch_size=128
)

test_dataloader = DataLoader(test_dataset, batch_size=128, shuffle=False)


Next we define the step function. This function is called in the beginning of each iteration of the tuning process.
This function is responsible for creating, training and evaluating the model given the chosen hyper parameters. The goal of the tuning process is to find the hyper parameters that minimize a chosen metric. In this example we try to minimize
the **negative** f1-score.

In [None]:
def step_function(current_params):
    model = Model(**current_params['model_params'])

    last_activation = nn.Softmax(dim=-1)
    if torch.cuda.is_available():
        system = System(model, last_activation=last_activation, device=torch.device('cuda'))
    else:
        system = System(model, last_activation=last_activation, device=torch.device('cpu'))

    loss_wrapper = GenericPointWiseLossWrapper(nn.CrossEntropyLoss())
    evals = {

        'prec': evaluators.MultiClassPrecisionEvaluator(average='macro'),
        'rec': evaluators.MultiClassRecallEvaluator(average='macro'),
        'f1': evaluators.MultiClassF1Evaluator(average='macro')

    }

    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, system.model.parameters()),
                                 lr=current_params['training_params']['lr'])

    _ = system.train(
        loss_wrapper,
        optimizer,
        train_data_loader=train_dataloader,
        evaluators=evals,
        evaluation_data_loaders={
            'val': val_dataloader
        },
        callbacks=[
            EarlyStoppingCriterionCallback(
                3,
                'val',
                'f1',
                'data/mnist_tuning_cur_best.weights'
            )
        ]
    )

    return -system.evaluate(dev_dataloader, evals)['f1'].score


Finally we define the hyper_parameter_generators, create the tuner and run it. For more information about the definition of the hyper_parameter_generators check the HyperOpt documentation.

In [None]:
hyper_parameter_generators = {

    'model_params': {
        'channels': hp.choice('channels', [5, 10, 20, 30, 50]),
        'kernel_size': hp.choice('kernel_size', [3, 5, 7]),
        'depth': hp.choice('depth', [1, 2, 3, 4]),
        'dp': hp.uniform('dp', 0, 0.5),
        'mlp_depth': hp.choice('mlp_depth', [1, 2, 3, 4]),
        'mlp_hl': hp.choice('mlp_hl', [32, 64, 128, 256])
    },

    'training_params': {
        'lr': hp.loguniform('lr', math.log(0.0001), math.log(0.1))
    }

}

tuner = Tuner(
    hyper_parameter_generators,
    step_function=step_function,
    algorithm=hyperopt.tpe.suggest,
    fit_iterations=20
)

results = tuner.run()


In [None]:
pprint(results[0])
