### Libraries

In [None]:
from copy import deepcopy

from sklearn.metrics import classification_report
from sklearn.model_selection import ParameterGrid
from skimage.segmentation import mark_boundaries
from matplotlib.image import imread
import matplotlib.pyplot as plt
from tqdm import tqdm
import numpy as np

from torch.utils.tensorboard import SummaryWriter
from torchvision.datasets import FashionMNIST
from torchvision import datasets, transforms
import torchvision
import torch

In [None]:
!unzip experiment.zip -d .
%load_ext tensorboard
%tensorboard --logdir ./experiment/

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

### Dataset & general parameters

In [None]:
random_seed = 61
torch.manual_seed(random_seed)
batch_size_train = 64
batch_size_test = 64

In [None]:
# 0.1307 and 0.3081 are just mean and std values for normalization

train_dataset = FashionMNIST('.', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                                 torchvision.transforms.ToTensor(),
                                 torchvision.transforms.Normalize(
                                     (0.1307,), (0.3081,))
                             ]))
test_dataset = FashionMNIST('.', train=False, download=True,
                            transform=torchvision.transforms.Compose([
                                torchvision.transforms.ToTensor(),
                                torchvision.transforms.Normalize(
                                    (0.1307,), (0.3081,))
                            ]))

print(train_dataset)

In [None]:
train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [45000, 15000])
print(len(train_dataset), len(val_dataset), len(test_dataset))

### General training code

In [None]:
def train_on_batch(model, x_batch, y_batch, optimizer, loss_function):
    model.train()
    model.zero_grad()

    output = model(x_batch.to(device))

    loss = loss_function(output, y_batch.to(device))
    loss.backward()

    optimizer.step()
    return loss.cpu().item()

In [None]:
def train_epoch(train_generator, model, loss_function, optimizer, callback=None):
    epoch_loss = 0
    total = 0

    for it, (batch_of_x, batch_of_y) in enumerate(train_generator):
        batch_loss = train_on_batch(model, batch_of_x.to(device), batch_of_y.to(device), optimizer, loss_function)

        if callback is not None:
            callback(model, batch_loss)

        epoch_loss += batch_loss * len(batch_of_x)
        total += len(batch_of_x)

    return epoch_loss / total

In [None]:
def trainer(count_of_epoch,
            batch_size,
            dataset,
            model,
            loss_function,
            optimizer,
            lr=0.001,
            callback=None):

    optima = optimizer(model.parameters(), lr=lr)

    iterations = tqdm(range(count_of_epoch), desc='epoch')
    iterations.set_postfix({'train epoch loss': np.nan})
    for it in iterations:
        batch_generator = tqdm(
            torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True),
            leave=False, total=(len(dataset) // batch_size + (len(dataset) % batch_size > 0)))

        epoch_loss = train_epoch(train_generator=batch_generator,
                    model=model,
                    loss_function=loss_function,
                    optimizer=optima,
                    callback=callback)

        iterations.set_postfix({'train epoch loss': epoch_loss})

In [None]:
def quality_of_train(batch_size,
                     dataset,
                     model,
                     loss_function):

    batch_generator = torch.utils.data.DataLoader(dataset=dataset,
                                                  batch_size=batch_size)

    pred = []
    real = []
    test_loss = 0

    for it, (x_batch, y_batch) in enumerate(batch_generator):
        x_batch = x_batch.to(device)
        y_batch = y_batch.to(device)

        output = model(x_batch)

        test_loss += loss_function(output, y_batch).cpu().item() * len(x_batch)

        pred.extend(torch.argmax(output, dim=-1).cpu().numpy().tolist())
        real.extend(y_batch.cpu().numpy().tolist())

    test_loss /= len(dataset)

    return test_loss, pred, real

### CNN model

In [None]:
class CNN(torch.nn.Module):
    @property
    def device(self):
        for p in self.parameters():
            return p.device

    def __init__(self, n_layers=1, kernel_size=5, pooling=False, batch_norm=False, dropout=0.0):
        super().__init__()

        self.n_channels = 1
        self.layers = torch.nn.Sequential()

        for layer in range(n_layers):
            self.layers.add_module('conv' + str(layer),
                torch.nn.Conv2d(self.n_channels, self.n_channels * 4,
                                kernel_size=kernel_size, padding=(kernel_size - 1) // 2))
            self.n_channels *= 4

            if batch_norm:
                self.layers.add_module('bn' + str(layer), torch.nn.BatchNorm2d(self.n_channels))

            self.layers.add_module('relu' + str(layer), torch.nn.ReLU())

            if pooling:
                self.layers.add_module('pool' + str(layer), torch.nn.MaxPool2d(kernel_size=2))

        self.layers.add_module('flatten', torch.nn.Flatten(start_dim=1))
        self.layers.add_module('dropout1', torch.nn.Dropout(dropout))
        self.layers.add_module('linear1',
            torch.nn.Linear(int(self.n_channels * (28 // 2 ** n_layers if pooling else 28) ** 2), 10))

    def forward(self, input):
        return self.layers(input)

### Tensorboard training tracking

In [None]:
class callback():
    def __init__(self, writer, dataset, loss_function, delimeter=100, batch_size=64):
        self.step = 0
        self.writer = writer
        self.delimeter = delimeter
        self.loss_function = loss_function
        self.batch_size = batch_size

        self.dataset = dataset

    def forward(self, model, loss):
        self.step += 1
        self.writer.add_scalar('LOSS/train', loss, self.step)

        if self.step % self.delimeter == 0:

            self.writer.add_graph(model, self.dataset[0][0].view(1, 1, 28, 28).to(model.device))

            test_loss, pred, real = quality_of_train(batch_size=self.batch_size, dataset=self.dataset,
                                                     model=model, loss_function=self.loss_function)
            self.writer.add_scalar('LOSS/test', test_loss, self.step)

    def __call__(self, model, loss):
        return self.forward(model, loss)

### Code for CNN training

In [None]:
loss_function = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam

In [None]:
grid = ParameterGrid({
    'layers': [2, 3],
    'kernel_size': [3, 5, 7],
    'bn': [True, False],
    'pooling': [True, False],
    'dropout': [0.0, 0.25, 0.5],
})

scores = dict()

for item in tqdm(grid):
    print(str(item))

    model = CNN(
        n_layers=item['layers'],
        kernel_size=item['kernel_size'],
        pooling=item['pooling'],
        batch_norm=item['bn'],
        dropout=item['dropout']
    )

    writer = SummaryWriter('experiment/' + str(item))

    model.float().to(device)

    call = callback(writer, test_dataset, loss_function, batch_size=batch_size_test, delimeter=10)

    trainer(count_of_epoch=1,
            batch_size=batch_size_train,
            dataset=train_dataset,
            model=model,
            loss_function=loss_function,
            optimizer=optimizer,
            lr=0.001,
            callback=call)