# Train VGG model

In [None]:
#!g1.1 #noqa
!nvidia-smi

In [None]:
#!g1.1 #noqa
!python3 --version

In [None]:
#!g1.1 #noqa
import sys
sys.path.append('/home/jupyter/work/resources/')

In [None]:
#!g1.1 #noqa
import pickle
import random
from typing import Any, Tuple

import torch
import torch.nn as nn
from matplotlib import pyplot as plt
from sklearn.metrics import f1_score
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from torchsummary import summary
from torchvision import transforms
from tqdm import tqdm

from processingDataSet import conv_to_img, ImageNetDataset
from VGGModel import vgg19

In [None]:
#!g1.1 #noqa
data_path = '/home/jupyter/mnt/datasets/ImageNet/ILSVRC/Data/CLS-LOC/train'

In [None]:
#!g1.1 #noqa
random_seed = 10
device = 'cuda'

In [None]:
#!g1.1 #noqa
random.seed(random_seed)
torch.manual_seed(random_seed)

In [None]:
#!g1.1 #noqa
transform_train = transforms.Compose([
    transforms.Resize([224, 224]),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    transforms.RandomRotation(random.randint(0, 180)),
    transforms.RandomHorizontalFlip(p=0.2)])

In [None]:
#!g1.1 #noqa
transform_test = transforms.Compose([
    transforms.Resize([224, 224]),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])

In [None]:
#!g1.1 #noqa
model = vgg19(num_classes = 10).to(device)

In [None]:
#!g1.1 #noqa
summary(model, (3, 224, 224), device = device)

In [None]:
#!g1.1 #noqa
# Hyper parameters
lr = 0.01
batch_size = 64
epochs = 6

In [None]:
#!g1.1 #noqa
with open('../filtered_data_10class.pkl', 'rb') as file:
    train_data = pickle.load(file)
    val_data = pickle.load(file)

In [None]:
#!g1.1 #noqa
train_set = ImageNetDataset(data_path, train_data, transform_train)
val_set = ImageNetDataset(data_path, val_data, transform_test)

In [None]:
#!g1.1 #noqa
print('Train data size: ', len(train_set), 'Validation data size: ', len(val_set))

In [None]:
#!g1.1 #noqa
train_loader = DataLoader(train_set, batch_size = batch_size, shuffle = True, drop_last = True)
val_loader = DataLoader(val_set, batch_size = batch_size, shuffle = False, drop_last = True)

In [None]:
#!g1.1 #noqa
optimizer = torch.optim.SGD(model.parameters(), lr = lr, momentum = 0.9)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode = 'max',
                                                       factor = 0.1, patience = 3,
                                                       cooldown = 3)
loss_func = nn.CrossEntropyLoss()

In [None]:
#!g1.1 #noqa
def train(model: nn.Module, opt: torch.optim.Optimizer, scheduler: Any,
          loss_fn: nn.Module, epochs: int, data_tr: DataLoader,
          data_val: DataLoader, writer: SummaryWriter, start: int = 0) -> None:
    """
    Start train and validation mode for epochs amount.

    Save:
    * metrics into TensorBoard at each epoch;
    * model and optimizer parameters every 5 epochs.
    """
    for epoch in range(start, start + epochs):
        avg_loss = 0
        train_accuracy = 0

        model.train()
        for x_batch, y_batch in tqdm(data_tr):
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)

            opt.zero_grad()
            y_pred = model(x_batch)
            loss = loss_fn(y_pred, y_batch)
            loss.backward()
            opt.step()

            # Calculate average train loss and accuracy
            avg_loss += (loss/len(data_tr)).detach().cpu()
            # !it is not final result, to get real accuracy need to divide into num_batches
            train_accuracy += torch.sum(torch.argmax(y_pred, 1) == y_batch) / len(y_batch)

            del x_batch, y_batch, y_pred, loss

        train_accuracy /= len(data_tr)

        avg_val_loss = 0
        val_accuracy = 0
        model.eval()
        with torch.no_grad():
            for x, y in data_val:
                x, y = x.to(device), y.to(device)
                y_pred = model(x)
                loss = loss_fn(y_pred, y)
                avg_val_loss += (loss/len(data_val)).detach().cpu()
                # !it is not final result to get real accuracy need to divide into num_batches
                val_accuracy += torch.sum(torch.argmax(y_pred, 1) == y) / len(y)
                del x, y, y_pred, loss

            val_accuracy /= len(data_val)

        writer.add_scalar('Lr', opt.state_dict()['param_groups'][0]['lr'], epoch)
        writer.add_scalars('Loss', {'train': avg_loss, 'val': avg_val_loss}, epoch)
        writer.add_scalars('Accuracy', {'train': train_accuracy, 'val': val_accuracy}, epoch)
        scheduler.step(val_accuracy)

        print('Train_loss: ', avg_loss, 'Train_accuracy: ', train_accuracy, '\n',
              'Val_loss: ', avg_val_loss, 'Val_accuracy: ', val_accuracy,
              'Lr: ', opt.state_dict()['param_groups'][0]['lr'])

        if (epoch + 1) % 5 == 0:
            # Save model state
            torch.save({
              'model_state_dict': model.state_dict(),
              'optimizer_state_dict': opt.state_dict()},
                'checkpoints/modelOptE{0}.pt'.format(epoch))

    writer.close()

In [None]:
#!g1.1 #noqa
%load_ext tensorboard
%tensorboard --logdir runs

In [None]:
#!g1.1 #noqa
start = 0
writer = SummaryWriter('runs/exp0', flush_secs = 1)
train(model, optimizer, scheduler, loss_func, epochs, train_loader, val_loader, writer, start = start)

In [None]:
#!g1.1 #noqa
# Save model state
torch.save({'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict()},
           'checkpoints/modelOptFinal.pt')

# Look at the results

In [None]:
#!g1.1 #noqa
def predict(model: nn.Module, loader: DataLoader) -> Tuple[torch.tensor, torch.tensor]:
    """
        Predict class label.

        Returns: Tuple[real, predicted]
    """
    with torch.no_grad():
        logits = []
        real_label = []

        for x, y in tqdm(loader):
            x = x.to(device)
            model.eval()
            y_pred = model(x).cpu()
            logits.append(y_pred)
            real_label.append(y)
            del x, y

    pred = torch.argmax(torch.softmax(torch.cat(logits), dim=-1), 1)
    real = torch.cat(real_label)
    return real, pred

In [None]:
#!g1.1 #noqa
f1_score(*predict(model, val_loader), average = 'macro')

In [None]:
#!g1.1 #noqa
fig, ax = plt.subplots(4, 4, figsize = (20, 20))
ind = random.sample(range(len(val_set)), 16)  # get 16 random indexes of pictures

with torch.no_grad():
    model.eval()
    for i, fig_x in zip(ind, ax.flatten()):
        x, y = val_set[i]
        x = torch.unsqueeze(x, 0).to(device)
        y_pred_prob = torch.softmax(model(x), dim = -1).cpu()
        prob = torch.max(y_pred_prob).item() * 100
        y_pred = torch.argmax(y_pred_prob)
        label = 'Real class: {0}, Predicted class: {1},\n Probability: {2:.0f}%'.format(y, y_pred, prob)
        fig_x.title.set_text(label)
        fig_x.imshow(conv_to_img(x))
plt.show()

In [None]:
#!g1.1 #noqa