## Changes for Colaboratory

In [None]:
# flake8-noqa-cell
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import sys

In [None]:
sys.path.append('/content/drive/MyDrive/ImageNetproject')

In [None]:
sys.path

In [None]:
path_to_drive = '/content/drive/MyDrive/ImageNetproject/'

# Move dataset to current work directory
!cp -R $path_to_drive'ImageNetDataSet/unzipedPart/ILSVRC' '/content/'

# Train VGG model

In [None]:
import pickle
import random
import shutil
from typing import Any

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from torchsummary import summary
from torchvision import transforms
from tqdm import tqdm

from processingDataSet import ImageNetDataset
from VGGModel import vgg19

In [None]:
!nvidia-smi

In [None]:
!python3 --version

In [None]:
data_path = '/content/ILSVRC/Data/CLS-LOC/train'

In [None]:
random_seed = 10
device = 'cuda'

In [None]:
random.seed(random_seed)
torch.manual_seed(random_seed)

In [None]:
transform_train = transforms.Compose([
    transforms.Resize([224, 224]),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    transforms.RandomRotation(random.randint(0, 180)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(0.2)])

In [None]:
transform_test = transforms.Compose([
    transforms.Resize([224, 224]),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])

In [None]:
model = vgg19(num_classes = 10).to(device)

In [None]:
summary(model, (3, 224, 224), device = device)

In [None]:
# Hyper parameters
lr = 0.01
batch_size = 64
epochs = 60

In [None]:
with open(path_to_drive + 'filtered_data_10class.pkl', 'rb') as file:
    train_data = pickle.load(file)
    val_data = pickle.load(file)

In [None]:
train_set = ImageNetDataset(data_path, train_data, transform_train)
val_set = ImageNetDataset(data_path, val_data, transform_test)

In [None]:
print('Train data size: ', len(train_set), 'Validation data size: ', len(val_set))

In [None]:
train_loader = DataLoader(train_set, batch_size = batch_size, shuffle = True, drop_last = True)
val_loader = DataLoader(val_set, batch_size = batch_size, shuffle = False, drop_last = True)

In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr = lr, momentum = 0.9)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode = 'max',
                                                       factor = 0.5, patience = 3,
                                                       cooldown = 5)
loss_func = nn.CrossEntropyLoss()

In [None]:
def train(model: nn.Module, opt: torch.optim.Optimizer, scheduler: Any,
          loss_fn: nn.Module, epochs: int, data_tr: DataLoader,
          data_val: DataLoader, writer: SummaryWriter, start: int = 0) -> None:
    """
    Start train and validation mode for epochs amount.

    Save:
    * metrics into TensorBoard at each epoch;
    * model and optimizer parameters every 5 epochs.
    """
    for epoch in range(start, start + epochs):
        avg_loss = 0
        train_accuracy = 0

        model.train()
        for x_batch, y_batch in tqdm(data_tr):
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)

            opt.zero_grad()
            y_pred = model(x_batch)
            loss = loss_fn(y_pred, y_batch)
            loss.backward()
            opt.step()

            # Calculate average train loss and accuracy
            avg_loss += (loss/len(data_tr)).detach().cpu()
            # !it is not final result, to get real accuracy need to divide into num_batches
            train_accuracy += torch.sum(torch.argmax(y_pred, 1) == y_batch) / len(y_batch)

            del x_batch, y_batch, y_pred, loss

        train_accuracy /= len(data_tr)

        avg_val_loss = 0
        val_accuracy = 0
        model.eval()
        with torch.no_grad():
            for x, y in data_val:
                x, y = x.to(device), y.to(device)
                y_pred = model(x)
                loss = loss_fn(y_pred, y)
                avg_val_loss += (loss/len(data_val)).detach().cpu()
                val_accuracy += torch.sum(torch.argmax(y_pred, 1) == y) / len(y)
                del x, y, y_pred, loss

        val_accuracy /= len(data_val)

        writer.add_scalar('Lr', opt.state_dict()['param_groups'][0]['lr'], epoch)
        writer.add_scalars('Loss', {'train': avg_loss, 'val': avg_val_loss}, epoch)
        writer.add_scalars('Accuracy', {'train': train_accuracy, 'val': val_accuracy}, epoch)
        scheduler.step(val_accuracy)

        print('Train_loss: ', avg_loss, 'Train_accuracy: ', train_accuracy, '\n',
              'Val_loss: ', avg_val_loss, 'Val_accurasy: ', val_accuracy,
              'Lr: ', opt.state_dict()['param_groups'][0]['lr'])

        if (epoch + 1) % 5 == 0:
            # Save model state on Google Drive
            torch.save({
              'model_state_dict': model.state_dict(),
              'optimizer_state_dict': opt.state_dict()},
                      path_to_drive + 'checkpoints/modelOptE{0}.pt'.format(epoch))

            # Copy TensorBoard logs to Google Drive
            shutil.copytree('runs', path_to_drive + 'logs/runs{0}'.format(epoch), dirs_exist_ok = True)

    writer.close()

In [None]:
# Loading model weights which we got before (if we have it)
checkpoints = torch.load(path_to_drive + 'checkpoints/modelOptE44.pt')
model.load_state_dict(checkpoints['model_state_dict'])
optimizer.load_state_dict(checkpoints['optimizer_state_dict'])

# Move TensorBoard log (if it exists) to the work directory
!cp -r $path_to_drive'logs/runs44/' 'runs/'

In [None]:
# Load TensorBoard  extention and start it
%load_ext tensorboard
%tensorboard --logdir runs

In [None]:
start = 0  # amount of epochs which went before
writer = SummaryWriter('runs/expColab2', flush_secs = 1)
train(model, optimizer, scheduler, loss_func, epochs, train_loader, val_loader, writer, start = start)

In [None]:
# Save model state on Google Drive
torch.save({'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict()},
           path_to_drive + 'checkpoints/modelOptFinal.pt')

In [None]:
# Making flake8 checks
# !flake8-nb $path_to_drive'ColabWorkflow/trainVGGModel.ipynb'