# Model Training Notebook

In [None]:
%matplotlib inline
import sys
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np
from datetime import datetime
from IPython.core.display import display, HTML
import cv2
from PIL import Image
from pathlib import Path
import torchvision
import torch
import torch.nn as nn
import torch.nn.functional as F
import ast

from sklearn.model_selection import train_test_split

# repeatable functions housed in the utils file and imported here
from utils import *
from model_training_utils import train as training_loop
from model_training_utils import validate as validation_loop
from model_training_utils import hp_grid_search
from models import CNN, YOLO, VGG, PreTrainedVGG, SimpleVGG

## Create Training and Validation Datasets

In [None]:
df= pd.read_csv('./datasets/annotations_map.csv', converters={'new_bb': from_np_array})

In [None]:
df_train = df.reset_index()
X = df_train[['new_path','new_bb']]
Y = df_train['class']
X_train, X_val, y_train, y_val = train_test_split(X, Y, test_size=0.25, random_state=42)
print('Number of training observations: ', X_train.shape[0])
print('Number of validation observations: ', X_val.shape[0])

## Build Simple CNN

In [None]:
simple_cnn = CNN()

## Build VGG
Implementation of VGG-16 architecture based on https://neurohive.io/en/popular-networks/vgg16/. VGG-16 is known for its high accuracy and speen on object detection tasks, largely attributed to its 3x3 kernel size.

In [None]:
vgg = VGG()

## Pre-trained VGG
A pytorch VGG-16 model that was pre-trained on object identification. Utilized for benchmarking our own implementations against.

In [None]:
preTrainedVGG = PreTrainedVGG()

## Build Simple VGG
Our simplified VGG based model that attempts to achieve better performance by using less layers. The idea behind this was that the many layers and transformations in VGG can cause it to struggle with smaller objects.

In [None]:
simple_vgg = SimpleVGG()

## Build YOLO

#### The Design of the YOLO NN was taken from the following paper:

https://arxiv.org/pdf/1506.02640.pdf - "You Only Look Once: Unified, Real-Time Object Detection" by Redmon, Divvala, Girshick, and Farhadi

The following article is YOLO V2:
https://arxiv.org/pdf/1612.08242v1.pdf - "YOLO 9000: Better, Faster, Stronger" by Redmon, and Farhadi

In [None]:
yolo = YOLO()

# Reference Model

In [None]:
from torchvision import transforms

In [None]:
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
class SingleLinear(nn.Module):
    ## Initialization of the model
    def __init__(self):
        super(SingleLinear, self).__init__()
        self.linear= nn.Linear(1000, 4)
    ## Defining the forward function
    def forward(self, x):
        output= self.linear(x)
        return output

In [None]:
model = nn.Sequential(torchvision.models.inception_v3(pretrained = True, aux_logits=False), SingleLinear())

# Train the Model


- Structured similarly to main.py file from pytorch part of A2

In [None]:
## Define Hyperparameters -- Currently setting values that we can modify
loss_type = "l1"
learning_rate = 0.001
momentum = 0.5
reg = 1e-2

training_batch_size = 40
validation_batch_size = 40

model_type= "SimpleCNN"

In [None]:
if loss_type == "l1":
    criterion = nn.L1Loss()
    
if loss_type == "l2":
    criterion = nn.MSELoss()
    
if model_type == "SimpleCNN":
    model = simple_cnn
elif model_type == "YOLO":
    model = yolo
elif model_type == "VGG":
    model = vgg
elif model_type == "PreTrainedVGG":
    model = preTrainedVGG
elif model_type == "SimpleVGG":
    model = simple_vgg
    
train_ds = WaldoDataset(X_train['new_path'],X_train['new_bb'] ,y_train)
valid_ds = WaldoDataset(X_val['new_path'],X_val['new_bb'],y_val)

train_dl = DataLoader(train_ds, batch_size=training_batch_size, shuffle=True)
valid_dl = DataLoader(valid_ds, batch_size=validation_batch_size)

optimizer = torch.optim.SGD(model.parameters(), learning_rate,
                            momentum=momentum,
                            weight_decay=reg)

In [None]:
training_loop(model= model, optimizer = optimizer, train_dl= train_dl, valid_dl=valid_dl, epochs= 20, criterion= criterion, verbose= True, return_loss= False, pretrained_model=True)

In [None]:
loss_type= ["l1"]
learning_rate= [0.001,0.0001]
momentum = [0.9]
reg = [0.01]
batch_size= [10]

all_training_loss, all_validation_loss= hp_grid_search(model_type= "SimpleCNN", 
               lr_list=learning_rate, 
               momentum_list=momentum, 
               reg_list=reg, 
               batch_size_list=batch_size,
               train_ds= train_ds,
               valid_ds= valid_ds,
               optimizer= optimizer, 
               loss_type_list=loss_type,
               epochs= 10,
               save_all_plots="Yes", 
               save_final_plot="Yes",
               final_plot_prefix="Test", 
               return_all_loss= True)

## Generate GradCAM
Generates GradCAM images for Simple CNN and saves them to images/SimpleCNNResizedGradCam

In [None]:

# Ref: https://medium.com/@stepanulyanin/implementing-grad-cam-in-pytorch-ea0937c31e82
if model_type == "SimpleCNN":
    for x, y_bb in train_dl:
        x = x.float()
        for i in range(len(x)):
            input = x[i]
            input = torch.unsqueeze(input, dim=0)
            output = model(input)

            output.sum().backward()

            gradients = model.get_activations_gradient()

            # not sure about this dims
            pooled_gradients = torch.mean(gradients, dim=1)

            x_perm = x.permute(1,0,2,3)
            activations = model.get_activations(x_perm).detach()


            for k in range(len(pooled_gradients)):
                activations[:, k] *= pooled_gradients[k]

            heatmap = torch.mean(activations, dim=1).squeeze()

            heatmap = np.maximum(heatmap, 0)

            heatmap /= torch.max(heatmap)

            img = cv2.imread('./images/resized/' + str(i + 1) + '.jpg')
            heatmap = cv2.resize(np.float32(heatmap), (img.shape[1], img.shape[0]))
            heatmap = np.uint8(255 * heatmap)
            heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
            superimposed_img = heatmap * .8 + img
            cv2.imwrite('./images/' + model_type + 'ResizedGradCam/' + str(i + 1) + '.jpg', superimposed_img)
else:
    print('GradCAM requires significant setup in the model and currently only works for SimpleCNN')
