# VGG

## Imports

In [1]:
import torch
import torchvision
import  torchvision.transforms as transforms
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import random
from vgg_models.vgg import vgg13_bn
import torchvision.datasets as datasets
from torch.utils.data import DataLoader

## Set up the model
### Dataloader & Transformations 

In [2]:
model_VGG = vgg13_bn(pretrained=True)
model_VGG.eval() # for evaluation
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model_VGG.to(device)
print(model_VGG)

#inverse transform to get normalize image back to original form for visualization
inv_normalize = transforms.Normalize(
    mean=[-0.4914/0.2471, -0.4822/0.2435, -0.4465/0.2616],
    std=[1/0.2471, 1/0.2435, 1/0.2616]
)

#transforms to resize image to the size expected by pretrained model,
#convert PIL image to tensor, and
#normalize the image
transform_c = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2471, 0.2435, 0.2616)),
])

test_dataset = datasets.CIFAR10(root='../MLP/data', train=False, download=True, transform=transform_c)
test_loader = DataLoader(test_dataset, batch_size=1024, shuffle=False)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256

## Accuracy

### Google Images dataset

### CIFAR10 Dataset

In [None]:
total_ex = 0
correct_ex = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model_VGG(inputs)
        _, predicted = torch.max(outputs, 1)
        correct_ex += (predicted == labels).sum().item()
        total_ex += labels.size(0)
        print("Correct: ", correct_ex, "Total: ", total_ex)

print("Model accuracy on CIFAR10: ", correct_ex*100/total_ex)

## Saliency

### Functions

In [20]:
from copy import deepcopy

transform = transforms.Compose([
        transforms.ToTensor(),
    ])
#We don't normalize here^ because the input is already normalized
def saliency(img, model):
    #we don't need gradients w.r.t. weights for a trained model
    for param in model.parameters():
        param.requires_grad = False
    
    #set model in eval mode
    model.eval()
    #transoform input PIL image to torch.Tensor and normalize
    input = deepcopy(img)
    input = transform(input)
    input.unsqueeze_(0)

    #we want to calculate gradient of higest score w.r.t. input
    #so set requires_grad to True for input 
    input.requires_grad = True
    #forward pass to calculate predictions
    preds = model(input)
    score, indices = torch.max(preds, 1)
    #backward pass to get gradients of score predicted class w.r.t. input image
    score.backward()
    #get max along channel axis
    slc, _ = torch.max(torch.abs(input.grad[0]), dim=0)
    #normalize to [0..1]
    slc = (slc - slc.min())/(slc.max()-slc.min())

    #apply inverse transform on image
    with torch.no_grad():
        input_img = inv_normalize(transform(img))
    #plot image and its saleincy map
    # plt.figure(figsize=(5, 5))
    # plt.subplot(1, 2, 1)
    # plt.imshow(np.transpose(input_img.detach().numpy(), (1, 2, 0)))
    # plt.xticks([])
    # plt.yticks([])
    plt.subplot(1, 2, 2)
    plt.imshow(slc.numpy(), cmap=plt.cm.hot)
    # plt.xticks([])
    # plt.yticks([])
    plt.show()

### Google Images Dataset

In [None]:
for i in range(20):
    path_img = f'/home/guptav/DLProject-MLP/Experiments/Vansh/Sample_Images/dog ({i+1}).jpg'
    img = resize_image(path_img, size = (224, 224)).convert('RGB')
    img_ten = transform_c(img).unsqueeze(0)
    output = model_VGG(img_ten)
    if (np.argmax(output).item()==5):
        saliency(img, model_VGG)

### CIFAR10 Dataset

In [None]:
classes = ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"]

transform_img = transforms.ToPILImage()

for inputs, labels in test_loader:
    inputs, labels = inputs.to(device), labels.to(device)
    outputs = model_VGG(inputs)
    _, predicted = torch.max(outputs, 1)
    for input, label, pred in zip(inputs, labels, predicted):
        if (random.random() < 0.01):
            if (label == pred):
                print(classes[label.item()])
                plt.figure(figsize=(10, 10))
                plt.subplot(1, 2, 1)
                plt.imshow(transform_img(inv_normalize(input)))
                saliency(transform_img(input), model_VGG)