In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import time
from torchvision import models, datasets, transforms
from model import Net, ConvNet
device = torch.device("cpu")
import matplotlib.pyplot as plt
import requests
import cv2
from PIL import Image
import io

In [None]:
np.random.seed(42)
torch.manual_seed(42)

In [None]:
# load everything that we need
# here we load alexnet, an already trained neural network
# you can play with other models in models.*, but for some of the others it is harder
# to get the activations in the middle of the network
alexnet = models.alexnet(pretrained=True)

# define a 'layer' to norlmalize an image such that it is usable by the network
class Normalize(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.mean = torch.Tensor([0.485, 0.456, 0.406]).float().unsqueeze(0).unsqueeze(-1).unsqueeze(-1)
        self.std = torch.Tensor([0.229, 0.224, 0.225]).float().unsqueeze(0).unsqueeze(-1).unsqueeze(-1)
    
    def forward(self, x):
        b, c, h, w = x.shape
        mean = self.mean.expand(x.shape)
        std = self.std.expand(x.shape)
        return (x - self.mean)/self.std

#d efine a layer that flattens whatever it gets passed into a vector
class Flatten(nn.Module):
    def forward(self, input):
        return input.view(input.size(0), -1)

# function that takes our pre-processed image and computes a numpy matrix that we can plot as an image
def img2numpy(x):
    x = x.clone().squeeze()
    x = x.detach().numpy()
    x = np.transpose(x, (1, 2, 0))
    return x

# preprocess an image for the network
preprocess = transforms.Compose([
   transforms.Scale(256),
   transforms.CenterCrop(224),
   transforms.ToTensor()
])

# download an image from flickr
response = requests.get("http://c1.staticflickr.com/5/4070/5148597478_0c34ec0b7e_n.jpg")
image = Image.open(io.BytesIO(response.content))
image = preprocess(image).unsqueeze(0)
plt.imshow(img2numpy(image))

In [None]:
# download a mapping of the imagenet class ids to text
# https://gist.github.com/yrevar/942d3a0ac09ec9e5eb3a
imagenet_classes_request = requests.get("https://gist.githubusercontent.com/yrevar/942d3a0ac09ec9e5eb3a/raw/c2c91c8e767d04621020c30ed31192724b863041/imagenet1000_clsid_to_human.txt")
print(imagenet_classes_request.content)

In [None]:
# turn the downloaded id-to-text mapping into a dict
# before running this really inspect the output of the above comment
# you are about to run something downloaded from the internet
# run at your own risk
imagenet_classes = eval(imagenet_classes_request.content)

In [None]:
# try out the neural network
# we want to run our normalize layer first and then alexnet
model = nn.Sequential(Normalize(), alexnet)
logits = model(image)
t = alexnet(Normalize()(image)).detach().numpy().ravel().argsort()[::-1][:5]
print('Top 5 classes for image:')
print(list(map(lambda x: imagenet_classes[x], t.tolist())))

We now take a closer look at the layers in the alexnet model.
alexnet has two parts 'features' and 'classifier' where features is the conovlutional part of the neural network.

In [None]:
alexnet.features

In [None]:
# we see that we can use the individual parts of alexnet
# but we need to add a Flatten() layer between the two parts
model = nn.Sequential(Normalize(), alexnet.features, Flatten(), alexnet.classifier)
logits = model(image)
t = alexnet(Normalize()(image)).detach().numpy().ravel().argsort()[::-1][:5]
print('Top 5 classes for image:')
print(list(map(lambda x: imagenet_classes[x], t.tolist())))

In [None]:
def gradcam(model, image, layer, target, treshold=0.5):
    """
    This method takes:
    model - an alexnet
    image - an input image
    layer - an integer that indexes alexnet.features; this gives the layer that we use for the algorithm
    target - the targetclass for visualzation
    treshold - how much of the heatmap to show in the overlayed image
    """
    
    # TODO implement this function
    # General appraoch:
    # - split the model into two parts: before the targeted layer, and after
    # - create an optimizeable variable/tensor from the output of the first part
    # - run it through the second part (you probably need to run layer_activations.clone() instead of layer_activations due to how pytroch behaves)
    # - call backward on the right value and optain the gradient
    # - from there follow the algorithm from the slide/paper
    
    assert 1 <= layer <= 12 # layer is valid index into alexnet.features
    L = np.ones((10, 10))
    L_transparent = np.ones((10, 10))
    # show the results
    f, axarr = plt.subplots(1,3, figsize=(18, 6))
    f.suptitle('Visualization for Class: ' + imagenet_classes[target], fontsize=16)
    axarr[0].imshow(img2numpy(image))
    axarr[0].set_title('Original Image')
    axarr[1].imshow(L, interpolation='nearest')
    axarr[1].set_title('Heatmap')
    axarr[2].imshow(img2numpy(image))
    axarr[2].imshow(L_transparent)
    axarr[2].set_title('Overlay') 
    

## Visualizeing different classes

In [None]:
s = 163 #'bloodhound, sleuthhound'
gradcam(alexnet, image, 9, s, treshold=0.2)

In [None]:
s = 282 #tiger cat
gradcam(alexnet, image, 9, s, treshold=0.2)

In [None]:
s = 243 #'bull mastiff'
gradcam(alexnet, image, 9, s, treshold=0.2)

## Visualizeing one class for various different layers

In [None]:
s = 163 #'bloodhound, sleuthhound'
gradcam(alexnet, image, 2, s, treshold=0.2)

In [None]:
s = 163 #'bloodhound, sleuthhound'
gradcam(alexnet, image, 3, s, treshold=0.2)

In [None]:
s = 163 #'bloodhound, sleuthhound'
gradcam(alexnet, image, 4, s, treshold=0.2)

In [None]:
s = 163 #'bloodhound, sleuthhound'
gradcam(alexnet, image, 5, s, treshold=0.2)

In [None]:
s = 163 #'bloodhound, sleuthhound'
gradcam(alexnet, image, 6, s, treshold=0.2)

In [None]:
s = 163 #'bloodhound, sleuthhound'
gradcam(alexnet, image, 7, s, treshold=0.2)

In [None]:
s = 163 #'bloodhound, sleuthhound'
gradcam(alexnet, image, 8, s, treshold=0.2)

In [None]:
s = 163 #'bloodhound, sleuthhound'
gradcam(alexnet, image, 9, s, treshold=0.2)

In [None]:
s = 163 #'bloodhound, sleuthhound'
gradcam(alexnet, image, 10, s, treshold=0.2)

In [None]:
s = 163 #'bloodhound, sleuthhound'
gradcam(alexnet, image, 11, s, treshold=0.2)

In [None]:
s = 163 #'bloodhound, sleuthhound'
gradcam(alexnet, image, 12, s, treshold=0.2)