In [18]:
import torch
import numpy as np
import torchvision
from torchvision import datasets, transforms, models
from PIL import Image
import imagenet_classes
%matplotlib inline
from matplotlib.pyplot import imshow
import matplotlib.pyplot as plt

In [2]:
# load pretrained model
model = torchvision.models.vgg19(pretrained=True)
model.eval()  # set in eval mode

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (17): ReLU(inplace)

In [3]:
# preprocess image

size = (224, 224)

single_image_name = 'peppers.jpg'
img_as_img = Image.open(single_image_name).convert('RGB')
resize = transforms.Resize(size=size)  # define resize fn
img_as_img = resize(img_as_img)  # resize image

toTensor = transforms.ToTensor()  # define tensor transform
img_as_tensor = toTensor(img_as_img)  # conver to tensor
normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 
img_as_tensor = normalize(img_as_tensor)
img_as_tensor = torch.unsqueeze(img_as_tensor, 0)  # add 1 dimension

In [4]:
# forward prop
logits = model(img_as_tensor)  
probs_out = torch.nn.functional.softmax(logits, dim=1)  # run softmax to get probs
probs, preds = torch.topk(probs_out.data, 3)  # grab top 3

# print(probs.shape)

for ind, pred in enumerate(preds.numpy()[0]):
    print('{}. {}, probability: {}'.format(ind+1, imagenet_classes.classes[pred], probs[0][ind]))


1. bell pepper, probability: 0.9867660999298096
2. cucumber, cuke, probability: 0.008986140601336956
3. grocery store, grocery, food market, market, probability: 0.001201258390210569


In [44]:
# list(model.children())

def forward(x):

    select = [1,18,36]  # select these layer numbers from network
#     select = [4]

    features = []  # a list of feature maps
    
    for ind, layer in enumerate(model.features):
        x = layer(x)
        if ind in select:
            features.append(x)
    return features

In [48]:
def normalize(x):
    
    min_val = torch.min(x)
    range_val = torch.max(x) - min_val
    
    if range_val > 0:
        # broad cast subtraction and division
        normalized = (x - min_val) / range_val
    else:
        normalized = torch.zeros(x.size())
        
    return normalized

In [49]:
# features is a list of 4d tensor (layer, batch, channel, heigh, width)
layers = forward(img_as_tensor)

# processed feat maps
layer_feature_maps = []


# loop through list of layers
for layer_num, layer in enumerate(layers):
    
    feature_maps = layer.squeeze()  # only batch size of 1
    processed_feature_maps = []
    
    # loop through feat maps (in that layer)
    for feature_map in feature_maps:
        
        feature_map = normalize(feature_map.detach())
        
        # detach and normalize
        processed_feature_maps.append(feature_map)

    fig, axarr = plt.subplots(nrows=8, ncols=8, figsize=(15,15))

    feat_map_idx = 0

    # iterate through rows and cols, and put 2d feat map inside
    for row in axarr:
        for col in row:

            col.imshow(processed_feature_maps[feat_map_idx])
            feat_map_idx += 1

    plt.savefig('feature_map_{}'.format(layer_num))
    plt.clf()

<Figure size 1080x1080 with 0 Axes>

<Figure size 1080x1080 with 0 Axes>

<Figure size 1080x1080 with 0 Axes>

In [None]:
'''

Written answer part1b

The structure of the feature map is early on is relatively high resolution of the image.

Beginning layer:
The 5 features I chose appear to activate certain colors in the original image, or at least
the over all shape of the image.  One feature map activates mostly red colors. Another
activates green colors.  One map activates all the peppers, so just the foreground.

Middle layer:
This feature map becomes already hard to interpret, but appears to map where
certain objects are located in the image in general.  But I can't tell
what the maps are activating beyond that, perhaps curves.

Later layer:
These feature maps are uninterpretable.  Perhaps they represent what type
of object lies in that area, or parts of an object.



'''