## Pretrained AlexNet

https://pytorch.org/vision/stable/models.html

In [None]:
import torch
from PIL import Image
from torchvision import transforms
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np
import collections
from typing import DefaultDict, Tuple, List
from functools import partial

model = torch.hub.load('pytorch/vision:v0.10.0', 'vgg16', pretrained=True)

use_cuda = True
device = torch.device("cuda" if use_cuda else "cpu")
model.to(device)
model.eval()

##Creating labeled feature patch from test image manually

In [None]:
## Download images
!gdown --id '1BwhagAYZlG1cnPZn1N3GTH4Hj2MWliNG'
!unzip Samoyed.zip
## Download ImageNet labels
!wget https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt

In [None]:
## preprocess image
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


def black_image_outside_patch(img, top_left_x, top_left_y, bot_right_x, bot_right_y):
    """Blacks image outside provided bounding box
    ----------
    img:
        image tensor

    top_left_x, top_left_y, bot_right_x, bot_right_y:
        coordinates of bounding box for patch to keep
    Returns
    -------
    img_copy:
        copy of the image with everything blacked out besides 
    """
    img_copy = img.detach().clone()
    img_copy[:,:,:top_left_x] = 0
    img_copy[:,:,bot_right_x:] = 0
    img_copy[:,:top_left_y,:] = 0
    img_copy[:,bot_right_y:,:] = 0
    return img_copy

def random_noise_outside_patch(img, top_left_x, top_left_y, bot_right_x, bot_right_y):
    """randomizes all pixels outside provided bounding box
    ----------
    img:
        image tensor

    top_left_x, top_left_y, bot_right_x, bot_right_y:
        coordinates of bounding box for patch to keep
    Returns
    -------
    img_copy:
        copy of the image with everything blacked out besides 
    """
    img_copy = img.detach().clone()
    img_copy[:,:,:top_left_x] = torch.randn(img[:,:,:top_left_x].size())
    img_copy[:,:,bot_right_x:] = torch.randn(img[:,:,bot_right_x:].size())
    img_copy[:,:top_left_y,:] = torch.randn(img[:,:top_left_y,:].size())
    img_copy[:,bot_right_y:,:] = torch.randn(img[:,bot_right_y:,:].size())
    return img_copy

def show(img):
    """shows the input img with pyplot
    ----------
    img:
        image tensor
    """
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1,2,0)), interpolation='nearest')



In [None]:
faces = []
noses = []
eyes = []
mouths = []
ears = []
furs = []
legs = []

def append_features(face, nose, eye, mouth, ear, fur, leg):
    """
    appends features to feature lists
    """
    if face is not None:
        faces.append(face.to(device))
    if nose is not None:
        noses.append(nose.to(device))
    if eye is not None:
        eyes.append(eye.to(device))
    if mouth is not None:
        mouths.append(mouth.to(device))
    if ear is not None:
        ears.append(ear.to(device))
    if fur is not None:
        furs.append(fur.to(device))
    if leg is not None:
        legs.append(leg.to(device))

# manually label all the test feature patches
filename = str(1) + '.jpeg'
input_image = Image.open(filename)
input_tensor = preprocess(input_image)
face = random_noise_outside_patch(input_tensor, 50, 0, 150, 100).unsqueeze(0)
nose = random_noise_outside_patch(input_tensor, 88, 50, 105, 70).unsqueeze(0)
eye = random_noise_outside_patch(input_tensor, 100, 35, 125, 55).unsqueeze(0)
mouth = random_noise_outside_patch(input_tensor, 82, 68, 120, 90).unsqueeze(0)
ear = random_noise_outside_patch(input_tensor, 102, 0, 145, 30).unsqueeze(0)
fur = random_noise_outside_patch(input_tensor, 100, 100, 135, 135).unsqueeze(0)
leg = random_noise_outside_patch(input_tensor, 100, 150, 150, 224).unsqueeze(0)
append_features(face, nose, eye, mouth, ear, fur, leg)

filename = str(2) + '.jpeg'
input_image = Image.open(filename)
input_tensor = preprocess(input_image)
face = random_noise_outside_patch(input_tensor, 100, 5, 220, 135).unsqueeze(0)
nose = random_noise_outside_patch(input_tensor, 150, 80, 180, 110).unsqueeze(0)
eye = random_noise_outside_patch(input_tensor, 165, 60, 190, 85).unsqueeze(0)
mouth = random_noise_outside_patch(input_tensor, 140, 108, 190, 134).unsqueeze(0)
ear = random_noise_outside_patch(input_tensor, 160, 0, 230, 60).unsqueeze(0)
fur = random_noise_outside_patch(input_tensor, 50, 125, 100, 150).unsqueeze(0)
leg = random_noise_outside_patch(input_tensor, 150, 150, 224, 224).unsqueeze(0)
append_features(face, nose, eye, mouth, ear, fur, leg)


filename = str(3) + '.jpeg'
input_image = Image.open(filename)
input_tensor = preprocess(input_image)
face = random_noise_outside_patch(input_tensor, 110, 0, 190, 80).unsqueeze(0)
nose = random_noise_outside_patch(input_tensor, 140, 50, 170, 65).unsqueeze(0)
eye = random_noise_outside_patch(input_tensor, 155, 28, 178, 48).unsqueeze(0)
ear = random_noise_outside_patch(input_tensor, 145, 0, 180, 30).unsqueeze(0)
fur = random_noise_outside_patch(input_tensor, 60, 30, 90, 75).unsqueeze(0)
leg = random_noise_outside_patch(input_tensor, 20, 100, 60, 224).unsqueeze(0)
append_features(face, nose, eye, mouth, ear, fur, leg)

filename = str(4) + '.jpeg'
input_image = Image.open(filename)
input_tensor = preprocess(input_image)
face = random_noise_outside_patch(input_tensor, 110, 35, 200, 110).unsqueeze(0)
nose = random_noise_outside_patch(input_tensor, 146, 75, 162, 90).unsqueeze(0)
eye = random_noise_outside_patch(input_tensor, 155, 65, 172, 78).unsqueeze(0)
mouth = random_noise_outside_patch(input_tensor, 140, 86, 170, 105).unsqueeze(0)
ear = random_noise_outside_patch(input_tensor, 160, 35, 190, 60).unsqueeze(0)
fur = random_noise_outside_patch(input_tensor, 130, 110, 170, 140).unsqueeze(0)
leg = random_noise_outside_patch(input_tensor, 160, 150, 190, 210).unsqueeze(0)
append_features(face, nose, eye, mouth, ear, fur, leg)

filename = str(5) + '.jpeg'
input_image = Image.open(filename)
input_tensor = preprocess(input_image)
face = random_noise_outside_patch(input_tensor, 80, 20, 180, 100).unsqueeze(0)
nose = random_noise_outside_patch(input_tensor, 97, 54, 110, 72).unsqueeze(0)
eye = random_noise_outside_patch(input_tensor, 100, 35, 125, 55).unsqueeze(0)
mouth = random_noise_outside_patch(input_tensor, 92, 70, 135, 95).unsqueeze(0)
ear = random_noise_outside_patch(input_tensor, 120, 25, 175, 50).unsqueeze(0)
fur = random_noise_outside_patch(input_tensor, 100, 100, 135, 135).unsqueeze(0)
leg = random_noise_outside_patch(input_tensor, 90, 175, 150, 224).unsqueeze(0)
append_features(face, nose, eye, mouth, ear, fur, leg)

filename = str(6) + '.jpeg'
input_image = Image.open(filename)
input_tensor = preprocess(input_image)
face = random_noise_outside_patch(input_tensor, 110, 15, 180, 100).unsqueeze(0)
nose = random_noise_outside_patch(input_tensor, 137, 62, 154, 78).unsqueeze(0)
eye = random_noise_outside_patch(input_tensor, 148, 45, 166, 60).unsqueeze(0)
mouth = random_noise_outside_patch(input_tensor, 125, 75, 165, 94).unsqueeze(0)
ear = random_noise_outside_patch(input_tensor, 150, 20, 180, 50).unsqueeze(0)
fur = random_noise_outside_patch(input_tensor, 150, 100, 180, 125).unsqueeze(0)
leg = random_noise_outside_patch(input_tensor, 130, 125, 170, 205).unsqueeze(0)
append_features(face, nose, eye, mouth, ear, fur, leg)


## Pytorch Hooks for saving activations


In [None]:
################################################################################
#                    CITATIONS                    
# https://www.lyndonduong.com/saving-activations/
# https://web.stanford.edu/~nanbhas/blog/forward-hooks-pytorch/#extracting-activations-from-a-layer
#
###############################################################################

def save_adaptive_activations(
        activations: DefaultDict,
        name: str,
        module: nn.Module,
        inp: Tuple,
        out: torch.Tensor
) -> None:
    """PyTorch Forward hook to save outputs and inhibitory hidden state at each forward
    pass. Mutates specified dict objects with each fwd pass.
    """
    activations[name].append(out.detach().cpu())


def register_activation_hooks(
        model: nn.Module,
) -> DefaultDict[List, torch.Tensor]:
    """Registers forward hooks in specified layers.
    Parameters
    ----------
    model:
        PyTorch model

    Returns
    -------
    activations_dict:
        dict of lists containing activations of specified layers in the
        form (k,v) where k is the name of the layer and v is a list of the 
        activation tensors in the order that they were run through the nerual 
        network
    """
    activations_dict = collections.defaultdict(list)
    hooks = []
    for name, module in model.named_modules():
        hooks.append(module.register_forward_hook(
            partial(save_adaptive_activations, activations_dict, name)
        ))

    return activations_dict, hooks

def remove_hooks(
    hooks: List,
) -> None:
    """Registers forward hooks in specified layers.
    Parameters
    ----------
    hooks:
        list of hooks attached to the model

    Returns
    -------
    None
    """
    for hook in hooks:
        hook.remove()
      

##Calculate the error between saved activation data structures

In [None]:
def matched_activations(activations, train_indices, test_ind, error_func):
    """ Returns what the patch indices that have similar activations to patch
    represented by test_ind
    ----------
    activations:
        dict: {'layer name': List of tensors of activations}
    
    train_indices:
        list of indices that represent what indices within the values of 
        activations dict lists are train indices

    test_index:
        list of indices that represent what index within the values of 
        activations dict lists are the test example

    error_func:
        func (tensor, tensor) -> int that gives error metric between tensors

    Returns
    -------
    matched_indices: (index, error)
        sorted list of what train indices match up well with the test index, 
        sorted from lowest error to highest error
    """
    matched_indices = []

    for train_ind in train_indices:
        # calculate error between train_ind and test_ind
        train_err = 0
        for i, name in enumerate(activations.keys()):
            train_err += np.exp(2 * i / len(activations)) * error_func(activations[name][train_ind], activations[name][test_ind])
        # create mew match tuple
        new_matched_el = (train_ind, train_err)

        # find index to insert new_matched_el
        ind = len(matched_indices)
        for i in range(len(matched_indices)):
            if new_matched_el[1] < matched_indices[i][1]:
              ind = i
              break;

        # insert new_matched_el into correct place in list
        if ind == len(matched_indices):
            matched_indices.append(new_matched_el)
        else:
            matched_indices = matched_indices[:i] + [new_matched_el] + matched_indices[i:]

    return matched_indices


##Test activation similarities between labeled features

In [None]:
import time
import torch.nn as nn
activations_dict, hooks = register_activation_hooks(model)

# run training images through the model
num_images = 6
num_features = 7
num_feature_images = num_images * num_features
for face in faces:
    model(face)
for nose in noses:
    model(nose)
for eye in eyes:
    model(eye)
for mouth in mouths:
    model(mouth)
for ear in ears:
    model(ear)
for fur in furs:
    model(fur)
for leg in legs:
    model(leg)

classes = ['face', 'nose', 'eye', 'mouth', 'ear', 'fur', 'leg']
def index_to_class():
    """ Returns a dictionary that map the index of a feature patch
    to the class label
    ----------

    Returns
    -------
    index_to_class_dict:
        {index: class name}
    """
    index_to_class_dict = {}
    for i in range(num_feature_images):
        index_to_class_dict[i] = classes[i//num_images]

    return index_to_class_dict

def class_to_score(scores, index_to_class_dict):
    """ Calculates the loss between the test image and each feature class
    ----------
    scores:
        list of tuples of the form [(ind, loss)] where the training patch
        at index has loss loss
    
    index_to_class_dict:
        dictionary that maps indices to classes

    Returns
    -------
    class_to_scores: 
        A dictionary that maps a feature class to the total loss
    """
    class_to_scores = {}
    class_counters = {}

    for class_name in classes:
        class_to_scores[class_name] = 0
        class_counters[class_name] = 0

    for ind, loss in scores:
        class_name = index_to_class_dict[ind]
        class_to_scores[class_name] += loss
        class_counters[class_name] += 1

    for class_name, counter in class_counters.items():
        class_to_scores[class_name] /= counter

    return class_to_scores


index_to_class_dict = index_to_class()
incorrect_preds = 0

# see if the train patches activations match up with each other
for i in range(num_feature_images):
    test_indices = list(np.arange(num_feature_images))
    test_indices = test_indices[:i] + test_indices[i+1:]
    matched_activation_results = matched_activations(activations_dict, test_indices, i, nn.L1Loss())
    class_to_score_dict = class_to_score(matched_activation_results, index_to_class_dict)
    print(f"{index_to_class_dict[i]}, {i}: {class_to_score_dict}")
    predicted_class = (None, 99999999)
    for class1, score in class_to_score_dict.items():
        if score < predicted_class[1]:
            predicted_class = (class1, score)
    if predicted_class[0] != index_to_class_dict[i]:
        incorrect_preds += 1
    print(f"prediction for {index_to_class_dict[i]} {i}: {predicted_class[0]}, loss: {predicted_class[1]}")

print(f"correct predictions for {1 - round(incorrect_preds / num_feature_images,2)}%")

##Extract Potential Features from Test Images

Now that we have saved the activations of many features within the training set, we have to attempt to extract features patches from new test images that the CNN recognizes. 

###Naivly testing all possible patches of image

In [None]:
from torch.nn.modules.loss import L1Loss
import time

#open test image
filename = str(10) + '.jpeg'
input_image = Image.open(filename)
input_tensor = preprocess(input_image).to(device)

def garbage_collect(activations):
    """
    Remove the activations stored in the activations dict from the most recent 
    test image so memory does not overflow
    """
    for k, v in activations.items():
        activations[k] = v[:-1]

def extract_all_patches(img):
    """
    Extract all the feature patches from img
    """
    train_indices = list(np.arange(num_feature_images))  
    width = img.size()[2]
    height = img.size()[1]
    # dict that maps feature to patch with minimum loss in the form "class":(dim, x, y, loss)
    feature_mins = {}
    for i in range(num_features):
        feature_mins[index_to_class_dict[i*num_images]] = (0,0,0,9999999999)
    # iterate through all the possible dimensions, and coordinates
    for dim in [20, 40, 80]:
        for x in range(0, width-dim, 10):
            for y in range(0, height-dim, 10):
                # create test patch
                test_patch = random_noise_outside_patch(img, x, y, x+dim, y+dim)
                # run test patch through nn, so hooks save the activations
                model(test_patch.unsqueeze(0).to(device))
                # Compare the activations between test patch and labeed train patches
                matched_activation_results = matched_activations(activations_dict, train_indices, -1, nn.L1Loss())
                # calculate the loss for each class
                class_to_score_dict = class_to_score(matched_activation_results, index_to_class_dict)
                # if the loss is the minimum for the feature class, save it
                for feat, loss in class_to_score_dict.items():
                    if loss < feature_mins[feat][3]:
                        feature_mins[feat] = (dim, x, y, loss)
                # delete the activations stored from the most recent test patch
                garbage_collect(activations_dict)

    return feature_mins

start_time = time.time()
# expected runtime around 30 minutes, depending on how many candidate patches tested
extracted_patches = extract_all_patches(input_tensor)
print(time.time() - start_time)

In [None]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

def show_images(img) -> None:
    """
    View multiple images stored in files, stacking vertically

    Arguments:
        filename: str - path to filename containing image
    """
    # <something gets done here>
    plt.figure()
    npimg = img.cpu().numpy()
    plt.imshow(np.transpose(npimg, (1,2,0)), interpolation='nearest')

show_images(input_tensor)

def print_extracted_patches(extracted_patches, img):
    for feature_name, (dim, x, y, loss) in extracted_patches.items():
        patch = random_noise_outside_patch(img, x, y, x+dim, y+dim)
        print(f"the extracted patch for {feature_name} is at dim:{dim}, x:{x}, y:{y}, with loss: {loss}")
        show_images(patch)
print_extracted_patches(extracted_patches, input_tensor)

## For other experiments...

See https://colab.research.google.com/drive/1XSqGRIEAMlYgimccmJpHAZTMgyOwpxj8?usp=sharing for School Bus


See https://drive.google.com/file/d/1tuo16a-bOVXhimnooDxGhF5wsHxE6fKo/view?usp=sharing  for results after running the notebook on Kaggle Notebook.