# Assignment 3 - Part1
In this assignment we will use pretrained ResNet to do classification on the Stanford Actions dataset, which has 40 action categories.

# Setup Code

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

In [None]:
import os

# TODO: Fill in the Google Drive path where you uploaded the assignment
# Example: If you create a 188 folder and put all the files under Assignment1 folder, then '188/Assignment1'
# GOOGLE_DRIVE_PATH_AFTER_MYDRIVE = '188/Assignment1'
GOOGLE_DRIVE_PATH_AFTER_MYDRIVE = '188/Assignment3'
GOOGLE_DRIVE_PATH = os.path.join('drive', 'My Drive', GOOGLE_DRIVE_PATH_AFTER_MYDRIVE)
print(os.listdir(GOOGLE_DRIVE_PATH))

In [None]:
import sys
sys.path.append(GOOGLE_DRIVE_PATH)

Now we are going to untar the actions folder. Don't worry! This time the file is much smaller.

In [None]:
!tar -xvf "/content/drive/My Drive/188/Assignment3/actions.tar.xz" -C "/content/drive/My Drive/188/Assignment3/"

In [None]:
import torch
import torchvision.models as models
from torch import nn
from tqdm import tqdm
import cv2
import numpy as np
import matplotlib.pyplot as plt

## Compare different features
Recall that in Assignment1, we used color histogram features for KNN classification.
In this assignment, we will use features from ResNet18 and compare the results.

#### Color Features
We will first used the color features as in Assignment1. You do not need to implement anything here. Copy your implementation of KNN in model.py, then run the codes below.

In [None]:
import dataset

training_data = dataset.Actions(
    split = 'train',
    root_dir = GOOGLE_DRIVE_PATH,
    color_feature = True
)
print (len(training_data))

test_data = dataset.Actions(
    split='test',
    root_dir = GOOGLE_DRIVE_PATH,
    color_feature = True
)

In [None]:
import torch
from tqdm import tqdm
import cv2

sub_sample = list(range(0, len(training_data), 25))
training_data1 = torch.utils.data.Subset(training_data, sub_sample)
training_loader1 = torch.utils.data.DataLoader(training_data1, batch_size=64, shuffle=False, num_workers=2)

all_features = []
all_labels = []

for i, data in enumerate(tqdm((training_loader1))):
    inputs, label = data

    all_features.append(inputs)
    all_labels.append(label)

all_features = torch.cat(all_features, dim=0)
all_labels = torch.cat(all_labels, dim=0)

In [None]:
import model
from model import *

test_loader = torch.utils.data.DataLoader(test_data, batch_size=64, shuffle=False, num_workers=2)

total_acc = 0

knn = KnnClassifier(all_features, all_labels)
for i, data in enumerate(tqdm((test_loader))):
    inputs, label = data
    acc = knn.check_accuracy(inputs, label, k=1, quiet=False)
    total_acc += acc
    
total_acc /= i
print ("total accuracy is %.2f"%(total_acc/100))

The total accuracy is about 4%

### ResNet Features
We then use features extracted from ResNet18.

In [None]:
import dataset

training_data = dataset.Actions(
    split = 'train',
    root_dir = GOOGLE_DRIVE_PATH
)
print (len(training_data))

test_data = dataset.Actions(
    split='test',
    root_dir = GOOGLE_DRIVE_PATH
)

Implement the Resnet class in model.py. First write the case where "mode=feature". This means that we want to discard the final FC layer of ResNet18.

In [None]:
from model import *
resnet18 = Resnet(mode='feature')

sub_sample = list(range(0, len(training_data), 25))
training_data1 = torch.utils.data.Subset(training_data, sub_sample)
training_loader1 = torch.utils.data.DataLoader(training_data1, batch_size=64, shuffle=False, num_workers=2)

all_features = []
all_labels = []

for i, data in enumerate(tqdm((training_loader1))):
    inputs, label = data
    cnn_features = resnet18(inputs)

    all_features.append(cnn_features)
    all_labels.append(label)

all_features = torch.cat(all_features, dim=0)
all_labels = torch.cat(all_labels, dim=0)

And then we will use pretrained resnet features to do KNN classification.

In [None]:
test_loader = torch.utils.data.DataLoader(test_data, batch_size=64, shuffle=False, num_workers=2)

total_acc = 0

knn = KnnClassifier(all_features, all_labels)
for i, data in enumerate(tqdm((test_loader))):
    inputs, label = data
    features = resnet18(inputs)
    acc = knn.check_accuracy(features, label, k=1, quiet=False)
    total_acc += acc
    
total_acc /= i
print ("total accuracy is %.2f"%(total_acc/100))

I can get 30% accuracy. What about you?

## What's your opinion on using different features? Pros & Cons?

[Your answer]

### Retrieve the most relevant images

We put two images: image1.jpg and image2.jpg in the folder. Please write your code, use pretrained resnet features to output **5 top relevant images** to each image.

In [None]:
# Your Code here

### Pretrained ResNet features + Linear Classifier
Then we implement the "linear" mode in Resnet class in model.py. Remember to freeze the features of ResNet.
In this implementation we use linear classifier to do classification on the dataset.

In [None]:
from model import *

torch.manual_seed(0)
resnet18 = Resnet(mode='linear').cuda()

# Your Code: training loader / test loader with batch size 64
 
# Your code: SGD optimizer with lr 0.01 and momentum 0.9

# Your code: define loss as cross entropy loss

for epoch in range(5):#If you run out of GPU, just run 1 epoch
    for i, data in enumerate(tqdm((training_loader))):
        # Every data instance is an input + label pair
        inputs, labels = data
        inputs = inputs.cuda()
        labels = labels.cuda()

        optimizer.zero_grad()

        # Your Code: Make predictions for this batch

        # Your Code: Compute the loss and its gradients

        # Your Code: Optimizer Adjust learning weights
    
    total_acc = .0
    len_samples = 0
    
    for i, data in enumerate(tqdm((test_loader))):
        inputs, labels = data
        len_samples += inputs.shape[0]
        inputs = inputs.cuda()
        labels = labels.cuda()
        
        # Your Code: Make predictions for this batch

        # Your Code: Use argmax to extract predicted labels
        
        acc = torch.sum(torch.eq(outputs, labels))
        total_acc += acc.item()
    total_acc /= len_samples
        
    print (total_acc)
    

I can get 65% accuracy! What about you?

### End-to-end finetune resnet
In this implementation, instead of freezing resnet features, we want to finetune it. Implement the "finetune" mode in Resnet class in model.py. The implementation is the same as the "linear" mode, except that we do not need to set requires_grad to False to freeze the features. 

In [None]:
from model import *

torch.manual_seed(0)
resnet18 = Resnet(mode='finetune').cuda()
    
# Your Code: training loader / test loader with batch size 64
 
# Your code: SGD optimizer with lr 0.01 and momentum 0.9

# Your code: define loss as cross entropy loss

for epoch in range(5):#If you run out of GPU, just run 1 epoch
    for i, data in enumerate(tqdm((training_loader))):
        # Every data instance is an input + label pair

for epoch in range(5):
    for i, data in enumerate(tqdm((training_loader))):
        # Every data instance is an input + label pair
        inputs, labels = data
        inputs = inputs.cuda()
        labels = labels.cuda()

        optimizer.zero_grad()

        # Your Code: Make predictions for this batch

        # Your Code: Compute the loss and its gradients

        # Your Code: Optimizer Adjust learning weights
    
    total_acc = .0
    len_samples = 0
    
    for i, data in enumerate(tqdm((test_loader))):
        inputs, labels = data
        len_samples += inputs.shape[0]
        inputs = inputs.cuda()
        labels = labels.cuda()
        # Your Code: Make predictions for this batch

        # Your Code: Use argmax to extract predicted labels
        
        acc = torch.sum(torch.eq(outputs, labels))
        total_acc += acc.item()
    total_acc /= len_samples
        
    print (total_acc)
    

I can get 75% accuracy. What about you?

### Compare using pretrained resnet features for classification / end-to-end finetuning

[Your answer]

## CAM

Now we will implement Prof. Zhou's famous paper: "Learning Deep Features for Discriminative Localization", in which he proposed class activation mapping (CAM). The main equation is:
$$S_{c}=\sum_{k} w_{k}^{c} \sum_{x, y} f_{k}(x, y)=\sum_{x, y} \sum_{k} w_{k}^{c} f_{k}(x, y)$$
where $f_{k}(x, y)$ represents the activation of unit k in the last convolutional layer, which is layer4 in resnet18.

For more detailed implementation, please refer to the github repo: https://github.com/zhoubolei/CAM

Please implement the CAM function in model.py. Specifically, given convolutional features, weights and a class index, CAM will output the reasons for classifying the image to the class, thus making CNN interpretable.

In [None]:
from model import *

resnet18.eval()

finalconv_name = 'layer4'
# hook the feature extractor
features_blobs = []

def hook_feature(module, input, output):
    features_blobs.append(output.data.cpu().numpy())

resnet18.resnet._modules.get(finalconv_name).register_forward_hook(hook_feature)

params = list(resnet18.parameters())
weight_softmax = np.squeeze(params[-2].data.cpu().numpy())

classes = ['applauding', 'blowing_bubbles', 'brushing_teeth', 'cleaning_the_floor', 'climbing', 'cooking', 'cutting_trees', 'cutting_vegetables', 'drinking', 'feeding_a_horse', 'fishing', 'fixing_a_bike', 'fixing_a_car', 'gardening', 'holding_an_umbrella', 'jumping', 'looking_through_a_microscope', 'looking_through_a_telescope', 'playing_guitar', 'playing_violin', 'pouring_liquid', 'pushing_a_cart', 'reading', 'phoning', 'riding_a_bike', 'riding_a_horse', 'rowing_a_boat', 'running', 'shooting_an_arrow', 'smoking', 'taking_photos', 'texting_message', 'throwing_frisby', 'using_a_computer', 'walking_the_dog', 'washing_dishes', 'watching_TV', 'waving_hands', 'writing_on_a_board', 'writing_on_a_book']

test_data1 = torch.utils.data.Subset(test_data, list(range(0,1000,100)))
test_loader1 = torch.utils.data.DataLoader(test_data1, batch_size=1, shuffle=True, num_workers=2)

figure = plt.figure(figsize=(24, 6))
cols, rows = 10, 2

for i, data in enumerate(tqdm((test_loader1))):
    features_blobs = []
    img, label = data
    img = img.cuda()
    
    logit = resnet18(img)
    
    h_x = F.softmax(logit, dim=1).data.squeeze()
    probs, idx = h_x.sort(0, True)
    probs = probs.cpu().numpy()
    idx = idx.cpu().numpy()

    # generate class activation mapping for the top1 prediction
    CAMs = CAM(features_blobs[0], weight_softmax, [idx[0]])

    img = img.squeeze().permute(1,2,0)
    height, width, _ = img.shape
    img = img.cpu().numpy()
    heatmap = cv2.applyColorMap(cv2.resize(CAMs[0],(width, height)), cv2.COLORMAP_JET)

    result = heatmap * 0.3 + img * 0.5
    
    figure.add_subplot(rows, cols, i+1)
    plt.axis("off")
    plt.title(classes[label])
    plt.imshow(img, cmap="gray")
    
    figure.add_subplot(rows, cols, i+11)
    plt.axis("off")
    plt.title(classes[idx[0]])
    plt.imshow(result, cmap="gray")

plt.show()

### What does your heatmap look like? How do they explain/affect the classification prediction?

[Your Answer]