# Grad-CAM implementation in Pytorch 
based on this [git-repo](https://github.com/jacobgil/pytorch-grad-cam).

In [4]:
#@title imports
import setGPU
import torch
import argparse
import cv2
import numpy as np
from torch.autograd import Function
from torchvision import models, transforms
import sys
import os
sys.path.append("../Scripts/decoding_videos")
sys.path.append("../Scripts/representation_learning")

#from models import *
from IPython.display import Image, display

import warnings
warnings.filterwarnings('ignore')

In [5]:
device = torch.device(f'cuda:{torch.cuda.current_device()}')    
model_file = 'H061_conv1d'
model_conv1d = torch.load(model_file, map_location=device)

In [3]:
model_file = 'weightsH061/epoch_65'
model = torch.load(os.path.join("../Results/Decoding_Videos/conv/{}".format(model_file)), map_location = device)

In [127]:
#model

In [None]:
self.cnn, frame_size, H_neurons = CNN_model(kernel,act_fn,n_channels)          
#after the stack of frames, the size of data becames: [n_samples, 128, 4, 4,n_frames]

######################### conv1d #########################
self.conv8 = nn.Sequential(nn.Conv2d(1,64,kernel_size=(k_size_1d,H_neurons*frame_size*frame_size), padding = 0),act_fn())       

######################### fully connected layer #########################
self.dropout = nn.Dropout(p = dropoutRate)
self.fc1 = nn.Sequential(nn.Linear(64*(n_frames-k_size_1d+1), d_layer),act_fn())
self.fc2 = nn.Linear(d_layer, n_classes)
self.FC = nn.Sequential(self.dropout,self.fc1,self.dropout,self.fc2)
   
                     
def forward(self, x): 
    #x shape: [n_samples,n_channels,n_frames,32,32]   
    #set_trace()
    convnets = []
    convnet = 0
    n_frames = x.shape[2]
    for i in range(n_frames):
        convnet = x[:,:,i,:,:]    #[n_samples,n_channels,32,32]        
        #cnn
        convnet = self.cnn(convnet) #[n_samples, 128, 4, 4]
        convnets.append(convnet)            
    convnets = torch.stack(convnets) #[n_frames, n_samples, 128, 4, 4]

    convnets = convnets.permute(1,0,2,3,4) #[n_samples, n_frames, 128, 4, 4]  

    #Conv-1d_over_flames
    #reshape
    convnets = convnets.view(-1,1, n_frames,  
                             convnets.size(-1)*convnets.size(-2)*convnets.size(-3))
                            # [samples,1(channels),n_frames,128*4*4]        
    convnets = self.conv8(convnets)  #[n_samples, 64, 8, 1]


    # flatten and FC
    convnets = convnets.view(convnets.shape[0],-1)
    convnets = self.FC(convnets)           
    return convnets   

In [4]:
def preprocess_image(img):
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
    preprocessing = transforms.Compose([
        transforms.ToTensor(),
        normalize,
    ])
    return preprocessing(img.copy()).unsqueeze(0)

def show_cam_on_image(img, mask):
    heatmap = cv2.applyColorMap(np.uint8(255 * mask), cv2.COLORMAP_JET)
    heatmap = np.float32(heatmap) / 255
    cam = heatmap + np.float32(img)
    cam = cam / np.max(cam)
    return np.uint8(255 * cam)

def deprocess_image(img):
    """ see https://github.com/jacobgil/keras-grad-cam/blob/master/grad-cam.py#L65 """
    img = img - np.mean(img)
    img = img / (np.std(img) + 1e-5)
    img = img * 0.1
    img = img + 0.5
    img = np.clip(img, 0, 1)
    return np.uint8(img*255)

In [5]:
class GuidedBackpropReLU(Function):
    @staticmethod
    def forward(self, input_img):
        positive_mask = (input_img > 0).type_as(input_img)
        output = torch.addcmul(torch.zeros(input_img.size()).type_as(input_img), input_img, positive_mask)
        self.save_for_backward(input_img, output)
        return output

    @staticmethod
    def backward(self, grad_output):
        input_img, output = self.saved_tensors
        grad_input = None

        positive_mask_1 = (input_img > 0).type_as(grad_output)
        positive_mask_2 = (grad_output > 0).type_as(grad_output)
        grad_input = torch.addcmul(torch.zeros(input_img.size()).type_as(input_img),
                                   torch.addcmul(torch.zeros(input_img.size()).type_as(input_img), grad_output,
                                                 positive_mask_1), positive_mask_2)
        return grad_input

In [6]:
class GuidedBackpropReLUModel:
    def __init__(self, model, use_cuda):
        self.model = model
        self.model.eval()
        self.cuda = use_cuda
        if self.cuda:
            self.model = model.cuda()

        def recursive_relu_apply(module_top):
            for idx, module in module_top._modules.items():
                recursive_relu_apply(module)
                if module.__class__.__name__ == 'ReLU':
                    module_top._modules[idx] = GuidedBackpropReLU.apply

        # replace ReLU with GuidedBackpropReLU
        recursive_relu_apply(self.model)

    def forward(self, input_img):
        return self.model(input_img)

    def __call__(self, input_img, target_category=None):
        if self.cuda:
            input_img = input_img.cuda()

        input_img = input_img.requires_grad_(True)

        output = self.forward(input_img)

        if target_category == None:
            target_category = np.argmax(output.cpu().data.numpy())

        one_hot = np.zeros((1, output.size()[-1]), dtype=np.float32)
        one_hot[0][target_category] = 1
        one_hot = torch.from_numpy(one_hot).requires_grad_(True)
        if self.cuda:
            one_hot = one_hot.cuda()

        one_hot = torch.sum(one_hot * output)
        one_hot.backward(retain_graph=True)

        output = input_img.grad.cpu().data.numpy()
        output = output[0, :, :, :]

        return output

In [45]:
class FeatureExtractor():
    """ Class for extracting activations and
    registering gradients from targetted intermediate layers """

    def __init__(self, model, target_layers):
        self.model = model
        self.target_layers = target_layers
        self.gradients = []

    def save_gradient(self, grad):
        self.gradients.append(grad)

    def __call__(self, x):
        outputs = []
        self.gradients = []
        for name, module in self.model._modules.items():
            x = module(x)
            print(x.shape)
            if name in self.target_layers:
                print("In feature extractor targe layer names exist? YES:",name)
                x.register_hook(lambda grad: self.save_gradient(grad))
                outputs += [x]
                
        return outputs, x

class ModelOutputs():
    """ Class for making a forward pass, and getting:
    1. The network output.
    2. Activations from intermeddiate targetted layers.
    3. Gradients from intermeddiate targetted layers. """

    def __init__(self, model, feature_module, target_layers):
        self.model = model
        self.feature_module = feature_module
        self.feature_extractor = FeatureExtractor(self.feature_module, target_layers)
            
    def get_gradients(self):
        return self.feature_extractor.gradients

    def __call__(self, x):
        
        target_activations = []        
        for name, module in self.model._modules.items():
            if module == self.feature_module:
                target_activations, target_x = self.feature_extractor(x)
            elif "conv8" in name.lower():
                convnets = []
                for i in range(10):
                    # x initially -> [n_samples, n_channels, 32,32]        
                    conv_x = model.cnn(x) # [n_samples, 128, 4, 4]
                    convnets.append(conv_x) 
                convnets = torch.stack(convnets) #[n_frames, n_samples, 128, 4, 4]     
                convnets = convnets.permute(1,0,2,3,4) #[n_samples, n_frames, 128, 4, 4] 
                #Conv-1d_over_flames 
                # reshape first  -> [samples,1(channels),n_frames,128*4*4]   
                convnets = convnets.view(-1,1, 10, convnets.size(-1)*convnets.size(-2)*convnets.size(-3))         
                x = module(convnets) # final shape-> [n_samples, 64, 8, 1]
            elif "FC" in name:
                x = x.view(x.shape[0],-1)
                x = module(x)

        return target_activations, x

class GradCam:
    def __init__(self, model, feature_module, target_layer_names, use_cuda):
        self.model = model
        self.feature_module = feature_module
        #self.model.eval()
        self.cuda = use_cuda
        if self.cuda:
            self.model = model.cuda()

        self.extractor = ModelOutputs(self.model, self.feature_module, target_layer_names)

    def forward(self, input_img):
        video = torch.stack([torch.squeeze(input_img) for i in range(10)])
        input_video = video.unsqueeze(0).permute(0,2,1,3,4).type('torch.FloatTensor').to(device)
        return self.model(input_video) 

    def __call__(self, input_img, target_category=None):
        if self.cuda:
            input_img = input_img.cuda()

        features, output = self.extractor(input_img)

        if target_category == None:
            target_category = np.argmax(output.cpu().data.numpy())

        one_hot = np.zeros((1, output.size()[-1]), dtype=np.float32)
        one_hot[0][target_category] = 1
        one_hot = torch.from_numpy(one_hot).requires_grad_(True)
        if self.cuda:
            one_hot = one_hot.cuda()
        
        one_hot = torch.sum(one_hot * output)

        self.feature_module.zero_grad()
        self.model.zero_grad()
        one_hot.backward(retain_graph=True)
    
        print(self.extractor.get_gradients())
        
        #grads_val = self.extractor.get_gradients()[-1].cpu().data.numpy() 

        #target = features[-1]
        #target = target.cpu().data.numpy()[0, :]

        #weights = np.mean(grads_val, axis=(2, 3))[0, :]
        #cam = np.zeros(target.shape[1:], dtype=np.float32)

        #for i, w in enumerate(weights):
        #    cam += w * target[i, :, :]

        #cam = np.maximum(cam, 0)
        #cam = cv2.resize(cam, input_img.shape[2:])
        #cam = cam - np.min(cam)
        #cam = cam / np.max(cam)
        #return cam


def grad_cam(image_path, model, feature_module, use_cuda=False):
    """ python grad_cam.py <path_to_image>
    1. Loads an image with opencv.
    2. Preprocesses it for VGG19 and converts to a pytorch variable.
    3. Makes a forward pass to find the category index with the highest score,
    and computes intermediate activations.
    Makes the visualization. """

    use_cuda = use_cuda and torch.cuda.is_available()

    if model is None:
         model = models.resnet50(pretrained=True)  # <-- Which model?
    if feature_module is None:
        feature_module=model.layer4
    
    grad_cam = GradCam(model=model, feature_module=feature_module, target_layer_names=['8'], use_cuda=use_cuda)
    print(grad_cam)

    #img = cv2.imread(image_path, 1)
    #img = np.float32(img) / 255
    # Opencv loads as BGR:
    #img = img[:, :, ::-1]
    #input_img = preprocess_image(img)
    
    
    img = torch.randn(32, 32, 5, requires_grad=True, dtype=torch.float32).to(device)
    img = img.unsqueeze(0).permute(0,3,1,2)
    #video = torch.stack([img for i in range(10)])
    #video = video.unsqueeze(0).permute(0,4,1,2,3).type('torch.FloatTensor')
    print(img.shape)
    
    
    # If None, returns the map for the highest scoring category.
    # Otherwise, targets the requested category.
    input_img = img # [batch, channel, width, height]
    target_category = None
    grayscale_cam = grad_cam(input_img, target_category)

    #grayscale_cam = cv2.resize(grayscale_cam, (img.shape[1], img.shape[0]))
    #cam = show_cam_on_image(img, grayscale_cam)

    #gb_model = GuidedBackpropReLUModel(model=model, use_cuda=use_cuda)
    #gb = gb_model(input_img, target_category=target_category)
    #gb = gb.transpose((1, 2, 0))

    #cam_mask = cv2.merge([grayscale_cam, grayscale_cam, grayscale_cam])
    #cam_gb = deprocess_image(cam_mask*gb)
    #gb = deprocess_image(gb)

    #cv2.imwrite("cam.jpg", cam)
    #cv2.imwrite('gb.jpg', gb)
    #cv2.imwrite('cam_gb.jpg', cam_gb)

In [46]:
grad_cam(image_path='wolf.png', model=model, use_cuda=True, feature_module = model.cnn)

#img_as_np_path = f'../EEG images/a_video.npy'
#grad_cam(image_path=img_as_np_path, model=model_conv1d, feature_module=model_conv1d.FC, use_cuda=True)

<__main__.GradCam object at 0x7fb4707bcd90>
torch.Size([1, 5, 32, 32])
torch.Size([1, 32, 32, 32])
torch.Size([1, 32, 32, 32])
torch.Size([1, 32, 32, 32])
torch.Size([1, 32, 32, 32])
torch.Size([1, 32, 16, 16])
torch.Size([1, 64, 16, 16])
torch.Size([1, 64, 16, 16])
torch.Size([1, 64, 8, 8])
torch.Size([1, 128, 8, 8])
In feature extractor targe layer names exist? YES: 8
torch.Size([1, 128, 4, 4])
[]


In [9]:
class FeatureExtractor():
    """ Class for extracting activations and
    registering gradients from targetted intermediate layers """

    def __init__(self, model, target_layers):
        self.model = model
        self.target_layers = target_layers
        self.gradients = []
        print(self.target_layers)
    def save_gradient(self, grad):
        self.gradients = grad # .append(grad)

    def __call__(self, x):
        outputs = []
        #self.gradients = []
        for name, module in self.model._modules.items():
            x = module(x)
            if int(name) == self.target_layers:
                print("In feature extractor targe layer names exist? YES:",name)
                x.register_hook(self.save_gradient)
                outputs += [x]
                
        return outputs, x
    
img = torch.randn(32, 32, 5, requires_grad=True)
img = img.unsqueeze(0).permute(0,3,1,2).type('torch.FloatTensor').to(device)

In [192]:
feature_ext = FeatureExtractor(model.cnn, 6)

6


In [193]:
out, x = feature_ext(img)

In feature extractor targe layer names exist? YES: 6


In [196]:
out[-1].shape

torch.Size([1, 64, 16, 16])

In [197]:
feature_ext.gradients

[]

In [10]:
img.shape

torch.Size([1, 5, 32, 32])

In [23]:
video = torch.stack([torch.squeeze(img) for i in range(10)])
video = video.unsqueeze(0).permute(0,2,1,3,4).type('torch.FloatTensor')
print(video.shape)

for name, module in model._modules.items():
    print(name)

out = model(video.to(device))
out

torch.Size([1, 5, 10, 32, 32])
cnn
conv8
dropout
fc1
fc2
FC


tensor([[ 2.4818, -1.3425]], device='cuda:0', grad_fn=<AddmmBackward>)

In [24]:
model.cnn[8]

Sequential(
  (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
)

In [90]:
from PIL import Image
rand_img = np.random.rand(32,32,5)

img = Image.fromarray(np.uint8(rand_img[:,:,:3])).convert('RGB')
display(img)

#@title Dislpay grad-cam result
# display(Image('cam.jpg'))
# display(Image('gb.jpg'))
# display(Image('cam_gb.jpg'))