In [1]:
import torch
import glob as gb
import numpy as np
from PIL import Image
from functions_M import *
from GradImgPro import *
import Vggface2model as model
import matplotlib.image as mpimg
from torch.autograd import Variable
import matplotlib.cm as mpl_color_map
# from misc_functions import save_class_activation_images

In [2]:
CNN_fc_hidden1, CNN_fc_hidden2 = 1024, 768
CNN_embed_dim = 512   # latent dim extracted by 2D CNN
res_size = 224        # ResNet image size
dropout_p = 0.0       # dropout probability
save_model_path = "./VGG_LSTM/"
img_path = "input_images/*.bmp"

# Detect devices
use_cuda = torch.cuda.is_available()   # check if GPU exists
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")  # use CPU or GPU

In [3]:
class CamExtractor():
    """
        Extracts cam features from the model
    """
    def __init__(self, model, target_layer):
        self.model = model
        self.target_layer = target_layer
        self.gradients = None

    def save_gradient(self, grad):
        self.gradients = grad

    def forward_pass(self, x):
        """
            Does a forward pass on convolutions, hooks the function at given layer
        """
        conv_output = None
        x = x.to(device)
        for l in range (7, 12):
            x = self.model.getgradcam(l,x) 
            if l == self.target_layer:
                x.register_hook(self.save_gradient)
                conv_output = x  # Save the convolution output on that layer
        return conv_output, x

In [4]:
class GradCam():
    """
        Produces class activation map
    """
    def __init__(self, model, target_layer):
        self.model = model
        self.model.eval()
        # Define extractor
        self.extractor = CamExtractor(self.model, target_layer)

    def generate_cam(self, input_image, target_class=None):

        conv_output, model_output = self.extractor.forward_pass(input_image) 
        print("model_output:",model_output.size())
        model_output = model_output.reshape(model_output.size()[0], model_output.size()[1])
        target_class = np.argmax(model_output.data.cpu().numpy())
        # Target for backprop
        print("Targetclass:", target_class)
        one_hot_output = torch.FloatTensor(1, model_output.size()[1]).zero_().to(device)
        one_hot_output[0][target_class] = 1
        print("one_hot_output:", one_hot_output.size())
        # Zero grads
        self.model.zero_grad()
        model_output.backward(gradient=one_hot_output, retain_graph=True)
        # Get hooked gradients
        print("gradients:",self.extractor.gradients.size())
        guided_gradients = self.extractor.gradients.data.cpu().numpy()[0]
        # Get convolution outputs
        target = conv_output.data.cpu().numpy()[0]
        # Get weights from gradients
        weights = np.mean(guided_gradients, axis=(1, 2))  # Take averages for each gradient
        # Create empty numpy array for cam
        cam = np.ones(target.shape[1:], dtype=np.float32)
        # Multiply each weight with its conv output and then, sum
        for i, w in enumerate(weights):
            cam += w * target[i, :, :]
        cam = np.maximum(cam, 0)
        cam = (cam - np.min(cam)) / (np.max(cam) - np.min(cam))  # Normalize between 0-1
        cam = np.uint8(cam * 255)  # Scale between 0-255 to visualize
        cam = np.uint8(Image.fromarray(cam).resize((input_image.shape[2],
                       input_image.shape[3]), Image.ANTIALIAS))/255

        return cam


In [18]:
VidData = gb.glob(img_path)[0]
original_image = Image.open(VidData).convert('RGB')
original_image = original_image.resize((224,224),Image.ANTIALIAS)
prep_img = preprocess_image(original_image)
cnn_encoder = VggFaceEncoder(fc_hidden1=CNN_fc_hidden1, fc_hidden2=CNN_fc_hidden2, drop_p=dropout_p, 
                             CNN_embed_dim=CNN_embed_dim).to(device)
cnn_encoder.load_state_dict(torch.load(os.path.join(save_model_path, 'cnn_encoder_epoch41.pth')))
print('CRNN model reloaded!')
target_layer = 7

CRNN model reloaded!


In [19]:
grad_cam = GradCam(cnn_encoder, target_layer)
cam = grad_cam.generate_cam(prep_img)
save_class_activation_images(original_image, './results/', cam, "GradCam"+str(target_layer))

model_output: torch.Size([1, 2048, 1, 1])
Targetclass: 1873
one_hot_output: torch.Size([1, 2048])
gradients: torch.Size([1, 1024, 14, 14])
