In [None]:
from keras.applications.vgg16 import VGG16, preprocess_input, decode_predictions
from keras.preprocessing import image
import keras.backend as K
import numpy as np
import cv2
import sys

# Taking pre-trained VGG model and
# implementing by importing necessary packages.

In [None]:
model = VGG16(weights="imagenet")

In [None]:
img_path ="tiger.jpg"
img = image.load_img(img_path, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)

# Using the VGG16 model shipped with Keras. 
# And loading certain helper functions required and preprocessing our image.

In [None]:
preds = model.predict(x)
class_idx = np.argmax(preds[0])
class_output = model.output[:, class_idx]
last_conv_layer = model.get_layer("block5_conv3")


Initializing our model and loading the image.
The VGG network expects input size to be (224×224×3), so we resize our image to the required size.
Since, we are passing only one image through the network, it’s required to expand the first dimension noting it as a batch of size 1. We then normalize our image by subtracting mean RGB values from the input image using a helper function preprocess_input

In [None]:
top_1 = decode_predictions(preds)[0][0]
print('Predicted class: %s (%s) with probability %.2f' % (top_1[1], top_1[0], top_1[2]))

Seeing the map for the top prediction. So, we get the predictions for the image and then, we take the topmost class index. Remember that we can compute map for any class. Then, we take the output from the final convolutional layer in the VGG16 which isblock5_conv3. The resulting feature map will be of shape 14×14×512.

In [None]:
grads = K.gradients(class_output, last_conv_layer.output)[0]
pooled_grads = K.mean(grads, axis=(0, 1, 2))
iterate = K.function([model.input], [pooled_grads, last_conv_layer.output[0]])
pooled_grads_value, conv_layer_output_value = iterate([x])
for i in range(512):
    conv_layer_output_value[:, :, i] *= pooled_grads_value[i]

we compute the gradient of the class output value with respect to the feature map. Then, we pool the gradients over all the axes leaving out the channel dimension. Finally, we weigh the output feature map with the computed gradient values.

In [None]:
heatmap = np.mean(conv_layer_output_value, axis=-1)
heatmap = np.maximum(heatmap, 0)
heatmap /= np.max(heatmap)

Averaging the weighed feature map along the channel dimension resulting in a heat map of size 14 times 14. And, then we normalize the heat map to make the values in between 0 and 1.

In [None]:
img = cv2.imread(img_path)
heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]))
heatmap = np.uint8(255 * heatmap)
heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
superimposed_img = cv2.addWeighted(img, 0.6, heatmap, 0.4, 0)
%matplotlib inline
import matplotlib.pyplot as plt
plt.imshow(img)
plt.title("Original")
plt.show()
plt.title("GradCam")
plt.imshow(superimposed_img)
plt.show()

Finally, we use OpenCV to read the image, resize the existing heatmap to the image size. We blend the original image and the heatmap to superimpose the heatmap on to the image. 