In [None]:
import os
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

RamprasaathR.Selvarajuetal.,arXiv(2017),https://arxiv.org/abs/1610.02391.

Grad-CAM consists of taking the output feature map of a convolution layer, given an input image, and weighing every channel in that feature map by the gradient of the class with respect to the channel. Intuitively, one way to understand this trick is to imagine that you’re weighting a spatial map of “how intensely the input image acti- vates different channels” by “how important each channel is with regard to the class,” resulting in a spatial map of “how intensely the input image activates the class.”

This time we will need model's top.

In [None]:
model = tf.keras.applications.xception.Xception(weights="imagenet")

In [None]:
img_path = tf.keras.utils.get_file(
    fname="elephant.jpg",
    origin="https://img-datasets.s3.amazonaws.com/elephant.jpg")

def get_img_array(img_path, target_size):
    img = tf.keras.utils.load_img(img_path, target_size=target_size)
    array = tf.keras.utils.img_to_array(img)
    array = np.expand_dims(array, axis=0)
    array = tf.keras.applications.xception.preprocess_input(array)
    return array

img_array = get_img_array(img_path, target_size=(299, 299))

In [None]:
preds = model.predict(img_array)
print(tf.keras.applications.xception.decode_predictions(preds, top=3)[0])

In [None]:
np.argmax(preds[0])

In [None]:
last_conv_layer_name = "block14_sepconv2_act"
classifier_layer_names = [
    "avg_pool",
    "predictions",
]
last_conv_layer = model.get_layer(last_conv_layer_name)
last_conv_layer_model = tf.keras.Model(model.inputs, last_conv_layer.output)

In [None]:
classifier_input = tf.keras.Input(shape=last_conv_layer.output.shape[1:])
x = classifier_input
for layer_name in classifier_layer_names:
    x = model.get_layer(layer_name)(x)
classifier_model = tf.keras.Model(classifier_input, x)

In [None]:
with tf.GradientTape() as tape:
    last_conv_layer_output = last_conv_layer_model(img_array)
    tape.watch(last_conv_layer_output)
    preds = classifier_model(last_conv_layer_output)
    top_pred_index = tf.argmax(preds[0])
    top_class_channel = preds[0, top_pred_index]

grads = tape.gradient(top_class_channel, last_conv_layer_output)

$$ f_{i^{th}class}: \mathbb{R}^{10 \times 10 \times 2048} \rightarrow \mathbb{R}^{2048} \rightarrow \mathbb{R} $$

In [None]:
grads.shape, top_class_channel.shape, last_conv_layer_output.shape

In [None]:
tf.reduce_mean(grads, axis=(0, 1, 2)).shape

In [None]:
pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2)).numpy()
last_conv_layer_output = last_conv_layer_output.numpy()[0]
for i in range(pooled_grads.shape[-1]):
    last_conv_layer_output[:, :, i] *= pooled_grads[i]

In [None]:
last_conv_layer_output.shape, pooled_grads.shape

In [None]:
tf.reduce_max(pooled_grads)

## Heatmap from all filters

In [None]:
heatmap = np.mean(last_conv_layer_output, axis=-1)

In [None]:
heatmap.shape

In [None]:
heatmap = np.maximum(heatmap, 1e-10)
heatmap /= np.max(heatmap)
plt.matshow(heatmap)

In [None]:
import matplotlib.cm as cm

img = tf.keras.utils.load_img(img_path)
img = tf.keras.utils.img_to_array(img)

heatmap = np.uint8(255 * heatmap)

jet = cm.get_cmap("jet")
jet_colors = jet(np.arange(256))[:, :3]
jet_heatmap = jet_colors[heatmap]

jet_heatmap = tf.keras.utils.array_to_img(jet_heatmap)
jet_heatmap = jet_heatmap.resize((img.shape[1], img.shape[0]))
jet_heatmap = tf.keras.utils.img_to_array(jet_heatmap)

superimposed_img = jet_heatmap * 0.4 + img
superimposed_img = tf.keras.utils.array_to_img(superimposed_img)

save_path = "elephant_cam.jpg"
superimposed_img.save(save_path)

## Heatmaps from each filter

In [None]:
def save_heatmap(heatmap, img_path, index, act):
    folder = 'filter_cams'
    os.makedirs(folder, exist_ok=True)
    
    img = tf.keras.utils.load_img(img_path)
    img = tf.keras.utils.img_to_array(img)

    heatmap = np.uint8(255 * heatmap)

    jet = cm.get_cmap("jet")
    jet_colors = jet(np.arange(256))[:, :3]
    jet_heatmap = jet_colors[heatmap]

    jet_heatmap = tf.keras.utils.array_to_img(jet_heatmap)
    jet_heatmap = jet_heatmap.resize((img.shape[1], img.shape[0]))
    jet_heatmap = tf.keras.utils.img_to_array(jet_heatmap)

    superimposed_img = jet_heatmap * 0.4 + img
    superimposed_img = tf.keras.utils.array_to_img(superimposed_img)

    save_path = f"{folder}/elephant_cam_str_{index:04d}_{act:08f}.jpg"
    superimposed_img.save(save_path)

In [None]:
def normalize_heatmap(heatmap):
    heatmap = np.maximum(heatmap, 1e-10)
    heatmap /= np.max(heatmap)
    #plt.matshow(heatmap)
    return heatmap

In [None]:
for i in range(last_conv_layer_output.shape[-1]):
    heatmap = last_conv_layer_output[:, :, i]
    avg_activation = np.mean(heatmap)
    if avg_activation > 1e-05:
        heatmap = normalize_heatmap(heatmap)
        save_heatmap(heatmap, img_path, i, avg_activation)