In [None]:
# ref
## https://github.com/ismailuddin/gradcam-tensorflow-2/
## https://github.com/kazuto1011/grad-cam-pytorch/

# for make class
## https://github.com/nguyenhoa93/GradCAM_and_GuidedGradCAM_tf2/tree/master/src
## https://github.com/fitushar/3D-GuidedGradCAM-for-Medical-Imaging/blob/master/guided_Gradcam3.py
%matplotlib inline

import random
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.applications.resnet50 import (
    ResNet50,
    preprocess_input,
    decode_predictions,
)
import cv2

image = np.array(load_img("./cat.jpg", target_size=(224, 224, 3)))
plt.imshow(image)

# prepare model
model = ResNet50()
last_conv_layer = model.get_layer("conv5_block3_out")
last_conv_layer_model = tf.keras.Model(model.inputs, last_conv_layer.output)

classifier_input = tf.keras.Input(shape=last_conv_layer.output.shape[1:])
x = classifier_input
for layer_name in ["avg_pool", "predictions"]:
    x = model.get_layer(layer_name)(x)
classifier_model = tf.keras.Model(classifier_input, x)

# Grad-CAM
with tf.GradientTape() as tape:
    inputs = image[np.newaxis, ...] # (N,H,W,C))
    last_conv_layer_output = last_conv_layer_model(inputs) # (N,H,W,C)
    tape.watch(last_conv_layer_output)
    preds = classifier_model(last_conv_layer_output) # (N,C)
    top_pred_index = tf.argmax(preds[0])
    top_class_channel = preds[:, top_pred_index]
grads = tape.gradient(top_class_channel, last_conv_layer_output)
pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2)) # 0=N,1=H,2=W
last_conv_layer_output = last_conv_layer_output.numpy()[0]
pooled_grads = pooled_grads.numpy()
for i in range(pooled_grads.shape[-1]):
    last_conv_layer_output[:, :, i] *= pooled_grads[i]

# Average over all the filters to get a single 2D array
gradcam = np.mean(last_conv_layer_output, axis=-1) # (H,W)
# Clip the values (equivalent to applying ReLU)
# and then normalise the values
gradcam = np.clip(gradcam, 0, np.max(gradcam)) / np.max(gradcam)
gradcam = cv2.resize(gradcam, (224, 224))
plt.imshow(image)
plt.imshow(gradcam, alpha=0.5)

In [None]:
# Counterfactual explanation
multiobject_image = np.array(
    load_img("../data/cat_and_dog.jpg", target_size=(224, 224, 3))
)
with tf.GradientTape() as tape:
    inputs = multiobject_image[np.newaxis, ...]
    last_conv_layer_output = last_conv_layer_model(inputs)
    tape.watch(last_conv_layer_output)
    preds = classifier_model(last_conv_layer_output)
    top_pred_index = tf.argmax(preds[0])
    top_class_channel = preds[:, top_pred_index]
grads = tape.gradient(top_class_channel, last_conv_layer_output)
pooled_grads = tf.reduce_mean(-1 * grads, axis=(0, 1, 2))
last_conv_layer_output = last_conv_layer_output.numpy()[0]
pooled_grads = pooled_grads.numpy()
for i in range(pooled_grads.shape[-1]):
    last_conv_layer_output[:, :, i] *= pooled_grads[i]

# Average over all the filters to get a single 2D array
ctfcl_gradcam = np.mean(last_conv_layer_output, axis=-1)
# Normalise the values
ctfcl_gradcam = np.clip(ctfcl_gradcam, 0, np.max(ctfcl_gradcam)) / np.max(ctfcl_gradcam)
ctfcl_gradcam = cv2.resize(ctfcl_gradcam, (224, 224))
plt.imshow(multiobject_image)
plt.imshow(ctfcl_gradcam, alpha=0.5)

In [None]:
# Guided Grad-CAM1
with tf.GradientTape() as tape:
    inputs = image[np.newaxis, ...]
    last_conv_layer_output = last_conv_layer_model(inputs)
    tape.watch(last_conv_layer_output)
    preds = classifier_model(last_conv_layer_output)
    top_pred_index = tf.argmax(preds[0])
    top_class_channel = preds[:, top_pred_index]
grads = tape.gradient(top_class_channel, last_conv_layer_output)[0]
last_conv_layer_output = last_conv_layer_output[0]
guided_grads = (
    tf.cast(last_conv_layer_output > 0, "float32")
    * tf.cast(grads > 0, "float32")
    * grads
)
pooled_guided_grads = tf.reduce_mean(guided_grads, axis=(0, 1)) #0=H,1=W
guided_gradcam = np.ones(last_conv_layer_output.shape[:2], dtype=np.float32)
for i, w in enumerate(pooled_guided_grads):
    guided_gradcam += w * last_conv_layer_output[:, :, i]
guided_gradcam = cv2.resize(guided_gradcam.numpy(), (224, 224))
guided_gradcam = np.clip(guided_gradcam, 0, np.max(guided_gradcam))
guided_gradcam = (guided_gradcam - guided_gradcam.min()) / (
    guided_gradcam.max() - guided_gradcam.min()
)
plt.imshow(image)
plt.imshow(guided_gradcam, alpha=0.5)

In [None]:
# Guided Grad-CAM2
@tf.custom_gradient
def guided_relu(x):
    def grad(dy):
        return tf.cast(dy > 0, "float32") * tf.cast(x > 0, "float32") * dy

    return tf.nn.relu(x), grad

class GuidedBackprop:
    def __init__(self, model, layer_name: str):
        self.model = model
        self.layer_name = layer_name
        self.gb_model = self.build_guided_model()

    def build_guided_model(self):
        gb_model = tf.keras.Model(
            self.model.inputs, self.model.get_layer(self.layer_name).output
        )
        layers = [
            layer for layer in gb_model.layers[1:] if hasattr(layer, "activation")
        ]
        for layer in layers:
            if layer.activation == tf.keras.activations.relu:
                layer.activation = guided_relu
        return gb_model

    def guided_backprop(self, image: np.ndarray):
        with tf.GradientTape() as tape:
            inputs = tf.cast(image, tf.float32)
            tape.watch(inputs)
            outputs = self.gb_model(inputs)
        grads = tape.gradient(outputs, inputs)[0]
        return grads

gb = GuidedBackprop(model, "conv5_block3_out")
saliency_map = gb.guided_backprop(image[np.newaxis, ...]).numpy() # (H,W,C)
saliency_map = saliency_map * np.repeat(guided_gradcam[..., np.newaxis], 3, axis=2)
saliency_map -= saliency_map.mean()
saliency_map /= saliency_map.std() + tf.keras.backend.epsilon()
saliency_map *= 0.25
saliency_map += 0.5
saliency_map = np.clip(saliency_map, 0, 1)
saliency_map *= (2 ** 8) - 1
saliency_map = saliency_map.astype(np.uint8)
plt.imshow(saliency_map)