## Introduction & Setup

This python notebook presents an implementation of GradCam and application to two deep neural network (CNN) models, one trained on ImageNet for object recognition(mainly , although annotated for some animal breeds) and another trained on VGGFace for face recognition. We also provide a set of cropped-aligned images and unaligned frames all of which contain faces along with some objects in the background. You can run (and certainly play around) the scripts to observe how the saliency maps differ across models and top k predictions of each model.

In [None]:
!pip install git+https://github.com/rcmalli/keras-vggface.git
!pip install keras_applications

In [7]:
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras_vggface import utils
from keras.preprocessing import image
from keras_vggface.vggface import VGGFace

# Display
from IPython.display import Image, display
import matplotlib.pyplot as plt
import matplotlib.cm as cm

## The Grad-CAM algorithm

In [8]:
def get_and_preprocess_imagenet_img_array(img_path, target_size=(299, 299)):
    # `img` is a PIL image of size 299x299
    img = keras.preprocessing.image.load_img(img_path, target_size=target_size)
    # `array` is a float32 Numpy array of shape (299, 299, 3)
    array = keras.preprocessing.image.img_to_array(img)
    # We add a dimension to transform our array into a "batch"
    # of size (1, 299, 299, 3)
    array = np.expand_dims(array, axis=0)
    array = keras.applications.xception.preprocess_input(array)
    return array


def get_and_preprocess_face_img_array(img_path, target_size=(224,224), version=1):
    img = image.load_img(img_path, target_size=target_size)
    array = image.img_to_array(img)
    array = np.expand_dims(array, axis=0)
    array = utils.preprocess_input(array, version=version) 
    return array


def make_gradcam_heatmap(img_array, model, visualization_layer, pred_index=None):
    # First, we create a model that maps the input image to the activations
    # of the last conv layer as well as the output predictions
    grad_model = tf.keras.models.Model(
        [model.inputs], [model.get_layer(visualization_layer).output, model.output]
    )

    # Then, we compute the gradient of the top predicted class for our input image
    # with respect to the activations of the last conv layer
    with tf.GradientTape() as tape:
        last_conv_layer_output, preds = grad_model(img_array)
        if pred_index is None:
            pred_index = tf.argmax(preds[0])
        class_channel = preds[:, pred_index]

    # This is the gradient of the output neuron (top predicted or chosen)
    # with regard to the output feature map of the last conv layer
    grads = tape.gradient(class_channel, last_conv_layer_output)

    # This is a vector where each entry is the mean intensity of the gradient
    # over a specific feature map channel
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))

    # We multiply each channel in the feature map array
    # by "how important this channel is" with regard to the top predicted class
    # then sum all the channels to obtain the heatmap class activation
    last_conv_layer_output = last_conv_layer_output[0]
    heatmap = last_conv_layer_output @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)

    # For visualization purpose, we will also normalize the heatmap between 0 & 1
    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
    heatmap = heatmap.numpy()

    return heatmap


def display_gradcam(img, heatmap, alpha=0.4):
    # Load the original image
    img = keras.preprocessing.image.load_img(img_path)
    img = keras.preprocessing.image.img_to_array(img)

    # Rescale heatmap to a range 0-255
    heatmap = np.uint8(255 * heatmap)

    # Use jet colormap to colorize heatmap
    jet = cm.get_cmap("jet")

    # Use RGB values of the colormap
    jet_colors = jet(np.arange(256))[:, :3]
    jet_heatmap = jet_colors[heatmap]

    # Create an image with RGB colorized heatmap
    jet_heatmap = image.array_to_img(jet_heatmap)
    jet_heatmap = jet_heatmap.resize((img.shape[1], img.shape[0]))
    jet_heatmap = image.img_to_array(jet_heatmap)

    # Superimpose the heatmap on original image
    superimposed_img = jet_heatmap * alpha + img
    superimposed_img = image.array_to_img(superimposed_img)

    # Save the superimposed image
    superimposed_img.save('tmp.jpg')

    # Display Grad CAM
    display(Image('tmp.jpg'))



In [None]:
# Download and unzip images
!wget -c https://github.com/dongpng/human-centered-ml/raw/master/pics.zip
!unzip pics.zip

In [9]:
# specify data path
img_paths = [os.path.join('pics/', img_name) for img_name in os.listdir('pics/')]

## ImageNet Classification

In [None]:
# Un comment part below if you wish to see examples with elephants and pets

#img_paths = [
#    keras.utils.get_file(
#        "african_elephant.jpg", "https://i.imgur.com/Bvro0YD.png"), 
#    keras.utils.get_file(
#    "cat_and_dog.jpg",
#    "https://storage.googleapis.com/petbacker/images/blog/2017/dog-and-cat-cover.jpg")
#    ]
    

topk = 3
for img_path in img_paths:
    print('********')
    print(f'image: {os.path.basename(img_path)}')
    display(Image(img_path))
    
    # Make model
    model = keras.applications.xception.Xception(weights="imagenet")
    # print(model.summary())
    visualization_layer = 'block14_sepconv2_act'
    target_size = (299, 299)
    
    img_array = get_and_preprocess_imagenet_img_array(
        img_path, target_size=target_size)
    
    # Print what the top predicted class is
    preds = model.predict(img_array)
    predictions = keras.applications.xception.decode_predictions(preds, top=topk)[0]
    top_indices = preds.squeeze(axis=0).argsort()[-topk:][::-1]
        
    # Generate class activation heatmap
    for idx, (prediction, top_idx) in enumerate(zip(predictions, top_indices)):
        print(f'Top {idx+1} prediction: {prediction[1]}')  

        heatmap = make_gradcam_heatmap(
            img_array, model, visualization_layer, pred_index=top_idx)
            
        fig = plt.figure()
        ax = fig.add_subplot()
        ms = ax.matshow(heatmap)
        fig.colorbar(ms)
        plt.show()
        
        display_gradcam(img_path, heatmap)
        plt.close()
    print('\n\n')

## ResNet50 FaceVGG Model

In [None]:
# Make model
model = VGGFace(model='resnet50')
# print(model.summary())
visualization_layer = 'conv5_3_1x1_increase'
target_size = (224, 224)

topk=3
print('Visualize ResNet50 Model')

for img_path in img_paths:
    print('********')
    print(f'image: {os.path.basename(img_path)}')
    display(Image(img_path))

    # Print what the top predicted class is
    img_array = get_and_preprocess_face_img_array(
        img_path, target_size=target_size, version=2)
    preds = model.predict(img_array)
    predictions = utils.decode_predictions(preds, top=topk)[0]
    top_indices = preds.squeeze(axis=0).argsort()[-topk:][::-1]
        
    # Generate class activation heatmap
    for idx, (prediction, top_idx) in enumerate(zip(predictions, top_indices)):
        print(f'Top {idx+1} prediction: {prediction[0]}')  

        heatmap = make_gradcam_heatmap(
            img_array, model, visualization_layer, pred_index=top_idx)

        fig = plt.figure()
        ax = fig.add_subplot()
        ms = ax.matshow(heatmap)
        fig.colorbar(ms)
        plt.show()
        
        display_gradcam(img_path, heatmap)
        plt.close()
    print('\n\n')