In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# 1 <b><span style="color:#27aee3; font-weight:1200">|</span> Required Libraries

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from collections import Counter

import scipy as sp

import warnings
warnings.filterwarnings('ignore')

import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential, Model
from keras.layers import Dense, Conv2D, Dropout, BatchNormalization, Flatten, MaxPooling2D, GlobalAveragePooling2D
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint, LearningRateScheduler
from keras import regularizers
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# 2 <b><span style="color:#27aee3; font-weight:1200">|</span> Data

In [None]:
train = pd.read_csv('../input/digit-recognizer/train.csv')

print(f'{train.shape}\n')
train.head()

In [None]:
test = pd.read_csv('../input/digit-recognizer/test.csv')

print(f'{test.shape}\n')
test.head()

# 3 <b><span style="color:#27aee3; font-weight:1200">|</span> Data Preprocessing

In [None]:
x_train = train.values[:, 1:] # get all values from 1st index onwards
y_train = train.values[:, 0]  # get the label column

x_test = test.values[:, 0:]   # get all values starting from 0th index

del train # delete train and test set to free up memory
del test 

### <b><span style="color:#27aee3; font-weight:1200">※</span> Some of the training set contents<br/>

In [None]:
fig = plt.figure(figsize=[14, 10])

for i in range(16):
    ax = fig.add_subplot(4 , 4, i + 1, xticks=[], yticks=[])
    ax.imshow(x_train[i].reshape((28,28)))
    ax.set_title(str(y_train[i]))

### <b><span style="color:#27aee3; font-weight:1200">※</span> Normalizing the pixel values

In [None]:
mean = np.mean(x_train) # take the mean
std = np.std(x_train)   # take the standard deviation
x_train = (x_train-mean)/(std+1e-7)    # normalizing the values
x_test = (x_test-mean)/(std+1e-7)

x_train = x_train.reshape(-1, 28, 28, 1) # reshaping them
x_test = x_test.reshape(-1, 28, 28, 1)

y_train

In [None]:
plt.imshow(x_train[2])

In [None]:
y_train[2]

In [None]:
num_classes = 10

y_train = to_categorical(y_train, num_classes=num_classes)

x_train, x_val = x_train[:37000], x_train[37000:]
y_train, y_val = y_train[:37000], y_train[37000:]

print(f'Training samples: {x_train.shape}\nValidation samples: {x_val.shape}\nTesting samples: {x_test.shape}')

# 4 <b><span style="color:#27aee3; font-weight:1200">|</span> CNN Model

In [None]:
model = Sequential()

# notice the padding parameter to recover the lost border pixels when doing the convolution
model.add(Conv2D(16,input_shape=(28,28,1),kernel_size=(3,3),activation='relu',padding='same'))
# pooling layer with a stride of 2 will reduce the image dimensions by half
model.add(MaxPooling2D(pool_size=(2,2)))

# pass through more convolutions with increasing filters
model.add(Conv2D(32,kernel_size=(3,3),activation='relu',padding='same'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(64,kernel_size=(3,3),activation='relu',padding='same'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(128,kernel_size=(3,3),activation='relu',padding='same'))

# use global average pooling to take into account lesser intensity pixels
model.add(GlobalAveragePooling2D())

# output class probabilities
model.add(Dense(10,activation='softmax'))

model.summary()

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# 5 <b><span style="color:#27aee3; font-weight:1200">|</span> Data Augmentation

In [None]:
datagen = ImageDataGenerator(
                rotation_range=10,  
                zoom_range = 0.10,  
                width_shift_range=0.1, 
                height_shift_range=0.1
)

aug = datagen.flow(x_train[6].reshape(-1, 28, 28, 1))

fig = plt.figure(figsize=[10, 8])
for i in range(24):
    
    ax = fig.add_subplot(3, 8, i+1, xticks=[], yticks=[])
    aug_img = next(aug)[0]
    ax.imshow(aug_img, cmap = 'gray')
    
plt.show()

# 6 <b><span style="color:#27aee3; font-weight:1200">|</span> Network Training

LearningRateScheduler is used to update the learning rate with each new epoch.

In [None]:
annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** x)
epochs = 10
batch_size = 64
hist = model.fit(
    datagen.flow(x_train, y_train, batch_size=batch_size), 
    epochs = epochs,
    steps_per_epoch=x_train.shape[0] // batch_size,
    validation_data = (x_val, y_val),
    callbacks = [annealer],
    verbose = 1)

# 7 <b><span style="color:#27aee3; font-weight:1200">|</span> Class Activation Map (CAM)
    
    
Class Activation Map is a matrix that shows what parts of the image the model was paying more or less attention to. <br/> <br/>
    
<center><img src='https://res.cloudinary.com/vincent1bt/image/upload/c_scale,w_752/v1559767859/x-bone_ovdahr.jpg'></img></center>

<br/>

- The more intense colors signify the most attention given by the model; while
- The darker ones signify less attention.

Now, to generate the class activation maps, we need to get the features detected in the last convolutional layer and then see which ones are most active when generating the output probabilities.

So, we'll take the following ones:

In [None]:
print("The last layers:\n")
for i in model.layers[-3:]:
    print(str(i).split('.')[3])

Now, we can create our _**CAM** model_. Remember not to include the *GlobalAveragePooling2D* layer as it does nothing extra except squeezing the spatial dimension.

So, we create a new model using **Model** and pass in the model input and the output of the **Conv2D** and **Dense** layers.

In [None]:
cam_model = Model(inputs=model.input, 
                 outputs=(model.layers[-3].output,
                         model.layers[-1].output))
cam_model.summary()

Now, use that **CAM** model to predict on the test set to generate the features and predicted prob for each class.

In [None]:
feats,res = cam_model.predict(x_test)

# shape of the features
print("features shape: ", feats.shape)
print("results shape", res.shape)

### <b><span style="color:#27aee3; font-weight:1200">※</span> Generating CAM by dot product of class activation feats and weights
    
    
To generate CAM, we need to do the dot product of the class activation features and the class activation weights.

We'll need the weights from the Global Average Pooling layer (GAP) to calculate the activations of each feature given a particular class.
    
Also, keep in mind that we'll get the weights from the dense layer that follows the global average pooling layer. To understand how, see the below points:
    
- The last conv2D layer has (h,w,depth) of (3 x 3 x 128), so there are 128 features.
- The global average pooling layer collapses the (h,w,depth) of (3 x 3 x 128) into a dense layer of 128 neurons (1 neuron per feature).
- The activations from the global average pooling layer get passed to the last dense layer.
- The last dense layer assigns weights to each of those 128 features (for each of the 10 classes),
- So the weights of the last dense layer (which immmediately follows the global average pooling layer) are referred to in this context as the **"weights of the global average pooling layer"**.

In [None]:
last_layer = model.layers[-1]

gap_weights_lst = last_layer.get_weights()
print(f'{gap_weights_lst[0].shape}\n{gap_weights_lst[1].shape}')

gap_weights = gap_weights_lst[0]

Now, choose index of any image.

In [None]:
idx = 0
feats_img = feats[idx, :,:,:]
print(f'The shape of the image at index {idx}: {feats_img.shape}')

Now, the features have height and width of 3 x 3. So, we need to scale them back up to the original image height and width i.e. 28 x 28.

We'll do it using **scipy** module.

In [None]:
feats_img_scaled = sp.ndimage.zoom(feats_img,
                                  (28/3, 28/3, 1), order=2)

print(feats_img_scaled.shape)

In [None]:
class_id = 0
gap_weights_for_one_class = gap_weights[:,class_id]

print("features_for_img_scaled has shape ", feats_img_scaled.shape)
print("gap_weights_for_one_class has shape ", gap_weights_for_one_class.shape)

# take the dot product between the scaled features and the weights for one class
cam = np.dot(feats_img_scaled, gap_weights_for_one_class)

print("Class Activation Map shape ", cam.shape)

### <b><span style="color:#27aee3; font-weight:1200">※</span> Class Activation Map Function
    
All the above things can be written down in the below function.

In [None]:
def show_cam(image_index):

  # takes the features of the chosen image
  feats_for_img = feats[image_index,:,:,:]

  # get the class with the highest output probability
  prediction = np.argmax(res[image_index])

  # get the gap weights at the predicted class
  class_activation_weights = gap_weights[:, prediction]

  # upsample the features to the image's original size (28 x 28)
  class_activation_features = sp.ndimage.zoom(feats_for_img, (28/3, 28/3, 1), order=2)

  # compute the intensity of each feature in the CAM
  cam_output  = np.dot(class_activation_features,class_activation_weights)
  
  print('Predicted Class = ' +str(prediction)+ ', Probability = ' + str(res[image_index][prediction]))
  
  # show the upsampled image
  plt.imshow(np.squeeze(x_test[image_index],-1), alpha=0.5)
  
  # strongly classified (95% probability) images will be in green, else red
  if res[image_index][prediction]>0.95:
    cmap_str = 'Greens'
  else:
    cmap_str = 'Reds'

  # overlay the cam output
  plt.imshow(cam_output, cmap=cmap_str, alpha=0.5)

  # display the image
  plt.show()

In [None]:
def show_maps(desired_class, num_maps):

    counter = 0

    for i in range(0,10000):
        # break if we already displayed the specified number of maps
        if counter == num_maps:
            break

        # images that match the class will be shown
        if np.argmax(res[i]) == desired_class:
            counter += 1
            show_cam(i)

In [None]:
show_maps(1, 10)

Remember:

- The more intense color pixels signify the most attention given by the model; while
- The darker ones signify less attention.

### Thanks for reading this! 😄