# Introduction 

In this notebook, we use 3 different similarity metrics to compare XAI saliency maps with heatmaps.

# 1. Formulate similarity metric functions

In [8]:
from sklearn.metrics import mean_absolute_error # MAE
from scipy.spatial import distance # Cosine similarity
import numpy as np

def KL_divergence(matrix1, matrix2):
    '''
    Adapted from https://github.com/cvzoya/saliency/blob/master/code_forMetrics/KLdiv.m
    '''
    # make sure each matrix sums up to 1
    matrix1 = matrix1 / np.sum(matrix1)
    matrix2 = matrix2 / np.sum(matrix2)

    eps = np.finfo(float).eps
    score = np.sum(np.sum(matrix2 * np.log(eps + matrix2 / (matrix1 + eps))))

    return score

# 2. Import saliency map and heatmap outputs from Google Drive

Make sure all json files have been uploaded to your Google Drive.


**Note**: The code below takes in either ROI or Fixation approach heatmaps only. Therefore, you will need to change certain parts of the code and rerun everything below.

In [None]:
import os
import torch
from tqdm import tqdm
from algorithms import read_tensor, get_model, get_class_name, rise, gradcam, risecam, gradrise
image_list = []

for img_name in os.listdir('Finalized Stimuli_Correct Resolution'):
    img = read_tensor('Finalized Stimuli_Correct Resolution/{}'.format(img_name))

    model = get_model()
    p, c = torch.topk(model(img.cuda()), k=1)
    p, c = p[0], c[0]

    image_list.append(['Finalized Stimuli_Correct Resolution/{}'.format(img_name), get_class_name(c)])

In [None]:
for img_path, predicted_calss in tqdm(image_list):
    img_name = img_path.split('/')[-1].split('.')[0]
    input_tensor = read_tensor(img_path)

    rise_sal = rise(model, input_tensor, mask_path='masks_6000.npy')
    gradcam_sal = gradcam(input_tensor)
    risecam_sal_optimal = risecam(input_tensor, mask_path='masks_6000.npy', top_k='optimal')
    risecam_sal_auto = risecam(input_tensor, mask_path='masks_6000.npy', top_k='auto')
    risexcam_sal = np.multiply(rise_sal, gradcam_sal)
    gradrise_sal = gradrise(input_tensor)

    np.save(f'results/rise/{img_name}.npy', rise_sal)
    np.save(f'results/gradcam/{img_name}.npy', gradcam_sal)
    np.save(f'results/risecam_optimal/{img_name}.npy', risecam_sal_optimal)
    np.save(f'results/risecam_auto/{img_name}.npy', risecam_sal_auto)
    np.save(f'results/risexcam/{img_name}.npy', risexcam_sal)
    np.save(f'results/gradrise/{img_name}.npy', gradrise_sal)

In [9]:
import pickle

mode = 'Fixation' # change to 'ROI' or 'Fixation' accordingly

with open(f'{mode}_cat_foc.json', 'rb') as handle: 
    cat_focused = pickle.load(handle)
with open(f'{mode}_exp_foc.json', 'rb') as handle: 
    exp_focused = pickle.load(handle)

In [10]:
import os

all_maps = {'gradcam': {}, 'rise': {}, 'risecam_auto': {}, 'risecam_optimal': {}, 'risexcam': {}, 'gradrise': {}}
for algorithm in all_maps.keys():
    for filename in os.listdir(f'results/{algorithm}'):
        image_name = filename.split('.')[0]
        image_array = np.load(f'results/{algorithm}/{filename}')
        normalized_image_array = (image_array - image_array.min()) / (image_array.max() - image_array.min())
        all_maps[algorithm][f'{image_name}.jpg'] = normalized_image_array

# 3. Generate similarity metrics

## Create focused and exploratory dictionaries to store our output

Sample output of each dictionary

e.g. to get MAE metrics for ant2.jpg using focused strategy: 
- focused['MAE']['ant2.jpg'] = [rise_score, pcb_score, gradcam_score] *in this order*

In [11]:
from collections import defaultdict

focused = defaultdict(lambda: defaultdict(dict))
explorative = defaultdict(lambda: defaultdict(dict))

In [12]:
for image in cat_focused.keys():

    cat_foc = cat_focused[image]
    exp_foc = exp_focused[image]

    focused['cat'][image] = defaultdict(list)
    focused['exp'][image] = defaultdict(list)

    for dicts in all_maps.values():
        map = dicts[image][:224, :224]

        # focused strategy
        MAE_cat_foc = mean_absolute_error(cat_foc, map)
        KL_cat_foc = KL_divergence(cat_foc, map)
        cossim_cat_foc = 1 - distance.cosine(cat_foc.flatten(), map.flatten())

        MAE_exp_foc = mean_absolute_error(exp_foc, map)
        KL_exp_foc = KL_divergence(exp_foc, map)
        cossim_exp_foc = 1 - distance.cosine(exp_foc.flatten(), map.flatten())

        focused['cat'][image]['MAE'].append(MAE_cat_foc)
        focused['cat'][image]['KL_divergence'].append(KL_cat_foc)
        focused['cat'][image]['cos_sim'].append(cossim_cat_foc)

        focused['exp'][image]['MAE'].append(MAE_exp_foc)
        focused['exp'][image]['KL_divergence'].append(KL_exp_foc)
        focused['exp'][image]['cos_sim'].append(cossim_exp_foc)

# 4. Export metrics to CSV files

In [13]:
import matplotlib.pyplot as plt
import pandas as pd

In [14]:
focusedDf = pd.DataFrame.from_dict(focused)
focusedDf.head()

Unnamed: 0,cat,exp
ant1.jpg,"{'MAE': [0.24305360150381666, 0.08854797780215...","{'MAE': [0.2082146013497567, 0.086362746072578..."
ant2.jpg,"{'MAE': [0.18343425941888755, 0.07634076885880...","{'MAE': [0.158592969752612, 0.0774286316582828..."
ant3.jpg,"{'MAE': [0.18160235269307806, 0.09705981876412...","{'MAE': [0.154010358024971, 0.0863205828260973..."
ant4.jpg,"{'MAE': [0.17849830958176618, 0.05038081539178...","{'MAE': [0.17002220511838742, 0.04998341555680..."
ant5.jpg,"{'MAE': [0.2051358438909155, 0.089157221897310...","{'MAE': [0.19953004473060335, 0.08619683710480..."


## Please run everything below *in order.*

In [15]:
column_names = [
    'RISE categorization focused',
    'GRADCAM categorization focused', 
    'RISECAM Auto categorization focused',
    'RISECAM Optimal categorization focused',
    'RISExCAM categorization focused',
    'GRADRISE categorization focused',
]

In [16]:
foc_cat = pd.json_normalize(focusedDf['cat'])

MAE_score = pd.DataFrame(foc_cat['MAE'].to_list(), columns=column_names, index=focusedDf.index)
KL_divergence_score = pd.DataFrame(foc_cat['KL_divergence'].to_list(), columns=column_names, index=focusedDf.index)
cos_sim_score = pd.DataFrame(foc_cat['cos_sim'].to_list(), columns=column_names, index=focusedDf.index)

In [17]:
foc_exp = pd.json_normalize(focusedDf['exp'])

MAE_score = MAE_score.merge(pd.DataFrame(foc_exp['MAE'].to_list(), columns=column_names, index=focusedDf.index), left_index=True, right_index=True)
KL_divergence_score = KL_divergence_score.merge(pd.DataFrame(foc_exp['KL_divergence'].to_list(), columns=column_names, index=focusedDf.index), left_index=True, right_index=True)
cos_sim_score = cos_sim_score.merge(pd.DataFrame(foc_exp['cos_sim'].to_list(), columns=column_names, index=focusedDf.index), left_index=True, right_index=True)

In [18]:
MAE_score.mean(axis=0)

RISE categorization focused_x               0.246445
GRADCAM categorization focused_x            0.171012
RISECAM Auto categorization focused_x       0.238297
RISECAM Optimal categorization focused_x    0.208170
RISExCAM categorization focused_x           0.149097
GRADRISE categorization focused_x           0.203481
RISE categorization focused_y               0.210730
GRADCAM categorization focused_y            0.152567
RISECAM Auto categorization focused_y       0.214979
RISECAM Optimal categorization focused_y    0.187802
RISExCAM categorization focused_y           0.124704
GRADRISE categorization focused_y           0.171610
dtype: float64

In [19]:
KL_divergence_score.mean(axis=0)

RISE categorization focused_x               0.988561
GRADCAM categorization focused_x            1.094419
RISECAM Auto categorization focused_x       1.305733
RISECAM Optimal categorization focused_x    1.273703
RISExCAM categorization focused_x           0.770234
GRADRISE categorization focused_x           0.763262
RISE categorization focused_y               0.414770
GRADCAM categorization focused_y            0.538295
RISECAM Auto categorization focused_y       0.658107
RISECAM Optimal categorization focused_y    0.644091
RISExCAM categorization focused_y           0.322460
GRADRISE categorization focused_y           0.337100
dtype: float64

In [20]:
cos_sim_score.mean(axis=0)

RISE categorization focused_x               0.747954
GRADCAM categorization focused_x            0.743865
RISECAM Auto categorization focused_x       0.729307
RISECAM Optimal categorization focused_x    0.736507
RISExCAM categorization focused_x           0.777602
GRADRISE categorization focused_x           0.830619
RISE categorization focused_y               0.829709
GRADCAM categorization focused_y            0.806488
RISECAM Auto categorization focused_y       0.788372
RISECAM Optimal categorization focused_y    0.792847
RISExCAM categorization focused_y           0.850797
GRADRISE categorization focused_y           0.885003
dtype: float64

In [72]:
# MAE_score.to_csv('Mean Absolute Error (MAE).csv')
# KL_divergence_score.to_csv('KL_divergence.csv')
# cos_sim_score.to_csv('Cosine Similarity.csv')