In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import json

# Metrics

In [None]:
from scipy.spatial.distance import cosine, jensenshannon
from scipy.stats import spearmanr

def extract(m1, m2, i):
    x = m1[i]
    y = m2[i]
    if sum(x) == 0:
        x[0] = 0.0001
    return x,y

# cosine distance
def cosine_vector(m1, m2):
  """
  in:
  m1, m2: DataFrames of shape (n_instances, n_features)
          containing feature importance scores from method 1/2
  
  out:
  list of length n_instances, containing cosine distances
  between feature importance scores for each instance
  """
  dists = []
  for i in range(min(len(m1.columns), len(m2.columns))):
    x,y = extract(m1, m2, i)
    dists.append(cosine(x,y))
  return dists

# norm of differences
def norm_differences(m1, m2):
  """
  in:
  m1, m2: DataFrames of shape (n_instances, n_features)
          containing feature importance scores from method 1/2
  
  out:
  Frobenius norm of the difference of the two dataframes
  """
  return np.linalg.norm(m1-m2, ord='fro')

# jensen_shannon distance
def jensen_shannon(m1, m2):
  """
  in:
  m1, m2: DataFrames of shape (n_instances, n_features)
          containing feature importance scores from method 1/2
  
  out:
  list of length n_instances, containing jenson shannon distance
  between feature importance scores for each instance
  """
  dists = []
  for i in range(min(len(m1.columns), len(m2.columns))):
    x, y = extract(m1, m2, i)
    dists.append(jensenshannon(x,y))
  dists = np.array(dists)
  return dists

# rank-correlation (spearman's rank correlation coefficient) 
def spearman_rank_correlation(m1, m2):
  """
  in:
  m1, m2: DataFrames of shape (n_instances, n_features)
          containing feature importance scores from method 1/2
  
  out:
  list of length n_instances, containing rank correlation
  between feature importance scores for each instance
  """
  dists = []
  for i in range(min(len(m1.columns), len(m2.columns))):
    x,y = extract(m1, m2, i)
    corr, pval = spearmanr(x,y)
    if corr is np.nan:
        print(x, y)
        print(sum(x), sum(y), corr)
    dists.append(corr)
  dists = np.array(dists)
  return dists

from sklearn.metrics import mutual_info_score
# mutual information 
def mutual_info(m1, m2):
  """
  in:
  m1, m2: DataFrames of shape (n_instances, n_features)
          containing feature importance scores from method 1/2
  
  out:
  list of length n_instances, containing mutual info
  between feature importance scores for each instance
  """
  dists = []
  for i in range(min(len(m1.columns), len(m2.columns))):
    x,y = extract(m1, m2, i)
    dists.append(mutual_info_score(x,y))
  dists = np.array(dists)
  return dists

In [None]:
method_list = ['LIME', 'PermSHAP', 'KernelSHAP', 'DiCE', 'CEM']
all_results = []
course_names = ['dsp_001', 'dsp_002', 'geomatique_003', 'villesafricaines_001', 'microcontroleurs_003']
distance_metric = mutual_info
for course in course_names:
    comparison_results = []
    for method_i in method_list:
        for method_j in method_list:
            print(course, method_i, method_j)
            comparison_results.append(distance_metric(all_points[(course, method_i)], all_points[(course, method_j)]))
    all_results.append(comparison_results)

In [None]:
dist_dict = {}
method = 'mutual_info'
pretty_name = "Mutual Info"
# course_index = 0
for course_index in np.arange(5):
    plt.figure()
    for i, course in enumerate(all_results):
        comp_mean = []
        for comparison in course:
            comp_mean.append(np.mean(comparison))
        dist_dict[i] = comp_mean
    heatmap_df = pd.DataFrame(np.array(dist_dict[course_index]).reshape(5,5), columns=['LIME', 'KernelSHAP', 'PermSHAP',  'DiCE', 'CEM'])
    heatmap_df['method'] = ['LIME', 'KernelSHAP', 'PermSHAP',  'DiCE', 'CEM']
    heatmap_df = heatmap_df.set_index('method')
    sns.heatmap(heatmap_df, cmap='magma', annot=True, vmin=0, vmax=1, fmt="0.2f", annot_kws={"fontsize":15})
    plt.title(pretty_name + ' between \nexplainability methods in '+courses[course_index])
    plt.savefig("plots/" + courses[course_index] + "_" + method + ".png", bbox_inches='tight')
    
data = json.dumps(dist_dict)
# open file for writing, "w" 
f = open(method +".json","w")
f.write(data)
f.close()