## Pearson and Spearman correlations between the top-5 explanations from RPS-LJE and Influence Functions
Table 7 (appendix) and Table 8 (appendix)

In [92]:
import numpy as np
import torch
import pandas as pd

In [93]:
path = "../data"
X_train_clean_res = pd.read_csv('{}/X_train_clean_res.csv'.format(path), index_col=0)
y_train_clean_res = pd.read_csv('{}/Y_train_clean_res.csv'.format(path), index_col=0)
X_test_clean = pd.read_csv('{}/X_test_clean.csv'.format(path), index_col=0)
y_test_clean = pd.read_csv('{}/Y_test_clean.csv'.format(path), index_col=0)
X_train_clean = pd.read_csv('{}/X_train_clean.csv'.format(path), index_col=0)
y_train_clean = pd.read_csv('{}/Y_train_clean.csv'.format(path), index_col=0)

data = pd.read_csv('{}/german_data.csv'.format(path), index_col=0)
data_translated = pd.read_csv('{}/german_data_translated.csv'.format(path), index_col=0)

In [94]:
path = '../saved_models/base'

weight_matrix_influence = np.load('{}/calculated_weights/influence_weight_matrix.npz'.format(path), allow_pickle=True)['weight_matrix'].squeeze()
grad_test = np.load('{}/calculated_weights/influence_weight_matrix.npz'.format(path), allow_pickle=True)['jaccobian_test']
weight_matrix_ours = np.load('{}/calculated_weights/ours_weight_matrix_with_lr_0.0001.npz'.format(path), allow_pickle=True)['weight_matrix'].squeeze()
file = np.load('{}/model/saved_outputs.npz'.format(path))
intermediate_train = torch.from_numpy(file['intermediate_train'])
intermediate_test = torch.from_numpy(file['intermediate_test'])
labels_train = file['labels_train']
labels_test = file['labels_test']
pred_train = file['pred_train']
pred_test = file['pred_test']

In [96]:
wrongly_predicted_train_ids = np.argwhere(np.abs(pred_train-labels_train)>0).flatten()
wrongly_predicted_test_ids = np.argwhere(np.abs(pred_test-labels_test)>0).flatten()


In [97]:
names = ['existingchecking', 'duration', 'credithistory', 'purpose', 'creditamount',
         'savings', 'employmentsince', 'installmentrate', 'statussex', 'otherdebtors',
         'residencesince', 'property', 'age', 'otherinstallmentplans', 'housing',
         'existingcredits', 'job', 'peopleliable', 'telephone', 'foreignworker', 'classification']
def get_influence_order(test_point=None):
    tmp = grad_test[test_point, 0]@ np.transpose(weight_matrix_influence)
    pos_idx = np.argsort(tmp)
    return pos_idx, tmp

def get_ours_order(test_point=None):
    true_class = labels_test[test_point]
    tmp = np.dot(weight_matrix_ours,
                 intermediate_test[test_point,:])
    if true_class == 1:
        pos_idx = np.flip(np.argsort(tmp), axis=0)
        tmp = -tmp
    else:
        pos_idx = np.argsort(tmp)
    return pos_idx, tmp

In [98]:
from scipy.stats import spearmanr, pearsonr

pearson_correlation_list = []
spearman_correlation_list = []
for i in range(len(y_test_clean)):
    order_inf, importance_inf = get_influence_order(test_point=i)
    order_ours, importance_ours = get_ours_order(test_point=i)
    top_5_idx = list(set(order_inf[:5]).union(set(order_ours[:5])))
    pearson_correlation_list.append(pearsonr(importance_inf[top_5_idx],
                                             importance_ours[top_5_idx])[0])
    spearman_correlation_list.append(spearmanr(importance_inf[top_5_idx],
                                               importance_ours[top_5_idx])[0])


In [101]:
np.quantile(pearson_correlation_list,
            q=[0.0001,0.001, 0.01,0.1,0.3,0.5,0.7,0.9],
            interpolation='lower')


array([-0.86289155, -0.86289155, -0.72018389, -0.56946209, -0.29982185,
       -0.0569823 ,  0.24517137,  0.60257427])

In [102]:

np.quantile(spearman_correlation_list,
            q=[0.0001,0.001, 0.01,0.1,0.3,0.5,0.7,0.9],
            interpolation='lower')



array([-0.82142857, -0.82142857, -0.81818182, -0.66666667, -0.38333333,
       -0.14285714,  0.07142857,  0.42857143])