In [31]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [110]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
import scipy
from scipy import stats
from scipy.stats import norm, spearmanr
from sklearn.datasets import load_breast_cancer, load_iris
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder, OrdinalEncoder, StandardScaler, RobustScaler, MinMaxScaler
#from sklearn.impute import KNNImputer
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, RandomizedSearchCV
from sklearn.metrics import pairwise_distances, accuracy_score
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import FunctionTransformer
import shap
import os
import sklearn

from joblib import dump, load
from tabulate import tabulate

import pickle
import os
from treeinterpreter import treeinterpreter as ti
from anchor import anchor_tabular
import sys
sys.path.append('..')
from utils import transform_feat_v2, transform, squeeze_dim_v2, squeeze_dim
#from all_tree_explanations_v2 import tree_shap_exp, mdi_exp, anchors_exp, lime_exp, shap_exp, lpi_exp, random_exp, local_mdi_exp
from sklearn.datasets import fetch_openml
from evaluation import get_robustness, get_robustness_sample

In [111]:
BASE_PATH = os.getcwd() 
e_path = f'{BASE_PATH}/explanations/default'
d_path = f'{BASE_PATH}/data'
m_path = f'{BASE_PATH}/models/default'

In [122]:
temp = pickle.load( open( "evaluation/robust_vals_gb.p", "rb" ) )

In [34]:
exp_names = ['lime', 'kernel_shap', 'lpi', 'tree_shap_obs', 'tree_shap_inter', 'local_mdi', 'saabas',  'random']

In [35]:
d_name = 'spambase'
m_name = 'gb'

In [36]:
exp_res = {}
for e_name in exp_names:
    temp = pickle.load( open( "{}/{}/{}_{}.p".format(e_path, d_name, e_name, m_name), "rb" ) )
    if e_name in ['kernel_shap', 'local_mdi']: 
        temp = np.squeeze(temp)
    exp_res[e_name] = temp

In [37]:
e_name = 'lime'
idx = 0

train_data = np.load("{}/{}/X_train.npy".format(d_path, d_name))
test_data = np.load("{}/{}/X_test.npy".format(d_path, d_name))
model = load(f'{m_path}/{d_name}/{m_name}.joblib')

In [49]:
def get_auc(result):
    cutoffs = np.linspace(0.05, 0.5, 10)
    temp = np.array(result).mean(axis=0)
    auc_ = 0
    for k in range(1, len(cutoffs) - 1):
        x = cutoffs[k] - cutoffs[k - 1]
        y = temp[k] + temp[k-1]
        auc = y / ( 2 * x)
    
    return auc

In [64]:
robust_vals = {'abs': {'insertion': {}, 'deletion': {}}, 'normal': {'insertion': {}, 'deletion': {}}}
eps = np.finfo(float).eps
for e_name in exp_names:
    
    robust_vals['abs']['insertion'][e_name] = []
    robust_vals['normal']['insertion'][e_name] = []
    
    robust_vals['normal']['deletion'][e_name] = []
    robust_vals['abs']['deletion'][e_name] = []
    
    temp_in_abs = []
    temp_del_abs = []
    
    temp_in = []
    temp_del = []
    
    for idx in range(100):
        exp_example = exp_res[e_name][idx]
        instance_explained = test_data[idx]
        
        temp_in_abs.append(get_robustness(exp_example, instance_explained, train_data, model, type_robust='insertion', selection_type='abs'))
        temp_in.append(get_robustness(exp_example, instance_explained, train_data, model, type_robust='insertion'))
        
        temp_del_abs.append(get_robustness(exp_example, instance_explained, train_data, model, type_robust='deletion', selection_type='abs'))
        temp_del.append(get_robustness(exp_example, instance_explained, train_data, model, type_robust='deletion'))
    
    robust_vals['abs']['insertion'][e_name] = get_auc(temp_in_abs)
    robust_vals['normal']['insertion'][e_name] = get_auc(temp_in)
    
    robust_vals['abs']['deletion'][e_name] = get_auc(temp_del_abs)
    robust_vals['normal']['deletion'][e_name] = get_auc(temp_del)

In [70]:
pd.DataFrame(robust_vals['abs']).style.highlight_max()

Unnamed: 0,insertion,deletion
lime,4.19488,3.604812
kernel_shap,0.072914,8.028069
lpi,1.014191,5.503489
tree_shap_obs,0.090388,9.309259
tree_shap_inter,0.138251,9.24474
local_mdi,0.230407,7.869383
saabas,0.274331,8.117837
random,4.97005,3.068245


In [82]:
r_df = pd.DataFrame(robust_vals['abs'])
r_df.columns = ['insertion_abs', 'deletion_abs']
r_df['insertion_abs'] = r_df['insertion_abs'].rank(ascending=True)
r_df['deletion_abs'] = r_df['deletion_abs'].rank(ascending=False)

r_df_ = pd.DataFrame(robust_vals['normal'])
r_df_['insertion'] = r_df_['insertion'].rank(ascending=True)
r_df_['deletion'] = r_df_['deletion'].rank(ascending=False)

total = pd.concat([r_df, r_df_], axis=1)
total.style.highlight_min()

Unnamed: 0,insertion_abs,deletion_abs,insertion,deletion
lime,7.0,7.0,7.0,7.0
kernel_shap,1.0,4.0,2.0,3.0
lpi,6.0,6.0,6.0,6.0
tree_shap_obs,2.0,1.0,1.0,2.0
tree_shap_inter,3.0,2.0,3.0,4.0
local_mdi,4.0,5.0,4.0,5.0
saabas,5.0,3.0,5.0,1.0
random,8.0,8.0,8.0,8.0


In [26]:
## TODO: Check the effect of sample first on the second measures 

In [None]:
s_size = [1, 10, 20, 50]
robust_vals_sample = {}

for s in s_size:
    robust_vals_sample[s] = {'abs': {'insertion': {}, 'deletion': {}}, 'normal': {'insertion': {}, 'deletion': {}}}
    eps = np.finfo(float).eps
    
    for e_name in exp_names:
        robust_vals_sample[s]['abs']['insertion'][e_name] = []
        robust_vals_sample[s]['normal']['insertion'][e_name] = []
        
        robust_vals_sample[s]['normal']['deletion'][e_name] = []
        robust_vals_sample[s]['abs']['deletion'][e_name] = []
        
        temp_in_abs = []
        temp_del_abs = []
        
        temp_in = []
        temp_del = []
        
        for idx in range(100):
            exp_example = exp_res[e_name][idx]
            instance_explained = test_data[idx]
            
            temp_in_abs.append(get_robustness_sample(exp_example, instance_explained, train_data, model, type_robust='insertion', selection_type='abs',sample_size =s ))
            temp_in.append(get_robustness_sample(exp_example, instance_explained, train_data, model, type_robust='insertion', selection_type='normal', sample_size =s))
            
            temp_del_abs.append(get_robustness_sample(exp_example, instance_explained, train_data, model, type_robust='deletion', selection_type='abs', sample_size =s))
            temp_del.append(get_robustness_sample(exp_example, instance_explained, train_data, model, type_robust='deletion', selection_type='normal', sample_size =s))
        
        robust_vals_sample[s]['abs']['insertion'][e_name] = get_auc(temp_in_abs)
        robust_vals_sample[s]['normal']['insertion'][e_name] = get_auc(temp_in)
        
        robust_vals_sample[s]['abs']['deletion'][e_name] = get_auc(temp_del_abs)
        robust_vals_sample[s]['normal']['deletion'][e_name] = get_auc(temp_del)