In [1]:
import warnings
warnings.filterwarnings("ignore")

from xai_agg.agg_exp import *

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.ensemble import RandomForestClassifier

import dill

2024-12-09 15:13:16.537314: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-12-09 15:13:16.753242: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Data Loading and Preprocessing

In [2]:
original_data = pd.read_csv('../data/german_credit_data_updated.csv')

# Dataset overview - German Credit Risk (from Kaggle):
# 1. Age (numeric)
# 2. Sex (text: male, female)
# 3. Job (numeric: 0 - unskilled and non-resident, 1 - unskilled and resident, 2 - skilled, 3 - highly skilled)
# 4. Housing (text: own, rent, or free)
# 5. Saving accounts (text - little, moderate, quite rich, rich)
# 6. Checking account (numeric, in DM - Deutsch Mark)
# 7. Credit amount (numeric, in DM)
# 8. Duration (numeric, in month)
# 9. Purpose (text: car, furniture/equipment, radio/TV, domestic appliances, repairs, education, business, vacation/others)

display(original_data.head())
display(original_data.describe())
display(original_data.info())

# Display the unique values of the categorical features:
print('Unique values of the categorical features:')
for col in original_data.select_dtypes(include='object'):
    print(f'\t- {col}: {original_data[col].unique()}')

Unnamed: 0.1,Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Credit Risk
0,0,67,male,2,own,,little,1169,6,radio/TV,1
1,1,22,female,2,own,little,moderate,5951,48,radio/TV,2
2,2,49,male,1,own,little,,2096,12,education,1
3,3,45,male,2,free,little,little,7882,42,furniture/equipment,1
4,4,53,male,2,free,little,little,4870,24,car,2


Unnamed: 0.1,Unnamed: 0,Age,Job,Credit amount,Duration,Credit Risk
count,954.0,954.0,954.0,954.0,954.0,954.0
mean,476.5,35.501048,1.909853,3279.112159,20.780922,1.302935
std,275.540378,11.379668,0.649681,2853.315158,12.046483,0.459768
min,0.0,19.0,0.0,250.0,4.0,1.0
25%,238.25,27.0,2.0,1360.25,12.0,1.0
50%,476.5,33.0,2.0,2302.5,18.0,1.0
75%,714.75,42.0,2.0,3975.25,24.0,2.0
max,953.0,75.0,3.0,18424.0,72.0,2.0


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 954 entries, 0 to 953
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Unnamed: 0        954 non-null    int64 
 1   Age               954 non-null    int64 
 2   Sex               954 non-null    object
 3   Job               954 non-null    int64 
 4   Housing           954 non-null    object
 5   Saving accounts   779 non-null    object
 6   Checking account  576 non-null    object
 7   Credit amount     954 non-null    int64 
 8   Duration          954 non-null    int64 
 9   Purpose           954 non-null    object
 10  Credit Risk       954 non-null    int64 
dtypes: int64(6), object(5)
memory usage: 82.1+ KB


None

Unique values of the categorical features:
	- Sex: ['male' 'female']
	- Housing: ['own' 'free' 'rent']
	- Saving accounts: [nan 'little' 'quite rich' 'rich' 'moderate']
	- Checking account: ['little' 'moderate' nan 'rich']
	- Purpose: ['radio/TV' 'education' 'furniture/equipment' 'car' 'business'
 'domestic appliances' 'repairs' 'vacation/others']


In [3]:
preprocessed_data = original_data.copy()

# For savings and checking accounts, we will replace the missing values with 'none':
preprocessed_data['Saving accounts'].fillna('none', inplace=True)
preprocessed_data['Checking account'].fillna('none', inplace=True)

# Dropping index column:
preprocessed_data.drop(columns=['Unnamed: 0'], inplace=True)

# Using pd.dummies to one-hot-encode the categorical features
preprocessed_data["Job"] = preprocessed_data["Job"].map({0: 'unskilled_nonresident', 1: 'unskilled_resident',
                                                         2: 'skilled', 3: 'highlyskilled'})

categorical_features = preprocessed_data.select_dtypes(include='object').columns
numerical_features = preprocessed_data.select_dtypes(include='number').columns.drop('Credit Risk')
print(f'Categorical features: {categorical_features}')
print(f'Numerical features: {numerical_features}')

preprocessed_data = pd.get_dummies(preprocessed_data, columns=categorical_features, dtype='int64')

# Remapping the target variable to 0 and 1:
preprocessed_data['Credit Risk'] = preprocessed_data['Credit Risk'].map({1: 0, 2: 1})

# Make sure all column names are valid python identifiers (important for pd.query() calls):
preprocessed_data.columns = preprocessed_data.columns.str.replace(' ', '_')
preprocessed_data.columns = preprocessed_data.columns.str.replace('/', '_')

# Normalizing the data
scaler = StandardScaler()
scaled_preprocessed_data = scaler.fit_transform(preprocessed_data)

display(preprocessed_data.head())
display(preprocessed_data.info())

display(scaled_preprocessed_data)

Categorical features: Index(['Sex', 'Job', 'Housing', 'Saving accounts', 'Checking account',
       'Purpose'],
      dtype='object')
Numerical features: Index(['Age', 'Credit amount', 'Duration'], dtype='object')


Unnamed: 0,Age,Credit_amount,Duration,Credit_Risk,Sex_female,Sex_male,Job_highlyskilled,Job_skilled,Job_unskilled_nonresident,Job_unskilled_resident,...,Checking_account_none,Checking_account_rich,Purpose_business,Purpose_car,Purpose_domestic_appliances,Purpose_education,Purpose_furniture_equipment,Purpose_radio_TV,Purpose_repairs,Purpose_vacation_others
0,67,1169,6,0,0,1,0,1,0,0,...,0,0,0,0,0,0,0,1,0,0
1,22,5951,48,1,1,0,0,1,0,0,...,0,0,0,0,0,0,0,1,0,0
2,49,2096,12,0,0,1,0,0,0,1,...,1,0,0,0,0,1,0,0,0,0
3,45,7882,42,0,0,1,0,1,0,0,...,0,0,0,0,0,0,1,0,0,0
4,53,4870,24,1,0,1,0,1,0,0,...,0,0,0,1,0,0,0,0,0,0


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 954 entries, 0 to 953
Data columns (total 30 columns):
 #   Column                       Non-Null Count  Dtype
---  ------                       --------------  -----
 0   Age                          954 non-null    int64
 1   Credit_amount                954 non-null    int64
 2   Duration                     954 non-null    int64
 3   Credit_Risk                  954 non-null    int64
 4   Sex_female                   954 non-null    int64
 5   Sex_male                     954 non-null    int64
 6   Job_highlyskilled            954 non-null    int64
 7   Job_skilled                  954 non-null    int64
 8   Job_unskilled_nonresident    954 non-null    int64
 9   Job_unskilled_resident       954 non-null    int64
 10  Housing_free                 954 non-null    int64
 11  Housing_own                  954 non-null    int64
 12  Housing_rent                 954 non-null    int64
 13  Saving_accounts_little       954 non-null    int64

None

array([[ 2.7694545 , -0.7399179 , -1.22763429, ...,  1.62518349,
        -0.14633276, -0.11286653],
       [-1.18704073,  0.93690642,  2.26068929, ...,  1.62518349,
        -0.14633276, -0.11286653],
       [ 1.18685641, -0.41486224, -0.72930235, ..., -0.61531514,
        -0.14633276, -0.11286653],
       ...,
       [-1.0111965 , -0.39768023,  1.26402541, ..., -0.61531514,
        -0.14633276, -0.11286653],
       [-0.65950803,  0.29240557,  0.26736153, ..., -0.61531514,
        -0.14633276, -0.11286653],
       [-0.83535227,  2.69823821,  1.26402541, ..., -0.61531514,
        -0.14633276, -0.11286653]])

In [4]:
y = preprocessed_data['Credit_Risk']
X = preprocessed_data.drop(columns='Credit_Risk')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
print(f'ROC AUC: {roc_auc_score(y_test, y_pred)}')

Accuracy: 0.7696335078534031
ROC AUC: 0.6830357142857143


### Checking metrics behaviour for LIME, SHAP and Anchor

In [6]:
shap_exp = ShapTabularTreeWrapper(clf, X_train, categorical_features)
lime_exp = LimeWrapper(clf, X_train, categorical_features)
anchor_exp = AnchorWrapper(clf, X_train, categorical_features)

evaluator = ExplanationModelEvaluator(clf, X_train, categorical_features, noise_gen_args={'encoding_dim': 5, 'epochs': 500})
evaluator.init()

Epoch 1/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1.2684 - val_loss: 1.2426
Epoch 2/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2640 - val_loss: 1.2267
Epoch 3/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1963 - val_loss: 1.2117
Epoch 4/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1986 - val_loss: 1.1974
Epoch 5/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2171 - val_loss: 1.1830
Epoch 6/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1896 - val_loss: 1.1683
Epoch 7/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1779 - val_loss: 1.1537
Epoch 8/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1729 - val_loss: 1.1389
Epoch 9/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━

In [7]:
metric_runs = {shap_exp: [], lime_exp: [], anchor_exp: []} # Arrays of the format [[faithfulness1, sensitivity1, complexity1], [faithfulness2, sensitivity2, complexity2], ...]

num_instances_to_check = 100
# Chosse num_isntances_to_check unique indexes from X_test size
indexes = np.random.choice(X_test.index, num_instances_to_check, replace=False)

i = 0
for idx in indexes:
    i = i + 1
    print (f'{i} - Checking instance {idx}')
    instance_data_row = X_test.loc[idx]
    for exp in [shap_exp, lime_exp, anchor_exp]:
        row = [
            idx,
            evaluator.faithfullness_correlation(exp, instance_data_row),
            evaluator.sensitivity(exp, instance_data_row),
            evaluator.complexity(exp, instance_data_row)
        ]
        print(f'\t- {exp}: {row}')
        metric_runs[exp].append(row)

1 - Checking instance 209
	- <xai_agg.explainers.ShapTabularTreeWrapper object at 0x71645240ea40>: [209, 0.42290249578970807, 0.9432196154806233, 2.4120545807098988]
	- <xai_agg.explainers.LimeWrapper object at 0x71645240eb00>: [209, 0.35237333150285544, 0.8316256157635467, 2.510913943994954]
	- <xai_agg.explainers.AnchorWrapper object at 0x7165aa6d9f30>: [209, 0.5145414225488084, 0.550506385031493, 1.0935839846319686]
2 - Checking instance 928
	- <xai_agg.explainers.ShapTabularTreeWrapper object at 0x71645240ea40>: [928, 0.5990180279019851, 0.9793555919087472, 2.498902716977565]
	- <xai_agg.explainers.LimeWrapper object at 0x71645240eb00>: [928, 0.5155262489548986, 0.863054187192118, 2.6367931295715112]
	- <xai_agg.explainers.AnchorWrapper object at 0x7165aa6d9f30>: [928, 0.5910975881033276, 0.675591694953676, 0.6891874292301077]
3 - Checking instance 882
	- <xai_agg.explainers.ShapTabularTreeWrapper object at 0x71645240ea40>: [882, 0.29425723062053527, 0.9888777357363283, 2.555484684

Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now ret

	- <xai_agg.explainers.AnchorWrapper object at 0x7165aa6d9f30>: [513, 0.502836649335933, 0.6818439186460941, 2.5643636239230574]
11 - Checking instance 342
	- <xai_agg.explainers.ShapTabularTreeWrapper object at 0x71645240ea40>: [342, 0.15224462771458117, 0.9613981308791011, 2.477746106554715]
	- <xai_agg.explainers.LimeWrapper object at 0x71645240eb00>: [342, 0.011037421968989047, 0.8636453201970442, 2.598162908757933]
	- <xai_agg.explainers.AnchorWrapper object at 0x7165aa6d9f30>: [342, 0.3636054676271399, 0.5619648898411229, 1.052964908781235]
12 - Checking instance 621
	- <xai_agg.explainers.ShapTabularTreeWrapper object at 0x71645240ea40>: [621, 0.7671024581527772, 0.9512844376044125, 2.124081385806579]
	- <xai_agg.explainers.LimeWrapper object at 0x71645240eb00>: [621, 0.4998933410275408, 0.8513793103448274, 2.5464336461783503]
	- <xai_agg.explainers.AnchorWrapper object at 0x7165aa6d9f30>: [621, 0.6235142182499209, 0.5744977044869946, 1.0530776095818093]
13 - Checking instance 7

Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now ret

	- <xai_agg.explainers.AnchorWrapper object at 0x7165aa6d9f30>: [333, 0.16316701528827363, 0.8062590687573735, 2.376669810595559]
20 - Checking instance 620
	- <xai_agg.explainers.ShapTabularTreeWrapper object at 0x71645240ea40>: [620, 0.23434796549010473, 0.9356549708533517, 2.4190888079593504]
	- <xai_agg.explainers.LimeWrapper object at 0x71645240eb00>: [620, 0.15942356469414182, 0.8340394088669949, 2.678981281546657]
	- <xai_agg.explainers.AnchorWrapper object at 0x7165aa6d9f30>: [620, 0.34328135272683397, 0.6412813668182127, 1.5697791116993587]
21 - Checking instance 120
	- <xai_agg.explainers.ShapTabularTreeWrapper object at 0x71645240ea40>: [120, 0.4034203188040525, 0.9574559774392186, 2.4348154075347956]
	- <xai_agg.explainers.LimeWrapper object at 0x71645240eb00>: [120, 0.2953757276382323, 0.8642364532019704, 2.575196101800435]
	- <xai_agg.explainers.AnchorWrapper object at 0x7165aa6d9f30>: [120, 0.43146841950687853, 0.6291096825326908, 1.5081067098944674]
22 - Checking instan

Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now ret

	- <xai_agg.explainers.AnchorWrapper object at 0x7165aa6d9f30>: [422, 0.38841862447973396, 0.6304834243443047, 2.6885986600261345]
77 - Checking instance 462
	- <xai_agg.explainers.ShapTabularTreeWrapper object at 0x71645240ea40>: [462, 0.10483621220308414, 0.9884336241691803, 2.623153515853469]
	- <xai_agg.explainers.LimeWrapper object at 0x71645240eb00>: [462, 0.0069329944473081745, 0.8773891625615763, 2.599460577438732]
	- <xai_agg.explainers.AnchorWrapper object at 0x7165aa6d9f30>: [462, 0.13906575257318637, 0.7137609270184566, 1.3229334924504847]
78 - Checking instance 918
	- <xai_agg.explainers.ShapTabularTreeWrapper object at 0x71645240ea40>: [918, 0.10740154701230815, 0.9881632444623454, 2.55966172035827]
	- <xai_agg.explainers.LimeWrapper object at 0x71645240eb00>: [918, 0.05346582326347757, 0.8315763546798027, 2.662882031424578]
	- <xai_agg.explainers.AnchorWrapper object at 0x7165aa6d9f30>: [918, 0.22490904116636917, 0.38093644071513516, 1.262253568153681]
79 - Checking inst

Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now ret

	- <xai_agg.explainers.AnchorWrapper object at 0x7165aa6d9f30>: [522, 0.5674510134407209, 0.6380215598441147, 2.6984640410212126]
85 - Checking instance 72
	- <xai_agg.explainers.ShapTabularTreeWrapper object at 0x71645240ea40>: [72, 0.249749460824588, 0.9779648530741399, 2.5034852886827417]
	- <xai_agg.explainers.LimeWrapper object at 0x71645240eb00>: [72, 0.11289703522640587, 0.842413793103448, 2.5905888509785253]
	- <xai_agg.explainers.AnchorWrapper object at 0x7165aa6d9f30>: [72, 0.22631300363116275, 0.6022621778451311, 1.658911281327487]
86 - Checking instance 377
	- <xai_agg.explainers.ShapTabularTreeWrapper object at 0x71645240ea40>: [377, 0.7630033618899179, 0.9700689295600364, 2.454041047709378]
	- <xai_agg.explainers.LimeWrapper object at 0x71645240eb00>: [377, 0.556661125625397, 0.868374384236453, 2.5724051354329798]
	- <xai_agg.explainers.AnchorWrapper object at 0x7165aa6d9f30>: [377, 0.5955876770217208, 0.5923974780206105, 1.087849723251832]
87 - Checking instance 398
	- <

In [8]:
# Change metric_runs keys to ["SHAP", "LIME", "Anchor"]
metric_runs1 = {"shap": metric_runs[shap_exp], "lime": metric_runs[lime_exp], "anchor": metric_runs[anchor_exp]}

In [9]:
metric_runs1

{'shap': [[209, 0.42290249578970807, 0.9432196154806233, 2.4120545807098988],
  [928, 0.5990180279019851, 0.9793555919087472, 2.498902716977565],
  [882, 0.29425723062053527, 0.9888777357363283, 2.555484684269827],
  [951, 0.5172303497289477, 0.9822011924274732, 2.410866091437909],
  [629, 0.6339661356767962, 0.9800946137561899, 2.63908690892781],
  [490, 0.43567342098387285, 0.9325486136080812, 2.2492532784662913],
  [567, 0.6689205959776308, 0.9655345781017022, 2.34744725416488],
  [316, 0.11369786353456922, 0.988630874294785, 2.514158230829087],
  [786, 0.4053114869084473, 0.9761545947501127, 2.538343206671231],
  [513, 0.07410789102758963, 0.989049720938844, 2.4754379439739416],
  [342, 0.15224462771458117, 0.9613981308791011, 2.477746106554715],
  [621, 0.7671024581527772, 0.9512844376044125, 2.124081385806579],
  [731, 0.5545574048173051, 0.9880389108847536, 2.5596312851168306],
  [893, 0.19435142379445403, 0.9791330084391283, 2.354025934011246],
  [96, 0.647298699669553, 0.98310

In [10]:
# pickle metric_runs
with open('./pickles/experiments_org_metric_runs_indexed.pkl', 'wb') as f:
    dill.dump(metric_runs1, f)

In [11]:
# read pickle
with open('./pickles/experiments_org_metric_runs_indexed.pkl', 'rb') as f:
    metric_runs_indexed = dill.load(f)

metric_runs_indexed

{'shap': [[209, 0.42290249578970807, 0.9432196154806233, 2.4120545807098988],
  [928, 0.5990180279019851, 0.9793555919087472, 2.498902716977565],
  [882, 0.29425723062053527, 0.9888777357363283, 2.555484684269827],
  [951, 0.5172303497289477, 0.9822011924274732, 2.410866091437909],
  [629, 0.6339661356767962, 0.9800946137561899, 2.63908690892781],
  [490, 0.43567342098387285, 0.9325486136080812, 2.2492532784662913],
  [567, 0.6689205959776308, 0.9655345781017022, 2.34744725416488],
  [316, 0.11369786353456922, 0.988630874294785, 2.514158230829087],
  [786, 0.4053114869084473, 0.9761545947501127, 2.538343206671231],
  [513, 0.07410789102758963, 0.989049720938844, 2.4754379439739416],
  [342, 0.15224462771458117, 0.9613981308791011, 2.477746106554715],
  [621, 0.7671024581527772, 0.9512844376044125, 2.124081385806579],
  [731, 0.5545574048173051, 0.9880389108847536, 2.5596312851168306],
  [893, 0.19435142379445403, 0.9791330084391283, 2.354025934011246],
  [96, 0.647298699669553, 0.98310

In [12]:
# Create a pandas dataframe from the metric_runs dictionary
lime_metric_runs = pd.DataFrame(metric_runs_indexed['lime'], columns=['idx', 'faithfulness', 'sensitivity', 'complexity'])
shap_metric_runs = pd.DataFrame(metric_runs_indexed['shap'], columns=['idx', 'faithfulness', 'sensitivity', 'complexity'])
anchor_metric_runs = pd.DataFrame(metric_runs_indexed['anchor'], columns=['idx', 'faithfulness', 'sensitivity', 'complexity'])

In [13]:
lime_metric_runs.describe()

Unnamed: 0,idx,faithfulness,sensitivity,complexity
count,100.0,100.0,100.0,100.0
mean,481.74,0.308458,0.850887,2.598218
std,270.125845,0.209106,0.021376,0.049137
min,23.0,0.004692,0.788079,2.458548
25%,265.75,0.113243,0.836921,2.563608
50%,485.5,0.289365,0.851576,2.599077
75%,704.0,0.500935,0.866121,2.636179
max,951.0,0.699996,0.901182,2.732209


In [14]:
shap_metric_runs.describe()

Unnamed: 0,idx,faithfulness,sensitivity,complexity
count,100.0,100.0,100.0,100.0
mean,481.74,0.441231,0.97429,2.464666
std,270.125845,0.239863,0.013342,0.133481
min,23.0,0.007411,0.9181,2.124081
25%,265.75,0.28313,0.969764,2.375842
50%,485.5,0.424894,0.977416,2.473547
75%,704.0,0.625389,0.982847,2.559821
max,951.0,0.883519,0.990773,2.758602


In [15]:
anchor_metric_runs.describe()

Unnamed: 0,idx,faithfulness,sensitivity,complexity
count,100.0,100.0,100.0,100.0
mean,481.74,0.388381,0.65439,1.370248
std,270.125845,0.205174,0.108059,0.526778
min,23.0,0.003289,0.380936,0.604767
25%,265.75,0.22234,0.583928,1.033351
50%,485.5,0.383127,0.642715,1.320988
75%,704.0,0.540573,0.737303,1.564009
max,951.0,0.83136,0.920907,2.822148


In [124]:
# display a few random rows of lime_metric_runs
lime_metric_runs.sample(5)

Unnamed: 0,idx,faithfulness,sensitivity,complexity
17,826,0.593268,1.0,2.514334
30,527,0.322364,1.0,2.564539
80,534,0.580512,1.0,2.600642
85,567,0.545407,1.0,2.644347
88,346,0.042042,1.0,2.587921


In [10]:
evaluator._sensitivity_sequential(lime_exp, X_test.loc[527])

0.9999999999999998

In [20]:
# read pickle
with open('./pickles/experiments_org_metric_runs.pkl', 'rb') as f:
    metric_runs = dill.load(f)

metric_runs

{'shap': [[0.5723255128372227, 0.9681694998768169, 2.3930018139207707],
  [0.4455079719457512, 0.7932906283588386, 2.5643626036115066],
  [0.11866524032382038, 0.5698036691722417, 2.4865124394765457],
  [0.3321909828950127, 0.5973491832952431, 2.267543704339637],
  [0.3880845549517916, 0.651052978785058, 2.1921237565267475],
  [0.045062567054379186, 0.7173494575917096, 2.4287078477545365],
  [0.3533635648657668, 0.8600620161683944, 2.55966172035827],
  [0.6466996145829886, 0.6259177137225918, 2.3808836586188544],
  [0.38288184555107013, 0.7660893228293795, 2.5972896124354863],
  [0.28946033910655344, 0.6925758450380424, 2.4304895479349486],
  [0.4108290471891962, 0.6841527247710804, 2.539228234839168],
  [0.4595011656489133, 0.7233812655654359, 2.7126702640784655],
  [0.11344736925449553, 0.6884734997750285, 2.4962205489144815],
  [0.912693063962245, 0.7772242998504544, 2.2466135681909547],
  [0.5627141313984826, 0.8864549119769549, 2.4792100295543626],
  [0.47097624826839524, 0.888735

In [22]:
# Create a pandas dataframe from the metric_runs dictionary
lime_metric_runs = pd.DataFrame(metric_runs['lime'], columns=['faithfulness', 'sensitivity', 'complexity'])
shap_metric_runs = pd.DataFrame(metric_runs['shap'], columns=['faithfulness', 'sensitivity', 'complexity'])
anchor_metric_runs = pd.DataFrame(metric_runs['anchor'], columns=['faithfulness', 'sensitivity', 'complexity'])

In [23]:
display(lime_metric_runs.describe())
display(shap_metric_runs.describe())
display(anchor_metric_runs.describe())

Unnamed: 0,faithfulness,sensitivity,complexity
count,100.0,100.0,100.0
mean,0.390486,0.486279,2.583746
std,0.203001,0.095099,0.044482
min,0.017157,0.134483,2.457439
25%,0.248143,0.438337,2.553871
50%,0.383037,0.505665,2.58389
75%,0.538127,0.54899,2.6118
max,0.786396,0.672167,2.724201


Unnamed: 0,faithfulness,sensitivity,complexity
count,100.0,100.0,100.0
mean,0.45949,0.713927,2.430877
std,0.217598,0.125869,0.156041
min,0.003457,0.391567,2.003556
25%,0.325217,0.629465,2.337372
50%,0.460242,0.716669,2.458565
75%,0.647138,0.789604,2.527835
max,0.912693,0.982225,2.758602


Unnamed: 0,faithfulness,sensitivity,complexity
count,100.0,100.0,100.0
mean,0.395468,0.902281,1.384464
std,0.230227,0.041055,0.550394
min,0.006044,0.801397,0.604767
25%,0.19084,0.874697,1.025438
50%,0.442871,0.903915,1.282263
75%,0.572965,0.932445,1.601507
max,0.811126,1.0,2.764107


In [2]:
agg_exp = AggregatedExplainer([ShapTabularTreeWrapper, LimeWrapper, AnchorWrapper], clf, X_train, categorical_features,
                              noise_gen_args={'encoding_dim': 5, 'epochs': 500}, evaluator=evaluator)

NameError: name 'AggregatedExplainer' is not defined

In [1]:
evaluator._sensitivity_sequential(anchor_exp, X_test.loc[527])

NameError: name 'evaluator' is not defined

In [8]:
agg_exp.explain_instance(X_test.loc[527])

Unnamed: 0,feature,score
0,Checking_account_none,1.383217
1,Age,0.780609
2,Duration,0.765524
3,Purpose_radio_TV,0.611217
4,Checking_account_little,0.41715
5,Credit_amount,0.271372
6,Checking_account_moderate,0.265932
7,Housing_own,0.196318
8,Housing_free,0.105157
9,Saving_accounts_little,0.070129


In [13]:
agg_exp.last_explanation_metrics

Unnamed: 0_level_0,faithfullness_correlation,sensitivity,complexity
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ShapTabularTreeWrapper,0.68326,0.999877,2.393002
LimeWrapper,0.371491,1.0,2.592132
AnchorWrapper,0.353447,0.83171,1.08785
