In [47]:
import numpy as np
import pandas as pd
import shap
from sklearn.ensemble import IsolationForest

import time
import warnings

from radical_shapley_values import compute_shapley_values
from radical_shapley_values import shap_payoff_isolation_forest as shap_if

In [48]:
# Load dataset
df = pd.read_csv('data\creditcard.csv')
y = df['Class']
X = df.drop('Class', axis=1)

In [49]:
#%% Choose data size
ns = 10**5
nf = 15

X = X[X.columns[0:nf]][0:ns]
y = y[0:ns]

In [50]:
X.shape

(100000, 15)

In [51]:
# Initialize model and SHAP explainer, test training and prediction time
n_background_samp = 100

model = IsolationForest(behaviour = 'new', random_state = 1, contamination = 0)

start = time.time()
model.fit(X)
end = time.time()
print('Isolation forest training time: %.3f.' %(end-start))

start = time.time()
model.decision_function(X)
end = time.time()
print('Isolation forest whole dataset prediction time: %.3f.' %(end-start))

explainer = shap.KernelExplainer(model.decision_function,X[0:n_background_samp])

Isolation forest training time: 8.152.
Isolation forest whole dataset prediction time: 7.751.


In [57]:
# test prediction time for the KernelExplainer's bootstrapped dataset size
num_samples_to_predict = (2048+15*2)*100

x = X.sample(n=num_samples_to_predict, replace=True)

start = time.time()
yp = model.decision_function(x)
end = time.time()

print('Isolation Forest prediction time: %.3f. Per sample: %.3f' %(end-start, (end-start)/num_samples_to_predict))

Isolation Forest prediction time: 16.582. Per sample: 0.000


In [59]:
# Run KernelExplainer
num_samples_to_explain = 1
x = X.sample(n=num_samples_to_explain)

start = time.time()
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    SHAP_values = explainer.shap_values(x)
end = time.time()
print('Kernel SHAP values run time: %.3f. Per sample: %.3f' %(end-start, (end-start)/num_samples_to_explain))

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))


Kernel SHAP values run time: 18.157. Per sample: 18.157
