In [1]:
from collections import defaultdict

import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import spearmanr
from scipy.cluster import hierarchy
from sklearn.metrics.pairwise import cosine_similarity

from sklearn.datasets import load_breast_cancer
from sklearn.naive_bayes import MultinomialNB, GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import shap
from sklearn.preprocessing import StandardScaler
from scipy.stats import norm

import lime
import lime.lime_tabular
from sklearn.metrics import f1_score

In [2]:
random_seed = 10

In [3]:
np.random.seed(random_seed)

In [4]:
data = load_breast_cancer()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random_seed)

In [5]:
scaler = StandardScaler()
X_train_transformed = scaler.fit_transform(X_train)
X_test_transformed = scaler.transform(X_test)

In [7]:
lreg = LogisticRegression(random_state=0)
lreg.fit(X_train_transformed, y_train)

nbayes_g = GaussianNB()
nbayes_g.fit(X_train_transformed, y_train)

GaussianNB(priors=None, var_smoothing=1e-09)

In [8]:
pred_lreg = lreg.predict(X_test_transformed)
print(f1_score(y_test, pred_lreg, average='binary'))

pred_nbayes = nbayes_g.predict(X_test_transformed)
print(f1_score(y_test, pred_nbayes, average='binary'))

0.9666666666666667
0.9613259668508287


### LIME Explanations

In [9]:
explainer = lime.lime_tabular.LimeTabularExplainer(X_train_transformed, feature_names=data.feature_names, class_names=data.target_names, verbose=False, mode='classification',discretize_continuous=False)

In [19]:
explained_class = 1

In [21]:
lime_exp_nbayes = []
for i in range(X_train.shape[0]):
    exp_nbayes = explainer.explain_instance(X_train_transformed[i], nbayes_g.predict_proba, num_features=30, labels=(explained_class,))
    lime_exp_nbayes.append(exp_nbayes.as_list())

In [31]:
lime_exp_lreg = []
for i in range(X_train.shape[0]):
    exp_lreg = explainer.explain_instance(X_train_transformed[i], lreg.predict_proba, num_features=30, labels=(explained_class,))
    lime_exp_lreg.append(exp_lreg.as_list())

In [22]:
def transform_lime_exp(exps, features):
    transform_exps = np.zeros((len(exps), len(features)))
    for i in range(len(exps)):
        for j in range(len(exps[i])):
            idx = np.argwhere(exps[i][j][0] == features)[0][0]
            transform_exps[i][idx] = exps[i][j][1]
    return transform_exps

In [32]:
transform_lime_bayes = transform_lime_exp(lime_exp_nbayes, data.feature_names)
transform_lime_lreg = transform_lime_exp(lime_exp_lreg, data.feature_names)

### Shap explanations

In [42]:
median_train = np.median(X_train_transformed, axis=0).reshape(1, -1)

In [43]:
lreg_lambda = lambda x: lreg.predict_proba(x)[:, explained_class]
nbayes_lambda = lambda x: nbayes_g.predict_proba(x)[:, explained_class]

In [49]:
shap_exp_nbayes = []

for i in range(X_train_transformed.shape[0]):
    shap_explainer_nbayes = shap.KernelExplainer(nbayes_lambda, median_train)
    shap_values_nbayes = shap_explainer_nbayes.shap_values(X_train_transformed[i], nsamples=2000)
    shap_exp_nbayes.append(shap_values_nbayes)

l1_reg="auto" is deprecated and in the next version (v0.29) the behavior will change from a conditional use of AIC to simply "num_features(10)"!
l1_reg="auto" is deprecated and in the next version (v0.29) the behavior will change from a conditional use of AIC to simply "num_features(10)"!
l1_reg="auto" is deprecated and in the next version (v0.29) the behavior will change from a conditional use of AIC to simply "num_features(10)"!
l1_reg="auto" is deprecated and in the next version (v0.29) the behavior will change from a conditional use of AIC to simply "num_features(10)"!
l1_reg="auto" is deprecated and in the next version (v0.29) the behavior will change from a conditional use of AIC to simply "num_features(10)"!
l1_reg="auto" is deprecated and in the next version (v0.29) the behavior will change from a conditional use of AIC to simply "num_features(10)"!
l1_reg="auto" is deprecated and in the next version (v0.29) the behavior will change from a conditional use of AIC to simply "num_

In [50]:
shap_exp_nbayes[0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [None]:
shap_exp_lreg = []

for i in range(X_train.shape[0]):
    shap_explainer_lreg = shap.KernelExplainer(lreg_lambda, median_train)
    shap_values_lreg = shap_explainer_nbayes.shap_values(X_train[i], nsamples=1000)
    shap_exp_lreg.append(shap_values_lreg)