In [1]:
from collections import defaultdict

import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import spearmanr
from scipy.cluster import hierarchy
from sklearn.metrics.pairwise import cosine_similarity

from sklearn.datasets import load_breast_cancer
from sklearn.naive_bayes import MultinomialNB, GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import shap
from sklearn.preprocessing import StandardScaler
from scipy.stats import norm

import lime
import lime.lime_tabular
from sklearn.metrics import f1_score

In [2]:
random_seed = 10

In [3]:
np.random.seed(random_seed)

In [4]:
data = load_breast_cancer()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random_seed)

In [5]:
scaler = StandardScaler()
X_train_transformed = scaler.fit_transform(X_train)
X_test_transformed = scaler.transform(X_test)

In [7]:
lreg = LogisticRegression(random_state=0)
lreg.fit(X_train_transformed, y_train)

nbayes_g = GaussianNB()
nbayes_g.fit(X_train_transformed, y_train)

GaussianNB(priors=None, var_smoothing=1e-09)

In [8]:
pred_lreg = lreg.predict(X_test_transformed)
print(f1_score(y_test, pred_lreg, average='binary'))

pred_nbayes = nbayes_g.predict(X_test_transformed)
print(f1_score(y_test, pred_nbayes, average='binary'))

0.9666666666666667
0.9613259668508287


### LIME Explanations

In [9]:
explainer = lime.lime_tabular.LimeTabularExplainer(X_train_transformed, feature_names=data.feature_names, class_names=data.target_names, verbose=False, mode='classification',discretize_continuous=False)

In [18]:
explained_class = 1

In [11]:
lime_exp_nbayes = []
for i in range(X_train.shape[0]):
    exp_nbayes = explainer.explain_instance(X_train_transformed[i], nbayes_g.predict_proba, num_features=30)
    lime_exp_nbayes.append(exp_nbayes.as_list())

In [44]:
#lime_exp_nbayes = np.array(lime_exp_nbayes)

In [12]:
def transform_lime_exp(exps, features):
    transform_exps = np.zeros((len(exps), len(features)))
    for i in range(len(exps)):
        for j in range(len(exps[i])):
            idx = np.argwhere(exps[i][j][0] == features)[0][0]
            transform_exps[i][idx] = exps[i][j][1]
    return transform_exps

In [13]:
transform_lime_bayes = transform_lime_exp(lime_exp_nbayes, data.feature_names)

In [15]:
transform_lime_bayes[0]

array([-0.02809414, -0.01645086, -0.02445034, -0.0340933 ,  0.00024557,
       -0.02001726, -0.02691532, -0.03055691, -0.01360946, -0.00421245,
       -0.0224395 ,  0.00385139, -0.01928732, -0.0261138 ,  0.00352827,
       -0.00345338, -0.00820703, -0.00528691, -0.0022683 , -0.00349995,
       -0.03647874, -0.01537901, -0.0393242 , -0.03510332, -0.01741868,
       -0.01839738, -0.02702567, -0.03570298, -0.00871805, -0.01059578])

In [16]:
lime_exp_nbayes[0]