In [12]:
from collections import defaultdict

import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import spearmanr
from scipy.cluster import hierarchy
from sklearn.metrics.pairwise import cosine_similarity

from sklearn.datasets import load_breast_cancer
from sklearn.naive_bayes import MultinomialNB, GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import shap
from sklearn.preprocessing import StandardScaler
from scipy.stats import norm

import lime
import lime.lime_tabular
from sklearn.metrics import f1_score

In [2]:
random_seed = 10

In [3]:
np.random.seed(random_seed)

In [81]:
data = load_breast_cancer()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random_seed)

In [5]:
scaler = StandardScaler()
X_train_transformed = scaler.fit_transform(X_train)
X_test_transformed = scaler.transform(X_test)

In [82]:
lreg = LogisticRegression(random_state=0)
lreg.fit(X_train, y_train)

nbayes_g = GaussianNB()
nbayes_g.fit(X_train, y_train)

lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


GaussianNB(priors=None, var_smoothing=1e-09)

In [83]:
pred_lreg = lreg.predict(X_test_transformed)
print(f1_score(y_test, pred_lreg, average='binary'))

pred_nbayes = nbayes_g.predict(X_test_transformed)
print(f1_score(y_test, pred_nbayes, average='binary'))

0.5542168674698795
0.02173913043478261


### LIME Explanations

In [10]:
explainer = lime.lime_tabular.LimeTabularExplainer(X_train, feature_names=data.feature_names, class_names=data.target_names, verbose=False, mode='classification',discretize_continuous=False)

In [18]:
explained_class = 1

In [79]:
lime_exp_nbayes = []
for i in range(X_train.shape[0]):
    exp_nbayes = explainer.explain_instance(X_train[i], nbayes.predict_proba, num_features=30)
    lime_exp_nbayes.append(exp_nbayes.as_list())

In [80]:
lime_exp_nbayes[4]

In [44]:
#lime_exp_nbayes = np.array(lime_exp_nbayes)

In [61]:
np.argwhere(lime_exp_nbayes[0][0][0] == data.feature_names)[0]

array([0])

In [68]:
def transform_lime_exp(exps, features):
    transform_exps = np.zeros((len(exps), len(features)))
    for i in range(len(exps)):
        for j in range(len(exps[i])):
            idx = np.argwhere(exps[i][j][0] == features)[0][0]
            transform_exps[i][idx] = exps[i][j][1]
    return transform_exps

In [69]:
transform_lime_bayes = transform_lime_exp(lime_exp_nbayes, data.feature_names)

In [72]:
transform_lime_bayes[1]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [73]:
lime_exp_nbayes[1]