In [46]:
import lime
import sklearn
import numpy as np
import sklearn
import sklearn.ensemble
import sklearn.metrics
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
import shap
from sklearn.metrics.pairwise import cosine_similarity
import copy
from sklearn.datasets import fetch_20newsgroups

In [2]:
np.random.seed(0)

In [3]:
categories = ['alt.atheism', 'soc.religion.christian']
newsgroups_train = fetch_20newsgroups(subset='train', categories=categories)
newsgroups_test = fetch_20newsgroups(subset='test', categories=categories)
class_names = ['atheism', 'christian']

In [16]:
vectorizer = sklearn.feature_extraction.text.TfidfVectorizer(lowercase=False, token_pattern=r"\b\w+\b")
train_vectors = vectorizer.fit_transform(newsgroups_train.data)
test_vectors = vectorizer.transform(newsgroups_test.data)

In [5]:
lreg = LogisticRegression(random_state=0, solver='lbfgs')
lreg.fit(train_vectors, newsgroups_train.target)

nbayes = MultinomialNB()
nbayes.fit(train_vectors, newsgroups_train.target)

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)

In [6]:
pred_lreg = lreg.predict(test_vectors)
print(sklearn.metrics.f1_score(newsgroups_test.target, pred_lreg, average='binary'))

pred_nbayes = nbayes.predict(test_vectors)
print(sklearn.metrics.f1_score(newsgroups_test.target, pred_nbayes, average='binary'))

0.9205607476635513
0.8816964285714286


### Global weights

In [7]:
global_nbayes_params = nbayes.coef_[0]
global_lreg_params = lreg.coef_[0]

In [8]:
selected_params_nbayes = np.abs(global_nbayes_params).argsort()[-10:][::-1]
selected_params_nbayes_values = global_nbayes_params[selected_params_nbayes]

selected_params_lreg = np.abs(global_lreg_params).argsort()[-10:][::-1]
selected_params_lreg_values = global_lreg_params[selected_params_lreg]

In [9]:
selected_feature_names_nbayes = []
selected_feature_names_lreg = []
for sel_p_val in selected_params_nbayes:
    selected_feature_names_nbayes.append(list(vectorizer.vocabulary_.keys())[list(vectorizer.vocabulary_.values()).index(sel_p_val)])  # Prints george
    
selected_feature_names_lreg = []
for sel_p_val in selected_params_lreg:
    selected_feature_names_lreg.append(list(vectorizer.vocabulary_.keys())[list(vectorizer.vocabulary_.values()).index(sel_p_val)])  # Prints george

### SP-LIME

### Local Naive Bayes weights

In [17]:
idx = 83
local_gt = train_vectors[idx].nonzero()[1]

In [11]:
local_weights_nbayes = np.abs(global_nbayes_params[local_gt]).argsort()[-10:][::-1]
local_weights_lreg = np.abs(global_lreg_params[local_gt]).argsort()[-10:][::-1]

selected_local_feature_names_nbayes = []
selected_local_feature_names_lreg = []

for sel_p_val in local_weights_nbayes:
    selected_local_feature_names_nbayes.append(list(vectorizer.vocabulary_.keys())[list(vectorizer.vocabulary_.values()).index(sel_p_val)])  # Prints george

for sel_p_val in local_weights_lreg:
    selected_local_feature_names_lreg.append(list(vectorizer.vocabulary_.keys())[list(vectorizer.vocabulary_.values()).index(sel_p_val)])  # Prints george

### LIME weights

In [12]:
from lime.lime_text import LimeTextExplainer
explainer = LimeTextExplainer(class_names=class_names)

from lime import lime_text
from sklearn.pipeline import make_pipeline
c_lreg = make_pipeline(vectorizer, lreg)
c_nbayes = make_pipeline(vectorizer, nbayes)

In [19]:
explained_class = 1
exp_lreg = explainer.explain_instance(newsgroups_train.data[idx], c_lreg.predict_proba, num_features=train_vectors.shape[1], labels=(explained_class, ))
print('Document id: %d' % idx)
print('Probability(christian) =', c_lreg.predict_proba([newsgroups_train.data[idx]])[0,1])
print('True class: %s' % class_names[newsgroups_train.target[idx]])

Document id: 83
Probability(christian) = 0.8429648336399032
True class: christian


In [20]:
explained_class = 1
exp_nbayes = explainer.explain_instance(newsgroups_train.data[idx], c_nbayes.predict_proba, num_features=10, labels=(explained_class, ))
print('Document id: %d' % idx)
print('Probability(christian) =', c_nbayes.predict_proba([newsgroups_train.data[idx]])[0,1])
print('True class: %s' % class_names[newsgroups_train.target[idx]])

Document id: 83
Probability(christian) = 0.9703013530527917
True class: christian


In [42]:
selected_features_lime_nbayes = np.zeros((1, train_vectors.shape[1]))
selected_features_lime_lreg = np.zeros((1, train_vectors.shape[1]))

for e in exp_lreg.as_list():
    selected_features_lime_lreg[:, vectorizer.vocabulary_[e[0]]] = e[1]

for e in exp_nbayes.as_list():
    selected_features_lime_nbayes[:, vectorizer.vocabulary_[e[0]]] = e[1]

### KernelShap Weights

In [32]:
median_train = np.median(train_vectors.toarray(), axis=0).reshape(1, -1)

In [69]:
lreg_lambda = lambda x: lreg.predict_proba(x)[:, explained_class]
nbayes_lambda = lambda x: nbayes.predict_proba(x)[:, explained_class]

shap_explainer_nbayes = shap.KernelExplainer(nbayes_lambda, median_train)
shap_values_nbayes = shap_explainer_nbayes.shap_values(train_vectors[idx], nsamples=1000)

shap_explainer_lreg = shap.KernelExplainer(lreg_lambda, median_train)
shap_values_lreg = shap_explainer_lreg.shap_values(train_vectors[idx], nsamples=1000)

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

l1_reg="auto" is deprecated and in the next version (v0.29) the behavior will change from a conditional use of AIC to simply "num_features(10)"!





HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

l1_reg="auto" is deprecated and in the next version (v0.29) the behavior will change from a conditional use of AIC to simply "num_features(10)"!





In [47]:
cosine_similarity(shap_values_lreg, selected_features_lime_lreg)

array([[-0.08013141]])

In [177]:
selected_features_lime_lreg[0].shape

(23098,)

### Evaluation

In [48]:
explicand = copy.copy(test_vectors[idx].toarray())

In [159]:
import pandas as pd
intervals = pd.qcut(range(100), 4)

In [164]:
intervals[99]

Interval(74.25, 99.0, closed='right')

In [226]:
def predict_replacement(explicand, x_train, selected_features, model, explained_class):
    result = 0
    for j in range(selected_features.shape[0]):
        explicand_copy = copy.copy(explicand)
        explicand_f_val = explicand_copy[:, j][0]
        feature_values = train_vectors[:, j].toarray().flatten()
        bin_count, bin_edge = np.histogram(feature_values, bins=4)
        
        for i in range(0, len(bin_edge) - 1):
            if explicand_f_val >= bin_edge[i] and  explicand_f_val < bin_edge[i+1]:
                inst_in_interval = np.argwhere(np.logical_and(feature_values>=bin_edge[i], feature_values<bin_edge[i+1])).flatten()
                bin_idx = i
                
                if len(inst_in_interval) == 0:
                    bin_avg = bin_edge[i]
                else:
                    bin_avg = np.median(feature_values[inst_in_interval])

        prior = bin_count[bin_idx] / len(feature_values)
        explicand_copy[:, j] = bin_avg
        
        new_pred = model.predict_proba(explicand_copy)[0][explained_class]
        
        result += new_pred * prior
    
    return result

In [236]:
base_pred_lreg = lreg.predict_proba(explicand)[0][explained_class]
base_pred_nbayes = nbayes.predict_proba(explicand)[0][explained_class]

In [227]:
new_pred_method_1_lreg_lime = predict_replacement(explicand, train_vectors, selected_features_lime_lreg.flatten(), lreg, explained_class)
new_pred_method_1_nbayes_lime = predict_replacement(explicand, train_vectors, selected_features_lime_nbayes.flatten(), lreg, explained_class)
new_pred_method_1_lreg_shap = predict_replacement(explicand, train_vectors, shap_values_lreg, nbayes, explained_class)
new_pred_method_1_nbayes_shap = predict_replacement(explicand, train_vectors, shap_values_nbayes, nbayes, explained_class)

In [237]:
(base_pred_lreg - new_pred_method_1_lreg_lime), (base_pred_lreg - new_pred_method_1_lreg_shap)

(-10556.509614889284, 0.0009189599462474662)

In [235]:
explicand_copy = copy.copy(explicand)

for i in range(0, selected_features_lime_lreg.shape[1]):
    explicand_copy[:, i] = np.mean(train_vectors[:, i]) 

new_pred_method_2_lreg_lime = lreg.predict_proba(explicand_copy)[0][explained_class]

explicand_copy = copy.copy(explicand)

for i in range(0, selected_features_lime_nbayes.shape[1]):
    explicand_copy[:, i] = np.mean(train_vectors[:, i]) 

new_pred_method_2_lreg_lime = lreg.predict_proba(explicand_copy)[0][explained_class]


explicand_copy = copy.copy(explicand)

for i in range(0, selected_features_lime_lreg.shape[1]):
    explicand_copy[:, i] = np.mean(train_vectors[:, i]) 

new_pred_method_2_lreg_lime = lreg.predict_proba(explicand_copy)[0][explained_class]

explicand_copy = copy.copy(explicand)

for i in range(0, selected_features_lime_lreg.shape[1]):
    explicand_copy[:, i] = np.mean(train_vectors[:, i]) 

new_pred_method_2_lreg_lime = lreg.predict_proba(explicand_copy)[0][explained_class]

In [203]:
(base_pred - new_pred_method_1), (base_pred - new_pred_method_2)

(0.4300836130356512, -0.15239361205791208)

In [201]:
bin_count, bin_edge = np.histogram(feature_values, bins='scott')

In [202]:
for i in range(0, len(bin_edge)):
    if explicand_f_val > bin_edge[i] and  explicand_f_val<bin_edge[i+1]:
        bin_idx = i
        bin_avg = np.mean([bin_edge[i], bin_edge[i+1]])

In [138]:
prior = bin_count[bin_idx] / len(feature_values)

In [139]:
prior

0.0018535681186283596

In [143]:
explicand[:, selected_features_lime[0]] = bin_avg

In [142]:
nbayes.predict(explicand)

array([0])

In [147]:
selected_features_lime[0]

4103

In [146]:
explicand.shape

(1, 23035)