In [1]:
import numpy as np
import pandas as pd
from joblib import dump
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from lime.lime_tabular import LimeTabularExplainer

from bokeh.io import output_notebook
from bokeh.models import HoverTool
from bokeh.plotting import figure, show, ColumnDataSource

In [2]:
iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(
    iris.data, iris.target, test_size = 0.2)
np.savez('data', data = X_train)

rf = RandomForestClassifier(n_estimators = 100)
rf.fit(X_train, y_train)
dump(rf, 'model.pkl')

['model.pkl']

In [3]:
output_notebook()

In [4]:
# initialize explainer with training data,
# and discretizing countinuous variables gives
# nicer explainations
explainer = LimeTabularExplainer(
    training_data = X_train,
    feature_names = iris.feature_names,
    class_names = iris.target_names,
    discretize_continuous = True)

In [5]:
i = np.random.randint(X_test.shape[0])
explained = explainer.explain_instance(
    X_test[i], rf.predict_proba,
    num_features = X_test.shape[1], top_labels = 0)
explained_info = explained.as_list()

In [10]:
# fix the column name up front
index_col = 'index'
variable_col = 'variable'
weight_col = 'weight'
color_col = 'color'


def assign_color2weight(df):
    """
    positive weight corresponds to a light green color,
    whereas negative weight corresponds to a light red
    """
    df[color_col] = (df[weight_col].
                     apply(lambda w: '#99d594' if w > 0 else '#d53e4f'))
    return df

# sort the weight in descending order so variables that have positive
# or negative contributions will be grouped together in the resulting plot
df = (pd.DataFrame(explained_info, columns = [variable_col, weight_col]).
      pipe(assign_color2weight).
      sort_values(weight_col))
df[index_col] = np.arange(len(explained_info))
df.head()

Unnamed: 0,variable,weight,color,index
3,2.80 < sepal width (cm) <= 3.00,0.005746,#99d594,0
2,5.10 < sepal length (cm) <= 5.80,0.021714,#99d594,1
1,1.50 < petal length (cm) <= 4.40,0.179208,#99d594,2
0,0.30 < petal width (cm) <= 1.30,0.189565,#99d594,3


In [7]:
# https://bokeh.pydata.org/en/latest/docs/user_guide/tools.html#hovertool

# input to ColumnDataSource requires a dictionary
source = ColumnDataSource(data = df.to_dict(orient = 'list'))
p = figure(plot_width = 700, plot_height = 400, title = 'Explanation')
p.ygrid.grid_line_color = None
p.xaxis.axis_label = weight_col
p.yaxis.axis_label = variable_col

# limit the position of the ticks, and specify the
# label for each tick using the major_label_overrides attribute,
# note that the key of the dictionary needs to be string type
p.yaxis.ticker = df[index_col]
p.yaxis.major_label_overrides = dict(zip(df[index_col].astype(str), df[variable_col]))

# hover tool will display the variable and its actual weight
# field names that begin with @ are associated with columns in a ColumnDataSource
tooltips = [
    (variable_col, '@' + variable_col),
    (weight_col, '@' + weight_col + '{0.2f}')]
p.add_tools(HoverTool(tooltips = tooltips))

plot = p.hbar(
    y = index_col, right = weight_col, color = color_col,
    hover_fill_color = color_col, source = source,
    height = 0.5, line_color = 'white', hover_line_color = 'black')

show(p)

In [None]:
from bokeh.models import Span


# draw a histogram of the predicted probabilities:
y_pred = 0.85
title = 'Predicted Probability for ID {}: {}'.format(input_id, y_pred)
p2 = figure(
    plot_width = 500, plot_height = 400,
    title = title, tools = '')

# use .quad to draw rectangles, in this case our histograms
# https://bokeh.pydata.org/en/latest/docs/user_guide/plotting.html#bars-and-rectangles
hist, edges = np.histogram(pipeline_pred_train, density = True, bins = 50)
p2.quad(top = hist, bottom = 0, left = edges[:-1],
        right = edges[1:], line_color = 'white') 

# add vertical line indicating where the current observation's probability
# sits compared to all other training data's predicted probabilities
# http://bokeh.pydata.org/en/latest/docs/user_guide/annotations.html#spans
vline = Span(
    location = y_pred, dimension = 'height',
    line_color = 'red', line_dash = 'dashed',
    line_width = 3)
p2.add_layout(vline)

p2.xaxis.axis_label = 'overall frequency'
p2.yaxis.axis_label = 'predicted probability'

In [None]:
from bokeh.layouts import widgetbox
from bokeh.models.widgets import DataTable, TableColumn

# feature and value table for a given input id
# https://bokeh.pydata.org/en/latest/docs/user_guide/examples/interaction_data_table.html
source = ColumnDataSource({
    'feature': preprocessor.colnames_,
    'value': X_train[input_id]}) 

columns = [
    TableColumn(field = 'feature', title = 'Feature'),
    TableColumn(field = 'value', title = 'Value')]

data_table = DataTable(
    source = source, columns = columns, width = 400, height = 280)
feature_table = widgetbox(data_table)
show(feature_table)

# Reference

- https://www.youtube.com/watch?v=LXLQTuSSKfY&index=7&list=PLYx7XA2nY5Gf37zYZMw6OqGFRPjB1jCy6
- https://github.com/bokeh/bokeh-notebooks