In [1]:
import numpy as np
import pandas as pd
from joblib import dump
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from lime.lime_tabular import LimeTabularExplainer

from bokeh.io import output_notebook
from bokeh.models import HoverTool
from bokeh.plotting import figure, show, ColumnDataSource

In [2]:
iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(
    iris.data, iris.target, test_size = 0.2)
np.savez('data', data = X_train)

rf = RandomForestClassifier(n_estimators = 100)
rf.fit(X_train, y_train)
dump(rf, 'model.pkl')

['model.pkl']

In [3]:
output_notebook()

In [4]:
explainer = LimeTabularExplainer(
    X_train, feature_names = iris.feature_names,
    class_names = iris.target_names, discretize_continuous = True)

In [5]:
i = np.random.randint(X_test.shape[0])
explained = explainer.explain_instance(
    X_test[i], rf.predict_proba,
    num_features = X_test.shape[1], top_labels = 0)
explained_info = explained.as_list()

In [6]:
index_col = 'index'
variable_col = 'variable'
weight_col = 'weight'
color_col = 'color'

def assign_color2weight(df):
    df[color_col] = df[weight_col].apply(lambda w: '#99d594' if w > 0 else '#d53e4f')
    return df

# sort the weight in descending order so variables that have positive
# or negative contributions will be grouped together in the resulting plot
df = (pd.DataFrame(explained_info, columns = [variable_col, weight_col]).
      pipe(assign_color2weight).
      sort_values(weight_col).
      assign(index = np.arange(len(explained_info))))
df

Unnamed: 0,variable,weight,color,index
2,sepal width (cm) <= 2.80,-0.01604,#d53e4f,0
3,5.75 < sepal length (cm) <= 6.40,-0.002472,#d53e4f,1
1,1.30 < petal width (cm) <= 1.80,0.118055,#99d594,2
0,4.30 < petal length (cm) <= 5.10,0.160788,#99d594,3


In [36]:
# https://bokeh.pydata.org/en/latest/docs/user_guide/tools.html#hovertool
source = ColumnDataSource(data = df.to_dict(orient = 'list'))
p = figure(plot_width = 700, plot_height = 400, title = 'Explanation')
p.ygrid.grid_line_color = None
p.xaxis.axis_label = weight_col
p.yaxis.axis_label = variable_col

# limit the position of the ticks, and specify the
# label for each tick using the major_label_overrides attribute,
# note that the key of the dictionary needs to be string type
p.yaxis.ticker = df[index_col]
p.yaxis.major_label_overrides = dict(zip(df[index_col].astype(str), df[variable_col]))

# hover tool will display the variable and its actual weight
# field names that begin with @ are associated with columns in a ColumnDataSource
tooltips = [
    (variable_col, '@' + variable_col),
    (weight_col, '@' + weight_col + '{0.2f}')
]
p.add_tools(HoverTool(tooltips = tooltips))

plot = p.hbar(
    y = index_col, right = weight_col, color = color_col,
    hover_fill_color = color_col, source = source,
    height = 0.5, line_color = 'white', hover_line_color = 'black')

show(p)

# Reference

- https://www.youtube.com/watch?v=LXLQTuSSKfY&index=7&list=PLYx7XA2nY5Gf37zYZMw6OqGFRPjB1jCy6
- https://github.com/bokeh/bokeh-notebooks