# Explicabilidade - Interpret - Decision Tree

https://github.com/interpretml/interpret

Fonte: https://nbviewer.jupyter.org/github/interpretml/interpret/blob/master/examples/python/notebooks/Explaining%20Blackbox%20Classifiers.ipynb

In [3]:
!pip install interpret

Collecting interpret
  Downloading interpret-0.1.22-py3-none-any.whl (1.4 kB)
Collecting interpret-core[dash,debug,decisiontree,ebm,lime,linear,notebook,plotly,required,sensitivity,shap,treeinterpreter]>=0.1.22
  Downloading interpret_core-0.1.22-py3-none-any.whl (8.3 MB)
[K     |████████████████████████████████| 8.3 MB 436 kB/s eta 0:00:01
[?25hCollecting dash>=1.0.0; extra == "dash"
  Downloading dash-1.13.4.tar.gz (67 kB)
[K     |████████████████████████████████| 67 kB 555 kB/s eta 0:00:011
[?25hCollecting dash-table>=4.1.0; extra == "dash"
  Downloading dash_table-4.8.1.tar.gz (1.8 MB)
[K     |████████████████████████████████| 1.8 MB 6.2 MB/s eta 0:00:01
[?25hCollecting dash-cytoscape>=0.1.1; extra == "dash"
  Downloading dash_cytoscape-0.1.1.tar.gz (3.4 MB)
[K     |████████████████████████████████| 3.4 MB 6.0 MB/s eta 0:00:01
Collecting lime>=0.1.1.33; extra == "lime"
  Downloading lime-0.2.0.1.tar.gz (275 kB)
[K     |████████████████████████████████| 275 kB 4.6 MB/s eta 0

Collecting brotli
  Downloading Brotli-1.0.7-cp37-cp37m-manylinux1_x86_64.whl (352 kB)
[K     |████████████████████████████████| 352 kB 5.6 MB/s eta 0:00:01


Building wheels for collected packages: dash, dash-table, dash-cytoscape, lime, SALib, dill, shap, flask-compress, dash-renderer, dash-core-components, dash-html-components
  Building wheel for dash (setup.py) ... [?25ldone
[?25h  Created wheel for dash: filename=dash-1.13.4-py3-none-any.whl size=74951 sha256=2ad768ceeb2ff5233bcf56bb0cbf16fb0a95a5945b67dabb8476025418d89059
  Stored in directory: /home/barbara/.cache/pip/wheels/06/28/f7/a5ce9b564da2bb24f6c8fbe190c0458c6ff4a497a937815103
  Building wheel for dash-table (setup.py) ... [?25ldone
[?25h  Created wheel for dash-table: filename=dash_table-4.8.1-py3-none-any.whl size=1779391 sha256=807f33b46e317d96f2e68870d8864c237ceead02945b5aa944378493db14a056
  Stored in directory: /home/barbara/.cache/pip/wheels/66/81/95/b2774b227694b28cf1a6a7dcb92af258cebac354dfa67af6bb
  Building wheel for dash-cytoscape (setup.py) ... [?25ldone
[?25h  Created wheel for dash-cytoscape: filename=dash_cytoscape-0.1.1-py3-none-any.whl size=3430004 sha2

In [1]:
import pandas as pd
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split

df = pd.read_csv(
    "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data",
    header=None)
df.columns = [
    "Age", "WorkClass", "fnlwgt", "Education", "EducationNum",
    "MaritalStatus", "Occupation", "Relationship", "Race", "Gender",
    "CapitalGain", "CapitalLoss", "HoursPerWeek", "NativeCountry", "Income"
]
# df = df.sample(frac=0.01, random_state=1)
train_cols = df.columns[0:-1]
label = df.columns[-1]
X = df[train_cols]
y = df[label].apply(lambda x: 0 if x == " <=50K" else 1) #Turning response into 0 and 1

# We have to transform categorical variables to use sklearn models
X_enc = pd.get_dummies(X, prefix_sep='.')
feature_names = list(X_enc.columns)

seed = 1  
X_train, X_test, y_train, y_test = train_test_split(X_enc, y, test_size=0.20, random_state=seed)

In [2]:
df.head()

Unnamed: 0,Age,WorkClass,fnlwgt,Education,EducationNum,MaritalStatus,Occupation,Relationship,Race,Gender,CapitalGain,CapitalLoss,HoursPerWeek,NativeCountry,Income
0,39,State-gov,77516,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,<=50K
1,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
2,38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
3,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
4,28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K


In [3]:
df.shape

(32561, 15)

In [4]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline

#Blackbox system can include preprocessing, not just a classifier!
pca = PCA()
rf = RandomForestClassifier(n_estimators=100, n_jobs=-1)

blackbox_model = Pipeline([('pca', pca), ('rf', rf)])
blackbox_model.fit(X_train, y_train)

Pipeline(memory=None,
         steps=[('pca',
                 PCA(copy=True, iterated_power='auto', n_components=None,
                     random_state=None, svd_solver='auto', tol=0.0,
                     whiten=False)),
                ('rf',
                 RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                                        class_weight=None, criterion='gini',
                                        max_depth=None, max_features='auto',
                                        max_leaf_nodes=None, max_samples=None,
                                        min_impurity_decrease=0.0,
                                        min_impurity_split=None,
                                        min_samples_leaf=1, min_samples_split=2,
                                        min_weight_fraction_leaf=0.0,
                                        n_estimators=100, n_jobs=-1,
                                        oob_score=False, random_state=None,
                    

In [5]:
from interpret import show
from interpret.perf import ROC

blackbox_perf = ROC(blackbox_model.predict_proba).explain_perf(X_test, y_test, name='Blackbox')
show(blackbox_perf)

# Local explications

## Lime

In [6]:
from interpret.blackbox import LimeTabular
from interpret import show

#Blackbox explainers need a predict function, and optionally a dataset
lime = LimeTabular(predict_fn=blackbox_model.predict_proba, data=X_train, random_state=1)

#Pick the instances to explain, optionally pass in labels if you have them
lime_local = lime.explain_local(X_test[:5], y_test[:5], name='LIME')

show(lime_local)

## SHAP

In [7]:
from interpret.blackbox import ShapKernel
import numpy as np

background_val = np.median(X_train, axis=0).reshape(1, -1)
shap = ShapKernel(predict_fn=blackbox_model.predict_proba, data=background_val, feature_names=feature_names)
shap_local = shap.explain_local(X_test[:5], y_test[:5], name='SHAP')
show(shap_local)


Bad key "text.kerning_factor" on line 4 in
/home/barbara/anaconda3/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test_patch.mplstyle.
You probably need to get an updated matplotlibrc file from
https://github.com/matplotlib/matplotlib/blob/v3.1.3/matplotlibrc.template
or from the matplotlib source distribution


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))

l1_reg="auto" is deprecated and in the next version (v0.29) the behavior will change from a conditional use of AIC to simply "num_features(10)"!
l1_reg="auto" is deprecated and in the next version (v0.29) the behavior will change from a conditional use of AIC to simply "num_features(10)"!
l1_reg="auto" is deprecated and in the next version (v0.29) the behavior will change from a conditional use of AIC to simply "num_features(10)"!
l1_reg="auto" is deprecated and in the next version (v0.29) the behavior will change from a conditional use of AIC to simply "num_features(10)"!





l1_reg="auto" is deprecated and in the next version (v0.29) the behavior will change from a conditional use of AIC to simply "num_features(10)"!


# Geral

In [9]:
from interpret.blackbox import MorrisSensitivity

sensitivity = MorrisSensitivity(predict_fn=blackbox_model.predict_proba, data=X_train)
sensitivity_global = sensitivity.explain_global(name="Global Sensitivity")

show(sensitivity_global)

In [10]:
from interpret.blackbox import PartialDependence

pdp = PartialDependence(predict_fn=blackbox_model.predict_proba, data=X_train)
pdp_global = pdp.explain_global(name='Partial Dependence')

show(pdp_global)

# Dash

In [11]:
show([blackbox_perf, lime_local, shap_local, sensitivity_global, pdp_global])