# Setup a classification experiment

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv("heart.csv")
df.drop_duplicates(inplace=True)

In [2]:
train_cols = df.columns[0:-1]
label = df.columns[-1]
X = df[train_cols]
y = df[label]

seed = 1
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=seed)

## Explore the dataset (info)

In [3]:
from interpret import show
from interpret.data import ClassHistogram

hist = ClassHistogram().explain_data(X_train, y_train, name = 'Train Data')
#show(hist)

## Train the Explainable Boosting Machine (EBM)

In [4]:
from interpret.glassbox import ExplainableBoostingClassifier, LogisticRegression, ClassificationTree

ebm = ExplainableBoostingClassifier(random_state=seed, n_jobs=-1)
ebm.fit(X_train, y_train)   #Works on dataframes and numpy arrays

ExplainableBoostingClassifier(feature_names=['age', 'sex', 'cp', 'trtbps',
                                             'chol', 'fbs', 'restecg',
                                             'thalachh', 'exng', 'oldpeak',
                                             'slp', 'caa', 'thall',
                                             'chol x caa', 'cp x trtbps',
                                             'age x thalachh', 'age x cp',
                                             'cp x caa', 'cp x slp',
                                             'thalachh x caa', 'caa x thall',
                                             'thalachh x oldpeak',
                                             'cp x chol'],
                              feature_types=['continuous', 'categorical',
                                             'continuous', 'continuous',
                                             'continuous', 'categorical',
                                             'continuous', 'continu

## Global Explanations: What the model learned overall

In [5]:
ebm_global = ebm.explain_global(name='EBM')
show(ebm_global)

## Local Explanations: How an individual prediction was made

In [6]:
ebm_local = ebm.explain_local(X_test[:5], y_test[:5], name='EBM')
show(ebm_local)

## Evaluate EBM performance

In [7]:
from interpret.perf import ROC

ebm_perf = ROC(ebm.predict_proba).explain_perf(X_test, y_test, name='EBM')
#show(ebm_perf)

## Let's test out a few other Explainable Models

In [8]:
from interpret.glassbox import LogisticRegression, ClassificationTree

# We have to transform categorical variables to use Logistic Regression and Decision Tree
X_enc = pd.get_dummies(X, prefix_sep='.')
feature_names = list(X_enc.columns)
X_train_enc, X_test_enc, y_train, y_test = train_test_split(X_enc, y, test_size=0.20, random_state=seed)

lr = LogisticRegression(random_state=seed, feature_names=feature_names, penalty='l1', solver='liblinear')
lr.fit(X_train_enc, y_train)

tree = ClassificationTree()
tree.fit(X_train_enc, y_train)

<interpret.glassbox.decisiontree.ClassificationTree at 0x22d9f4fb8b0>

## Compare performance using the Dashboard

In [9]:
lr_perf = ROC(lr.predict_proba).explain_perf(X_test_enc, y_test, name='Logistic Regression')
tree_perf = ROC(tree.predict_proba).explain_perf(X_test_enc, y_test, name='Classification Tree')

show(lr_perf)
show(tree_perf)
show(ebm_perf)

### Glassbox: All of our models have global and local explanations

In [10]:
lr_global = lr.explain_global(name='Logistic Regression')
tree_global = tree.explain_global(name='Classification Tree')

show(lr_global)
show(tree_global)
show(ebm_global)

### Dashboard: look at everything at once

In [11]:
# Do everything in one shot with the InterpretML Dashboard by passing a list into show

show([hist, lr_global, lr_perf, tree_global, tree_perf, ebm_global, ebm_perf], share_tables=True)

## eli5 lib 

### Logistic Regression

In [12]:
from eli5.sklearn import PermutationImportance

In [13]:
perm = PermutationImportance(lr, random_state=1).fit(X_train, y_train)

In [14]:
import eli5

In [15]:
eli5.show_weights(perm, feature_names = X_train.columns.tolist())

Weight,Feature
0.0830  ± 0.0332,caa
0.0614  ± 0.0177,cp
0.0465  ± 0.0321,thalachh
0.0415  ± 0.0273,sex
0.0398  ± 0.0276,oldpeak
0.0332  ± 0.0189,thall
0.0116  ± 0.0192,chol
0.0108  ± 0.0066,slp
0.0108  ± 0.0135,trtbps
0.0083  ± 0.0074,exng


### ebm

In [16]:
from eli5.sklearn import PermutationImportance

perm = PermutationImportance(ebm, random_state=1).fit(X_train, y_train)

import eli5

eli5.show_weights(perm, feature_names = X_train.columns.tolist())

Weight,Feature
0.0390  ± 0.0171,caa
0.0332  ± 0.0117,cp
0.0315  ± 0.0154,thall
0.0116  ± 0.0110,slp
0.0116  ± 0.0097,trtbps
0.0100  ± 0.0179,chol
0.0066  ± 0.0250,oldpeak
0.0033  ± 0.0062,sex
0.0008  ± 0.0033,fbs
0.0008  ± 0.0177,exng


### tree

In [17]:
from eli5.sklearn import PermutationImportance

perm = PermutationImportance(tree, random_state=1).fit(X_train, y_train)

import eli5

eli5.show_weights(perm, feature_names = X_train.columns.tolist())

Weight,Feature
0.1710  ± 0.0243,cp
0.0813  ± 0.0309,caa
0.0747  ± 0.0283,oldpeak
0.0631  ± 0.0206,thall
0.0133  ± 0.0033,slp
0  ± 0.0000,exng
0  ± 0.0000,thalachh
0  ± 0.0000,restecg
0  ± 0.0000,fbs
0  ± 0.0000,chol
