# Intepretable models

[interpret](https://github.com/interpretml/interpret): Fit interpretable models. Explain blackbox machine learning.

In [2]:
!pip install -q interpret

In [1]:
import pandas as pd 
# Makes sure we see all columns
pd.set_option('display.max_columns', None)
from interpret import show
from sklearn.metrics import f1_score, accuracy_score, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from interpret.data import Marginal, ClassHistogram

In [2]:
from interpret.provider import InlineProvider
from interpret import set_visualize_provider

set_visualize_provider(InlineProvider())

## Classification Problem

### Explore the dataset

In [28]:
# load data
from sklearn.datasets import load_breast_cancer

breast_cancer = load_breast_cancer()
feature_names = list(breast_cancer.feature_names)
df = pd.DataFrame(breast_cancer.data, columns=feature_names)
df["target"] = breast_cancer.target
train_cols = df.columns[0:-1]
label = df.columns[-1]
X = df[train_cols]
y = df[label]

seed = 2022
# Split the data for evaluation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=seed)
print(X_train.shape)
print(X_test.shape)

(455, 30)
(114, 30)


In [29]:
hist = ClassHistogram().explain_data(X_train, y_train, name = 'Train Data')
show(hist)

### Classification Moldes

In [30]:
from interpret.glassbox import (LogisticRegression,
                                ClassificationTree, 
                                ExplainableBoostingClassifier)

#### Logistic Regression Model

In [34]:
# Fit logistic regression model
lr = LogisticRegression(random_state=2022, feature_names=X_train.columns, penalty='l1', solver='liblinear')
lr.fit(X_train, y_train)
print("Training finished.")

# Evaluate logistic regression model
y_pred = lr.predict(X_test)
print(f"F1 Score: {round(f1_score(y_test, y_pred, average='macro'), 2)}")
print(f"Accuracy: {round(accuracy_score(y_test, y_pred), 2)}")

Training finished.
F1 Score: 0.93
Accuracy: 0.93


In [35]:
# Explain global logistic regression model
lr_global = lr.explain_global(name='Logistic Regression')
show(lr_global)

In [36]:
# Explain local prediction
lr_local = lr.explain_local(X_test, y_test, name='Logistic Regression')
show(lr_local)

#### Decision Tree for Classification

In [37]:
# Fit decision tree for classification model
tree = ClassificationTree()
tree.fit(X_train, y_train)
print("Training finished.")
y_pred = tree.predict(X_test)
print(f"F1 Score: {round(f1_score(y_test, y_pred, average='macro'), 2)}")
print(f"Accuracy: {round(accuracy_score(y_test, y_pred), 2)}")

Training finished.
F1 Score: 0.95
Accuracy: 0.96


In [40]:
# Explain global decision tree for classification model
ct_global = tree.explain_global(name='Classification Tree')
show(ct_global)

In [41]:
# Explain local prediction
ct_local = tree.explain_local(X_test, y_test, name='Classification Tree')
show(ct_local)

#### Explainable Boosting Machine for Classification

In [42]:
# Fit explainable boosting machine for classification
ebm = ExplainableBoostingClassifier(random_state=2022)
ebm.fit(X_train, y_train) 
print("Training finished.")
y_pred = ebm.predict(X_test)
print(f"F1 Score: {round(f1_score(y_test, y_pred, average='macro'), 2)}")
print(f"Accuracy: {round(accuracy_score(y_test, y_pred), 2)}")

Training finished.
F1 Score: 0.95
Accuracy: 0.96


In [44]:
# Explain global explainable boosting machine for classification
cebm_global = ebm.explain_global(name='Classification EBM')
show(cebm_global)

In [45]:
# Explain local prediction
cebm_local = ebm.explain_local(X_test, y_test, name='Classification EBM')
show(cebm_local)

## Regression Problem

### Explore the dataset

In [46]:
# load data
from sklearn.datasets import load_boston

boston = load_boston()
feature_names = list(boston.feature_names)
df = pd.DataFrame(boston.data, columns=feature_names)
df["target"] = boston.target
train_cols = df.columns[0:-1]
label = df.columns[-1]
X = df[train_cols]
y = df[label]

seed = 2022
# Split the data for evaluation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=seed)
print(X_train.shape)
print(X_test.shape)

(404, 13)
(102, 13)


In [47]:
marginal = Marginal().explain_data(X_train, y_train, name = 'Train Data')
show(marginal)

### Regression Moldes

In [48]:
from interpret.glassbox import (LinearRegression,
                                RegressionTree, 
                                ExplainableBoostingRegressor)

#### Linear Regression Model

In [51]:
# Fit linear regression model
lr = LinearRegression(random_state=2022, feature_names=X_train.columns)
lr.fit(X_train, y_train)
print("Training finished.")

# Evaluate linear regression model
y_pred = lr.predict(X_test)
print(f"Root Mean Squared Error: {round(mean_squared_error(y_test, y_pred)**(1/2), 2)}")
print(f"R2: {round(r2_score(y_test, y_pred), 2)}")

Training finished.
Root Mean Squared Error: 5.02
R2: 0.65


In [52]:
# Explain global linear regression model
lr_global = lr.explain_global(name='Linear Regression')
show(lr_global)

In [53]:
# Explain local prediction
lr_local = lr.explain_local(X_test, y_test, name='Linear Regression')
show(lr_local)

#### Decision Tree for Regression

In [54]:
# Fit decision tree model for regression
tree = RegressionTree()
tree.fit(X_train, y_train)
print("Training finished.")
y_pred = tree.predict(X_test)
print(f"Root Mean Squared Error: {round(mean_squared_error(y_test, y_pred)**(1/2), 2)}")
print(f"R2: {round(r2_score(y_test, y_pred), 2)}")

Training finished.
Root Mean Squared Error: 4.95
R2: 0.66


In [55]:
# Explain global decision tree model for regression
rt_global = tree.explain_global(name='Regression Tree')
show(rt_global)

In [56]:
# Explain local prediction
rt_local = tree.explain_local(X_test, y_test, name='Regression Tree')
show(rt_local)

#### Explainable Boosting Machine for Regression

In [57]:
# Fit explainable boosting machine for regression
ebm = ExplainableBoostingRegressor(random_state=2022)
ebm.fit(X_train, y_train) 
print("Training finished.")
y_pred = ebm.predict(X_test)
print(f"Root Mean Squared Error: {round(mean_squared_error(y_test, y_pred)**(1/2), 2)}")
print(f"R2: {round(r2_score(y_test, y_pred), 2)}")

Training finished.
Root Mean Squared Error: 4.17
R2: 0.76


In [58]:
# Explain global explainable boosting machine for regression
ebm_global = ebm.explain_global(name='EBM')
show(ebm_global)

In [59]:
# Explain local prediction
ebm_local = ebm.explain_local(X_test, y_test, name='EBM')
show(ebm_local)