# Intepretable models
[interpret](https://github.com/interpretml/interpret): Fit interpretable models. Explain blackbox machine learning.

In [1]:
!pip install -q interpret

In [33]:
import pandas as pd 
# Makes sure we see all columns
pd.set_option('display.max_columns', None)
from interpret import show
from sklearn.metrics import f1_score, accuracy_score, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from interpret.data import Marginal, ClassHistogram

In [34]:
from interpret.provider import InlineProvider
from interpret import set_visualize_provider

set_visualize_provider(InlineProvider())

## Classification Problem

### Explore the dataset

In [35]:
# load data
from sklearn.datasets import load_breast_cancer

breast_cancer = load_breast_cancer()
feature_names = list(breast_cancer.feature_names)
df = pd.DataFrame(breast_cancer.data, columns=feature_names)
df["target"] = breast_cancer.target
train_cols = df.columns[0:-1]
label = df.columns[-1]
X = df[train_cols]
y = df[label]

seed = 2022
# Split the data for evaluation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=seed)
print(X_train.shape)
print(X_test.shape)

(455, 30)
(114, 30)


In [36]:
hist = ClassHistogram().explain_data(X_train, y_train, name = 'Train Data')
show(hist)

### Classification Moldes

In [37]:
from interpret.glassbox import (LogisticRegression,
                                ClassificationTree, 
                                ExplainableBoostingClassifier)

#### Logistic Regression Model

In [38]:
# Fit logistic regression model
lr = LogisticRegression(random_state=2022, feature_names=X_train.columns, penalty='l1', solver='liblinear')
lr.fit(X_train, y_train)
print("Training finished.")

# Evaluate logistic regression model
y_pred = lr.predict(X_test)
print(f"F1 Score: {round(f1_score(y_test, y_pred, average='macro'), 2)}")
print(f"Accuracy: {round(accuracy_score(y_test, y_pred), 2)}")

Training finished.
F1 Score: 0.93
Accuracy: 0.93


In [39]:
# Explain global logistic regression model
lr_global = lr.explain_global(name='Logistic Regression')
show(lr_global)

In [40]:
# Explain local prediction
lr_local = lr.explain_local(X_test, y_test, name='Logistic Regression')
show(lr_local)

#### Decision Tree for Classification

In [41]:
# Fit decision tree for classification model
ct = ClassificationTree()
ct.fit(X_train, y_train)
print("Training finished.")
y_pred = ct.predict(X_test)
print(f"F1 Score: {round(f1_score(y_test, y_pred, average='macro'), 2)}")
print(f"Accuracy: {round(accuracy_score(y_test, y_pred), 2)}")

Training finished.
F1 Score: 0.95
Accuracy: 0.96


In [42]:
# Explain global decision tree for classification model
ct_global = ct.explain_global(name='Classification Tree')
show(ct_global)

In [43]:
# Explain local prediction
ct_local = ct.explain_local(X_test, y_test, name='Classification Tree')
show(ct_local)

In [47]:
X_test[0:1]

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,radius error,texture error,perimeter error,area error,smoothness error,compactness error,concavity error,concave points error,symmetry error,fractal dimension error,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
453,14.53,13.98,93.86,644.2,0.1099,0.09242,0.06895,0.06495,0.165,0.06121,0.306,0.7213,2.143,25.7,0.006133,0.01251,0.01615,0.01136,0.02207,0.003563,15.8,16.93,103.1,749.9,0.1347,0.1478,0.1373,0.1069,0.2606,0.0781


In [49]:
ct.predict_proba(X_test[0:1])

array([[0.01851852, 0.98148148]])

#### Explainable Boosting Machine for Classification

In [13]:
# Fit explainable boosting machine for classification
ebc = ExplainableBoostingClassifier(random_state=2022)
ebc.fit(X_train, y_train) 
print("Training finished.")
y_pred = ebc.predict(X_test)
print(f"F1 Score: {round(f1_score(y_test, y_pred, average='macro'), 2)}")
print(f"Accuracy: {round(accuracy_score(y_test, y_pred), 2)}")

Training finished.
F1 Score: 0.95
Accuracy: 0.96


In [14]:
# Explain global explainable boosting machine for classification
ebc_global = ebc.explain_global(name='Classification EBM')
show(ebc_global)

In [15]:
# Explain local prediction
ebc_local = ebc.explain_local(X_test, y_test, name='Classification EBM')
show(ebc_local)

## Regression Problem

### Explore the dataset

In [19]:
# load data
from sklearn.datasets import load_boston

boston = load_boston()
feature_names = list(boston.feature_names)
df = pd.DataFrame(boston.data, columns=feature_names)
df["target"] = boston.target
train_cols = df.columns[0:-1]
label = df.columns[-1]
X = df[train_cols]
y = df[label]

seed = 2022
# Split the data for evaluation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=seed)
print(X_train.shape)
print(X_test.shape)

(404, 13)
(102, 13)


In [20]:
marginal = Marginal().explain_data(X_train, y_train, name = 'Train Data')
show(marginal)

### Regression Moldes

In [21]:
from interpret.glassbox import (LinearRegression,
                                RegressionTree, 
                                ExplainableBoostingRegressor)

#### Linear Regression Model

In [22]:
# Fit linear regression model
lr = LinearRegression(random_state=2022, feature_names=X_train.columns)
lr.fit(X_train, y_train)
print("Training finished.")

# Evaluate linear regression model
y_pred = lr.predict(X_test)
print(f"Root Mean Squared Error: {round(mean_squared_error(y_test, y_pred)**(1/2), 2)}")
print(f"R2: {round(r2_score(y_test, y_pred), 2)}")

Training finished.
Root Mean Squared Error: 5.02
R2: 0.65


In [23]:
# Explain global linear regression model
lr_global = lr.explain_global(name='Linear Regression')
show(lr_global)

In [24]:
# Explain local prediction
lr_local = lr.explain_local(X_test, y_test, name='Linear Regression')
show(lr_local)

#### Decision Tree for Regression

In [25]:
# Fit decision tree model for regression
rt = RegressionTree()
rt.fit(X_train, y_train)
print("Training finished.")
y_pred = rt.predict(X_test)
print(f"Root Mean Squared Error: {round(mean_squared_error(y_test, y_pred)**(1/2), 2)}")
print(f"R2: {round(r2_score(y_test, y_pred), 2)}")

Training finished.
Root Mean Squared Error: 4.95
R2: 0.66


In [26]:
# Explain global decision tree model for regression
rt_global = rt.explain_global(name='Regression Tree')
show(rt_global)

In [27]:
# Explain local prediction
rt_local = rt.explain_local(X_test, y_test, name='Regression Tree')
show(rt_local)

#### Explainable Boosting Machine for Regression

In [34]:
# Fit explainable boosting machine for regression
ebr = ExplainableBoostingRegressor(random_state=2022)
ebr.fit(X_train, y_train) 
print("Training finished.")
y_pred = ebr.predict(X_test)
print(f"Root Mean Squared Error: {round(mean_squared_error(y_test, y_pred)**(1/2), 2)}")
print(f"R2: {round(r2_score(y_test, y_pred), 2)}")

Training finished.
Root Mean Squared Error: 0.2
R2: 0.83


In [35]:
# Explain global explainable boosting machine for regression
ebr_global = ebr.explain_global(name='Regression EBM')
show(ebr_global)

In [36]:
# Explain local prediction
ebr_local = ebr.explain_local(X_test, y_test, name='Regression EBM')
show(ebr_local)