# ELI5 library

## Building a model to interpret
1. ELI5 library only works for sklearn models, so Keras models have to be wrapped as sklearn models.

In [1]:
# For data
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# For model
import tensorflow as tf
from tensorflow import keras

# For interpreting the model
import eli5
from eli5.sklearn import PermutationImportance



In [2]:
# Loading the data
bc_data = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(bc_data.data, bc_data.target,
                                                   test_size=0.25, random_state=1)
feature_names = bc_data.feature_names

# Normalizing the predictive variables
scalar = StandardScaler()
X_train = scalar.fit_transform(X_train)
X_test = scalar.transform(X_test)

In [3]:
X_train.shape

(426, 30)

In [4]:
y_train[0:3]

array([0, 1, 0])

In [5]:
# Building a MLP classification model with Keras API
def build_model():
    model = keras.models.Sequential([
        keras.layers.Input(shape=[30]),
        keras.layers.Dense(30, activation='relu'),
        keras.layers.Dense(30, activation='relu'),
        keras.layers.Dense(1, activation='sigmoid')
    ])
    
    model.compile(loss='binary_crossentropy',
                 optimizer=keras.optimizers.Adam(),
                 metrics=['accuracy'])
    
    return model

In [6]:
# Wrapping Keras model with a thin sklearn wrapper
keras_clf = keras.wrappers.scikit_learn.KerasClassifier(build_model)

In [7]:
# Model learning
history = keras_clf.fit(X_train, y_train,
             batch_size=32, epochs=100, verbose=0,
             validation_data=(X_test, y_test),
             callbacks=[keras.callbacks.EarlyStopping(patience=10)])

## Permutation Importance
1. Computes feature importance of each variable in the model.

In [8]:
# Learning variable importance using Permutation analysis
perm_obj = PermutationImportance(keras_clf, random_state=1).fit(X_test, y_test, 
                                                            verbose=0)





In [9]:
# View variable importance
eli5.show_weights(perm_obj, feature_names=feature_names)

Weight,Feature
0.0154  ± 0.0105,radius error
0.0154  ± 0.0105,worst smoothness
0.0126  ± 0.0285,worst perimeter
0.0070  ± 0.0153,mean concavity
0.0070  ± 0.0125,worst area
0.0070  ± 0.0088,compactness error
0.0070  ± 0.0000,mean smoothness
0.0056  ± 0.0105,mean perimeter
0.0042  ± 0.0069,worst symmetry
0.0028  ± 0.0112,worst texture


### Note
1. Some variables have a negative score. Could have happened by chance where the permutation led to a better set of predictions