# Interacting with ProtoDash

In this notebook we'll combine the ProtoDash and the Partial Effects to obtain feature importances on the digits classifications task.

In [1]:
import numpy  as np
import pandas as pd

# automatically differentiable implementation of numpy
import jax.numpy as jnp

from sklearn import datasets

from sklearn.model_selection import train_test_split
from IPython.display         import display, Math, Latex

import matplotlib.pyplot as plt

from itea.classification import ITEA_classifier
from itea.inspection     import *

from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import classification_report

from aix360.algorithms.protodash import ProtodashExplainer, get_Gaussian_Data

In [1]:
digits_data = datasets.load_digits()

X, y        = digits_data['data'], digits_data['target']
labels      = digits_data['feature_names']
targets     = digits_data['target_names']


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42)

print(X_train.shape)

# Creating transformation functions for ITEA using jax.numpy
# (so we don't need to analytically calculate its derivatives)
tfuncs = {
    'id'       : lambda x: x,
    'log'      : jnp.log,
    'exp'      : jnp.exp
}

clf = ITEA_classifier(
    gens            = 200,
    popsize         = 100,
    max_terms       = 64,
    expolim         = (0, 1),
    verbose         = 5,
    tfuncs          = tfuncs,
    labels          = labels,
    simplify_method = 'simplify_by_var',
    random_state    = 42,
).fit(X_train, y_train)

(1203, 64)
gen 	 min_fitness 	 mean_fitness 	 max_fitness 	 remaining (s)
0 	 0.1055694098088113 	 0.10556940980881135 	 0.1055694098088113 	 11min52seg
5 	 0.1055694098088113 	 0.10556940980881135 	 0.1055694098088113 	 11min57seg
10 	 0.1055694098088113 	 0.10556940980881135 	 0.1055694098088113 	 10min27seg
15 	 0.1055694098088113 	 0.10556940980881135 	 0.1055694098088113 	 11min59seg
20 	 0.1055694098088113 	 0.10636741479634253 	 0.15045719035743974 	 16min39seg
25 	 0.1055694098088113 	 0.11221113881961763 	 0.15045719035743974 	 36min0seg
30 	 0.10889443059019119 	 0.15061512884455525 	 0.1629260182876143 	 35min2seg
35 	 0.15045719035743974 	 0.16246882793017456 	 0.22942643391521197 	 58min18seg
40 	 0.16209476309226933 	 0.2215045719035743 	 0.2734829592684954 	 84min1seg
45 	 0.22942643391521197 	 0.24636741479634242 	 0.2734829592684954 	 87min42seg
50 	 0.2734829592684954 	 0.27348295926849536 	 0.2734829592684954 	 80min52seg
55 	 0.2734829592684954 	 0.27348295926849536

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/galdeia/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3441, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-2-dd7c663c0d94>", line 31, in <module>
    ).fit(X_train, y_train)
  File "/home/galdeia/anaconda3/lib/python3.7/site-packages/itea/classification/_ITEA_classifier.py", line 183, in fit
    X, y, self.itexpr_class, self.greater_is_better)
  File "/home/galdeia/anaconda3/lib/python3.7/site-packages/itea/_base/_BaseITEA.py", line 333, in _evolve
    random_state = random_state)
  File "/home/galdeia/anaconda3/lib/python3.7/site-packages/itea/_base/_BaseITEA.py", line 264, in _select_population
    pop = [ps for ps in [p.fit(X, y) for p in pop]
  File "/home/galdeia/anaconda3/lib/python3.7/site-packages/itea/_base/_BaseITEA.py", line 264, in <listcomp>
    pop = [ps for ps in [p.fit(X, y) for p in pop]
  File "/home/galdeia/anaconda3/lib/python3.7/site-packages/i


KeyboardInterrupt



In [None]:
final_itexpr = clf.bestsol_
final_itexpr.selected_features_

In [None]:
print(X_test.shape, y_test.reshape(-1, 1).shape)

onehot_encoder = OneHotEncoder(sparse=False)
onehot_encoded = onehot_encoder.fit_transform(
    np.hstack( (X_train, y_train.reshape(-1, 1)) ) )

explainer = ProtodashExplainer()

# call protodash explainer
# S contains indices of the selected prototypes
# W contains importance weights associated with the selected prototypes 
(W, S, _) = explainer.explain(onehot_encoded, onehot_encoded, m=len(np.unique(y_train)))

In [None]:
fig, axs = plt.subplots(2, 3, figsize=(8,5))

# Hiding one subplot
axs[1, 2].set_visible(False)

for s, ax in zip(S, fig.axes):
    ax.imshow(X_train[s].reshape(8, 8))
    ax.set_title(f"Prototype of class {y_train[s]}")
    
plt.tight_layout()
plt.show()

In [None]:
it_explainer = ITExpr_explainer(
    itexpr=final_itexpr,
    tfuncs=tfuncs
).fit(X_train, y_train)

fig, axs = plt.subplots(2, 3, figsize=(8,5))

axs[1, 2].set_visible(False)

for s, ax in zip(S, fig.axes):
    
    importances = np.sum(
        it_explainer.average_partial_effects(X_train[s, :].reshape(1, -1)),
        axis=0
    )
    
    ax.imshow(importances.reshape(8, 8))
    ax.set_title(f"Feature importances for\nPrototype of class {y_train[s]}")
    
plt.tight_layout()
plt.show()

In [None]:
# now lets pick multiple prototypes and see the feature importance for groups

explainer = ProtodashExplainer()

(W, S, _) = explainer.explain(onehot_encoded, onehot_encoded, m=len(np.unique(y_train))*3)

In [None]:
fig, axs = plt.subplots(2, 3, figsize=(8,5))

axs[1, 2].set_visible(False)

for class_, ax in zip(np.unique(y_train), fig.axes):
    
    prototypes_for_class = [s for s in S if y_train[s]==class_]
    
    importances = it_explainer.average_partial_effects(X_train[prototypes_for_class, :])[class_]
    
    ax.imshow(importances.reshape(8, 8))
    ax.set_title(f"Feature importances for\nPrototype of class {class_}")
    
plt.tight_layout()
plt.show()