In [1]:
# maximize reproducibility: set seed with minimal imports
# just a seed
seed = 431136
import os

# verbosity
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# reproducibility
# https://github.com/NVIDIA/framework-determinism
os.environ['TF_DETERMINISTIC_OPS'] = '1'
os.environ['TF_CUDNN_DETERMINISTIC'] = '1'

import random

random.seed(seed)
rng_r = random.Random(seed + 1)

import numpy as np

np.random.seed(seed + 2)
rng_np = np.random.default_rng(seed + 3)

import tensorflow as tf

tf.random.set_seed(seed + 4)

In [2]:
%load_ext autoreload
%autoreload 2
from IPython.display import display

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D  # noqa
import seaborn as sns

import pandas as pd

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

# from posthoceval.explainers import KernelSHAPExplainer
# from posthoceval.explainers import MAPLEExplainer
from posthoceval.explainers import *

from posthoceval.models.gam import MultiClassLogisticGAM
from posthoceval.models.gam import LinearGAM
from posthoceval.models.gam import T
from posthoceval.models.dnn import AdditiveDNN
from posthoceval.transform import Transformer
from posthoceval.utils import nonexistent_filename
from posthoceval.datasets import COMPASDataset
from posthoceval.datasets import BostonDataset
from posthoceval.datasets import HELOCDataset
from posthoceval.models.term_util import generate_terms
from posthoceval.viz import gather_viz_data
from posthoceval import metrics

In [3]:
sns.set_theme(
    # context='paper',
    context='notebook',
    style='ticks',
    font_scale=1,  # 2.25,
    color_codes=True,
    # palette=sns.color_palette('pastel'),
)

In [4]:
def mpl_backend(backend='inline'):
    rc = plt.rcParams.copy()
    # backend = plt.get_backend()
    %matplotlib $backend
    
mpl_inline = lambda: mpl_backend('inline')
mpl_qt = lambda: mpl_backend('qt')
mpl_notebook = lambda: mpl_backend('notebook')
mpl_backend('nbagg')

In [5]:
dataset_name = 'compas'

if dataset_name == 'synthetic':
    raise NotImplementedError
    task = 'regression'

    # import numpy
    # X = np.random.rand(1000, 8) / 4
    # x1, x2, x3, x4, x5, x6, x7, x8 = X.T
    # y = (x1 ** 2 + x5 ** 2 + x5 * numpy.log(x1 + x2) +
    #      x7 * numpy.select([numpy.greater(x2, numpy.sinc(x1 / numpy.pi)),
    #                         True],
    #                        [numpy.asarray(x2 ** (-1.0)).astype(numpy.bool),
    #                         numpy.asarray(numpy.sinc(x1 / numpy.pi) ** (-1.0)
    #                                       ).astype(numpy.bool)],
    #                        default=numpy.nan) + (
    #              x1 * abs(x7) + x5) ** 3 + numpy.exp(x7) + numpy.exp(
    #             (x1 + x2) / x5) + numpy.sin(numpy.log(x2)))

    # X = np.random.randn(1000, 4)
    # x1, x2, x3, x4 = X.T
    # x1 = np.abs(x1)
    # x2 = np.abs(x2)
    # y = x1 ** (1 / 4) + np.sqrt(x2) + np.exp(x3 / 2) + np.abs(x4) + np.tan(
    #     x4) / x1 ** 2

    # X = np.random.randn(1000, 2)
    # y = X[:, 0] ** 9 + np.tan(X[:, 1]) + np.abs(X[:, 0] / X[:, 1] ** 2)

    # X = np.random.randn(1000, 400)
    # y = np.exp(np.random.randn(len(X)))

    # X[:, 1] = X[:, 0] / 2
    # X[:, 2] = X[:, 1] + 1
    # X[:, 3] = X[:, 2] * 2.6
    # y = (np.sin(X[:, 0] ** 3) + np.maximum(X[:, 1], 0)
    #     - np.sin(X[:, 2]) / X[:, 2] + 2 * X[:, 3])

    feature_names = [*range(X.shape[1])]
elif dataset_name == 'compas':
    dataset_cls = COMPASDataset
elif dataset_name == 'heloc':
    dataset_cls = HELOCDataset
elif dataset_name == 'boston':
    dataset_cls = BostonDataset
else:
    raise NotImplementedError
    task = 'classification'
    # dataset = datasets.load_iris()
    # dataset = datasets.load_breast_cancer()
    dataset = datasets.load_wine()

    X = dataset.data
    y = dataset.target

# load dataset
dataset_orig = dataset_cls()

# transform data
transformer = Transformer()
dataset = transformer.fit_transform(dataset_orig)

# extract data
task = dataset.task
X = dataset.X
y = dataset.y
feature_names = dataset.feature_names
n_features = dataset.n_features

print(f'Samples     = {len(X)}')
print(f'Input Shape = {dataset.input_shape}')
print(f'Task        = {task}')

Samples     = 6172
Input Shape = (19,)
Task        = regression


In [6]:
# model
# model_type = 'gam'
model_type = 'dnn'

In [7]:
# put together terms
max_order = 2
min_order = 1

if dataset_name == 'heloc':
    n_main = n_features - 10
    desired_interactions = [(1, 2), (10, 12), (15, 18), (7, 11)]
else:
    desired_interactions = []
    n_main = n_features

n_interact = None if desired_interactions else None

# current interact plots use this: LIME, MAPLE
# desired_interactions = [(1, 2)]

# features 8 & 9 correlate in Boston dataset
# desired_interactions = [(8, 0, 1), (2, 8), (2, 9)]

terms = generate_terms(
    n_features=n_features,
    n_main=n_main,
    n_interact=n_interact,
    desired_interactions=desired_interactions,
    min_order=min_order,
    max_order=max_order,
    seed=rng_np,
)
# terms = [T.te(0, 1), T.te(2, 3), T.s(0, n_splines=50)]
# terms = [T.te(0, 1), T.te(1, 3, n_splines=5), T.s(2, n_splines=50)]

In [8]:
if model_type == 'dnn':
    callback = EarlyStopping(monitor='loss', mode='min', patience=5,
                             restore_best_weights=True)
    optimizer = Adam(learning_rate=1e-3)
    fit_kwargs = {'epochs': 50, 'batch_size': len(X),
                  'callbacks': [callback], 'optimizer': optimizer}
else:
    fit_kwargs = {}

# TODO: factor terms for categoricals in GAM?
# TODO: embed categoricals in NN?

if model_type == 'dnn':
    model = AdditiveDNN(
        terms=terms,
        task=task,
        symbol_names=feature_names,
        activation='sigmoid',
    )
elif model_type == 'gam':
    if task == 'classification':
        model = MultiClassLogisticGAM(symbol_names=feature_names, terms=terms)
    else:
        model = LinearGAM(symbol_names=feature_names, terms=terms)
else:
    raise NotImplementedError(model_type)

model.fit(X, y, **fit_kwargs)

if model_type == 'dnn':
    model.plot_model(nonexistent_filename('dnn.png'),
                     show_shapes=True)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
('Failed to import pydot. You must `pip install pydot` and install graphviz (https://graphviz.gitlab.io/download/), ', 'for `pydotprint` to work.')


In [9]:
explain_only_this_many = 100
# explain_only_this_many = len(X)
explain_only_this_many = min(explain_only_this_many, len(X))
sample_idxs_all = np.arange(len(X))
sample_idxs = rng_np.choice(sample_idxs_all,
                            size=explain_only_this_many, replace=False)
X_subset = X[sample_idxs]
y_subset = y[sample_idxs]

true_contribs = model.feature_contributions(X_subset)

In [None]:
explainer_array = [
    # ('SHAPR', SHAPRExplainer),  # >:(
    #('VanillaGradients', VanillaGradientsExplainer),
    #('VanillaGradients-Smooth', VanillaGradientsExplainer.smooth_grad),
    #('GradientsXInputs', GradientsXInputsExplainer),
    #('GradientsXInputs-Smooth', GradientsXInputsExplainer.smooth_grad),
    #('IntegratedGradients', IntegratedGradientsExplainer),
    #('IntegratedGradients-Smooth', IntegratedGradientsExplainer.smooth_grad),
    #('Occlusion', OcclusionExplainer),
    #('XRAI', XRAIExplainer),
    #('XRAI-Smooth', XRAIExplainer.smooth_grad),
    #('BlurIG', BlurIntegratedGradientsExplainer),
    #('BlurIG-Smooth', BlurIntegratedGradientsExplainer.smooth_grad),
    ('LIME', LIMEExplainer),
    ('SHAP', KernelSHAPExplainer),
]
if task == 'regression':
    explainer_array.extend([
        ('MAPLE', MAPLEExplainer),
        ('PDP', PDPExplainer),
    ])

# TODO: feature_contributions() --> explain()
# TODO: explain() --> ExplainerMixin (for both models and explainers)

pred_contribs_map = {}
pred_y_map = {}

In [46]:
def _get_grids(feature_values, num_grid_points, grid_type, percentile_range, grid_range):
    """Calculate grid points for numeric feature

    Returns
    -------
    feature_grids: 1d-array
        calculated grid points
    percentile_info: 1d-array or []
        percentile information for feature_grids
        exists when grid_type='percentile'
    """

    if grid_type == 'percentile':
        # grid points are calculated based on percentile in unique level
        # thus the final number of grid points might be smaller than num_grid_points
        start, end = 0, 100
        if percentile_range is not None:
            start, end = np.min(percentile_range), np.max(percentile_range)

        percentile_grids = np.linspace(start=start, stop=end, num=num_grid_points)
        value_grids = np.percentile(feature_values, percentile_grids)

        grids_df = pd.DataFrame()
        grids_df['percentile_grids'] = [round(v, 2) for v in percentile_grids]
        grids_df['value_grids'] = value_grids
        grids_df = grids_df.groupby(['value_grids'], as_index=False).agg(
            {'percentile_grids': lambda v: str(tuple(v)).replace(',)', ')')}).sort_values('value_grids', ascending=True)

        feature_grids, percentile_info = grids_df['value_grids'].values, grids_df['percentile_grids'].values
        
    return feature_grids, percentile_info


feature_names__ = model.symbol_names
dataset__ = pd.DataFrame(
            columns=feature_names__,
            data=dataset.X,
        )
_get_grids(
                feature_values=dataset__[feature_names__[1]].values, num_grid_points=100, grid_type='percentile',
                percentile_range=None, grid_range=None)[0].shape

(4,)

In [50]:
import os
os.environ['R_HOME'] = '/afs/crc.nd.edu/x86_64_linux/r/R/3.6.2/gcc/4.8.5/bin/R'
# del os.environ['R_HOME']
import sys
sys.path.append('/afs/crc.nd.edu/x86_64_linux/r/R/3.6.2/gcc/4.8.5/bin/')

for expl_i, (explainer_name, explainer_cls) in enumerate(explainer_array):
    if explainer_name in pred_contribs_map:
        print('Skipping', explainer_name)
        continue
    print('Explaining model using', explainer_name)
    explainer = explainer_cls(model, seed=seed, task=task)
    explainer.fit(dataset)  # fit full dataset
    pred_contribs, y_pred = explainer.feature_contributions(
        X_subset, as_dict=True, return_predictions=True)

    # store for later viz data generation
    pred_contribs_map[explainer_name] = pred_contribs
    pred_y_map[explainer_name] = y_pred

Skipping LIME
Skipping SHAP
Skipping MAPLE
Explaining model using PDP
die
(6172, 19)
(6172,)
Age
(46,)
(46,)
Juvenile Crime Count (Felony)
(4,)
(4,)
Juvenile Crime Count (Misdemeanor)
(4,)
(4,)
Juvenile Crime Count (Other)
(4,)
(4,)
Priors Count
(19,)
(19,)
['Sex = Female', 'Sex = Male']
(2,)
(2,)
['Race = African-American', 'Race = Asian', 'Race = Caucasian', 'Race = Hispanic', 'Race = Native American', 'Race = Other']
(6,)
(6,)
['Charge Degree = F', 'Charge Degree = M']
(2,)
(2,)
['Recidivated = 0', 'Recidivated = 1']
(2,)
(2,)
['Recidivated in Two Years = 0', 'Recidivated in Two Years = 1']
(2,)
(2,)
(6172, 19)
19
(10,)
(10,)


ValueError: Expected matrix of rank 2 but received vector of rank 1 instead for y.

In [None]:
# TODO: import from viz and implement fully...
# plot_fit()

df, df_3d, contribs_df, err_dfs = gather_viz_data(
    model=model,
    dataset=dataset,
    transformer=transformer,
    true_contribs=true_contribs,
    pred_contribs_map=pred_contribs_map,
    dataset_sample_idxs=sample_idxs,
)

In [None]:
col_wrap = 4

if df is not None:
    # if n_features > 12 or task == 'classification':
    #     mpl_qt()
    # else:
    #     mpl_inline()
    g = sns.relplot(
        data=df,
        x='Feature Value',
        y='Contribution',
        hue='Explainer',
        # col='class' if task == 'classification' else 'true_effect',
        col='Class' if task == 'classification' else 'Match',
        col_wrap=None if task == 'classification' else col_wrap,
        # row='true_effect' if task == 'classification' else None,
        row='Match' if task == 'classification' else None,
        kind='scatter',
        x_jitter=.08,  # for visualization purposes of nearby points
        alpha=.65,
        facet_kws=dict(sharex=False, sharey=False),
    )
    for ax in g.axes.flat:
        title = ax.get_title()
        ax.set_title(title.split(' = ', 1)[1])
    g.tight_layout()
    g.savefig(nonexistent_filename(f'contributions_grid_{model_type}.pdf'))

In [None]:
# 3d interaction plot time
if df_3d is not None:

    plt_x = 'Feature Value x'
    plt_y = 'Feature Value y'
    plt_z = 'Contribution'
    plt_hue = 'Explainer'
    plt_col = 'Match'

    df_3d_grouped = df_3d.groupby(['Class', plt_col])

    n_plots = len(df_3d_grouped)
    n_rows = int(np.ceil(n_plots / col_wrap))
    n_cols = min(col_wrap, n_plots)
    figsize = plt.rcParams['figure.figsize']
    figsize = (figsize[0] * n_cols, figsize[1] * n_rows)
    fig = plt.figure(figsize=figsize)

    for i, ((class_i, ax_title), group_3d) in enumerate(df_3d_grouped):
        ax = fig.add_subplot(n_rows, n_cols, i + 1, projection='3d')

        for hue_name, hue_df in group_3d.groupby(plt_hue):
            ax.scatter(
                hue_df[plt_x],
                hue_df[plt_y],
                hue_df[plt_z],
                label=hue_name,
                alpha=.5,
            )
        ax.set_xlabel(plt_x)
        ax.set_ylabel(plt_y)
        ax.set_zlabel(plt_z)

        ax.set_title(ax_title)

        if i == 0:
            fig.legend(loc='center right')
    fig.tight_layout()
    fig.savefig(nonexistent_filename(
        f'contributions_grid_interact_{model_type}.pdf'))

In [None]:
err_dfs['effectwise_err_agg']

In [None]:
err_dfs['samplewise_err_agg']

In [None]:
def effects_to_str(*effect_sets):
    effect_strs = []
    for effects_set in zip(*effect_sets):
        features = set()
        for effects in effects_set:
            for effect in effects:
                features.update(effect)
        effect_strs.append(' & '.join(map(str, features)))
    return effect_strs


def plot_explanation(
    explanation,
):
    contribs = contribs_df[(contribs_df['Explainer'] == explanation['Explainer']) &
                           (contribs_df['Class'] == explanation['Class'])]
    assert len(contribs) == 1
    # get contribs for true/pred
    contribs = contribs.iloc[0]
    true_contribs = contribs['True Contribs']
    pred_contribs = contribs['Pred Contribs']
    # get attributions for effects
    sample_idx = explanation['Sample Index']
    # pred
    sample_pred_contribs = pred_contribs.iloc[sample_idx]
    pred_effects = sample_pred_contribs.keys()
    pred_contribs = sample_pred_contribs.values
    # true
    sample_true_contribs = true_contribs.iloc[sample_idx]
    true_effects = sample_true_contribs.keys()
    true_contribs = sample_true_contribs.values
    # map to readable strings
    all_effects = effects_to_str(pred_effects, true_effects)
    # set up DF for plotting
    df_plot = pd.concat([
        pd.DataFrame({
            'Effect': all_effects,
            'Explainer': 'True',
            'Attribution': true_contribs,
        }),
        pd.DataFrame({
            'Effect': all_effects,
            'Explainer': explanation['Explainer'],
            'Attribution': pred_contribs,
        }),
    ], ignore_index=True)
    f, ax = plt.subplots()
    sns.barplot(y='Effect', x='Attribution', hue='Explainer',
                data=df_plot, ax=ax,
                orient='h')
    # ax.set_xscale('symlog')
    return ax

In [None]:
k = 3
worst_by = 'root_mean_squared_error'

# Top k worst effects per explainer
df_effects = err_dfs['effectwise_err']
df_effects = df_effects[df_effects['Metric'] == worst_by]
for explainer_name, df_expl_effects in df_effects.groupby(['Explainer']):
    df_expl_effects = df_expl_effects.sort_values(by='Score', ascending=False)
    print(f'{explainer_name} Top-{k} worst effects:')
    display(df_expl_effects.iloc[:k])
    print(f'{explainer_name} Top-{k} best effects:')
    display(df_expl_effects.iloc[-k:])

In [None]:
k = 3
worst_by = 'cosine_distances'
# worst_by = 'euclidean_distances'

# Top k worst explanations per explainer
df_effects = err_dfs['samplewise_err']
df_effects = df_effects[df_effects['Metric'] == worst_by]
for explainer_name, df_expl_effects in df_effects.groupby(['Explainer']):
    df_expl_effects = df_expl_effects.sort_values(by='Score', ascending=False)
    print(f'{explainer_name} Top-{k} worst explanations:')
    display(df_expl_effects.iloc[:k])
    print(f'{explainer_name} Top-{k} best explanations:')
    display(df_expl_effects.iloc[-k:])
    ax = plot_explanation(df_expl_effects.iloc[0])
    ax.set_title(f'{explainer_name} worst explanation')
    ax = plot_explanation(df_expl_effects.iloc[-1])
    ax.set_title(f'{explainer_name} best explanation')

In [None]:
y_model_subset = model.predict(X_subset)
if task == 'classification':
    y_pred_expl = np.argmax(y_pred_expl, axis=0)
    acc = metrics.accuracy(y_subset, y_model_subset)
    print(f'Model accuracy={acc * 100:.2f}')
else:
    err = metrics.rmse(y_subset, y_model_subset)
    print(f'Model rmse={err:.3g}')
    
for explainer_name, y_pred_expl in pred_y_map.items():
    if task == 'classification':
        y_pred_expl = np.argmax(y_pred_expl, axis=0)
        acc = metrics.accuracy(y_model_subset, y_pred_expl)
        print(f'{explainer_name} accuracy={acc * 100:.2f}')
    else:
        err = metrics.rmse(y_model_subset, y_pred_expl)
        print(f'{explainer_name} rmse={err:.3g}')