In [1]:
# shap
# https://slundberg.github.io/shap/notebooks/gradient_explainer/Explain%20an%20Intermediate%20Layer%20of%20VGG16%20on%20ImageNet.html
''' Start up jupyter
source activate tensorflow2_latest_p37
export PATH=/opt/tljh/user/bin:$PATH
jupyter lab --no-browser --port=8888 --ip=$(hostname)
'''
import sys
print(sys.path)
#sys.path.append("/opt/tljh/user/bin/")


['/home/ubuntu/covid-cxr/src', '/opt/tljh/user/lib/python37.zip', '/opt/tljh/user/lib/python3.7', '/opt/tljh/user/lib/python3.7/lib-dynload', '', '/home/ubuntu/.local/lib/python3.7/site-packages', '/opt/tljh/user/lib/python3.7/site-packages', '/opt/tljh/user/lib/python3.7/site-packages/IPython/extensions', '/home/ubuntu/.ipython']


Explaining a prediction in terms of the original input image is harder than explaining the predicition in terms of a higher convolutional layer (because the higher convolutional layer is closer to the output). This notebook gives a simple example of how to use GradientExplainer to do explain a model output with respect to the 7th layer of the pretrained VGG16 network.

Note that by default 200 samples are taken to compute the expectation. To run faster you can lower the number of samples per explanation.

In [2]:
from tensorflow import keras
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input, decode_predictions
import pandas as pd
import yaml
import os
import datetime
import random
import dill
import numpy as np
import matplotlib.pyplot as plt
import tensorflow.summary as tf_summary
from imblearn.over_sampling import RandomOverSampler
from math import ceil
from tensorflow.keras.metrics import BinaryAccuracy, CategoricalAccuracy, Precision, Recall, AUC
from tensorflow.keras.models import *
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorboard.plugins.hparams import api as hp
import sys
sys.path.append('src')
from models.models import *
from visualization.visualize import *
from custom.metrics import F1Score
from data.preprocess import remove_text
# ---- shap
import shap
import tensorflow.compat.v1.keras.backend as K
tf.compat.v1.disable_eager_execution()
import json

cfg = yaml.full_load(open("/home/ubuntu/covid-cxr/config.yml", 'r'))
print("1",cfg)
cfg['TRAIN']['EXPERIMENT_TYPE'] #single train
data = {}
data['TRAIN'] = pd.read_csv(cfg['PATHS']['TRAIN_SET'])

# Create ImageDataGenerators
train_img_gen = ImageDataGenerator(rotation_range=10, preprocessing_function=remove_text,
                                   samplewise_std_normalization=True, samplewise_center=True)
# Create DataFrameIterators
img_shape = tuple(cfg['DATA']['IMG_DIM'])
y_col = 'label_str'
class_mode = 'categorical'
train_generator = train_img_gen.flow_from_dataframe(dataframe=data['TRAIN'], directory=cfg['PATHS']['RAW_DATA'],
    x_col="filename", y_col=y_col, target_size=img_shape, batch_size=cfg['TRAIN']['BATCH_SIZE'],
    class_mode=class_mode, validate_filenames=False)

#histogram = np.bincount(np.array(train_generator.labels).astype(int))  # Get class distribution

input_shape = cfg['DATA']['IMG_DIM'] + [3]


covid_class_idx = 0   # Get index of COVID-19 class

def F1Score():
    return tfa.metrics.F1Score(name='f1score', thresholds=0.5, class_id=0)


# load pre-trained model and choose two images to explain
model = VGG16(weights='imagenet', include_top=True)
reconstructed_model = load_model("../results/models/model20201121-222337.h5", custom_objects= {'F1Score': F1Score})

X,y = shap.datasets.imagenet50()
#print(X)
#print(y)
to_explain = X[[39,41,45]]

# load the ImageNet class names
url = "https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json"
fname = shap.datasets.cache(url)
with open(fname) as f:
    class_names = json.load(f) #{'0': ['n01440764', 'tench'],

# explain how the input to the 7th layer of the model explains the top two classes
def map2layer(x, layer):
    feed_dict = dict(zip([model.layers[0].input], [preprocess_input(x.copy())]))
    return K.get_session().run(model.layers[layer].input, feed_dict)

layer = 9

e = shap.GradientExplainer((model.layers[layer].input, model.layers[-1].output), map2layer(preprocess_input(X.copy()), layer))
shap_values,indexes = e.shap_values(map2layer(to_explain, layer), ranked_outputs=2)

# get the names for the classes
index_names = np.vectorize(lambda x: class_names[str(x)][1])(indexes)

# plot the explanations
shap.image_plot(shap_values, to_explain, index_names)

1 {'PATHS': {'RAW_DATA': '/home/ubuntu/covid-cxr/data/', 'MILA_DATA': '/home/ubuntu/covid-cxr/data/covid-chestxray-dataset/', 'FIGURE1_DATA': '/home/ubuntu/covid-cxr/data/Figure1-COVID-chestxray-dataset/', 'RSNA_DATA': '/home/ubuntu/covid-cxr/data/rsna/', 'PROCESSED_DATA': 'data/processed/', 'TRAIN_SET': '/home/ubuntu/covid-cxr/data/processed/train_set.csv', 'VAL_SET': '/home/ubuntu/covid-cxr/data/processed/val_set.csv', 'TEST_SET': '/home/ubuntu/covid-cxr/data/processed/test_set.csv', 'IMAGES': '/home/ubuntu/covid-cxr/documents/generated_images/', 'LOGS': '/home/ubuntu/covid-cxr/results/logs/', 'MODEL_WEIGHTS': '/home/ubuntu/covid-cxr/results/models/', 'MODEL_TO_LOAD': '/home/ubuntu/covid-cxr/results/models/model20201102-033225.h5', 'LIME_EXPLAINER': '/home/ubuntu/covid-cxr/data/interpretability/lime_explainer.pkl', 'OUTPUT_CLASS_INDICES': '/home/ubuntu/covid-cxr/data/interpretability/output_class_indices.pkl', 'BATCH_PRED_IMGS': '/home/ubuntu/covid-cxr/data/processed/test/', 'BATCH_P

TypeError: F1Score() got an unexpected keyword argument 'name'

In [None]:
#shap.datasets.imagenet50()
# class_names

Explain with local smoothing¶

Gradient explainer uses expected gradients, which merges ideas from integrated gradients, SHAP, and SmoothGrad into a single expection equation. To use smoothing like SmoothGrad just set the local_smoothing parameter to something non-zero. This will add normally distributed noise with that standard deviation to the input during the expectation calculation. It can create smoother feature attributions that better capture correlated regions of the image.

In [None]:
# explain how the input to the 7th layer of the model explains the top two classes
explainer = shap.GradientExplainer(
    (model.layers[layer].input, model.layers[-1].output),
    map2layer(preprocess_input(X.copy()), layer),
    local_smoothing=100
)
shap_values,indexes = explainer.shap_values(map2layer(to_explain, layer), ranked_outputs=2)

# get the names for the classes
index_names = np.vectorize(lambda x: class_names[str(x)][1])(indexes)

# plot the explanations
shap.image_plot(shap_values, to_explain, index_names)