Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lucas/wip #48

Merged
merged 4 commits into from
Jul 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
21 changes: 15 additions & 6 deletions tests/attributions/test_occlusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,24 @@ def test_output_shape():


def test_polymorphic_parameters():
"""Ensure we could pass tuple or int to define patch parameters"""
"""Ensure we could pass tuple or int to define patch parameters when inputs are images"""
s = 3
model = generate_model()

occlusion_int = Occlusion(model, patch_size=s, patch_stride=s)
occlusion_tuple = Occlusion(model, patch_size=(s, s), patch_stride=(s, s))
input_shapes = [(28, 28, 1), (32, 32, 3)]
nb_labels = 10

for input_shape in input_shapes:
features, targets = generate_data(input_shape, nb_labels, 20)
model = generate_model(input_shape, nb_labels)

occlusion_int = Occlusion(model, patch_size=s, patch_stride=s)
occlusion_tuple = Occlusion(model, patch_size=(s, s), patch_stride=(s, s))

occlusion_int(features, targets)
occlusion_tuple(features, targets)

assert occlusion_int.patch_size == occlusion_tuple.patch_size
assert occlusion_int.patch_stride == occlusion_tuple.patch_stride
assert occlusion_int.patch_size == occlusion_tuple.patch_size
assert occlusion_int.patch_stride == occlusion_tuple.patch_stride
fel-thomas marked this conversation as resolved.
Show resolved Hide resolved


def test_mask_generator():
Expand Down
130 changes: 130 additions & 0 deletions tests/attributions/test_tabular_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import numpy as np
import tensorflow as tf

from xplique.attributions import (Saliency, GradientInput, IntegratedGradients, SmoothGrad, VarGrad,
SquareGrad, Occlusion, Rise, GuidedBackprop, DeconvNet, Lime,
KernelShap)
from ..utils import generate_regression_model, generate_data

def _default_methods(model, output_layer_index):
return [
Saliency(model, output_layer_index),
GradientInput(model, output_layer_index),
SmoothGrad(model, output_layer_index),
VarGrad(model, output_layer_index),
SquareGrad(model, output_layer_index),
IntegratedGradients(model, output_layer_index),
GuidedBackprop(model, output_layer_index),
DeconvNet(model, output_layer_index),
Lime(model),
KernelShap(model),
Occlusion(model, patch_size=1, patch_stride=1),
# Rise(model)
fel-thomas marked this conversation as resolved.
Show resolved Hide resolved
]

def test_tabular_data():
"""Test applied to most attributions method"""

features_shape, output_shape, samples = ((10,), 1, 20)
model = generate_regression_model(features_shape, output_shape)
output_layer_index = -1

inputs_np, targets_np = generate_data(features_shape, output_shape, samples)
inputs_tf, targets_tf = tf.cast(inputs_np, tf.float32), tf.cast(targets_np, tf.float32)
dataset = tf.data.Dataset.from_tensor_slices((inputs_np, targets_np))
# batched_dataset = tf.data.Dataset.from_tensor_slices((inputs_np, targets_np)).batch(4)
fel-thomas marked this conversation as resolved.
Show resolved Hide resolved

methods = _default_methods(model, output_layer_index)

for inputs, targets in [(inputs_np, targets_np),
(inputs_tf, targets_tf),
(dataset, None),
# (batched_dataset, None)
fel-thomas marked this conversation as resolved.
Show resolved Hide resolved
]:
for method in methods:
try:
explanations = method.explain(inputs, targets)
except:
raise AssertionError(
"Explanation failed for method ", method.__class__.__name__)

# all explanation must have an explain method
assert hasattr(method, 'explain')

# all explanations returned must be numpy array
assert isinstance(explanations, tf.Tensor)

# all explanations shape should match features shape
assert explanations.shape == [samples, *features_shape]

def test_multioutput_regression():
"""Tests applied to most attribution methods"""
fel-thomas marked this conversation as resolved.
Show resolved Hide resolved

features_shape, output_shape, samples = ((10,), 4, 20)
model = generate_regression_model(features_shape, output_shape=output_shape)
output_layer_index = -1

inputs_np, targets_np = generate_data(features_shape, output_shape, samples)
inputs_tf, targets_tf = tf.cast(inputs_np, tf.float32), tf.cast(targets_np, tf.float32)
dataset = tf.data.Dataset.from_tensor_slices((inputs_np, targets_np))
# batched_dataset = tf.data.Dataset.from_tensor_slices((inputs_np, targets_np)).batch(4)
fel-thomas marked this conversation as resolved.
Show resolved Hide resolved

methods = _default_methods(model, output_layer_index)

for inputs, targets in [(inputs_np, targets_np),
(inputs_tf, targets_tf),
(dataset, None),
# (batched_dataset, None)
fel-thomas marked this conversation as resolved.
Show resolved Hide resolved
]:
for method in methods:
try:
explanations = method.explain(inputs, targets)
except:
raise AssertionError(
"Explanation failed for method ", method.__class__.__name__)

# all explanation must have an explain method
assert hasattr(method, 'explain')

# all explanations returned must be numpy array
assert isinstance(explanations, tf.Tensor)

# all explanations shape should match features shape
assert explanations.shape == [samples, *features_shape]

def test_batch_size():
"""
Ensure the functioning of attributions for special batch size cases with tabular data
"""

input_shape, nb_targets, samples = ((10,), 5, 20)
inputs, targets = generate_data(input_shape, nb_targets, samples)
model = generate_regression_model(input_shape, nb_targets)
output_layer_index = -1

batch_sizes = [None, 1, 32]

for bs in batch_sizes:

methods = [
Saliency(model, output_layer_index, bs),
GradientInput(model, output_layer_index, bs),
SmoothGrad(model, output_layer_index, bs),
VarGrad(model, output_layer_index, bs),
SquareGrad(model, output_layer_index, bs),
IntegratedGradients(model, output_layer_index, bs),
GuidedBackprop(model, output_layer_index, bs),
DeconvNet(model, output_layer_index, bs),
Lime(model, bs),
KernelShap(model, bs),
Occlusion(model, bs, patch_size=1, patch_stride=1),
# Rise(model, bs),
fel-thomas marked this conversation as resolved.
Show resolved Hide resolved
]

for method in methods:
try:
explanations = method.explain(inputs, targets)
except:
raise AssertionError(
"Explanation failed for method ", method.__class__.__name__,
" batch size ", bs)
11 changes: 11 additions & 0 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,17 @@ def generate_model(input_shape=(32, 32, 3), output_shape=10):

return model

def generate_regression_model(features_shape, output_shape=1):
fel-thomas marked this conversation as resolved.
Show resolved Hide resolved
model = Sequential()
model.add(Input(shape=features_shape))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(output_shape))
model.compile(loss='mean_absolute_error',
optimizer='sgd')

return model

def almost_equal(arr1, arr2, epsilon=1e-6):
"""Ensure two array are almost equal at an epsilon"""
Expand Down
38 changes: 25 additions & 13 deletions xplique/attributions/kernel_shap.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,23 @@ def __init__(self,
feature (e.g super-pixel).
It allows to transpose from (resp. to) the original input space to (resp. from)
the interpretable space.
The default mapping is the identity mapping which is quickly a poor mapping.
The default mapping is:
- the quickshift segmentation algorithm for inputs with (N, W, H, C) shape,
we assume here such shape is used to represent (W, H, C) images.
- the felzenszwalb segmentation algorithm for inputs with (N, W, H) shape,
fel-thomas marked this conversation as resolved.
Show resolved Hide resolved
we assume here such shape is used to represent (W, H) images.
- an identity mapping if inputs has shape (N, W), we assume here your inputs
are tabular data.

To use your own custom map function you should use the following scheme:

def custom_map_to_interpret_space(inputs: tf.tensor (N, W, H, C)) ->
tf.tensor (N, W, H):
def custom_map_to_interpret_space(inputs: tf.tensor (N, W (, H, C) )) ->
tf.tensor (N, W (, H)):
**some grouping techniques**
return mappings

For instance you can use the scikit-image library to defines super pixels on your
images.
For instance you can use the scikit-image (as we did for the quickshift algorithm)
library to defines super pixels on your images..

nb_samples
The number of pertubed samples you want to generate for each input sample.
Expand All @@ -60,7 +66,8 @@ def custom_map_to_interpret_space(inputs: tf.tensor (N, W, H, C)) ->
ref_values
It defines reference value which replaces each feature when the corresponding
interpretable feature is set to 0.
It should be provided as: a ndarray (C,)
It should be provided as: a ndarray of shape (1) if there is no channels in your input
and (C,) otherwise

The default ref value is set to (0.5,0.5,0.5) for inputs with 3 channels (corresponding
to a grey pixel when inputs are normalized by 255) and to 0 otherwise.
Expand Down Expand Up @@ -92,9 +99,14 @@ def _kernel_shap_similarity_kernel(
This method compute the similarity between interpretable pertubed samples and
the original input (i.e a tf.ones(num_features)).
"""

# when calling the kernel, we will call it for interpretable
# samples which all have the same size, thus we can use the
# following trich to get the total number of interpretable
fel-thomas marked this conversation as resolved.
Show resolved Hide resolved
# features toward a specific input
nb_total_features = interpret_samples.bounding_shape(out_type = tf.int32)[1]
interpret_samples = interpret_samples.to_tensor()
num_selected_features = tf.reduce_sum(interpret_samples, axis=1)
num_features = len(interpret_samples[0])
nb_selected_features = tf.reduce_sum(interpret_samples, axis=1)

# Theoretically, in the case where the number of selected
# features is zero or the total number of features of the
Expand All @@ -103,8 +115,8 @@ def _kernel_shap_similarity_kernel(
# weight to 1000000 (all other weights are 1).
similarities = tf.where(
tf.logical_or(
tf.equal(num_selected_features, tf.constant(0)),
tf.equal(num_selected_features, tf.constant(num_features))
tf.equal(nb_selected_features, tf.constant(0)),
tf.equal(nb_selected_features, tf.constant(nb_total_features))
),
tf.ones(len(interpret_samples), dtype=tf.float32)*1000000.0,
fel-thomas marked this conversation as resolved.
Show resolved Hide resolved
tf.ones(len(interpret_samples), dtype=tf.float32)
Expand All @@ -114,7 +126,7 @@ def _kernel_shap_similarity_kernel(

@staticmethod
@tf.function
def _kernel_shap_pertub_func(num_features: Union[int, tf.Tensor],
def _kernel_shap_pertub_func(nb_features: Union[int, tf.Tensor],
nb_samples: int) -> tf.Tensor:
"""
The pertubed instances are sampled that way:
Expand All @@ -132,7 +144,7 @@ def _kernel_shap_pertub_func(num_features: Union[int, tf.Tensor],
This trick is the one used in the Captum library: https://github.com/pytorch/captum
"""
probs_nb_selected_feature = KernelShap._get_probs_nb_selected_feature(
tf.cast(num_features, dtype=tf.int32))
tf.cast(nb_features, dtype=tf.int32))
nb_selected_features = tf.random.categorical(tf.math.log([probs_nb_selected_feature]),
nb_samples,
dtype=tf.int32)
Expand All @@ -141,7 +153,7 @@ def _kernel_shap_pertub_func(num_features: Union[int, tf.Tensor],
interpret_samples = []

for i in range(nb_samples):
rand_vals = tf.random.normal([num_features])
rand_vals = tf.random.normal([nb_features])
idx_sorted_values = tf.argsort(rand_vals, direction='DESCENDING')
threshold_idx = idx_sorted_values[nb_selected_features[i]]
threshold = rand_vals[threshold_idx]
Expand Down