In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy

In [2]:
from captum.attr import IntegratedGradients, Saliency


In [3]:
# defining model input tensors
input1 = torch.tensor([2.0], requires_grad=True)
input2 = torch.tensor([2.0], requires_grad=True)
baseline = torch.tensor([0.0], requires_grad=True)


In [4]:
class Toy(nn.Module):
    """
    Example toy model from the original paper
    https://arxiv.org/pdf/1703.01365.pdf
    f(x) = 1 − ReLU(1−x)
    but with two parameters
    """
    def __init__(self):
        super().__init__()

    def forward(self, input1, input2):
        relu_out = F.relu(1.0-input1-0.1*input2)
        return 1.0 - relu_out

In [5]:
m = Toy()

In [6]:
print(m(baseline,  baseline))

tensor([0.], grad_fn=<RsubBackward1>)


In [7]:
print(m(input1, input2))

tensor([1.], grad_fn=<RsubBackward1>)


In [8]:
y = m(input1, input2)
y.backward()
print('dm/dinput1 = ', input1.grad)
print('dm/dinput2 = ', input2.grad)

dm/dinput1 =  tensor([-0.])
dm/dinput2 =  tensor([-0.])


In [16]:
ig = IntegratedGradients(m)
attributions, approximation_error = ig.attribute((input1, input2), baselines=(baseline, baseline),
            return_convergence_delta=True)
attr1 = attributions[0].detach().numpy()[0]
attr2 = attributions[1].detach().numpy()[0]
err = approximation_error[0].detach().numpy()

print("attributions input1", attr1)
print("attributions input2", attr2)
print('sum = ', attr1+attr2)
print("approx_error", err)

attributions input1 0.9378233833446531
attributions input2 0.09378233973193104
sum =  1.031605723076584
approx_error 0.03160572307658405


In [17]:
saliency = Saliency(m)
saliency.attribute((input1, input2))

(tensor([0.]), tensor([0.]))