In [6]:
import numpy as np
import shap
import sklearn
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
import torch

input_size = 16
np.random.seed(0)
torch.random.manual_seed(0)
print(f'shap {shap.__version__}, torch {torch.__version__}, numpy {np.__version__}, sklearn {sklearn.__version__}')

shap 0.37.0, torch 1.8.1, numpy 1.19.2, sklearn 0.24.1


In [9]:
# defining a network using torch.nn.functional gives incorrect shap values 

class FunctionalTestNet(torch.nn.Module):
    
    def __init__(self):
        super().__init__()
        self.layer_1 = torch.nn.Linear(input_size, 32)
        self.layer_2 = torch.nn.Linear(32, 32)
        self.layer_3 = torch.nn.Linear(32, 1)
        self.activation = torch.nn.functional.relu


    def forward(self, x):
        h = self.activation(self.layer_1(x))
        h = self.activation(self.layer_2(h))
        z = self.layer_3(h)
        return z

X, y = make_regression(n_samples=100, n_features=input_size, random_state=0, noise=5.0, bias=25)
X, y = torch.from_numpy(X.astype(np.float32)), torch.from_numpy(y.reshape(-1, 1).astype(np.float32))
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=64)

test_net = FunctionalTestNet()

loss_function = torch.nn.MSELoss()
optimizer = torch.optim.Adam(test_net.parameters(), lr=0.001)

for k in range(5000):
    optimizer.zero_grad()
    y_pred = test_net(X_train)
    loss_value = loss_function(y_pred, y_train)
    loss_value.backward()
    optimizer.step()
    
test_net.eval()
e = shap.DeepExplainer(test_net, X_train)
shap_values = e.shap_values(X_test)

# these values should be the same
test_net(X_test)[:10, 0], e.expected_value + np.sum(shap_values, axis=1)[:10]

(tensor([-102.5702,  -67.0447,  -86.7515,  -95.4933,   67.3495,  -74.6358,
         -120.1311,   51.8371, -131.1944, -101.9454], grad_fn=<SelectBackward>),
 array([ -74.55077034,  -49.73115642,  -96.34563647, -109.0783332 ,
          48.33857596, -106.10481459,  -93.44837552,   16.12230485,
        -111.44415119, -122.32224575]))

In [10]:
# Also issues if we use torch.nn.Module, but only instantiate once

class ModuleTestNet(torch.nn.Module):
    
    def __init__(self):
        super().__init__()
        self.layer_1 = torch.nn.Linear(input_size, 32)
        self.layer_2 = torch.nn.Linear(32, 32)
        self.layer_3 = torch.nn.Linear(32, 1)
        self.activation = torch.nn.ReLU()


    def forward(self, x):
        h = self.activation(self.layer_1(x))
        h = self.activation(self.layer_2(h))
        z = self.layer_3(h)
        return z
    

test_net = ModuleTestNet()

loss_function = torch.nn.MSELoss()
optimizer = torch.optim.Adam(test_net.parameters(), lr=0.001)

for k in range(5000):
    optimizer.zero_grad()
    y_pred = test_net(X_train)
    loss_value = loss_function(y_pred, y_train)
    loss_value.backward()
    optimizer.step()
    
e = shap.DeepExplainer(test_net, X_train)
shap_values = e.shap_values(X_test)

test_net(X_test)[:10, 0], e.expected_value + np.sum(shap_values, axis=1)[:10]

(tensor([ -89.7170,  -45.3518,  -90.7030,  -99.8936,   75.6612,  -69.8784,
         -117.3017,   47.9124, -137.5312,  -93.5812], grad_fn=<SelectBackward>),
 array([-125.27943121,  -92.24842853, -161.75920326, -148.78950273,
          80.21771759, -173.27037176, -119.57442342,   19.46549901,
        -157.65544493, -189.05813551]))

In [10]:
# using separate instances for each activation works

class WorkingTestNet(torch.nn.Module):
    
    def __init__(self):
        super().__init__()
        self.layer_1 = torch.nn.Linear(input_size, 32)
        self.layer_2 = torch.nn.Linear(32, 32)
        self.layer_3 = torch.nn.Linear(32, 1)
        self.activation_1 = torch.nn.ReLU()
        self.activation_2 = torch.nn.ReLU()


    def forward(self, x):
        h = self.activation_1(self.layer_1(x))
        h = self.activation(self.layer_2(h))
        z = self.layer_3(h)
        return z

sequential_test_net = torch.nn.Sequential(
    torch.nn.Linear(input_size, 32),
    torch.nn.ReLU(),
    torch.nn.Linear(32, 32),
    torch.nn.ReLU(),
    torch.nn.Linear(32, 1)
)


loss_function = torch.nn.MSELoss()
optimizer = torch.optim.Adam(sequential_test_net.parameters(), lr=0.001)

for k in range(5000):
    optimizer.zero_grad()
    y_pred = sequential_test_net(X_train)
    loss_value = loss_function(y_pred, y_train)
    loss_value.backward()
    optimizer.step()
    
e = shap.DeepExplainer(sequential_test_net, X_train)
shap_values = e.shap_values(X_test)

shap.force_plot(e.expected_value, shap_values)

In [11]:
sequential_test_net(X_test)[:10, 0], e.expected_value + np.sum(shap_values, axis=1)[:10]

(tensor([ 103.2466,  -48.3360,  144.0894, -195.6727,   90.0781,  223.8405,
          222.3949,  -83.4374,   -8.4779, -134.1612], grad_fn=<SelectBackward>),
 array([ 103.24662095,  -48.33595702,  144.08941627, -195.67277825,
          90.07809907,  223.84048617,  222.39493722,  -83.43740249,
          -8.47794724, -134.16115761]))