In [1]:
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the Iris dataset
iris = load_iris()

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    iris.data, iris.target, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert the data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
y_test = torch.tensor(y_test, dtype=torch.long)

# Define the model
model = nn.Linear(4, 3)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

# Train the model
num_epochs = 100
for epoch in range(num_epochs):
    # Forward pass
    outputs = model(X_train)
    loss = criterion(outputs, y_train)

    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print the loss at every 10th epoch
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluate the model on the test set
with torch.no_grad():
    outputs = model(X_test)
    _, predicted = torch.max(outputs.data, 1)
    accuracy = (predicted == y_test).sum().item() / y_test.size(0)
    print(f'Test Accuracy: {accuracy:.4f}')

to_np = lambda  mod: lambda _x: mod(_x).detach().cpu().numpy()
prob_model = torch.nn.Sequential(model, torch.nn.Softmax(dim=-1))

Epoch [10/100], Loss: 0.6824
Epoch [20/100], Loss: 0.5442
Epoch [30/100], Loss: 0.4747
Epoch [40/100], Loss: 0.4320
Epoch [50/100], Loss: 0.4027
Epoch [60/100], Loss: 0.3811
Epoch [70/100], Loss: 0.3642
Epoch [80/100], Loss: 0.3504
Epoch [90/100], Loss: 0.3386
Epoch [100/100], Loss: 0.3284
Test Accuracy: 0.9333


In [2]:
from dvals.player_distributions import ShapleyCD

# initialize shapley coalition distribution and baseline
scd = ShapleyCD(4)
baseline = torch.mean(X_train, dim=0)

In [3]:
from dvals import values
from dvals.games import game_from_ml_model_with_baseline_torch

standard_svs = []
# computes exact Shapley values for all exampls
for x in X_train:
    std_prob_game = game_from_ml_model_with_baseline_torch(to_np(prob_model), x, baseline)
    svs = values.compute(std_prob_game, scd, False)
    standard_svs.append(svs)
# print a value, just to show
print(standard_svs[0], standard_svs[0].offset, standard_svs[0].grand_payoff, sep='\n\n')

{0: array([ 0.16921091, -0.11419018, -0.05502074], dtype=float32), 1: array([ 0.18676063, -0.1640841 , -0.02267651], dtype=float32), 2: array([ 0.32333717, -0.25545856, -0.06787854], dtype=float32), 3: array([ 0.05509668,  0.01608855, -0.07118523], dtype=float32)}

[0.2526324  0.5301747  0.21719286]

[9.8703778e-01 1.2530354e-02 4.3182765e-04]


In [4]:
from dvals.games import CategoricalGame

# computes the categorical values
cat_svs = []
for x in X_train:
    std_logits_game = game_from_ml_model_with_baseline_torch(to_np(model), x, baseline)
    cat_game = CategoricalGame.from_logits_game(std_logits_game)
    rs = cat_game([set(), {0, 1, 2, 3}])
    cat_sv = values.compute(cat_game, scd, False)
    cat_svs.append(cat_sv)
# print a categorical value!
print(cat_svs[0], cat_svs[0].offset, sep='\n\n')

{0: <CategoricalDifference {(0, 1): 0.12128063469056204, (0, 2): 0.04793030369147252, (1, 0): 0.0, (1, 2): 0.007090438858103076, (2, 0): 0.0, (2, 1): 0.0, 0.0: 0.8236986185802379}>, 1: <CategoricalDifference {(0, 1): 0.15942370363651215, (0, 2): 0.02733691485299056, (1, 0): 0.0, (1, 2): 0.0, (2, 0): 0.0, (2, 1): 0.004660402174145157, 0.0: 0.8085789556983703}>, 2: <CategoricalDifference {(0, 1): 0.2561276663496523, (0, 2): 0.06720949407412533, (1, 0): 0.0, (1, 2): 0.0006690683695424548, (2, 0): 0.0, (2, 1): 0.0, 0.0: 0.6759938055523282}>, 3: <CategoricalDifference {(0, 1): 0.016440963249083004, (0, 2): 0.03865572472344579, (1, 0): 0.0, (1, 2): 0.032529512721613495, (2, 0): 0.0, (2, 1): 0.0, 0.0: 0.9123738327325555}>}

<CategoricalPayoff {0: 0.25263244, 1: 0.5301747, 2: 0.21719287}>


In [5]:
def importance_standard_abs(val):
    importance = np.sum(np.abs(val.np_values(include_offset=False, include_gp=False)), axis=1)
    srt = np.argsort(importance)[::-1]
    return importance, srt

In [6]:
def dval_importance(dval):
    importance = np.array([dval[i].probability_of_change() for i in range(4)])
    srt = np.argsort(importance)[::-1]
    return importance, srt

### Identifying cancellation errors of standard SVs

This addresses the fact that standard SVs are expectations $\mathbb{E}_S [v(S\cup i) - v(S)]$.
Simple fact, the expectation can "hide" importance as you might have terms that cancel each other out.
e.g. for binary classifiers, 2 features problem you can have simple attributions of -1 and 1 that cancel each other out, leaving as output 0. However, one would hardly say that that feature is unimportant, as it always flip the outcome. Bernoulli values correctly identify that that feature is relevant by putting $Q(\xi = 0) = 0$.
This is an extreme case, but in practice what it is easy to verify is that cancellations lead to wrong attribution orders

In [7]:
ids_for_cancellation_all = []
ids_for_cancellation_first = []
for k, (stv, catv) in enumerate(zip(standard_svs, cat_svs)):
    _, st_ord = importance_standard_abs(stv)
    _, cat_ord = dval_importance(catv)
    if np.any(st_ord != cat_ord):
        ids_for_cancellation_all.append(k)
    if st_ord[0] != cat_ord[0]:
        ids_for_cancellation_first.append(k)


In [8]:
len(X_train), len(ids_for_cancellation_all), len(ids_for_cancellation_first)

(120, 18, 4)