In [1]:
import itertools
import logging
import textwrap
from typing import Callable

import numpy as np
import pandas as pd

from attribution_game import AttributionGame
from multivariate_bernoulli import MultivariateBernoulli

In [2]:
%load_ext autoreload
%autoreload 2

# Shapley values for explanation games
We can compute the equations for the Shapley values for the two person game given by the *conditional*, *marginal*, and *joint marginal* distributions. For a two-person game, the *marginal* payout Shapley values are the same as the *joint marginal*.

## Conditional payout
The conditional payout considers:

$$
v_{\text{conditional}} = \pmb{E}_{X \sim D_{\text{data}}}\left[ f(\pmb{\tau}(\pmb{x}, X, S)) | X_{s_1} = x_{s_1}, X_{s_2} = x_{s_2}, ..., X_{s_k} = x_{s_k} \right] - \pmb{E}_{X \sim D_{data}}\left [ f(X) \right]
$$

In [3]:
def shapley_values(n_players: int, payout: Callable):
    """Computes the exact shapley values for a payout function v(S)"""
    values = np.zeros(n_players)
    for perm in itertools.permutations(range(n_players)):
        coalition = set()
        past_payout = payout(coalition)
        for player in perm:
            logging.debug(f'adding player {player} to coalition {coalition}')
            coalition.add(player)
            payout_after_adding = payout(coalition)
            values[player] +=  (payout_after_adding - past_payout)
            logging.debug(f'Payout with: {payout_after_adding: .2f} | payout without: {past_payout:.2f}')
            past_payout = payout_after_adding
            
    values = values / np.math.factorial(n_players)
    return values

def compare_games(model, model_input, distribution):
    """Utility function to compare explanations generated by different payout attributions."""
    game = AttributionGame(model=model, model_input=model_input, distribution=distribution)
    phis = dict()
    phis['marginal'] = shapley_values(n_players=distribution.M, payout=game.marginal_expectation_payout)
    phis['joint_marginal'] = shapley_values(n_players=distribution.M, payout=game.joint_marginal_expectation_payout)
    phis['uniform'] = shapley_values(n_players=distribution.M, payout=game.uniform_expectation_payout)
    phis['logical_inverse_only'] = shapley_values(n_players=distribution.M, payout=game.logical_inverse_expectation_payout)
    phis['conditional'] = shapley_values(n_players=distribution.M, payout=game.conditional_expectation_payout)
    
    res_table = pd.DataFrame(
        phis.values(), 
        index=phis.keys(), 
        columns=[f'phi_{name}' for name in distribution.variable_names])
    res_table.index.name = 'game'
    res_table.insert(0, 'phi_0', [game.expected_fx, game.expected_fx, game.expected_fx_joint_marginal, 
                                  game.expected_fx_uniform, game.expected_fx_logical_inverse])
    res_table.insert(0, 'f(x)', model(**model_input))

    return res_table

# Hiring a mover: two-variable simple example

In [4]:
mu = np.array([
    [ # female
        0.1, # not_good_lifting
        0.0, # good_lifting
    ],
    [ # male
        0.4, # not_good_lifting
        0.5, # good_lifting
    ],
])


# normalize to 1
mu = mu / mu.sum()

# now we can create our new X
X = MultivariateBernoulli(mu, ['is_male', 'is_good_lifter'])

# here we create two functions
def fn_male_lifter(**kwargs):
    return 1.0 if (kwargs['is_male'] and kwargs['is_good_lifter']) else 0.0

def fn_is_male(**kwargs):
    return 1.0 if kwargs['is_male'] else 0.0

In [5]:
# construct summary table of the variable
x_table = X.to_table()

# add other distributions
x_table['p_joint_marginal(x)'] = X.joint_marginal().iter_probs()
x_table['p_uniform(x)'] = 1/X.M
x_table['p_counterfactual(x)'] = 0; x_table.iloc[0, -1] = 1.0

# add functions
def eval_function(variable: MultivariateBernoulli, fn: Callable):
    return [fn(**dict(zip(variable.variable_names, values)))
            for values in variable.iter_inputs()]

x_table['f_male_lifter(x)'] = eval_function(X, fn_male_lifter)
x_table['f_is_male(x)'] = eval_function(X, fn_is_male)

print('Example summary:')
display(x_table)

print()
print()

print('fn_complex correlations (uniform distribution):')
display(x_table.corr().iloc[-1, :2].to_frame())

print()
print()

print('Correlations in X:')
display(X.corr())

Example summary:


Unnamed: 0,is_male,is_good_lifter,p(x),p_joint_marginal(x),p_uniform(x),p_counterfactual(x),f_male_lifter(x),f_is_male(x)
0,0,0,0.1,0.05,0.5,1.0,0.0,0.0
1,0,1,0.0,0.05,0.5,0.0,0.0,0.0
2,1,0,0.4,0.45,0.5,0.0,0.0,1.0
3,1,1,0.5,0.45,0.5,0.0,1.0,1.0




fn_complex correlations (uniform distribution):


Unnamed: 0,f_is_male(x)
is_male,1.0
is_good_lifter,0.0




Correlations in X:


Unnamed: 0,is_male,is_good_lifter
is_male,1.0,0.333333
is_good_lifter,0.333333,1.0


In [6]:
# Explain the male model
all_true_input = {k: True for k in X.variable_names}
comparison_table = compare_games(fn_is_male, all_true_input, X)
display(comparison_table.style.format('{:.4f}'))

Unnamed: 0_level_0,f(x),phi_0,phi_is_male,phi_is_good_lifter
game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
marginal,1.0,0.9,0.1,0.0
joint_marginal,1.0,0.9,0.1,0.0
uniform,1.0,0.9,0.5,0.0
logical_inverse_only,1.0,0.5,1.0,0.0
conditional,1.0,0.0,0.05,0.05


In [8]:
# Explain the male_lifter model
all_true_input = {k: True for k in X.variable_names}
comparison_table = compare_games(fn_male_lifter, all_true_input, X)
display(comparison_table.style.format('{:.4f}'))

Unnamed: 0_level_0,f(x),phi_0,phi_is_male,phi_is_good_lifter
game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
marginal,1.0,0.5,0.05,0.45
joint_marginal,1.0,0.5,0.075,0.475
uniform,1.0,0.45,0.375,0.375
logical_inverse_only,1.0,0.25,0.5,0.5
conditional,1.0,0.0,0.0278,0.4722


# Three variables and the joint-marginal distribution: hiring a mover
Let's consider the example problem of explaining an automated system used to assist in hiring decisions for a moving company. The system outputs a score between 0 and 1 representing the perceived likelihood of a particular hire being good (turning out well). The system considers three features: $X_0 :=$ "gender is male", $X_1 :=$ "good weightlifting ability", and $X_2 :=$ "age 30+". These features are Bernoulli random variables.

### Defining the 3d toy problem distribution

In [9]:
mu = np.array([
    [ # female
        [ # not_good_lifting
            0.01, # under 30
            0.04, # 30+
        ], 
        [ # good_lifting
            0.00, # under 30
            0.00, # 30+
        ]    
    ],
    [ # male
        [ # not_good_lifting
            0.04, # under 30
            0.15, # 30+
        ],   
        [ # good_lifting
            0.70, # under 30
            0.06, # 30+
        ]    
    ]
])


# normalize to 1
mu = mu / mu.sum()

# now we can create our new X
X = MultivariateBernoulli(mu, ['is_male', 'is_good_lifter', 'is_30'])

# create a new complex function for 3-variable
def fn_complex(**kwargs):
    if kwargs['is_good_lifter']:
        if kwargs['is_30']:
            res = 0.1
        else:
            res = 0.6
    else:
        if kwargs['is_30']:
            res = 0.3
        else:
            res = 0.0
    return res + (0.3 * kwargs['is_male'])

In [10]:
# construct summary table of the variable
x_table = X.to_table()

# add other distributions
x_table['p_joint_marginal(x)'] = X.joint_marginal().iter_probs()
x_table['p_uniform(x)'] = 1/X.M
x_table['p_logical_counterfactual(x)'] = 0; x_table.iloc[0, -1] = 1.0

# create counterfactual distribution by zeroing out inputs that result in low scores
X_counterfactual = X.joint_marginal()
X_counterfactual.mu = np.array([
    [
        [0, 0],
        [0, 0],
    ],
    [
        [0, .15],
        [.7, 0]
    ]
])
X_counterfactual.mu = X_counterfactual.mu / X_counterfactual.mu.sum()

# add this distribution to the table, too
x_table['p_counterfactual_distribution(x)'] = X_counterfactual.iter_probs()

# add functions
# x_table['f_is_male(x)'] = eval_function(X, fn_is_male)
x_table['f_complex(x)'] = eval_function(X, fn_complex)

print('Example summary:')
display(x_table)

print()
print()

print('fn_complex correlations (uniform distribution):')
display(x_table.corr().iloc[-1, :3].to_frame())

print()
print()

print('Correlations in X:')
display(X.corr())

Example summary:


Unnamed: 0,is_male,is_good_lifter,is_30,p(x),p_joint_marginal(x),p_uniform(x),p_logical_counterfactual(x),p_counterfactual_distribution(x),f_complex(x)
0,0,0,0,0.01,0.009,0.333333,1.0,0.0,0.0
1,0,0,1,0.04,0.003,0.333333,0.0,0.0,0.3
2,0,1,0,0.0,0.0285,0.333333,0.0,0.0,0.6
3,0,1,1,0.0,0.0095,0.333333,0.0,0.0,0.1
4,1,0,0,0.04,0.171,0.333333,0.0,0.0,0.3
5,1,0,1,0.15,0.057,0.333333,0.0,0.176471,0.6
6,1,1,0,0.7,0.5415,0.333333,0.0,0.823529,0.9
7,1,1,1,0.06,0.1805,0.333333,0.0,0.0,0.4




fn_complex correlations (uniform distribution):


Unnamed: 0,f_complex(x)
is_male,0.547723
is_good_lifter,0.365148
is_30,-0.182574




Correlations in X:


Unnamed: 0,is_male,is_good_lifter,is_30
is_male,1.0,0.408248,-0.291397
is_good_lifter,0.408248,1.0,-0.702959
is_30,-0.291397,-0.702959,1.0


In [11]:
# Explain the complex model
all_true_input = {k: True for k in X.variable_names}
comparison_table = compare_games(fn_complex, all_true_input, X)

# add explanation for counterfactual distribution
counterfactual_game = AttributionGame(fn_complex, all_true_input, X_counterfactual)
phi_counterfactual = shapley_values(X_counterfactual.M, counterfactual_game.marginal_expectation_payout)
comparison_table.loc['counterfactual_distribution', :] = [counterfactual_game.fx_on_input, counterfactual_game.expected_fx] + phi_counterfactual.tolist()

display(comparison_table.style.format('{:.4f}'))

Unnamed: 0_level_0,f(x),phi_0,phi_is_male,phi_is_good_lifter,phi_is_30
game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
marginal,0.4,0.768,0.015,-0.028,-0.355
joint_marginal,0.4,0.768,0.015,0.024,-0.303
uniform,0.4,0.664,0.15,0.0,-0.15
logical_inverse_only,0.4,0.4,0.3,0.2,-0.1
conditional,0.4,0.0,0.0157,-0.0233,-0.3604
counterfactual_distribution,0.4,0.8471,0.0,-0.0353,-0.4118


# Appendix: looking at properties of the data distribution

In [12]:
mu_male = X.prob({'is_male': True})
mu_lift = X.prob({'is_good_lifter': True})
mu_experienced = X.prob({'is_30': True})
print(f'probability male: {mu_male:.3f} | probability lifter: {mu_lift:.3f} | probability 30+: {mu_experienced:.3f}')

# prob lifting if male, lifting if female
mu_lift_if_male = X.conditional({'is_male': True}).prob({'is_good_lifter': True})
mu_lift_if_female = X.conditional({'is_male': False}).prob({'is_good_lifter': True})
print(f'probability lifter: {mu_lift_if_male:.3f} if male | {mu_lift_if_female:.3f} if female')

# prob lifting if over 30, if over 30 and male, if over 30 and female
mu_lift_if_30 = X.conditional({'is_30': True}).prob({'is_good_lifter': True})
mu_lift_if_30_male = X.conditional({'is_male': True, 'is_30': True}).prob({'is_good_lifter': True})
mu_lift_if_30_female = X.conditional({'is_male': False, 'is_30': True}).prob({'is_good_lifter': True})
print(f'probability lifter: {mu_lift_if_30:.3f} if 30+ | {mu_lift_if_30_male:.3f} if male and 30+ | {mu_lift_if_30_female:.3f} if female and 30+')

probability male: 0.950 | probability lifter: 0.760 | probability 30+: 0.250
probability lifter: 0.800 if male | 0.000 if female
probability lifter: 0.240 if 30+ | 0.286 if male and 30+ | 0.000 if female and 30+
