## Tests for additive model with three Gaussian inputs
Exact permutation

In [6]:
import numpy as np
import pandas as pd
from numpy.testing import assert_array_almost_equal as aaae

from econsa_shapley import get_shapley
from econsa_shapley import _r_condmvn
from simulation_of_variance import simulate_variance

In [33]:
    def additive_model(x):
        return x[:, 0] + x[:, 1] * x[:, 2]
    
    var_1 = 1
    var_2 = 1
    var_3 = 1
    
    rho = 0.65
    covariance = rho * np.sqrt(var_1) * np.sqrt(var_3)
    np.random.seed(123)
    n_inputs = 3
    mean = np.zeros(n_inputs)
    n_sim = 10 ** 6
    
    cov = np.array(
        [[var_1, 0, covariance],
        [0, var_2, 0],
        [covariance, 0, var_3]]
        )
    
    var_y = simulate_variance(
        model=additive_model,
        cov=cov,
        mean=mean,
        n_sim=n_sim)
    
    true_shapley_1 = ((var_1 * (1 - ((rho ** 2) / 2))) + (((var_2 * var_3) * (rho ** 2)) / 6)) / var_y
    true_shapley_2 = (((var_2 * var_3) * (3 + (rho ** 2))) / 6) / var_y
    true_shapley_3 = (((var_1 * (rho ** 2)) / 2) + (((var_2 * var_3) * (3 - (2 * (rho ** 2)))) / 6)) / var_y

true_shapley_1 + true_shapley_2 + true_shapley_3

1.0007924940855344

In [4]:
def test_get_shapley_additive_three_inputs():
    def additive_model(x):
        return x[:, 0] + x[:, 1] * x[:, 2]

    def x_all(n):
        return cp.MvNormal(mean, cov).sample(n)

    def x_cond(n, subset_j, subsetj_conditional, xjc):
        if subsetj_conditional is None:
            cov_int = np.array(cov).take(subset_j, axis=1)[subset_j]
            distribution = cp.MvNormal(mean[subset_j], cov_int)
            return distribution.sample(n)
        else:
            return _r_condmvn(
                n,
                mean=mean,
                cov=cov,
                dependent_ind=subset_j,
                given_ind=subsetj_conditional,
                x_given=xjc,
            )

    np.random.seed(123)
    n_inputs = 3
    mean = np.zeros(n_inputs)
    var_1 = 1
    var_2 = 1
    var_3 = 1
    
    # rho is the correlation coefficient, and thus, in range [-1,1]. Correlation between X2 and X3 only, i.e. rho = Corr[X2, X3].
    rho = 0.3
    covariance = rho * np.sqrt(var_1) * np.sqrt(var_3)
    share = 0.5 * rho ** 2
    n_sim = 10 ** 6
    
    cov = np.array(
        [[var_1, 0, covariance],
        [0, var_2, 0],
        [covariance, 0, var_3]]
        )
    
    var_y = simulate_variance(
        model=additive_model,
        cov=cov,
        mean=mean,
        n_sim=n_sim)
    
    true_shapley_1 = ((var_1 * (1 - share)) + ((var_2 * var_3) * (1/6) * (rho ** 2)))/var_y
    true_shapley_2 = ((1/6) * (var_2 * var_3) * (3 + rho ** 2))/var_y
    true_shapley_3 = ((0.5 * (var_1 * rho ** 2)) + ((1/6) * (var_2 * var_3) * (3 - 2 * rho ** 2)))/var_y
    
    

    method = "exact"
    n_perms = None
    n_output = 10 ** 4
    n_outer = 10 ** 3
    n_inner = 10 ** 2

    col = ["X" + str(i) for i in np.arange(n_inputs) + 1]
    names = ["Shapley effects", "std. errors", "CI_min", "CI_max"]

    expected = pd.DataFrame(
        data=[
            [true_shapley_1, true_shapley_2, true_shapley_3],
            [0, 0, 0],
            [true_shapley_1, true_shapley_2, true_shapley_3],
            [true_shapley_1, true_shapley_2, true_shapley_3],
        ],
        index=names,
        columns=col,
    ).T

    calculated = get_shapley(
        method,
        additive_model,
        x_all,
        x_cond,
        n_perms,
        n_inputs,
        n_output,
        n_outer,
        n_inner,
    )

    aaae(calculated["Shapley effects"], expected["Shapley effects"], 4)

test_get_shapley_additive_three_inputs()

In [38]:
first_row = np.zeros(2)
second_row = np.ones(2)
pd.DataFrame(
    data=[
        [first_row],
        [second_row]
    ]
            )

Unnamed: 0,0
0,"[0.0, 0.0]"
1,"[1.0, 1.0]"
