## Tests for Ishigami function
Exact permutation. Analytical Shapley values obtained from Plischke, Rabitti, and Borgonovo (2020), p. 8. Inputs are independent.

In [1]:
import numpy as np
import pandas as pd
import chaospy as cp
from numpy.testing import assert_array_almost_equal as aaae

from econsa_shapley import get_shapley
from econsa_shapley import _r_condmvn
from econsa_sampling import cond_mvn

In [81]:
# Function inputs.
n_inputs = 7
perms = np.random.permutation(n_inputs)
j = 4
subset_j = perms[:j]
subsetj_conditional = perms[j:]
mean = np.zeros(n_inputs)
cov = np.identity(n_inputs)
cov

array([[1., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 1.]])

In [82]:
# What does x_cond from the test cases spit out?
def x_cond(n, subset_j, subsetj_conditional, xjc):
    if subsetj_conditional is None:
        cov_int = np.array(cov)  # Get an array from cov.
        cov_int = cov_int.take(subset_j, axis=1)  # Take only columns/rows (depending on axis) that correspond to the indices
        #provided by subset_j.
        cov_int = cov_int[subset_j]
        distribution = cp.MvNormal(mean[subset_j], cov_int)
        return distribution.sample(n)
    else:
        return _r_condmvn(
            n,
            mean=mean,
            cov=cov,
            dependent_ind=subset_j,
            given_ind=subsetj_conditional,
            x_given=xjc,
            )
    
n_outer = 10
xjc_sampled = np.array(x_cond(n_outer, subset_j=subset_j, subsetj_conditional=None, xjc=None)).T
xjc_sampled

array([[-0.44109489, -0.43804072, -0.19616541,  0.11204452],
       [-0.75655787, -1.85433115,  0.35997685, -2.0784411 ],
       [ 1.01969651,  1.83732029, -0.99752845,  0.69722418],
       [ 0.2742608 ,  0.18682276,  0.29409172, -0.62021494],
       [ 0.42377777, -0.72768265, -0.95456416,  0.80159265],
       [-0.48690291, -0.57972813, -0.70220058,  0.76191355],
       [ 0.30159425,  0.11761762, -0.86000833, -0.68097673],
       [-0.83248662,  0.28567526,  1.5261132 ,  1.27037114],
       [-0.88631203, -0.94830386, -0.67047187,  0.31163852],
       [ 0.80705216,  0.42154777, -0.39314096, -1.12634342]])

In [83]:
n_inner = 10
#n_inputs = 7
mean = np.zeros(n_inputs)
cov = np.identity(n_inputs)

for j in range(1, n_inputs):
    
    perms = np.random.permutation(n_inputs)
    j = 4
    sj = perms[:j]
    sjc = perms[j:]
    for length in range(1, n_outer):
        
        xjc = xjc_sampled[length, ]
        
        sample_inputs = np.array(x_cond(n_inner, sj, sjc, xjc.flat)).T.reshape(n_inner, -1,)

ValueError: lengths of given_value and given_ind must be the same

In [78]:
sample_inputs

array([[-2.83754094,  1.06031703, -0.93791004,  0.25151254],
       [-0.54452563,  0.78519974, -0.14471513, -0.16336214],
       [-0.8596926 , -1.46110175,  1.64818679, -0.24599251],
       [-0.59044271, -1.42073883, -0.18828122, -0.19149145]])

In [69]:
cov = np.identity(n_inputs)
cov_int = np.array(cov)
cov_int

array([[1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1.]])

In [70]:
subset_j

array([0, 1, 2, 6])

In [71]:
cov_int = cov_int.take(subset_j, axis=1)
cov_int

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 0.]])

In [72]:
cov_int = cov_int[subset_j]
cov_int

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

In [73]:
distribution = cp.MvNormal(mean[subset_j], cov_int)

In [74]:
distribution.sample(3)

array([[ 0.06172266,  0.48953322, -0.72340657],
       [-0.56113574,  1.70222466, -0.03065108],
       [ 1.11076468,  0.36046205, -1.37229961],
       [ 2.3194623 ,  0.64655772, -1.22032668]])

In [7]:
n_inner = 4
test_ = x_cond(n_inner, subset_j, subsetj_conditional, xjc.flat)
#sample_inputs = np.array(x_cond(n_inner, subset_j, subsetj_conditional, xjc.flat)).T.reshape(n_inner, -1, )
#sample_inputs

ValueError: lengths of given_value and given_ind must be the same

In [13]:
    cond_mean, cond_var = cond_mvn(
        mean,
        cov,
        dependent_ind=dependent_ind,
        given_ind=given_ind,
        given_value=x_given,
    )
    distribution = cp.MvNormal(cond_mean, cond_var)

    return distribution.sample(n)

NameError: name 'dependent_ind' is not defined

In [None]:
def test_get_shapley_ishigami():
    def ishigami_function(x):
        return math.sin(x[:, 1]) * (1 + 0.1 * (x[:, 3] ** 4)) + 7 * (math.sin(x[:, 2]) ** 2)

    def x_all(n):
        distribution = cp.Uniform(lower, upper)
        return distribution.sample(n)

    def x_cond(n, subset_j, subsetj_conditional, xjc):
        
        # Wrong, since we need samples for the variables corresponding to the indices in sj(c)!
        if subsetj_conditional is None:
            
            distribution = cp.Uniform(lower, upper)
        return distribution.sample(n)
            return # ndarray with dim n_sim x no. of elements in subset_j, exactly for those input variables.
        else:
            return # ndarray  with dim n_sim x no. of elements in subsetj_conditional.

    np.random.seed(123)
    n_inputs = 3
    mean = np.zeros(3)
    # Lower and upper bound of the uniform distribution.
    lower = -math.pi
    upper = math.pi
    variance = (1/12) * ((upper - lower) ** 2)
    cov = np.array([[1.0, 0, 0], [0, 1.0, 1.8], [0, 1.8, 4.0]])
    method = "exact"
    n_perms = None
    n_output = 10 ** 4
    n_outer = 10 ** 3
    n_inner = 10 ** 2

    col = ["X" + str(i) for i in np.arange(n_inputs) + 1]
    names = ["Shapley effects", "std. errors", "CI_min", "CI_max"]

    expected = pd.DataFrame(
        data=[
            [0.101309, 0.418989, 0.479701],
            [0.00241549, 0.16297, 0.163071],
            [0.096575, 0.0995681, 0.160083],
            [0.106044, 0.73841, 0.79932],
        ],
        index=names,
        columns=col,
    ).T

    calculated = get_shapley(
        method,
        gaussian_model,
        x_all,
        x_cond,
        n_perms,
        n_inputs,
        n_output,
        n_outer,
        n_inner,
    )

    aaae(calculated, expected)

In [21]:
# Example.
n_inputs = 7
n_sim = 10
perms = np.random.permutation(n_inputs)
j = 4
subset_j = perms[:j]
subsetj_conditional = perms[j:]
lower = 0
upper = 1
out = np.zeros((n_sim, n_inputs))
for i in subset_j:
    out[:, i] = cp.Uniform(lower, upper).sample(n_sim).T
out

array([[0.38377533, 0.        , 0.27999478, 0.        , 0.        ,
        0.47232937, 0.55148138],
       [0.16606031, 0.        , 0.49768564, 0.        , 0.        ,
        0.43442878, 0.46587021],
       [0.85391958, 0.        , 0.50341559, 0.        , 0.        ,
        0.36058198, 0.2781392 ],
       [0.53300291, 0.        , 0.50630399, 0.        , 0.        ,
        0.52169385, 0.53213383],
       [0.91287332, 0.        , 0.24418302, 0.        , 0.        ,
        0.0477079 , 0.12106805],
       [0.70604899, 0.        , 0.68216086, 0.        , 0.        ,
        0.84896816, 0.66360917],
       [0.57615931, 0.        , 0.23240094, 0.        , 0.        ,
        0.59900273, 0.65371224],
       [0.99222253, 0.        , 0.48436874, 0.        , 0.        ,
        0.73214416, 0.43902138],
       [0.02985574, 0.        , 0.72488381, 0.        , 0.        ,
        0.63781945, 0.09538967],
       [0.00731334, 0.        , 0.94950301, 0.        , 0.        ,
        0.25366271, 0.1

In [19]:
def x_cond(n, subset_j, subsetj_conditional, xjc):
        distribution = cp.Uniform(lower, upper)
        return distribution.sample(n)

In [None]:
def x_cond(n, subset_j, subsetj_conditional, xjc):
        
        # Wrong, since we need samples for the variables corresponding to the indices in sj(c)!
        if subsetj_conditional is None:
            
            distribution = cp.Uniform(lower, upper)
        return distribution.sample(n)
            return # ndarray with dim n_sim x no. of elements in subset_j, exactly for those input variables.
        else:
            return # ndarray  with dim n_sim x no. of elements in subsetj_conditional.

In [None]:
# Call the function.
x_cond(
    n=n_sim,
    subset_j=subset_j,
    subsetj_conditional=subsetj_conditional,
    
      )