## Tests for linear case with three Gaussian inputs
Exact permutation

In [19]:
    var_1 = 1
    var_2 = 1
    var_3 = 4
    
    # rho is the correlation coefficient, and thus, in range [-1,1]. Correlation between X2 and X3 only, i.e. rho = Corr[X2, X3].
    rho = 0.3
    covariance = rho * np.sqrt(var_2) * np.sqrt(var_3)
    beta_1 = 1
    beta_2 = 1
    beta_3 = 1

    component_1 = beta_1 ** 2 * var_1
    component_2 = beta_2 ** 2 * var_2
    component_3 = beta_3 ** 2 * var_3
    var_y = component_1 + component_2 + component_3 + 2 * covariance * beta_2 * beta_3
    share = 0.5 * (rho ** 2)
    true_shapley_1 = (component_1)/(component_1 + component_2 + component_3 + 2 * covariance * beta_2 * beta_3)
    true_shapley_2 = (component_2 + covariance * beta_2 * beta_3 + share * (component_3 - component_2))/(component_1 + component_2 + component_3 + 2 * covariance * beta_2 * beta_3)
    true_shapley_3 = (component_3 + covariance * beta_2 * beta_3 + share * (component_2 - component_1))/(component_1 + component_2 + component_3 + 2 * covariance * beta_2 * beta_3)

In [20]:
true_shapley_1 + true_shapley_2 + true_shapley_3

1.0187499999999998

In [2]:
    var_1 = 16
    var_2 = 4
    var_3 = 9
    
    # rho is the correlation coefficient, and thus, in range [-1,1]. Correlation between X2 and X3 only, i.e. rho = Corr[X2, X3].
    rho = 0.3
    covariance = rho * 2 * 3
    beta_1 = 1.3
    beta_2 = 1.5
    beta_3 = 2.5

    component_1 = beta_1 ** 2 * var_1
    component_2 = beta_2 ** 2 * var_2
    component_3 = beta_3 ** 2 * var_3
    var_y = component_1 + component_2 + component_3 + 2 * covariance * beta_2 * beta_3
    share = 0.5 * (rho**2)
    true_shapley_1 = (component_1)/var_y
    true_shapley_2 = (component_2 + covariance * beta_2 * beta_3 + share * (component_3 - component_2))/var_y
    true_shapley_3 = (component_3 + covariance * beta_2 * beta_3 + share * (component_2 - component_1))/var_y
    
    true_shapley_1 + true_shapley_2 + true_shapley_3 - 1.0

0.012425087437375915

In [6]:
    def linear_model(x):
        beta = np.array([[beta_1], [beta_2], [beta_3]])
        return x.dot(beta)
    np.random.seed(123)
    n_inputs = 3
    mean = np.zeros(n_inputs)
    n_sim = 10 ** 4
    
    var_1 = 16
    var_2 = 4
    var_3 = 9
    
    # rho is the correlation coefficient, and thus, in range [-1,1]. Correlation between X2 and X3 only, i.e. rho = Corr[X2, X3].
    rho = 0.3
    covariance = rho * 2 * 3
    beta_1 = 1.3
    beta_2 = 1.5
    beta_3 = 2.5

    cov = np.array(
        [[var_1, 0, 0],
        [0, var_2, covariance],
        [0, covariance, var_3]]
        )
    
    component_1 = beta_1 ** 2 * var_1
    component_2 = beta_2 ** 2 * var_2
    component_3 = beta_3 ** 2 * var_3
    
    var_y = simulate_variance(
        model=linear_model,
        cov=cov,
        mean=mean,
        n_sim=n_sim)
    
    share = 0.5 * (rho**2)
    
    true_shapley_1 = (component_1)/var_y
    true_shapley_2 = (component_2 + covariance * beta_2 * beta_3 + share * (component_3 - component_2))/var_y
    true_shapley_3 = (component_3 + covariance * beta_2 * beta_3 + share * (component_2 - component_1))/var_y
    
    true_shapley_1 + true_shapley_2 + true_shapley_3 - 1.0

0.04037756533463899

In [14]:
def test_get_shapley_linear_three_inputs():
    def linear_model(x):
        beta = np.array([[beta_1], [beta_2], [beta_3]])
        return x.dot(beta)

    def x_all(n):
        return cp.MvNormal(mean, cov).sample(n)

    def x_cond(n, subset_j, subsetj_conditional, xjc):
        if subsetj_conditional is None:
            cov_int = np.array(cov).take(subset_j, axis=1)[subset_j]
            distribution = cp.MvNormal(mean[subset_j], cov_int)
            return distribution.sample(n)
        else:
            return _r_condmvn(
                n,
                mean=mean,
                cov=cov,
                dependent_ind=subset_j,
                given_ind=subsetj_conditional,
                x_given=xjc,
            )

    np.random.seed(123)
    n_inputs = 3
    mean = np.zeros(n_inputs)
    var_1 = 16
    var_2 = 4
    var_3 = 9
    
    # rho is the correlation coefficient, and thus, in range [-1,1]. Correlation between X2 and X3 only, i.e. rho = Corr[X2, X3].
    rho = 0.3
    covariance = rho * np.sqrt(var_2) * np.sqrt(var_3)
    beta_1 = 1.3
    beta_2 = 1.5
    beta_3 = 2.5
    #beta = (beta_1, beta_2, beta)

    component_1 = beta_1 ** 2 * var_1
    component_2 = beta_2 ** 2 * var_2
    component_3 = beta_3 ** 2 * var_3
    var_y = component_1 + component_2 + component_3 + 2 * covariance * beta_2 * beta_3
    share = 0.5 * (rho**2)
    true_shapley_1 = (component_1)/var_y
    true_shapley_2 = (component_2 + covariance * beta_2 * beta_3 + share * (component_3 - component_2))/var_y
    true_shapley_3 = (component_3 + covariance * beta_2 * beta_3 + share * (component_2 - component_1))/var_y

    cov = np.array(
        [[var_1, 0, 0],
        [0, var_2, covariance],
        [0, covariance, var_3]]
        )

    method = "exact"
    n_perms = None
    n_output = 10 ** 4
    n_outer = 10 ** 3
    n_inner = 10 ** 2

    col = ["X" + str(i) for i in np.arange(n_inputs) + 1]
    names = ["Shapley effects", "std. errors", "CI_min", "CI_max"]

    expected = pd.DataFrame(
        data=[
            [true_shapley_1, true_shapley_2],
            [0, 0],
            [true_shapley_1, true_shapley_2],
            [true_shapley_1, true_shapley_2],
        ],
        index=names,
        columns=col,
    ).T

    calculated = get_shapley(
        method,
        linear_model,
        x_all,
        x_cond,
        n_perms,
        n_inputs,
        n_output,
        n_outer,
        n_inner,
    )

    aaae(calculated["Shapley effects"], expected["Shapley effects"], 4)