# Plot Shapley effects against $N_O$ while keeping $N_V$ and $N_I$ constant at $10^{7}$ and $3$ respectively.
Since n_inputs is small, choose exact method (draw all permutations).

In [8]:
import numpy as np
import pandas as pd

## Define function inputs.

In [None]:
def linear_model(x):
    return x.dot(beta)

## Define function that returns Shapley effects for a range of $N_O$. 

In [None]:
def range_shapley_estimates(n_inputs, model, beta, cov, n_output, n_inner, n_output_range):
    '''Function for plotting estimates of Shapley effects for inputs sampled from MV normal distribution.
    
    Args:
    n_inputs (int): Number of input variables.
    model (str): Model to calculate the Shapley effects for.
    beta (array): Model parameters.
    cov (ndaray): Variance-covariance matrix of model inputs.
    n_ouput (int): Number of samples drawn for the estimation of the output variance (= model variance).
    n_inner (int): Number of samples drawn for the estimation of the conditional variance.
    n_outer_range (list): Array of numbers of samples drawn for the estimation of the conditional variance.
    
    Returns:
    estimated_shapleys (ndarray): The estimated Shapley effects for each element in n_outer_range. Dimensions of the 
    ndarray are (n_inputs, n_outer_range).
    true_shapley (array): Analytical Shapley effects.
    '''


    def x_all(n):
        return cp.MvNormal(mean, cov).sample(n)

    def x_cond(n, subset_j, subsetj_conditional, xjc):
        if subsetj_conditional is None:
            cov_int = np.array(cov).take(subset_j, axis=1)[subset_j]
            distribution = cp.MvNormal(mean[subset_j], cov_int)
            return distribution.sample(n)
        else:
            return _r_condmvn(
                n,
                mean=mean,
                cov=cov,
                dependent_ind=subset_j,
                given_ind=subsetj_conditional,
                x_given=xjc,
            )

    
    if model==linear_model:
        
        if n_inputs==2:
            beta_1 = beta[0]
            beta_2 = beta[1]
            var_1 = cov[0, 0]
            var_2 = cov[1, 1]
            rho = (cov[0, 1])/(np.sqrt(var_1) * np.sqrt(var_2))
    
            component_1 = beta_1 ** 2 * var_1
            component_2 = beta_2 ** 2 * var_2
            covariance = cov[0, 1]
            var_y = component_1 + 2 * covariance * beta_1 * beta_2 + component_2
            share = 0.5 * (rho**2)
            true_shapley_1 = (component_1 * (1 - share) + covariance * beta_1 * beta_2 + component_2 * share)/var_y
            true_shapley_2 = (component_2 * (1 - share) + covariance * beta_1 * beta_2 + component_1 * share)/var_y
            
        elif n_inputs==3:
            var_1 = cov[0, 0]
            var_2 = cov[1, 1]
            var_3 = cov[2, 2]
            rho = (cov[1, 2])/(np.sqrt(var_2) * np.sqrt(var_3))
            covariance = cov[1, 2]
            beta_1 = beta[0]
            beta_2 = beta[1]
            beta_3 = beta[2]

            component_1 = beta_1 ** 2 * var_1
            component_2 = beta_2 ** 2 * var_2
            component_3 = beta_3 ** 2 * var_3
            var_y = component_1 + component_2 + component_3 + 2 * covariance * beta_2 * beta_3
            share = 0.5 * (rho**2)
            true_shapley_1 = (component_1)/var_y
            true_shapley_2 = (component_2 + covariance * beta_2 * beta_3 + share * (component_3 - component_2))/var_y
            true_shapley_3 = (component_3 + covariance * beta_2 * beta_3 + share * (component_2 - component_3))/var_y
        else:
            print('Error: in linear case, function defined for two or three inputs only.')
            
    elif model==additive_model:
            var_1 = cov[0, 0]
            var_2 = cov[1, 1]
            var_3 = cov[2, 2]
            rho = (cov[0, 1])/(np.sqrt(var_1) * np.sqrt(var_3))
            covariance = cov[0, 2]
            # Variance obtained analytically by myself.
            var_y = var_1 + var_2 * var_3
    
            true_shapley_1 = ((var_1 * (1 - ((rho ** 2) / 2))) + (((var_2 * var_3) * (rho ** 2)) / 6)) / var_y
            true_shapley_2 = (((var_2 * var_3) * (3 + (rho ** 2))) / 6) / var_y
            true_shapley_3 = (((var_1 * (rho ** 2)) / 2) + (((var_2 * var_3) * (3 - (2 * (rho ** 2)))) / 6)) / var_y
        
    np.random.seed(123)
    mean = np.zeros(n_inputs)
    method = "exact"

    col = ["X" + str(i) for i in np.arange(n_inputs) + 1]
    names = ["Shapley effects", "std. errors", "CI_min", "CI_max"]

    expected = pd.DataFrame(
        data=[
            [true_shapley_1, true_shapley_2],
            [0, 0],
            [true_shapley_1, true_shapley_2],
            [true_shapley_1, true_shapley_2],
        ],
        index=names,
        columns=col,
    ).T

    calculated = get_shapley(
        method,
        linear_model,
        x_all,
        x_cond,
        n_perms,
        n_inputs,
        n_output,
        n_outer,
        n_inner,
    )

In [4]:
var_1 = 16
var_2 = 4
rho = 0.3
covariance = rho * np.sqrt(var_1) * np.sqrt(var_2)
cov = np.array(
    [[var_1, covariance],
    [covariance, var_2]]
    )

## Plot against $N_O$ and mark analytical Shapley effects.