In [1]:
from scipy.stats import chi2, norm
import numpy as np
from sklearn.metrics import mutual_info_score

def conditional_mutual_information(X, Y, Z):
    """
    Calculate conditional mutual information I(X;Y|Z)
    """
    unique_Z = np.unique(Z)
    cmi = 0
    for z in unique_Z:
        indices = Z == z
        X_z = X[indices]
        Y_z = Y[indices]
        cmi += mutual_info_score(X_z, Y_z) * len(X_z) / len(Z)
    return cmi

def asymptotic_test(X, Y, Z):
    """
    Perform asymptotic test of conditional independence
    """
    n = len(Z)
    cmi = conditional_mutual_information(X, Y, Z)
    chi2_val = 2 * n * cmi
    dof = (len(np.unique(X)) - 1) * (len(np.unique(Y)) - 1) * len(np.unique(Z))
    p_value = 1 - chi2.cdf(chi2_val, df=dof)
    return chi2_val, p_value

def conditional_permute(X, Z):
    """
    Conditionally permute X given Z. X is permuted within each subgroup defined by the unique values of Z.
    """
    permuted_X = np.empty_like(X)
    for z in np.unique(Z):
        indices = Z == z
        values = X[indices]
        permuted_X[indices] = np.random.permutation(values)
    return permuted_X

def conditional_permutation_test(X, Y, Z, B=1000):
    """
    Perform conditional independence test based on permutations.
    """
    original_cmi = conditional_mutual_information(X, Y, Z)
    cmi_permutations = []
    for _ in range(B):
        permuted_X = conditional_permute(X, Z)
        cmi_perm = conditional_mutual_information(permuted_X, Y, Z)
        cmi_permutations.append(cmi_perm)

    p_value = (1 + sum(cmi_perm <= original_cmi for cmi_perm in cmi_permutations)) / (1 + B)
    return original_cmi, p_value

def generate_samples_conditional_dependence(n=1000):
    """
    Generate samples where X and Y are conditionally dependent given Z.
    Following Model 1: Z~ ∼ N(0, 1), discretize Z~, then X~ ∼ N(Z/2, 1) and Y~ ∼ N(Z/2, 1), and discretize X~ and Y~.
    """
    Z_tilde = np.random.normal(0, 1, n)
    Z = np.where(Z_tilde < 0, -1, 1)  # Discretize Z~
    X_tilde = np.random.normal(Z / 2, 1, n)
    Y_tilde = np.random.normal(Z / 2, 1, n)
    X = np.where(X_tilde < 0, -1, 1)  # Discretize X~
    Y = np.where(Y_tilde < 0, -1, 1)  # Discretize Y~
    return X, Y, Z

def generate_samples_conditional_independence(n=1000):
    """
    Generate samples where X and Y are conditionally independent given Z.
    Following Model 3: X~ and Y~ independently from N(0, 1), Z~ ∼ N((X + Y) / 2, 1), then discretize all.
    """
    X_tilde = np.random.normal(0, 1, n)
    Y_tilde = np.random.normal(0, 1, n)
    Z_tilde = np.random.normal((X_tilde + Y_tilde) / 2, 1, n)
    X = np.where(X_tilde < 0, -1, 1)  # Discretize X~
    Y = np.where(Y_tilde < 0, -1, 1)  # Discretize Y~
    Z = np.where(Z_tilde < 0, -1, 1)  # Discretize Z~
    return X, Y, Z

# Generate samples for both scenarios
X_cd, Y_cd, Z_cd = generate_samples_conditional_dependence()
X_ci, Y_ci, Z_ci = generate_samples_conditional_independence()

# Apply the tests
results = {
    "Conditional Dependence": {
        "Asymptotic Test": asymptotic_test(X_cd, Y_cd, Z_cd),
        "Permutation Test": conditional_permutation_test(X_cd, Y_cd, Z_cd)
    },
    "Conditional Independence": {
        "Asymptotic Test": asymptotic_test(X_ci, Y_ci, Z_ci),
        "Permutation Test": conditional_permutation_test(X_ci, Y_ci, Z_ci)
    }
}

print(results)


{'Conditional Dependence': {'Asymptotic Test': (0.38629002895058484, 0.8243624209869187), 'Permutation Test': (0.00019314501447529242, 0.16983016983016982)}, 'Conditional Independence': {'Asymptotic Test': (6.568886441535309, 0.037461437038060375), 'Permutation Test': (0.0032844432207676543, 0.9560439560439561)}}


In [3]:
import pprint
pprint.pprint(results)

{'Conditional Dependence': {'Asymptotic Test': (0.38629002895058484,
                                                0.8243624209869187),
                            'Permutation Test': (0.00019314501447529242,
                                                 0.16983016983016982)},
 'Conditional Independence': {'Asymptotic Test': (6.568886441535309,
                                                  0.037461437038060375),
                              'Permutation Test': (0.0032844432207676543,
                                                   0.9560439560439561)}}


# Conditional Dependence Scenario

    Asymptotic Test: The test statistic value is approximately 0.386 with a p-value of 0.824. This high p-value indicates that there is not enough evidence to reject the null hypothesis of conditional independence between XX and YY given ZZ. In other words, the test does not provide strong evidence against the notion that XX and YY are conditionally independent, despite the fact that we expected them to be conditionally dependent in this scenario.

    Permutation Test: The test statistic (conditional mutual information) is about 0.00019 with a p-value of approximately 0.170. Similar to the asymptotic test, this p-value does not strongly reject the null hypothesis of conditional independence. However, it's notably lower than the p-value from the asymptotic test, suggesting a slight inclination towards detecting dependency, yet still not conclusive.

# Conditional Independence Scenario

    Asymptotic Test: The test statistic value is approximately 6.569 with a p-value of 0.037. This result is significant at common significance levels (e.g., α=0.05α=0.05), suggesting that there is evidence to reject the null hypothesis of conditional independence. This is an interesting outcome because it implies a detection of conditional dependency when, according to the scenario setup, XX and YY should be conditionally independent given ZZ. This could be due to the model capturing underlying patterns or noise in the data that falsely suggest dependency.

    Permutation Test: The test statistic here is approximately 0.00328 with a p-value of about 0.956. Contrary to the asymptotic test, this very high p-value strongly suggests that there is no evidence against the null hypothesis of conditional independence in this scenario. This aligns with our expectation that XX and YY are conditionally independent given ZZ in this scenario.

# Conclusions

    The asymptotic test seems to have provided a counterintuitive result in the conditional independence scenario, detecting dependency where we expected none. This might be due to the sensitivity of the asymptotic test to the sample size, distribution characteristics, or specific patterns in the simulated data.

    The permutation test results appear more aligned with expectations, especially in the conditional independence scenario, where it did not find evidence against the null hypothesis.
    In the conditional dependence scenario, it suggested a slight, albeit not strong, indication of dependency, which could be due to the inherent randomness and complexity of detecting conditional dependencies in discretized data.

    These outcomes underscore the importance of using multiple testing approaches to assess conditional independence, as different tests may have varying sensitivities and specificities to the underlying data structures. They also highlight the complexities involved in statistical testing, where factors like sample size, distribution assumptions, and the method of discretization can significantly influence test outcomes.