In [2]:
from sklearn.metrics import mutual_info_score
#from sklearn.feature_selection import chi2
from scipy.stats import chi2_contingency
from scipy.stats import chi2
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde, pearsonr
import seaborn as sns
import pandas as pd
import warnings
from sklearn.preprocessing import KBinsDiscretizer
warnings.filterwarnings("ignore")

# Mathematical Underpinnings - Lab 4

Tests to verify hipoteses of independence (from Lab 4):

In [3]:
def indep_test_asymptotic(X, Y, stat):

    if stat == "mi":

        stat_value = 2*len(X)*mutual_info_score(X, Y)

    if stat == "chi2":

        test_res = (chi2_contingency(pd.crosstab(X, Y)))
        stat_value = test_res.statistic

    df = (len(np.unique(X)) - 1)*(len(np.unique(Y)) - 1)

    p_value = 1 - chi2.cdf(stat_value, df=df)

    return stat_value, p_value

def indep_test_permutation(X, Y, B):

    stat_value = mutual_info_score(X, Y)

    condition_p_value = 0
    for b in range(B):
        X_b = np.random.permutation(X)

        stat_value_b = mutual_info_score(X_b, Y)

        if stat_value <= stat_value_b:
            condition_p_value += 1

    p_value = (1 + condition_p_value)/(1 + B)

    return 2*len(X)*stat_value, p_value

## Task 1

In [4]:
# a function which computes CMI
def conditional_mutual_information(X, Y, Z):
    cmi = 0
    n = len(Z)
    for z in np.unique(Z):
        proba = np.sum(Z == z)/n
        cmi += proba*mutual_info_score(X[Z == z], Y[Z == z])
    return cmi

### a)

In [5]:
# CI test based on CMI and asymptotics
def cond_indep_test_asymptotic(X, Y, Z):
    stat_value = 2*len(X)*conditional_mutual_information(X, Y, Z)
    df = (len(np.unique(X)) - 1)*(len(np.unique(Y)) - 1) *len(np.unique(Z)) 
    p_value = 1 - chi2.cdf(stat_value, df=df)

    return stat_value, p_value

### b)

In [6]:
# CI test based on CMI and permutations
def conditional_petmutation(X, Z):
    X_b = np.zeros(len(X))
    for z in np.unique(Z):
        X_b[Z==z] = np.random.permutation(X[Z == z])
    return X_b

def cond_indep_test_permutation(X, Y, Z, B):
    stat_value = conditional_mutual_information(X, Y, Z)    
    condition_p_value = 0
    for _ in range(B):
        X_b = conditional_petmutation(X, Z)
        stat_value_b = conditional_mutual_information(X_b, Y, Z)
        if stat_value <= stat_value_b:
            condition_p_value += 1
    p_value = (1 + condition_p_value)/(1 + B)

    return 2*len(X)*stat_value, p_value

### c)

conditionaly independent

In [15]:
Z = np.random.normal(0, 1, 1000)
X = np.random.normal(Z, np.ones(1000)) 
Y = np.random.normal(Z*2, np.ones(1000))
discretizer = KBinsDiscretizer(n_bins=10, encode='ordinal', strategy='uniform',subsample=None)
Xt = discretizer.fit_transform(X.reshape(-1, 1)).flatten()
Yt = discretizer.fit_transform(Y.reshape(-1, 1)).flatten()
Zt = discretizer.fit_transform(Z.reshape(-1, 1)).flatten()
print(f"Asymptotic test based on conditional mutual information: {cond_indep_test_asymptotic(Xt, Yt, Zt)}")
print(f"Conditional permutation test: {cond_indep_test_permutation(Xt, Yt, Zt, 100)}")

Asymptotic test based on conditional mutual information: (172.03812256635607, 1.0)
Conditional permutation test: (172.03812256635607, 0.0891089108910891)


conditionaly dependent

In [34]:
X = np.random.normal(0, 1, size=1000)
Y = np.random.normal(0, 1, size=1000)
Z = X + Y
discretizer = KBinsDiscretizer(n_bins=10, encode='ordinal', strategy='uniform',subsample=None)
Xt = discretizer.fit_transform(X.reshape(-1, 1)).flatten()
Yt = discretizer.fit_transform(Y.reshape(-1, 1)).flatten()
Zt = discretizer.fit_transform(Z.reshape(-1, 1)).flatten()
print(f"Asymptotic test based on conditional mutual information: {cond_indep_test_asymptotic(Xt, Yt, Zt)}")
print(f"Conditional permutation test: {cond_indep_test_permutation(Xt, Yt, Zt, 100)}")

Asymptotic test based on conditional mutual information: (1501.0516097882858, 0.0)
Conditional permutation test: (1501.0516097882858, 0.009900990099009901)


## Task 2

In [29]:

def sample_from_model1():
    n = 1000
    Z_c = np.random.normal(0, 1, n)
    Z = np.array([-1 if Z_c[i] < 0 else 1 for i in range(n)])
    X_c = np.random.normal(Z/2, np.ones(n))
    X = np.array([-1 if X_c[i] < 0 else 1 for i in range(n)])
    Y_c = np.random.normal(Z/2, np.ones(n))
    Y = np.array([-1 if Y_c[i] < 0 else 1 for i in range(n)])
    return X, Y, Z

def sample_from_model2():
    n = 1000
    X_c = np.random.normal(0, 1, n)
    X = np.array([-1 if X_c[i] < 0 else 1 for i in range(n)])
    Z_c = np.random.normal(X/2, np.ones(n))
    Z = np.array([-1 if Z_c[i] < 0 else 1 for i in range(n)])
    Y_c = np.random.normal(Z/2, np.ones(n))
    Y = np.array([-1 if Y_c[i] < 0 else 1 for i in range(n)])
    return X, Y, Z

def sample_from_model3():
    n = 1000
    X_c = np.random.normal(0,1,n)
    X = np.array([-1 if X_c[i] < 0 else 1 for i in range(n)])
    Y_c = np.random.normal(0,1,n)
    Y = np.array([-1 if Y_c[i] < 0 else 1 for i in range(n)])
    Z_c = np.random.normal((X+Y)/2, np.ones(n))
    Z = np.array([-1 if Z_c[i] < 0 else 1 for i in range(n)])
    return X, Y, Z

### a)

answer:   
Model 1 and 2 - conditional idependence  
Model 3 - independence  


### b)

In [31]:
X1, Y1, Z1 = sample_from_model1()
X2, Y2, Z2 = sample_from_model2()
X3, Y3, Z3 = sample_from_model3()

In [38]:
print(f"Model 1 MI: {mutual_info_score(X1, Y1)}")
print(f"Model 1 CMI: {conditional_mutual_information(X1, Y1, Z1)}\n")
print(f"Model 2 MI: {mutual_info_score(X2, Y2)}")
print(f"Model 2 CMI: {conditional_mutual_information(X2, Y2, Z2)}\n")
print(f"Model 3 MI: {mutual_info_score(X3, Y3)}")
print(f"Model 3 CMI: {conditional_mutual_information(X3, Y3, Z3)}")

Model 1 MI: 0.005739884023151176
Model 1 CMI: 0.0011645847928807192

Model 2 MI: 0.011053690733280719
Model 2 CMI: 8.855297177055708e-05

Model 3 MI: 0.0008469862681012574
Model 3 CMI: 0.0063871159739739055


### c)

In [34]:
print('Model 1')
print(f"Asymptotic test based on mutual information: {indep_test_asymptotic(X1, Y1,'mi')}")
print(f"Permutation test {indep_test_permutation(X1, Y1, 100)}")
print(f"Asymptotic test based on conditional mutual information: {cond_indep_test_asymptotic(X1, Y1, Z1)}")
print(f"Conditional permutation test {cond_indep_test_permutation(X1, Y1, Z1, 100)}")

Model 1
Asymptotic test based on mutual information: (11.479768046302352, 0.0007035791046886564)
Permutation test (11.479768046302352, 0.009900990099009901)
Asymptotic test based on conditional mutual information: (2.3291695857614383, 0.3120522014335675)
Conditional permutation test (2.3291695857614383, 0.3069306930693069)


In [35]:
print('Model 2')
print(f"Asymptotic test based on mutual information: {indep_test_asymptotic(X2, Y2,'mi')}")
print(f"Permutation test {indep_test_permutation(X2, Y2, 100)}")
print(f"Asymptotic test based on conditional mutual information: {cond_indep_test_asymptotic(X2, Y2, Z2)}")
print(f"Conditional permutation test {cond_indep_test_permutation(X2, Y2, Z2, 100)}")

Model 2
Asymptotic test based on mutual information: (22.10738146656144, 2.5781647922107354e-06)
Permutation test (22.10738146656144, 0.009900990099009901)
Asymptotic test based on conditional mutual information: (0.17710594354111417, 0.9152546267937769)
Conditional permutation test (0.17710594354111417, 0.9405940594059405)


In [36]:
print('Model 3')
print(f"Asymptotic test based on mutual information: {indep_test_asymptotic(X3, Y3,'mi')}")
print(f"Permutation test {indep_test_permutation(X3, Y3, 100)}")
print(f"Asymptotic test based on conditional mutual information: {cond_indep_test_asymptotic(X3, Y3, Z3)}")
print(f"Conditional permutation test {cond_indep_test_permutation(X3, Y3, Z3, 100)}")

Model 3
Asymptotic test based on mutual information: (1.6939725362025149, 0.1930781282234395)
Permutation test (1.6939725362025149, 0.27722772277227725)
Asymptotic test based on conditional mutual information: (12.774231947947811, 0.0016831033222041158)
Conditional permutation test (12.774231947947811, 0.009900990099009901)
