# Mathematical Underpinnings - Lab 6

In [26]:
from sklearn.metrics import mutual_info_score
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from tqdm import tqdm
import scipy

import warnings
warnings.filterwarnings("ignore")

## Useful functions

In [27]:
def discetize_2bins(X):
    X_discrete = 1*(X >= 0)
    return X_discrete

In [28]:
def conditional_permutation(X, Z):

    z_values = np.unique(Z)
    n_z_values = len(z_values)
    n = len(Z)

    X_b = np.zeros(n)

    for i in range(n_z_values):

        z_value_tmp = z_values[i]

        X_b[Z == z_value_tmp] = np.random.permutation(X[Z == z_value_tmp])

    return X_b

In [29]:
def conditional_mutual_information(X, Y, Z):

    z_values = np.unique(Z)
    n_z_values = len(z_values)
    n = len(Z)

    cmi = 0

    for i in range(n_z_values):

        z_value_tmp = z_values[i]
        z_condition = (Z == z_value_tmp)

        X_z = X[z_condition]
        Y_z = Y[z_condition]

        mi_XY_z = mutual_info_score(X_z, Y_z)
        p_z = np.sum(z_condition)/n

        cmi += p_z*mi_XY_z

    return cmi

In [30]:
# II(X;Y;Z)
def interaction_information(X, Y, Z):
    return conditional_mutual_information(X, Y, Z) - mutual_info_score(X, Y)

In [31]:
# II(X;Y;Z1;Z2)
def interaction_information2(X, Y, Z1, Z2):
    Z_1_and_2 = 2*Z2 + Z1
    return interaction_information(X, Y, Z_1_and_2) - interaction_information(X, Y, Z1) - interaction_information(X, Y, Z2)

## Task 1

In [32]:
def secmi2(X, Y, Z):
    sum_ = mutual_info_score(X, Y)
    for i in range(Z.shape[1]):
        sum_ += interaction_information(X, Y, Z[:, i])
    return sum_

def secmi3(X, Y, Z):
    sum_ = mutual_info_score(X, Y)
    for i in range(Z.shape[1]):
        sum_ += interaction_information(X, Y, Z[:, i])

        for j in range(i + 1, Z.shape[1]):
            sum_ += interaction_information2(X, Y, Z[:, i], Z[:, j])

    return sum_

### a)

In [33]:
def cond_indep_test_permutation(X, Y, Z, B, stat):

    n_col_Z = Z.shape[1]
    Z_1dim = np.dot(Z, 2**np.linspace(0, n_col_Z-1, n_col_Z))

    if stat == "cmi":
        stat_value = conditional_mutual_information(X, Y, Z_1dim)
    if stat == "secmi2":
        stat_value = secmi2(X, Y, Z)
    if stat == "secmi3":
        stat_value = secmi3(X, Y, Z)

    condition_p_value = 0
    for b in range(B):
        X_b = conditional_permutation(X, Z_1dim)

        if stat == "cmi":
            stat_value_b = conditional_mutual_information(X_b, Y, Z_1dim)
        if stat == "secmi2":
            stat_value_b = secmi2(X_b, Y, Z)
        if stat == "secmi3":
            stat_value_b = secmi3(X_b, Y, Z)

        if stat_value <= stat_value_b:
            condition_p_value += 1

    p_value = (1 + condition_p_value)/(1 + B)

    return 2*len(X)*stat_value, p_value

### b)

In [34]:
n = 100

Y = np.random.normal(0, 1, n)
Y_ = discetize_2bins(Y)

Z1 = np.random.normal(Y / 2, 1)
Z1_ = discetize_2bins(Z1)

Z2 = np.random.normal(Y / 2, 1)
Z2_ = discetize_2bins(Z2)

Z3 = np.random.normal(Y / 2, 1)
Z3_ = discetize_2bins(Z3)

X = np.random.normal(Z1 / 2, 1)
X_ = discetize_2bins(X)

In [35]:
B = 100

In [36]:
_, p_cmi = cond_indep_test_permutation(
    X, 
    Y, 
    np.vstack([Z1_, Z2_]).T,
    B,
    "cmi"
)

_, p_secmi2 = cond_indep_test_permutation(
    X, 
    Y, 
    np.vstack([Z1_, Z2_]).T,
    B,
    "secmi2"
)

_, p_secmi3 = cond_indep_test_permutation(
    X, 
    Y, 
    np.vstack([Z1_, Z2_]).T,
    B,
    "secmi3"
)

print("CMI", p_cmi)
print("SECMI2", p_secmi2)
print("SECMI3", p_secmi3)

# we expect:
# independent

# large p-value -> don't reject the null hypothesis -> independent

CMI 1.0
SECMI2 1.0
SECMI3 1.0


In [41]:
_, p_cmi = cond_indep_test_permutation(
    X, 
    Y, 
    np.vstack([Z2_, Z3_]).T,
    B,
    "cmi"
)

_, p_secmi2 = cond_indep_test_permutation(
    X, 
    Y, 
    np.vstack([Z2_, Z3_]).T,
    B,
    "secmi2"
)

_, p_secmi3 = cond_indep_test_permutation(
    X, 
    Y, 
    np.vstack([Z2_, Z3_]).T,
    B,
    "secmi3"
)

print("CMI", p_cmi)
print("SECMI2", p_secmi2)
print("SECMI3", p_secmi3)

# we expect:
# dependent

# large p-value -> don't reject the null hypothesis -> independent

CMI 1.0
SECMI2 1.0
SECMI3 1.0


In [42]:
counter_cmi = 0
counter_secmi2 = 0
counter_secmi3 = 0

for i in range(100):

    Y = np.random.normal(0, 1, n)
    Y_ = discetize_2bins(Y)

    Z1 = np.random.normal(Y / 2, 1)
    Z1_ = discetize_2bins(Z1)

    Z2 = np.random.normal(Y / 2, 1)
    Z2_ = discetize_2bins(Z2)

    Z3 = np.random.normal(Y / 2, 1)
    Z3_ = discetize_2bins(Z3)

    X = np.random.normal(Z1 / 2, 1)
    X_ = discetize_2bins(X)

    _, p_cmi = cond_indep_test_permutation(
        X, 
        Y, 
        np.vstack([Z1_, Z2_]).T,
        B,
        "cmi"
    )
    counter_cmi += p_cmi < 0.05

    _, p_secmi2 = cond_indep_test_permutation(
        X, 
        Y, 
        np.vstack([Z1_, Z2_]).T,
        B,
        "secmi2"
    )
    counter_secmi2 += p_secmi2 < 0.05

    _, p_secmi3 = cond_indep_test_permutation(
        X, 
        Y, 
        np.vstack([Z1_, Z2_]).T,
        B,
        "secmi3"
    )
    counter_secmi3 += p_secmi3 < 0.05

print("CMI - hypothesis rejections", counter_cmi)
print("SECMI2 - hypothesis rejections", counter_secmi2)
print("SECMI3 - hypothesis rejections", counter_secmi3)
# no hypothesis rejections -> something wrong? too small sample?

CMI - hypothesis rejections 0
SECMI2 - hypothesis rejections 0
SECMI3 - hypothesis rejections 0


In [43]:
counter_cmi = 0
counter_secmi2 = 0
counter_secmi3 = 0

for i in range(100):

    Y = np.random.normal(0, 1, n)
    Y_ = discetize_2bins(Y)

    Z1 = np.random.normal(Y / 2, 1)
    Z1_ = discetize_2bins(Z1)

    Z2 = np.random.normal(Y / 2, 1)
    Z2_ = discetize_2bins(Z2)

    Z3 = np.random.normal(Y / 2, 1)
    Z3_ = discetize_2bins(Z3)

    X = np.random.normal(Z1 / 2, 1)
    X_ = discetize_2bins(X)

    _, p_cmi = cond_indep_test_permutation(
        X, 
        Y, 
        np.vstack([Z2_, Z3_]).T,
        B,
        "cmi"
    )
    counter_cmi += p_cmi < 0.05

    _, p_secmi2 = cond_indep_test_permutation(
        X, 
        Y, 
        np.vstack([Z2_, Z3_]).T,
        B,
        "secmi2"
    )
    counter_secmi2 += p_secmi2 < 0.05

    _, p_secmi3 = cond_indep_test_permutation(
        X, 
        Y, 
        np.vstack([Z2_, Z3_]).T,
        B,
        "secmi3"
    )
    counter_secmi3 += p_secmi3 < 0.05

print("CMI - hypothesis rejections", counter_cmi)
print("SECMI2 - hypothesis rejections", counter_secmi2)
print("SECMI3 - hypothesis rejections", counter_secmi3)

CMI - hypothesis rejections 0
SECMI2 - hypothesis rejections 0
SECMI3 - hypothesis rejections 0


### c)

In [44]:
n = 100

X = scipy.stats.binom.rvs(1, 0.5, size=n)
Z1 = scipy.stats.binom.rvs(1, 0.5, size=n)
Z2 = scipy.stats.binom.rvs(1, 0.5, size=n)
Z3 = scipy.stats.binom.rvs(1, 0.5, size=n)

y1 = scipy.stats.binom.rvs(1, p=0.8, size=100)
y2 = scipy.stats.binom.rvs(1, p=0.2, size=100)
Y = np.where((X + Z1 + Z2) % 2 == 1, y1, y2)

In [45]:
# H01
# we expect:
# conditional dependence
# CIFE2 should not work
# CMI, CIFE3 should


counter_cmi = 0
counter_secmi2 = 0
counter_secmi3 = 0

for i in range(100):

    _, p_cmi = cond_indep_test_permutation(
        X, 
        Y, 
        np.vstack([Z1_, Z2_]).T,
        B,
        "cmi"
    )
    counter_cmi += p_cmi < 0.05

    _, p_secmi2 = cond_indep_test_permutation(
        X, 
        Y, 
        np.vstack([Z1_, Z2_]).T,
        B,
        "secmi2"
    )
    counter_secmi2 += p_secmi2 < 0.05

    _, p_secmi3 = cond_indep_test_permutation(
        X, 
        Y, 
        np.vstack([Z1_, Z2_]).T,
        B,
        "secmi3"
    )
    counter_secmi3 += p_secmi3 < 0.05

print("CMI - hypothesis rejections", counter_cmi)
print("SECMI2 - hypothesis rejections", counter_secmi2)
print("SECMI3 - hypothesis rejections", counter_secmi3)

CMI - hypothesis rejections 0
SECMI2 - hypothesis rejections 0
SECMI3 - hypothesis rejections 0


In [46]:
# H02
# we expect:
# conditional independence

counter_cmi = 0
counter_secmi2 = 0
counter_secmi3 = 0

for i in range(100):

    _, p_cmi = cond_indep_test_permutation(
        X, 
        Y, 
        np.vstack([Z2_, Z3_]).T,
        B,
        "cmi"
    )
    counter_cmi += p_cmi < 0.05

    _, p_secmi2 = cond_indep_test_permutation(
        X, 
        Y, 
        np.vstack([Z2_, Z3_]).T,
        B,
        "secmi2"
    )
    counter_secmi2 += p_secmi2 < 0.05

    _, p_secmi3 = cond_indep_test_permutation(
        X, 
        Y, 
        np.vstack([Z2_, Z3_]).T,
        B,
        "secmi3"
    )
    counter_secmi3 += p_secmi3 < 0.05

print("CMI - hypothesis rejections", counter_cmi)
print("SECMI2 - hypothesis rejections", counter_secmi2)
print("SECMI3 - hypothesis rejections", counter_secmi3)

CMI - hypothesis rejections 0
SECMI2 - hypothesis rejections 0
SECMI3 - hypothesis rejections 0


## Task 2
 
in R