# Mathematical Underpinnings - Lab 6

In [1]:
from sklearn.metrics import mutual_info_score
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from tqdm import tqdm

## Useful functions

In [2]:
def discetize_2bins(X):
    X_discrete = 1*(X >= 0)
    return X_discrete

In [3]:
def conditional_permutation(X, Z):

    z_values = np.unique(Z)
    n_z_values = len(z_values)
    n = len(Z)

    X_b = np.zeros(n)

    for i in range(n_z_values):

        z_value_tmp = z_values[i]

        X_b[Z == z_value_tmp] = np.random.permutation(X[Z == z_value_tmp])

    return X_b

In [4]:
def conditional_mutual_information(X, Y, Z):

    z_values = np.unique(Z)
    n_z_values = len(z_values)
    n = len(Z)

    cmi = 0

    for i in range(n_z_values):

        z_value_tmp = z_values[i]
        z_condition = (Z == z_value_tmp)

        X_z = X[z_condition]
        Y_z = Y[z_condition]

        mi_XY_z = mutual_info_score(X_z, Y_z)
        p_z = np.sum(z_condition)/n

        cmi += p_z*mi_XY_z

    return cmi

In [5]:
# II(X;Y;Z)
def interaction_information(X, Y, Z):
    return conditional_mutual_information(X, Y, Z) - mutual_info_score(X, Y)

In [6]:
# II(X;Y;Z1;Z2)
def interaction_information2(X, Y, Z1, Z2):
    Z_1_and_2 = 2*Z2 + Z1
    return interaction_information(X, Y, Z_1_and_2) - interaction_information(X, Y, Z1) - interaction_information(X, Y, Z2)

## Task 1

In [7]:
def secmi2(X, Y, Z):
    tmp = mutual_info_score(X,Y)
    for i in range(Z.shape[1]):
        tmp += interaction_information(X, Y, Z[:, i])
    return tmp
def secmi3(X, Y, Z):
    tmp3 = secmi2(X, Y, Z)
    for i in range(Z.shape[1]):
        for j in range(i + 1, Z.shape[1]):
            tmp3 += interaction_information2(X, Y, Z[:,i], Z[:,j])
    return tmp3

### a)

In [8]:
def cond_indep_test_permutation(X, Y, Z, B, stat):

    n_col_Z = Z.shape[1]
    Z_1dim = np.dot(Z, 2**np.linspace(0, n_col_Z-1, n_col_Z))

    if stat == "cmi":
        stat_value = conditional_mutual_information(X, Y, Z_1dim)
    if stat == "secmi2":
        stat_value = secmi2(X, Y, Z)
    if stat == "secmi3":
        stat_value = secmi3(X, Y, Z)

    condition_p_value = 0
    for b in range(B):
        X_b = conditional_permutation(X, Z_1dim)

        if stat == "cmi":
            stat_value_b = conditional_mutual_information(X_b, Y, Z_1dim)
        if stat == "secmi2":
            stat_value_b = secmi2(X_b, Y, Z)
        if stat == "secmi3":
            stat_value_b = secmi3(X_b, Y, Z)

        if stat_value <= stat_value_b:
            condition_p_value += 1

    p_value = (1 + condition_p_value)/(1 + B)

    return 2*len(X)*stat_value, p_value

### b)

In [9]:
def sample_from_model1(n = 100):
    Y = 2*(np.random.randn(n) > 0) - 1
    Z1 = 2*(Y/2 + np.random.randn(n) > 0) - 1
    Z2 = 2*(Y/2 + np.random.randn(n) > 0) - 1
    Z3 = 2*(Y/2 + np.random.randn(n) > 0) - 1
    X = 2*(Z1/2 + np.random.randn(n) > 0) - 1
    return X,Y, np.stack((Z1,Z2,Z3), axis = -1)

In [10]:
n_tests = 100
alpha = 5/100
cmi_test = 0
secmi2_test = 0
secmi3_test = 0

for n in range(n_tests):
    X,Y,Z = sample_from_model1(n = 500)
    cmi_test += cond_indep_test_permutation(X,Y,Z[:,0:2], B = 50,stat="cmi")[1] <= alpha
    secmi2_test += cond_indep_test_permutation(X,Y,Z[:,0:2], B = 50,stat="secmi2")[1] <= alpha
    secmi3_test += cond_indep_test_permutation(X,Y,Z[:,0:2], B = 50,stat="secmi3")[1] <= alpha
print("Test 1 CMI: " + str(cmi_test/n_tests))
print("Test 1 secmi2: " + str(secmi2_test/n_tests))
print("Test 1 secmi3: " + str(secmi3_test/n_tests))

Test 1 CMI: 0.02
Test 1 secmi2: 0.05
Test 1 secmi3: 0.03


In [11]:
cmi_test = 0
secmi2_test = 0
secmi3_test = 0

for n in range(n_tests):
    X,Y,Z = sample_from_model1(n = 500)
    cmi_test += cond_indep_test_permutation(X,Y,Z[:,1:3], B = 50,stat="cmi")[1] <= alpha
    secmi2_test += cond_indep_test_permutation(X,Y,Z[:,1:3], B = 50,stat="secmi2")[1] <= alpha
    secmi3_test += cond_indep_test_permutation(X,Y,Z[:,1:3], B = 50,stat="secmi3")[1] <= alpha
print("Test 2 CMI: " + str(cmi_test/n_tests))
print("Test 2 secmi2: " + str(secmi2_test/n_tests))
print("Test 2 secmi3: " + str(secmi3_test/n_tests))

Test 2 CMI: 0.63
Test 2 secmi2: 0.62
Test 2 secmi3: 0.57


### c)

In [12]:
def sample_from_model2(n = 100):
    X = 2*np.random.randint(0,2,n) -1
    Z1 = 2*np.random.randint(0,2,n) -1
    Z2 = 2*np.random.randint(0,2,n) -1
    Z3 = 2*np.random.randint(0,2,n) -1
    p = 0.2 * np.ones(n)
    p[X + Z1 + Z2 % 2 == 1] = 0.8
    Y = 2*np.random.binomial(1, p) -1
    return X,Y,np.stack((Z1,Z2,Z3), axis = -1)

## Task 2
 
in R