# Mathematical Underpinnings - Lab 6

In [4]:
from sklearn.metrics import mutual_info_score
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from tqdm import tqdm

## Useful functions

In [16]:
def discetize_2bins(X):
    X_discrete = 1*(X >= 0) 
    return X_discrete

In [6]:
def conditional_permutation(X, Z):

    z_values = np.unique(Z)
    n_z_values = len(z_values)
    n = len(Z)

    X_b = np.zeros(n)

    for i in range(n_z_values):

        z_value_tmp = z_values[i]

        X_b[Z == z_value_tmp] = np.random.permutation(X[Z == z_value_tmp])

    return X_b

In [7]:
def conditional_mutual_information(X, Y, Z):

    z_values = np.unique(Z)
    n_z_values = len(z_values)
    n = len(Z)

    cmi = 0

    for i in range(n_z_values):

        z_value_tmp = z_values[i]
        z_condition = (Z == z_value_tmp)
        X_z = X[z_condition]
        Y_z = Y[z_condition]

        mi_XY_z = mutual_info_score(X_z, Y_z)
        p_z = np.sum(z_condition)/n

        cmi += p_z*mi_XY_z

    return cmi

In [8]:
# II(X;Y;Z)
def interaction_information(X, Y, Z):
    return conditional_mutual_information(X, Y, Z) - mutual_info_score(X, Y)

In [9]:
# II(X;Y;Z1;Z2)
def interaction_information2(X, Y, Z1, Z2):
    Z_1_and_2 = 2*Z2 + Z1
    return interaction_information(X, Y, Z_1_and_2) - interaction_information(X, Y, Z1) - interaction_information(X, Y, Z2)

## Task 1

In [10]:
def secmi2(X,Y,Z):
    s = len(Z)
    sum_ii = 0
    for i in range(s):
        sum_ii += interaction_information(X,Y,Z[i])
    return sum_ii + mutual_info_score(X,Y)
    
def secmi3(X,Y,Z):
    s = len(Z)
    sum_ii = 0
    for i in range(s):
        sum_ii += interaction_information(X,Y,Z[i])
    sum_ii2 = 0
    for i in range(s):
        for j in range(i+1,s):
            sum_ii2 += interaction_information2(X,Y,Z[i],Z[j])
    return sum_ii + sum_ii2 + mutual_info_score(X,Y)

### a)

In [11]:
def cond_indep_test_permutation(X, Y, Z, B, stat):

    n_col_Z = Z.shape[1]
    Z_1dim = np.dot(Z, 2**np.linspace(0, n_col_Z-1, n_col_Z))
    Z = Z.T
    if stat == "cmi":
        stat_value = conditional_mutual_information(X, Y, Z_1dim)
    if stat == "secmi2":
        stat_value = secmi2(X, Y, Z)
    if stat == "secmi3":
        stat_value = secmi3(X, Y, Z)

    condition_p_value = 0
    for b in range(B):
        X_b = conditional_permutation(X, Z_1dim)

        if stat == "cmi":
            stat_value_b = conditional_mutual_information(X_b, Y, Z_1dim)
        if stat == "secmi2":
            stat_value_b = secmi2(X_b, Y, Z)
        if stat == "secmi3":
            stat_value_b = secmi3(X_b, Y, Z)

        if stat_value <= stat_value_b:
            condition_p_value += 1

    p_value = (1 + condition_p_value)/(1 + B)

    return 2*len(X)*stat_value, p_value

### b)

In [31]:
n = 100
Y_c = np.random.normal(0, 1, n)
Y = discetize_2bins(Y_c)
Z1_c = np.random.normal(2*Y-1, 1, n)
Z1 = discetize_2bins(Z1_c)
Z2_c = np.random.normal(2*Y-1, 1, n) 
Z2 = discetize_2bins(Z2_c)
Z3_c = np.random.normal(2*Y-1, 1, n)
Z3 = discetize_2bins(Z3_c)
X_c = np.random.normal(2*Z1-1,1,n)   
X = discetize_2bins(X_c)

In [32]:
# Z1 and Z2
N = 100
resultZ1Z2 = {'cmi':0, 'secmi2':0, 'secmi3':0}
for i in range(N):
    for stat in ["cmi", "secmi2", "secmi3"]:
        resultZ1Z2[stat] += cond_indep_test_permutation(X,Y,np.transpose([Z1,Z2]),50,stat)[1] < 0.05
for stat in ["cmi", "secmi2", "secmi3"]:
    print(f"Conditional permutation test based on {stat} null hypothesis rejection:  {resultZ1Z2[stat]}")

Conditional permutation test based on cmi null hypothesis rejection:  0
Conditional permutation test based on secmi2 null hypothesis rejection:  0
Conditional permutation test based on secmi3 null hypothesis rejection:  0


In [33]:
# Z2 and Z3
resultZ2Z3 = {'cmi':0, 'secmi2':0, 'secmi3':0}
for i in range(N):
    for stat in ["cmi", "secmi2", "secmi3"]:
        resultZ2Z3[stat] += cond_indep_test_permutation(X,Y,np.transpose([Z2,Z3]),50,stat)[1] < 0.05
for stat in ["cmi", "secmi2", "secmi3"]:
    print(f"Conditional permutation test based on {stat} null hypothesis rejection:  {resultZ2Z3[stat]}")

Conditional permutation test based on cmi null hypothesis rejection:  94
Conditional permutation test based on secmi2 null hypothesis rejection:  99
Conditional permutation test based on secmi3 null hypothesis rejection:  92


### c)

In [21]:
X = np.random.binomial(1, 0.5, n)
Z1 = np.random.binomial(1, 0.5, n)
Z2 = np.random.binomial(1, 0.5, n)
Z3 = np.random.binomial(1, 0.5, n)
P = np.random.binomial(1, 0.5, n)
Y = np.zeros(n)
for i in range(n):
    if (X[i] + Z1[i] + Z2[i]) % 2 == 1:
        Y[i] = np.random.binomial(1, 0.8)
    else:
        Y[i] = np.random.binomial(1, 0.2)


In [28]:
#Z1 and Z2
N = 100
resultZ1Z2 = {'cmi':0, 'secmi2':0, 'secmi3':0}
for i in range(N):
    for stat in ["cmi", "secmi2", "secmi3"]:
        resultZ1Z2[stat] += cond_indep_test_permutation(X,Y,np.transpose([Z1,Z2]),50,stat)[1] < 0.05
for stat in ["cmi", "secmi2", "secmi3"]:
    print(f"Conditional permutation test based on {stat} null hypothesis rejection:  {resultZ1Z2[stat]}")

Conditional permutation test based on cmi null hypothesis rejection:  100
Conditional permutation test based on secmi2 null hypothesis rejection:  0
Conditional permutation test based on secmi3 null hypothesis rejection:  100


In [29]:
# Z2 and Z3
resultZ2Z3 = {'cmi':0, 'secmi2':0, 'secmi3':0}
for i in range(N):
    for stat in ["cmi", "secmi2", "secmi3"]:
        resultZ2Z3[stat] += cond_indep_test_permutation(X,Y,np.transpose([Z2,Z3]),50,stat)[1] < 0.05
for stat in ["cmi", "secmi2", "secmi3"]:
    print(f"Conditional permutation test based on {stat} null hypothesis rejection:  {resultZ2Z3[stat]}")

Conditional permutation test based on cmi null hypothesis rejection:  0
Conditional permutation test based on secmi2 null hypothesis rejection:  0
Conditional permutation test based on secmi3 null hypothesis rejection:  0


## Task 2
 
in R