# Mathematical Underpinnings - Lab 6

In [37]:
from sklearn.metrics import mutual_info_score
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from tqdm import tqdm

## Useful functions

In [38]:
def discetize_2bins(X):
    X_discrete = 1*(X >= 0)
    return X_discrete

In [39]:
def conditional_permutation(X, Z):

    z_values = np.unique(Z)
    n_z_values = len(z_values)
    n = len(Z)

    X_b = np.zeros(n)

    for i in range(n_z_values):

        z_value_tmp = z_values[i]

        X_b[Z == z_value_tmp] = np.random.permutation(X[Z == z_value_tmp])

    return X_b

In [40]:
def conditional_mutual_information(X, Y, Z):

    z_values = np.unique(Z)
    n_z_values = len(z_values)
    n = len(Z)

    cmi = 0

    for i in range(n_z_values):

        z_value_tmp = z_values[i]
        z_condition = (Z == z_value_tmp)

        X_z = X[z_condition]
        Y_z = Y[z_condition]

        mi_XY_z = mutual_info_score(X_z, Y_z)
        p_z = np.sum(z_condition)/n

        cmi += p_z*mi_XY_z

    return cmi

In [41]:
# II(X;Y;Z)
def interaction_information(X, Y, Z):
    return conditional_mutual_information(X, Y, Z) - mutual_info_score(X, Y)

In [42]:
# II(X;Y;Z1;Z2)
def interaction_information2(X, Y, Z1, Z2):
    Z_1_and_2 = 2*Z2 + Z1
    return interaction_information(X, Y, Z_1_and_2) - interaction_information(X, Y, Z1) - interaction_information(X, Y, Z2)

## Task 1

In [43]:
def secmi2(X, Y, Z):
    res = 0
    mi = mutual_info_score(X, Y)
    res += mi
    for i in range(Z.shape[1]):
        ii = interaction_information(Y, X, Z[:, i])
        res += ii
    return res

def secmi3(X, Y, Z):
    res = 0
    res += secmi2(X, Y, Z)
    
    for i in range(Z.shape[1]):
        for j in range(Z.shape[1]):
            if i < j:
                res += interaction_information2(Y, X, Z[:, i], Z[:, j])
    return res

### a)

In [52]:
def cond_indep_test_permutation(X, Y, Z, B, stat):

    n_col_Z = Z.shape[1]
    Z_1dim = np.dot(Z, 2**np.linspace(0, n_col_Z-1, n_col_Z))

    if stat == "cmi":
        stat_value = conditional_mutual_information(X, Y, Z_1dim)
    if stat == "secmi2":
        stat_value = secmi2(X, Y, Z)
    if stat == "secmi3":
        stat_value = secmi3(X, Y, Z)

    condition_p_value = 0
    for b in range(B):
        X_b = conditional_permutation(X, Z_1dim)

        if stat == "cmi":
            stat_value_b = conditional_mutual_information(X_b, Y, Z_1dim)
        if stat == "secmi2":
            stat_value_b = secmi2(X_b, Y, Z)
        if stat == "secmi3":
            stat_value_b = secmi3(X_b, Y, Z)

        if stat_value <= stat_value_b:
            condition_p_value += 1

    p_value = (1 + condition_p_value)/(1 + B)

    return 2*len(X)*stat_value, p_value

### b)

In [53]:
def generate_sample_b(n):
    Y = discetize_2bins(np.random.normal(size=n))
    Z1 = discetize_2bins(np.random.normal(Y/2, 1, n))
    Z2 = discetize_2bins(np.random.normal(Y/2, 1, n))
    Z3 = discetize_2bins(np.random.normal(Y/2, 1, n))
    X = discetize_2bins(np.random.normal(Z1/2, 1, n))

    return X, Y, Z1, Z2, Z3

In [54]:
def experiment(stat, N=100, alpha=0.05, n=100, B=50, sample_gen_fnc=None):
    h1_rejected = 0
    h2_rejected = 0
    for _ in range(N):
        X, Y, Z1, Z2, Z3 = sample_gen_fnc(n)

            
        Z_1_2 = np.array([Z1, Z2]).T
        p = cond_indep_test_permutation(X, Y, Z_1_2, B=B, stat=stat)[1]
        if p < alpha:
            h1_rejected += 1
            
        Z_2_3 = np.array([Z2, Z3]).T
        p = cond_indep_test_permutation(X, Y, Z_2_3, B=B, stat=stat)[1]
        if p < alpha:
            h2_rejected += 1
            
    return h1_rejected, h2_rejected
N = 100
rejected_cmi = experiment("cmi", N=N, sample_gen_fnc=generate_sample_b)
print("Hypothesis 1")
print(f"CMI rejected: {rejected_cmi[0]} out of {N}")
rejected_secmi2 = experiment("secmi2", N=N, sample_gen_fnc=generate_sample_b)
print(f"SECMI2 rejected: {rejected_secmi2[0]} out of {N}")
rejected_secmi3 = experiment("secmi3", N=N, sample_gen_fnc=generate_sample_b)
print(f"SECMI3 rejected: {rejected_secmi3[0]} out of {N}")

print("Hypothesis 2")
print(f"CMI rejected: {rejected_cmi[1]} out of {N}")
print(f"SECMI2 rejected: {rejected_secmi2[1]} out of {N}")
print(f"SECMI3 rejected: {rejected_secmi3[1]} out of {N}")


Hypothesis 1
CMI rejected: 5 out of 100
SECMI2 rejected: 4 out of 100
SECMI3 rejected: 4 out of 100
Hypothesis 2
CMI rejected: 6 out of 100
SECMI2 rejected: 2 out of 100
SECMI3 rejected: 3 out of 100


### c)

In [55]:
def generate_sample_c(n=1000):
    X = np.random.binomial(1, 0.5, n)
    Z1 = np.random.binomial(1, 0.5, n)
    Z2 = np.random.binomial(1, 0.5, n)
    Z3 = np.random.binomial(1, 0.5, n)
    X_Z1_Z2 = (X + Z1 + Z2) % 2
    ind_0 = np.where(X_Z1_Z2 == 0)[0]
    ind_1 = np.where(X_Z1_Z2 == 1)[0]
    Y = np.zeros(n)
    Y[ind_0] = np.random.binomial(1, 0.2, len(ind_0))
    Y[ind_1] = np.random.binomial(1, 0.8, len(ind_1))
    return X, Y, Z1, Z2, Z3

In [57]:
print("Hypothesis 1")
print(f"CMI rejected: {rejected_cmi[0]} out of {N}")
rejected_cmi = experiment("cmi", N=N, sample_gen_fnc=generate_sample_c)
rejected_secmi2 = experiment("secmi2", N=N, sample_gen_fnc=generate_sample_c)
print(f"SECMI2 rejected: {rejected_secmi2[0]} out of {N}")
rejected_secmi3 = experiment("secmi3", N=N, sample_gen_fnc=generate_sample_c)
print(f"SECMI3 rejected: {rejected_secmi3[0]} out of {N}")

print("Hypothesis 2")
print(f"CMI rejected: {rejected_cmi[1]} out of {N}")
print(f"SECMI2 rejected: {rejected_secmi2[1]} out of {N}")
print(f"SECMI3 rejected: {rejected_secmi3[1]} out of {N}")

Hypothesis 1
CMI rejected: 100 out of 100
SECMI2 rejected: 5 out of 100
SECMI3 rejected: 100 out of 100
Hypothesis 2
CMI rejected: 2 out of 100
SECMI2 rejected: 3 out of 100
SECMI3 rejected: 5 out of 100


## Task 2
 
in R