# Mathematical Underpinnings - Lab 6

In [1]:
from sklearn.metrics import mutual_info_score
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from tqdm import tqdm

## Useful functions

In [2]:
def discretize_2bins(X):
    X_discrete = 2*(X >= 0)-1
    return X_discrete

In [3]:
def conditional_permutation(X, Z):
    z_values = np.unique(Z)
    X_b = np.copy(X)

    for z_value_tmp in z_values:
        X_b[Z == z_value_tmp] = np.random.permutation(X[Z == z_value_tmp])

    return X_b

In [4]:
def conditional_mutual_information(X, Y, Z):
    z_values = np.unique(Z)
    n = len(Z)
    cmi = 0

    for z_value_tmp in z_values:
        z_condition = (Z == z_value_tmp)
        X_z = X[z_condition]
        Y_z = Y[z_condition]
        mi_XY_z = mutual_info_score(X_z, Y_z)
        p_z = np.sum(z_condition)/n
        cmi += p_z*mi_XY_z
    
    return cmi

In [5]:
# II(X;Y;Z)
def interaction_information(X, Y, Z):
    return conditional_mutual_information(X, Y, Z) - mutual_info_score(X, Y)

In [6]:
# II(X;Y;Z1;Z2)
def interaction_information2(X, Y, Z1, Z2):
    Z12 = 2*Z2 + Z1
    return interaction_information(X, Y, Z12) - interaction_information(X, Y, Z1) - interaction_information(X, Y, Z2)

## Task 1

In [7]:
def secmi2(X, Y, Z):
    z_columns = Z.shape[1]
    stat_value = mutual_info_score(X, Y)
    
    for i in range(z_columns):
        Z_i = Z[:, i]
        stat_value += interaction_information(Y, X, Z_i)
    
    return stat_value

def secmi3(X, Y, Z):
    z_columns = Z.shape[1]
    stat_value = secmi2(X, Y, Z)
    
    for i in range(z_columns):
        Z_i = Z[:, i]
        for j in range(i+1, z_columns):
            Z_j = Z[:, j]
            stat_value += interaction_information2(Y, X, Z_i, Z_j)
    
    return stat_value

### a)

In [8]:
def cond_indep_test_permutation(X, Y, Z, stat, B=50):
    
    assert stat in ["cmi", "secmi2", "secmi3"]

    n_col_Z = Z.shape[1]
    Z_1dim = np.dot(Z, 2**np.linspace(0, n_col_Z-1, n_col_Z))

    if stat == "cmi":
        stat_value = conditional_mutual_information(X, Y, Z_1dim)
    if stat == "secmi2":
        stat_value = secmi2(X, Y, Z)
    if stat == "secmi3":
        stat_value = secmi3(X, Y, Z)

    condition_p_value = 0
    for b in range(B):
        X_b = conditional_permutation(X, Z_1dim)

        if stat == "cmi":
            stat_value_b = conditional_mutual_information(X_b, Y, Z_1dim)
        if stat == "secmi2":
            stat_value_b = secmi2(X_b, Y, Z)
        if stat == "secmi3":
            stat_value_b = secmi3(X_b, Y, Z)

        if stat_value <= stat_value_b:
            condition_p_value += 1

    p_value = (1 + condition_p_value)/(1 + B)

    return 2*len(X)*stat_value, p_value

### b)

In [9]:
def sample_from_model_1b(n):
    Y = discretize_2bins(np.random.normal(size=n))
    Y_add = np.repeat(Y.reshape((-1, 1)), 3, axis=1)
    Z = discretize_2bins(np.random.multivariate_normal([1, 1, 1], np.eye(3), size=n) + Y_add)
    X = discretize_2bins(np.random.normal(size=n) + Z[:, 0])
    return X, Y, Z

In [10]:
def experiment_1b(stat, N=100, alpha=0.05):
    null_hyp1 = 0
    null_hyp2 = 0
    
    for i in range(N):
        X, Y, Z = sample_from_model_1b(100)
        Z12 = Z[:, 0:2]
        Z23 = Z[:, 1:3]
        
        _, p_value1 = cond_indep_test_permutation(X, Y, Z12, stat)
        _, p_value2 = cond_indep_test_permutation(X, Y, Z23, stat)
        
        if p_value1 < alpha:
            null_hyp1 += 1
        if p_value2 < alpha:
            null_hyp2 += 1
    
    # how many times has each null hypothesis been rejected?
    return null_hyp1, null_hyp2

In [11]:
experiment_1b("cmi")

(4, 39)

In [12]:
experiment_1b("secmi2")

(1, 62)

In [13]:
experiment_1b("secmi3")

(3, 55)

### c)

In [32]:
def sample_from_model_1c(n):
    X = np.random.binomial(1, 0.5, n)
    Z = np.random.binomial(1, 0.5, (n, 3))
    XZ_sum = X + Z[:, 0] + Z[:, 1]
    condition_zero = (XZ_sum % 2 == 0)
    condition_one = (XZ_sum % 2 == 1)
    Y = np.zeros(n)
    Y[condition_one] = np.random.binomial(1, 0.8, np.sum(condition_one))
    Y[condition_zero] = np.random.binomial(1, 0.2, np.sum(condition_zero))
    return X, Y, Z

In [33]:
def experiment_1c(stat, N=100, alpha=0.05):
    null_hyp1 = 0
    null_hyp2 = 0
    
    for i in range(N):
        X, Y, Z = sample_from_model_1c(100)
        Z12 = Z[:, 0:2]
        Z23 = Z[:, 1:3]
        
        _, p_value1 = cond_indep_test_permutation(X, Y, Z12, stat)
        _, p_value2 = cond_indep_test_permutation(X, Y, Z23, stat)
        
        if p_value1 < alpha:
            null_hyp1 += 1
        if p_value2 < alpha:
            null_hyp2 += 1
    
    # how many times has each null hypothesis been rejected?
    return null_hyp1, null_hyp2

In [34]:
experiment_1c("cmi")

(100, 4)

In [35]:
experiment_1c("secmi2")

(3, 4)

In [36]:
experiment_1c("secmi3")

(100, 7)