# Mathematical Underpinnings - Lab 6

In [29]:
from sklearn.metrics import mutual_info_score
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from tqdm import tqdm

## Useful functions

In [30]:
def discretize_2bins(X):
    X_discrete = 1*(X >= 0)
    return X_discrete

In [31]:
def conditional_permutation(X, Z):

    z_values = np.unique(Z)
    n_z_values = len(z_values)
    n = len(Z)

    X_b = np.zeros(n)

    for i in range(n_z_values):

        z_value_tmp = z_values[i]

        X_b[Z == z_value_tmp] = np.random.permutation(X[Z == z_value_tmp])

    return X_b

In [32]:
def conditional_mutual_information(X, Y, Z):

    z_values = np.unique(Z)
    n_z_values = len(z_values)
    n = len(Z)

    cmi = 0

    for i in range(n_z_values):

        z_value_tmp = z_values[i]
        z_condition = (Z == z_value_tmp)

        X_z = X[z_condition]
        Y_z = Y[z_condition]

        mi_XY_z = mutual_info_score(X_z, Y_z)
        p_z = np.sum(z_condition)/n

        cmi += p_z*mi_XY_z

    return cmi

In [33]:
# II(X;Y;Z)
def interaction_information(X, Y, Z):
    return conditional_mutual_information(X, Y, Z) - mutual_info_score(X, Y)

In [34]:
# II(X;Y;Z1;Z2)
def interaction_information2(X, Y, Z1, Z2):
    Z_1_and_2 = 2*Z2 + Z1
    return interaction_information(X, Y, Z_1_and_2) - interaction_information(X, Y, Z1) - interaction_information(X, Y, Z2)

## Task 1

In [35]:
def secmi2(X, Y, Z):
    z_cols = Z.shape[1]
    stat_val = mutual_info_score(X, Y)
    for i in range(z_cols):
        Z_i = Z[:, i]
        stat_val += interaction_information(Y, X, Z_i)
    return stat_val

def secmi3(X, Y, Z):
    z_cols = Z.shape[1]
    stat_val = secmi2(X, Y, Z)
    for i in range(z_cols):
        Z_i = Z[:, i]
        for j in range(i+1, z_cols):
            Z_j = Z[:, j]
            stat_val += interaction_information2(Y, X, Z_i, Z_j)
    return stat_val

### a)

In [36]:
def cond_indep_test_permutation(X, Y, Z, B, stat):

    n_col_Z = Z.shape[1]
    Z_1dim = np.dot(Z, 2**np.linspace(0, n_col_Z-1, n_col_Z))

    if stat == "cmi":
        stat_value = conditional_mutual_information(X, Y, Z_1dim)
    if stat == "secmi2":
        stat_value = secmi2(X, Y, Z)
    if stat == "secmi3":
        stat_value = secmi3(X, Y, Z)

    condition_p_value = 0
    for b in range(B):
        X_b = conditional_permutation(X, Z_1dim)

        if stat == "cmi":
            stat_value_b = conditional_mutual_information(X_b, Y, Z_1dim)
        if stat == "secmi2":
            stat_value_b = secmi2(X_b, Y, Z)
        if stat == "secmi3":
            stat_value_b = secmi3(X_b, Y, Z)

        if stat_value <= stat_value_b:
            condition_p_value += 1

    p_value = (1 + condition_p_value)/(1 + B)

    return 2*len(X)*stat_value, p_value

### b)

In [37]:
def model_b(n):
    Y = discretize_2bins(np.random.normal(size=n))
    Y_err = np.repeat(Y.reshape((-1, 1)), 3, axis=1)
    Z = discretize_2bins(np.random.multivariate_normal([1, 1, 1], np.eye(3), size=n) + Y_err)
    X = discretize_2bins(np.random.normal(size=n) + Z[:, 0])
    return X, Y, Z

def experiment_b(stat, N=100, alpha=0.05, n=100, B=50):
    h1_count = 0
    h2_count = 0
    for i in range(N):
        X, Y, Z = model_b(n)
        Z_12 = Z[:, 0:2]
        Z_23 = Z[:, 1:3]
        _, p_value1 = cond_indep_test_permutation(X, Y, Z_12, B, stat)
        _, p_value2 = cond_indep_test_permutation(X, Y, Z_23, B, stat)
        if p_value1 < alpha:
            h1_count += 1
        if p_value2 < alpha:
            h2_count += 1
    return h1_count, h2_count

In [38]:
h1, h2 = experiment_b("cmi")
print(f"Hypothesis 1 was rejected {h1} times.")
print(f"Hypothesis 2 was rejected {h2} times.")

Hypothesis 1 was rejected 3 times.
Hypothesis 2 was rejected 6 times.


In [39]:
h1, h2 = experiment_b("secmi2")
print(f"Hypothesis 1 was rejected {h1} times.")
print(f"Hypothesis 2 was rejected {h2} times.")

Hypothesis 1 was rejected 3 times.
Hypothesis 2 was rejected 2 times.


In [40]:
h1, h2 = experiment_b("secmi3")
print(f"Hypothesis 1 was rejected {h1} times.")
print(f"Hypothesis 2 was rejected {h2} times.")

Hypothesis 1 was rejected 3 times.
Hypothesis 2 was rejected 1 times.


### c)

In [41]:
def model_c(n):
    X = np.random.binomial(1, 0.5, n)
    Z = np.random.binomial(1, 0.5, (n, 3))
    X_Z = X + Z[:, 0] + Z[:, 1]
    Y = np.zeros(n)
    con_1 = (X_Z % 2 == 0)
    con_2 = (X_Z % 2 == 1)
    Y[con_1] = np.random.binomial(1, 0.2, np.sum(con_1))
    Y[con_2] = np.random.binomial(1, 0.8, np.sum(con_2))
    return X, Y, Z

def experiment_c(stat, N=100, alpha=0.05, n=100, B=50):
    h1_count = 0
    h2_count = 0
    for i in range(N):
        X, Y, Z = model_c(n)
        Z_12 = Z[:, 0:2]
        Z_23 = Z[:, 1:3]
        _, p_value1 = cond_indep_test_permutation(X, Y, Z_12, B, stat)
        _, p_value2 = cond_indep_test_permutation(X, Y, Z_23, B, stat)
        if p_value1 < alpha:
            h1_count += 1
        if p_value2 < alpha:
            h2_count += 1
    return h1_count, h2_count

In [42]:
h1, h2 = experiment_c("cmi")
print(f"Hypothesis 1 was rejected {h1} times.")
print(f"Hypothesis 2 was rejected {h2} times.")

Hypothesis 1 was rejected 100 times.
Hypothesis 2 was rejected 5 times.


In [None]:
h1, h2 = experiment_c("secmi2")
print(f"Hypothesis 1 was rejected {h1} times.")
print(f"Hypothesis 2 was rejected {h2} times.")

In [None]:
h1, h2 = experiment_c("secmi3")
print(f"Hypothesis 1 was rejected {h1} times.")
print(f"Hypothesis 2 was rejected {h2} times.")

## Task 2
 
in R