# Mathematical Underpinnings - Lab 6

In [1]:
from sklearn.metrics import mutual_info_score
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from tqdm import tqdm

## Useful functions

In [2]:
def discetize_2bins(X):
    X_discrete = 1*(X >= 0)
    return X_discrete

In [3]:
def conditional_permutation(X, Z):

    z_values = np.unique(Z)
    n_z_values = len(z_values)
    n = len(Z)

    X_b = np.zeros(n)

    for i in range(n_z_values):

        z_value_tmp = z_values[i]

        X_b[Z == z_value_tmp] = np.random.permutation(X[Z == z_value_tmp])

    return X_b

In [4]:
def conditional_mutual_information(X, Y, Z):

    z_values = np.unique(Z)
    n_z_values = len(z_values)
    n = len(Z)

    cmi = 0

    for i in range(n_z_values):

        z_value_tmp = z_values[i]
        z_condition = (Z == z_value_tmp)

        X_z = X[z_condition]
        Y_z = Y[z_condition]

        mi_XY_z = mutual_info_score(X_z, Y_z)
        p_z = np.sum(z_condition)/n

        cmi += p_z*mi_XY_z

    return cmi

In [5]:
# II(X;Y;Z)
def interaction_information(X, Y, Z):
    return conditional_mutual_information(X, Y, Z) - mutual_info_score(X, Y)

In [6]:
# II(X;Y;Z1;Z2)
def interaction_information2(X, Y, Z1, Z2):
    Z_1_and_2 = 2*Z2 + Z1
    return interaction_information(X, Y, Z_1_and_2) - interaction_information(X, Y, Z1) - interaction_information(X, Y, Z2)

## Task 1

In [13]:
def secmi2(X, Y, Z):
  value = mutual_info_score(X,Y)
  for i in range(Z.shape[1]):
    value += interaction_information(X, Y, Z[:,i])
  return value

def secmi3(X, Y, Z):
  value = mutual_info_score(X,Y)
  for i in range(Z.shape[1]):
    value += interaction_information(X, Y, Z[:,i])
  for i in range(Z.shape[1]):
    for j in range(i+1, Z.shape[1]):
      value += interaction_information2(X, Y, Z[:,i], Z[:,j])
  return value

### a)

In [14]:
def cond_indep_test_permutation(X, Y, Z, B, stat):

    n_col_Z = Z.shape[1]
    Z_1dim = np.dot(Z, 2**np.linspace(0, n_col_Z-1, n_col_Z))

    if stat == "cmi":
        stat_value = conditional_mutual_information(X, Y, Z_1dim)
    if stat == "secmi2":
        stat_value = secmi2(X, Y, Z)
    if stat == "secmi3":
        stat_value = secmi3(X, Y, Z)

    condition_p_value = 0
    for b in range(B):
        X_b = conditional_permutation(X, Z_1dim)

        if stat == "cmi":
            stat_value_b = conditional_mutual_information(X_b, Y, Z_1dim)
        if stat == "secmi2":
            stat_value_b = secmi2(X_b, Y, Z)
        if stat == "secmi3":
            stat_value_b = secmi3(X_b, Y, Z)

        if stat_value <= stat_value_b:
            condition_p_value += 1

    p_value = (1 + condition_p_value)/(1 + B)

    return 2*len(X)*stat_value, p_value

### b)

In [47]:
n = 100
Y = np.random.normal(0,1,n)
Y = discetize_2bins(Y)

Z1 = np.random.normal(Y*2-1,1,n)
Z1 = discetize_2bins(Z1)

Z2 = np.random.normal(Y*2-1,1,n)
Z2 = discetize_2bins(Z2)

Z3 = np.random.normal(Y*2-1,1,n)
Z3 = discetize_2bins(Z3)

X = np.random.normal(Z1*2-1,1,n)
X = discetize_2bins(X)

In [48]:

for stat in ["cmi", "secmi2", "secmi3"]:
  print(f"(Z1, Z2) Stat: {stat}, p-value: {cond_indep_test_permutation(X, Y, np.transpose([Z1, Z2]), B=50, stat=stat)[1]}")
print("\n")
for stat in ["cmi", "secmi2", "secmi3"]:
  print(f"(Z2, Z3) Stat: {stat}, p-value: {cond_indep_test_permutation(X, Y, np.transpose([Z2, Z3]), B=50, stat=stat)[1]}")

(Z1, Z2) Stat: cmi, p-value: 0.4117647058823529
(Z1, Z2) Stat: secmi2, p-value: 0.3137254901960784
(Z1, Z2) Stat: secmi3, p-value: 0.35294117647058826


(Z2, Z3) Stat: cmi, p-value: 0.0196078431372549
(Z2, Z3) Stat: secmi2, p-value: 0.09803921568627451
(Z2, Z3) Stat: secmi3, p-value: 0.0392156862745098


In [49]:
N = 100
for stat in ["cmi", "secmi2", "secmi3"]:
  rejected = 0
  for _ in range(N):
    p = cond_indep_test_permutation(X, Y, np.transpose([Z1, Z2]), B=50, stat=stat)[1]
    if p<0.05:
      rejected += 1
  print(f"(Z1, Z2) Stat: {stat}, rejection rate: {rejected/N}")



(Z1, Z2) Stat: cmi, rejection rate: 0.0
(Z1, Z2) Stat: secmi2, rejection rate: 0.0
(Z1, Z2) Stat: secmi3, rejection rate: 0.0


In [50]:
for stat in ["cmi", "secmi2", "secmi3"]:
  rejected = 0
  for _ in range(N):
    p = cond_indep_test_permutation(X, Y, np.transpose([Z2, Z3]), B=50, stat=stat)[1]
    if p<0.05:
      rejected += 1
  print(f"(Z2, Z3) Stat: {stat}, rejection rate: {rejected/N}")

(Z2, Z3) Stat: cmi, rejection rate: 0.6
(Z2, Z3) Stat: secmi2, rejection rate: 0.08
(Z2, Z3) Stat: secmi3, rejection rate: 0.5


### c)

In [51]:
n = 100
X = (np.random.uniform(0,1,n)>1/2).astype(int)
Z1 = (np.random.uniform(0,1,n)>1/2).astype(int)
Z2 = (np.random.uniform(0,1,n)>1/2).astype(int)
Z3 = (np.random.uniform(0,1,n)>1/2).astype(int)

sum_2 = [(X[i]+Z1[i]+Z2[i])%2 for i in range(n)]

prob = [0.2 if sum_2[i]==0 else 0.8 for i in range(n)]
Y = np.random.binomial(1,prob)


In [52]:
N = 100
for stat in ["cmi", "secmi2", "secmi3"]:
  rejected = 0
  for _ in range(N):
    p = cond_indep_test_permutation(X, Y, np.transpose([Z1, Z2]), B=50, stat=stat)[1]
    if p<0.05:
      rejected += 1
  print(f"(Z1, Z2) Stat: {stat}, rejection rate: {rejected/N}")

print("\n")
for stat in ["cmi", "secmi2", "secmi3"]:
  rejected = 0
  for _ in range(N):
    p = cond_indep_test_permutation(X, Y, np.transpose([Z2, Z3]), B=50, stat=stat)[1]
    if p<0.05:
      rejected += 1
  print(f"(Z2, Z3) Stat: {stat}, rejection rate: {rejected/N}")

(Z1, Z2) Stat: cmi, rejection rate: 1.0
(Z1, Z2) Stat: secmi2, rejection rate: 0.0
(Z1, Z2) Stat: secmi3, rejection rate: 1.0


(Z2, Z3) Stat: cmi, rejection rate: 0.0
(Z2, Z3) Stat: secmi2, rejection rate: 0.0
(Z2, Z3) Stat: secmi3, rejection rate: 0.0


## Task 2

in R