# Mathematical Underpinnings - Lab 6

In [None]:
from sklearn.metrics import mutual_info_score
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from tqdm import tqdm

## Useful functions

In [None]:
def discetize_2bins(X):
    X_discrete = 1*(X >= 0)
    return X_discrete

In [None]:
def conditional_permutation(X, Z):

    z_values = np.unique(Z)
    n_z_values = len(z_values)
    n = len(Z)

    X_b = np.zeros(n)

    for i in range(n_z_values):

        z_value_tmp = z_values[i]

        X_b[Z == z_value_tmp] = np.random.permutation(X[Z == z_value_tmp])

    return X_b

In [None]:
def conditional_mutual_information(X, Y, Z):

    z_values = np.unique(Z)
    n_z_values = len(z_values)
    n = len(Z)

    cmi = 0

    for i in range(n_z_values):

        z_value_tmp = z_values[i]
        z_condition = (Z == z_value_tmp)

        X_z = X[z_condition]
        Y_z = Y[z_condition]

        mi_XY_z = mutual_info_score(X_z, Y_z)
        p_z = np.sum(z_condition)/n

        cmi += p_z*mi_XY_z

    return cmi

In [None]:
# II(X;Y;Z)
def interaction_information(X, Y, Z):
    return conditional_mutual_information(X, Y, Z) - mutual_info_score(X, Y)

In [None]:
# II(X;Y;Z1;Z2)
def interaction_information2(X, Y, Z1, Z2):
    Z_1_and_2 = 2*Z2 + Z1
    return interaction_information(X, Y, Z_1_and_2) - interaction_information(X, Y, Z1) - interaction_information(X, Y, Z2)

## Task 1

In [None]:
def secmi2(X, Y, Z):
    n_col_Z = Z.shape[1]
    Z_1dim = np.dot(Z, 2**np.linspace(0, n_col_Z-1, n_col_Z))
    return mutual_info_score(X, Y) + conditional_mutual_information(X, Y, Z_1dim)


def secmi3(X, Y, Z):
    n_col_Z = Z.shape[1]
    Z_1dim = np.dot(Z, 2**np.linspace(0, n_col_Z-1, n_col_Z))
    secmi2 = mutual_info_score(X, Y) + conditional_mutual_information(X, Y, Z_1dim)
    II = 0
    for j in range(Z.shape[1]):
      for i in range(j):
        II += interaction_information2(X, Y, Z[:, i], Z[:, j])
    return secmi2 + II

### a)

In [None]:
def cond_indep_test_permutation(X, Y, Z, B, stat):

    n_col_Z = Z.shape[1]
    Z_1dim = np.dot(Z, 2**np.linspace(0, n_col_Z-1, n_col_Z))

    if stat == "cmi":
        stat_value = conditional_mutual_information(X, Y, Z_1dim)
    if stat == "secmi2":
        stat_value = secmi2(X, Y, Z)
    if stat == "secmi3":
        stat_value = secmi3(X, Y, Z)

    condition_p_value = 0
    for b in range(B):
        X_b = conditional_permutation(X, Z_1dim)

        if stat == "cmi":
            stat_value_b = conditional_mutual_information(X_b, Y, Z_1dim)
        if stat == "secmi2":
            stat_value_b = secmi2(X_b, Y, Z)
        if stat == "secmi3":
            stat_value_b = secmi3(X_b, Y, Z)

        if stat_value <= stat_value_b:
            condition_p_value += 1

    p_value = (1 + condition_p_value)/(1 + B)

    return 2*len(X)*stat_value, p_value

### b)

In [None]:
def sample_from_model(n=1000):
    Y = np.random.normal(0, 1, n)
    Y[Y<0] = -1
    Y[Y>=0] = 1

    Z1 = np.expand_dims(np.random.normal(Y/2, 1, n), 1)
    Z1[Z1<0] = -1
    Z1[Z1>=0] = 1

    Z2 = np.expand_dims(np.random.normal(Y/2, 1, n), 1)
    Z2[Z2<0] = -1
    Z2[Z2>=0] = 1

    Z3 = np.expand_dims(np.random.normal(Y/2, 1, n), 1)
    Z3[Z3<0] = -1
    Z3[Z3>=0] = 1

    X = np.random.normal(Z1.T[0]/2, 1, n)
    X[X<0] = -1
    X[X>=0] = 1
    return X, Y, Z1, Z2, Z3

In [None]:
X, Y, Z1, Z2, Z3 = sample_from_model()

In [None]:
Z = np.concatenate((Z1, Z2), 1)
s2_stat, s2_p = cond_indep_test_permutation(X, Y, Z, 20, "secmi2")
s3_stat, s3_p = cond_indep_test_permutation(X, Y, Z, 20, "secmi3")

# print(f"Secmi2 statiscti: {s2_stat}, p-val {s2_p}")
# print(f"Secmi3 statiscti: {s3_stat}, p-val {s3_p}")

Secmi2 statiscti: 5.255193959762449, p-val 0.43137254901960786
Secmi3 statiscti: 9.456559499577857, p-val 0.21568627450980393


In [None]:
Z = np.concatenate((Z2, Z3), 1)
s2_stat, s2_p = cond_indep_test_permutation(X, Y, Z, 20, "secmi2")
s3_stat, s3_p = cond_indep_test_permutation(X, Y, Z, 20, "secmi3")

# print(f"Secmi2 statiscti: {s2_stat}, p-val {s2_p}")
# print(f"Secmi3 statiscti: {s3_stat}, p-val {s3_p}")

Secmi2 statiscti: 10.615579106957195, p-val 0.11764705882352941
Secmi3 statiscti: 19.894305754327103, p-val 0.0392156862745098


In [None]:
def perform_testing(sample_fun):
  s2_p_1_list = []
  s3_p_1_list = []

  s2_p_0_list = []
  s3_p_0_list = []


  for i in tqdm(range(100), "Calculating..."):
    X, Y, Z1, Z2, Z3 = sample_fun()
    Z = np.concatenate((Z1, Z2), 1)
    s2_stat, s2_p_1 = cond_indep_test_permutation(X, Y, Z, 20, "secmi2")
    s3_stat, s3_p_1 = cond_indep_test_permutation(X, Y, Z, 20, "secmi3")

    Z = np.concatenate((Z2, Z3), 1)
    s2_stat, s2_p_0 = cond_indep_test_permutation(X, Y, Z, 20, "secmi2")
    s3_stat, s3_p_0 = cond_indep_test_permutation(X, Y, Z, 20, "secmi3")

    s2_p_1_list.append(s2_p_1 < 0.05)
    s3_p_1_list.append(s3_p_1 < 0.05)

    s2_p_0_list.append(s2_p_0 < 0.05)
    s3_p_0_list.append(s3_p_0 < 0.05)

  return s2_p_1_list, s3_p_1_list, s2_p_0_list, s3_p_0_list

In [None]:
s2_p_1_list, s3_p_1_list, s2_p_0_list, s3_p_0_list = perform_testing(sample_from_model)

Calculating...: 100%|██████████| 100/100 [01:52<00:00,  1.12s/it]


In [None]:

print(f"Ratio of H0 rejection in case of secmi2 using Z2 and Z3 (should reject): {np.sum(s2_p_0_list)/100}")
print(f"Ratio of H0 rejection in case of using Z2 and Z3 (should reject): {np.sum(s3_p_0_list)/100}")

print(f"Ratio of H0 rejection in case of using Z1 and Z2 (should not reject): {np.sum(s2_p_1_list)/100}")
print(f"Ratio of H0 rejection in case of using Z1 and Z2 (should not reject): {np.sum(s3_p_1_list)/100}")

Ratio of H0 rejection in case of secmi2 using Z2 and Z3 (should reject): 0.95
Ratio of H0 rejection in case of using Z2 and Z3 (should reject): 0.95
Ratio of H0 rejection in case of using Z1 and Z2 (should not reject): 0.01
Ratio of H0 rejection in case of using Z1 and Z2 (should not reject): 0.03


### c)

In [None]:
def sample_from_model2(n=1000):
  X = np.random.binomial(1, 1/2, n)
  Z1 = np.random.binomial(1, 1/2, (1, n))
  Z2 = np.random.binomial(1, 1/2, (1, n))
  Z3 = np.random.binomial(1, 1/2, (1, n))

  _Y = X + Z1 + Z2
  Y = []
  for y in _Y.T:
    prob_of_1 = 0.8 if y % 2 == 1 else 0.2
    Y.append(np.random.binomial(1, prob_of_1, 1)[0])

  Y = np.array(Y)
  Y[Y==0] = -1
  X[X==0] = -1
  Z1[Z1==0] = -1
  Z2[Z2==0] = -1
  Z3[Z3==0] = -1

  return X, Y, Z1.T, Z2.T, Z3.T

In [None]:
s2_p_1_list, s3_p_1_list, s2_p_0_list, s3_p_0_list = perform_testing(sample_from_model2)

Calculating...: 100%|██████████| 100/100 [01:51<00:00,  1.12s/it]


In [None]:
print(f"Ratio of H0 rejection in case of secmi2 using Z2 and Z3: {np.sum(s2_p_0_list)/100}")
print(f"Ratio of H0 rejection in case of using Z2 and Z3: {np.sum(s3_p_0_list)/100}")

print(f"Ratio of H0 rejection in case of using Z1 and Z2: {np.sum(s2_p_1_list)/100}")
print(f"Ratio of H0 rejection in case of using Z1 and Z2: {np.sum(s3_p_1_list)/100}")

Ratio of H0 rejection in case of secmi2 using Z2 and Z3: 0.05
Ratio of H0 rejection in case of using Z2 and Z3: 0.05
Ratio of H0 rejection in case of using Z1 and Z2: 1.0
Ratio of H0 rejection in case of using Z1 and Z2: 1.0


## Task 2

in R