# Mathematical Underpinnings - Lab 6

In [None]:
from sklearn.metrics import mutual_info_score
import numpy as np
import pandas as pd
from tqdm import tqdm

## Useful functions

In [12]:
def discretize_2bins(X):
    X_discrete = np.where(X >= 0, 1, -1)
    return X_discrete

In [3]:
def conditional_permutation(X, Z):

    z_values = np.unique(Z)
    n_z_values = len(z_values)
    n = len(Z)

    X_b = np.zeros(n)

    for i in range(n_z_values):

        z_value_tmp = z_values[i]

        X_b[Z == z_value_tmp] = np.random.permutation(X[Z == z_value_tmp])

    return X_b

In [4]:
def conditional_mutual_information(X, Y, Z):

    z_values = np.unique(Z)
    n_z_values = len(z_values)
    n = len(Z)

    cmi = 0

    for i in range(n_z_values):

        z_value_tmp = z_values[i]
        z_condition = (Z == z_value_tmp)

        X_z = X[z_condition]
        Y_z = Y[z_condition]

        mi_XY_z = mutual_info_score(X_z, Y_z)
        p_z = np.sum(z_condition)/n

        cmi += p_z*mi_XY_z

    return cmi

In [5]:
# II(X;Y;Z)
def interaction_information(X, Y, Z):
    return conditional_mutual_information(X, Y, Z) - mutual_info_score(X, Y)

In [6]:
# II(X;Y;Z1;Z2)
def interaction_information2(X, Y, Z1, Z2):
    Z_1_and_2 = 2*Z2 + Z1
    return interaction_information(X, Y, Z_1_and_2) - interaction_information(X, Y, Z1) - interaction_information(X, Y, Z2)

## Task 1

In [8]:
def secmi2(X, Y, Z):
    value = mutual_info_score(X, Y)
    for i in range(Z.shape[1]):
        value += conditional_mutual_information(X, Y, Z[:,i])
    return value

def secmi3(X, Y, Z):
    value = secmi2(X, Y, Z)
    for i in range(Z.shape[1]):
        for j in range(i+1, Z.shape[1]):
            value += interaction_information2(X, Y, Z[:,i], Z[:,j])
    return value

### a)

In [14]:
def cond_indep_test_permutation(X, Y, Z, B, stat):

    n_col_Z = Z.shape[1]
    Z_1dim = np.dot(Z, 2**np.linspace(0, n_col_Z-1, n_col_Z))

    if stat == "cmi":
        stat_value = conditional_mutual_information(X, Y, Z_1dim)
    if stat == "secmi2":
        stat_value = secmi2(X, Y, Z)
    if stat == "secmi3":
        stat_value = secmi3(X, Y, Z)

    condition_p_value = 0
    for b in range(B):
        X_b = conditional_permutation(X, Z_1dim)

        if stat == "cmi":
            stat_value_b = conditional_mutual_information(X_b, Y, Z_1dim)
        if stat == "secmi2":
            stat_value_b = secmi2(X_b, Y, Z)
        if stat == "secmi3":
            stat_value_b = secmi3(X_b, Y, Z)

        if stat_value <= stat_value_b:
            condition_p_value += 1

    p_value = (1 + condition_p_value)/(1 + B)

    return 2*len(X)*stat_value, p_value

### b)

In [48]:
def sample_model_1(n = 100, seed = None):
    if seed is not None:
        np.random.seed(seed)
    Y = discretize_2bins(np.random.randn(n))
    Z = np.zeros((n, 3))
    for i in range(3):
        Z[:,i] = discretize_2bins(np.random.randn(n) + Y)
    X = discretize_2bins(np.random.randn(n) + Z[:, 0])
    return X, Y, Z

In [50]:
B = 100
N = 100

Only first conditional independence is true, i.e., $X \perp \!\!\! \perp Y | (Z_1, Z_2)$.

The second hypothesis is false, i.e., $X \not\perp \!\!\! \perp Y | (Z_2, Z_3)$.

In [51]:
result = []
for i in tqdm(range(N)):
    X, Y, Z = sample_model_1(100, i)
    for stat in ["cmi", "secmi2", "secmi3"]:
        for condition in ["Z1,Z2", "Z2,Z3"]:
            Z_c = Z[:, [int(var[1])-1 for var in condition.split(",")]]
            stat_value, p_value = cond_indep_test_permutation(X, Y, Z_c, B, stat)
            result.append({"stat": stat, "condition": condition, "stat_value": stat_value, "p_value": p_value})
            

100%|██████████| 100/100 [02:41<00:00,  1.61s/it]


In [53]:
res_df = pd.DataFrame(result)

In [54]:
res_df["is_rejected"] = res_df["p_value"] < 0.05

In [55]:
res_df.groupby(["condition", "stat"])["is_rejected"].sum()

condition  stat  
Z1,Z2      cmi        5
           secmi2     4
           secmi3     5
Z2,Z3      cmi       68
           secmi2    85
           secmi3    85
Name: is_rejected, dtype: int64

All tests more often reject the null hypothesis for the second considered case - as expected. It's also visible that tests based on SECMI2 and SECMI3 are more powerful than the one based on CMI. 

### c)

In [52]:
def sample_model_2(n=100, seed=None):
    if seed is not None:
        np.random.seed(seed)
    X = np.random.binomial(1, 0.5, n)
    Z = np.zeros((n, 3))
    for i in range(3):
        Z[:,i] = np.random.binomial(1, 0.5, n)
    condition = ((X + Z[:,0] + Z[:,1]) % 2 == 1)
    Y = np.random.binomial(1, np.where(condition, 0.8, 0.2), n)
    return X, Y, Z

In this model, the first conditional independence is not true, i.e., $X \not\perp \!\!\! \perp Y | (Z_1, Z_2)$ as knowing $Z_1$ and $Z_2$ values gives us exact information about probability of $Y=1$ given $X=x$ -- value of $Y$ is dependent on $X$. 

However, the second conditional independence is true, i.e., $X \perp \!\!\! \perp Y | (Z_2, Z_3)$ as knowing $Z_2$ only gives us no additional information about probability of $Y=1$ given $X=x$.

In [56]:
result2 = []
for i in tqdm(range(N)):
    X, Y, Z = sample_model_2(100, i)
    for stat in ["cmi", "secmi2", "secmi3"]:
        for condition in ["Z1,Z2", "Z2,Z3"]:
            Z_c = Z[:, [int(var[1]) - 1 for var in condition.split(",")]]
            stat_value, p_value = cond_indep_test_permutation(X, Y, Z_c, B, stat)
            result2.append({"stat": stat, "condition": condition, "stat_value": stat_value, "p_value": p_value})

100%|██████████| 100/100 [02:40<00:00,  1.61s/it]


In [57]:
res_df2 = pd.DataFrame(result2)

In [58]:
res_df2["is_rejected"] = res_df2["p_value"] < 0.05

In [59]:
res_df2.groupby(["condition", "stat"])["is_rejected"].sum()

condition  stat  
Z1,Z2      cmi       100
           secmi2      2
           secmi3     99
Z2,Z3      cmi         6
           secmi2      4
           secmi3      2
Name: is_rejected, dtype: int64

All tests correctly more often do not reject the null hypothesis for the second considered case. However, for the first condition, tests based on CMI and SECMI3 work as expected, but the test based on SECMI2 is not able to reject the null hypothesis in most experiments. It is due to the fact that SECMI2 is able to capture only two-way interactions, and in this case, the 3-way interaction is crucial.

## Task 2
 
in R