In [2]:
from sklearn.metrics import mutual_info_score
#from sklearn.feature_selection import chi2
from scipy.stats import chi2_contingency
from scipy.stats import chi2
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde, pearsonr
import seaborn as sns
import pandas as pd

# Mathematical Underpinnings - Lab 4

Tests to verify hipoteses of independence (from Lab 4):

In [3]:
def indep_test_asymptotic(X, Y, stat):

    if stat == "mi":

        stat_value = 2*len(X)*mutual_info_score(X, Y)

    if stat == "chi2":

        test_res = (chi2_contingency(pd.crosstab(X, Y)))
        stat_value = test_res.statistic

    df = (len(np.unique(X)) - 1)*(len(np.unique(Y)) - 1)

    p_value = 1 - chi2.cdf(stat_value, df=df)

    return stat_value, p_value

def indep_test_permutation(X, Y, B, stat="mi"):

    stat_value = mutual_info_score(X, Y)

    condition_p_value = 0
    for b in range(B):
        X_b = np.random.permutation(X)

        stat_value_b = mutual_info_score(X_b, Y)

        if stat_value <= stat_value_b:
            condition_p_value += 1

    p_value = (1 + condition_p_value)/(1 + B)

    return 2*len(X)*stat_value, p_value

## Task 1

In [4]:
def cmi(X, Y, Z):
    stat = 0
    n = len(Z)
    for z_value in np.unique(Z):
        stat += np.sum(Z == z_value)/n * mutual_info_score(X[Z == z_value], Y[Z == z_value])
    return stat

### a)

In [5]:
def cond_indep_test_asymptotic(X, Y, Z):
    stat_value = 2 * len(X) * cmi(X, Y, Z)
    df = (len(np.unique(X)) - 1) * (len(np.unique(Y)) - 1) * len(np.unique(Z))
    p_value = 1 - chi2.cdf(stat_value, df=df)

    return stat_value, p_value

### b)

In [6]:
def cond_perm(X, Z):
    X_b = np.zeros(len(X))
    for z in np.unique(Z):
        X_b[Z == z] = np.random.permutation(X[Z == z])
    return X_b

def cond_indep_test_permutation(X, Y, Z, B = 1000):
    stat_value = cmi(X, Y, Z)
    condition_p_value = 0
    for _ in range(B):
        X_b = cond_perm(X, Z)
        stat_value_b = cmi(X_b, Y, Z)
        if stat_value <= stat_value_b:
            condition_p_value += 1
    p_value = (1 + condition_p_value)/(1 + B)
    return stat_value, p_value

### c)

conditionaly independent

In [7]:
Z = np.random.normal(0, 1, 1000)
Z = np.where(Z < 0, -1, 1)
X = np.random.normal(Z/2, 1, 1000)
X = np.where(X < 0, -1, 1)
Y = np.random.normal(Z/2, 1, 1000)
Y = np.where(Y < 0, -1, 1)

print("Asymptotic test ", cond_indep_test_asymptotic(X, Y, Z))
print("Permutation test ", cond_indep_test_permutation(X, Y, Z))

Asymptotic test  (0.8904258263055527, 0.6406878506392288)
Permutation test  (0.00044521291315277633, 0.6363636363636364)


conditionaly dependent

In [8]:
X = np.random.normal(0, 1, 1000)
X = np.where(X < 0, -1, 1)
Y = np.random.normal(0, 1, 1000)
Y = np.where(Y < 0, -1, 1)
Z = np.random.normal((X + Y)/2, 1, 1000)
Z = np.where(Z < 0, -1, 1)

print("Asymptotic test ", cond_indep_test_asymptotic(X, Y, Z))
print("Permutation test ", cond_indep_test_permutation(X, Y, Z))

Asymptotic test  (24.34761771071448, 5.163949545972102e-06)
Permutation test  (0.012173808855357241, 0.000999000999000999)


## Task 2

In [1]:
def sample_from_model1():
    Z = np.random.normal(0, 1, 1000)
    Z = np.where(Z < 0, -1, 1)
    X = np.random.normal(Z/2, 1, 1000)
    X = np.where(X < 0, -1, 1)
    Y = np.random.normal(Z/2, 1, 1000)
    Y = np.where(Y < 0, -1, 1)
    return X, Y, Z

def sample_from_model2():
    X = np.random.normal(0, 1, 1000)
    X = np.where(X < 0, -1, 1)
    Z = np.random.normal(X/2, 1, 1000)
    Z = np.where(Z < 0, -1, 1)
    Y = np.random.normal(Z/2, 1, 1000)
    Y = np.where(Y < 0, -1, 1)
    return X, Y, Z

def sample_from_model3():
    X = np.random.normal(0, 1, 1000)
    X = np.where(X < 0, -1, 1)
    Y = np.random.normal(0, 1, 1000)
    Y = np.where(Y < 0, -1, 1)
    Z = np.random.normal((X + Y)/2, 1, 1000)
    Z = np.where(Z < 0, -1, 1)
    return X, Y, Z

### a)

answer:

### b)

In [None]:
# model1
X, Y, Z = sample_from_model1()
mi_XY = mutual_info_score(X, Y)
mi_XY_Z = cmi(X, Y, Z)

print(f"Model 1 -> MI(X,Y): {mi_XY}, CMI(X,Y|Z): {mi_XY_Z}")

# model2
X, Y, Z = sample_from_model2()
mi_XY = mutual_info_score(X, Y)
mi_XY_Z = cmi(X, Y, Z)

print(f"Model 2 -> MI(X,Y): {mi_XY}, CMI(X,Y|Z): {mi_XY_Z}")

# model3
X, Y, Z = sample_from_model3()
mi_XY = mutual_info_score(X, Y)
mi_XY_Z = cmi(X, Y, Z)

print(f"Model 3 -> MI(X,Y): {mi_XY}, CMI(X,Y|Z): {mi_XY_Z}")

### c)