In [14]:
from sklearn.metrics import mutual_info_score
#from sklearn.feature_selection import chi2
from scipy.stats import chi2_contingency
from scipy.stats import chi2
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde, pearsonr
import seaborn as sns
import pandas as pd

# Mathematical Underpinnings - Lab 4

Tests to verify hipoteses of independence (from Lab 4):

In [1]:
def indep_test_asymptotic(X, Y, stat):

    if stat == "mi":

        stat_value = 2*len(X)*mutual_info_score(X, Y)

    if stat == "chi2":

        test_res = (chi2_contingency(pd.crosstab(X, Y)))
        stat_value = test_res.statistic

    df = (len(np.unique(X)) - 1)*(len(np.unique(Y)) - 1)

    p_value = 1 - chi2.cdf(stat_value, df=df)

    return stat_value, p_value

In [2]:
def indep_test_permutation(X, Y, B, stat="mi"):

    stat_value = mutual_info_score(X, Y)

    condition_p_value = 0
    for b in range(B):
        X_b = np.random.permutation(X)

        stat_value_b = mutual_info_score(X_b, Y)

        if stat_value <= stat_value_b:
            condition_p_value += 1

    p_value = (1 + condition_p_value)/(1 + B)

    return 2*len(X)*stat_value, p_value

## Task 1

In [24]:
# a function which computes CMI
def cond_mutual_score(X, Y, Z):
  cmi = 0
  for z in np.unique(Z):
    mask = Z == z
    X_z, Y_z = X[mask], Y[mask]
    cmi += mutual_info_score(X_z, Y_z) * len(X_z) / len(X)
  return cmi

### a)

In [25]:
# CI test based on CMI and asymptotics
def cond_indep_test_asymptotic(X, Y, Z):
  stat_value = 2 * len(X) * cond_mutual_score(X, Y, Z)
  df = (len(np.unique(X)) - 1) * (len(np.unique(Y)) - 1) * len(np.unique(Z))
  p_value = 1 - chi2.cdf(stat_value, df=df)
  return stat_value, p_value

### b)

In [26]:
def cond_permute(X, Z):
  X = X.copy()
  for z in np.unique(Z):
    X[Z == z] = np.random.permutation(X[Z == z])
  return X

In [27]:
# CI test based on CMI and permutations
def cond_indep_test_permutation(X, Y, Z, B):
  stat_value = cond_mutual_score(X, Y, Z)

  condition_p_value = 0
  for b in range(B):
      X_b = cond_permute(X, Z)
      stat_value_b = cond_mutual_score(X_b, Y, Z)

      if stat_value <= stat_value_b:
          condition_p_value += 1

  p_value = (1 + condition_p_value)/(1 + B)
  return 2 * len(X) * stat_value, p_value

### c)

In [21]:
def discretize(A_dash, thr=0):
  return np.where(A_dash >= thr, 1, -1)

In [11]:
n=1000

### conditionaly independent

In [22]:
Z_dash = np.random.normal(0, 1, n)
Z = discretize(Z_dash)

X_dash = np.random.normal(0, 1, n) + Z / 2
X = discretize(X_dash)

Y_dash = np.random.normal(0, 1, n) + Z / 2
Y = discretize(Y_dash)

In [32]:
stat_value, p_value = cond_indep_test_asymptotic(X,Y,Z)
print("1. Asymptotic test of conditional independence:")
print(f"- Statistic value: {stat_value:.4f}\n- p value: {p_value:.4f}\n")

1. Asymptotic test of conditional independence:
- Statistic value: 0.6252
- p value: 0.7315



In [33]:
stat_value, p_value = cond_indep_test_permutation(X, Y, Z, B=100)
print("2. Conditional permutation test:")
print(f"- Statistic value: {stat_value:.4f}\n- p value: {p_value:.4f}\n")

2. Conditional permutation test:
- Statistic value: 0.6252
- p value: 0.7426



### conditionaly dependent

In [34]:
X_dash = np.random.normal(0, 1, n)
X = discretize(X_dash)

Y_dash = np.random.normal(0, 1, n)
Y = discretize(Y_dash)

Z_dash = np.random.normal(0, 1, n) + (X + Y) / 2
Z = discretize(Z_dash)

In [35]:
stat_value, p_value = cond_indep_test_asymptotic(X,Y,Z)
print("1. Asymptotic test of conditional independence:")
print(f"- Statistic value: {stat_value:.4f}\n- p value: {p_value:.4f}\n")

1. Asymptotic test of conditional independence:
- Statistic value: 33.6657
- p value: 0.0000



In [36]:
stat_value, p_value = cond_indep_test_permutation(X, Y, Z, B=100)
print("2. Conditional permutation test:")
print(f"- Statistic value: {stat_value:.4f}\n- p value: {p_value:.4f}\n")

2. Conditional permutation test:
- Statistic value: 33.6657
- p value: 0.0099



## Task 2

In [60]:
def sample_from_model1(n=1000):
    Z_dash = np.random.normal(0, 1, n)
    Z = discretize(Z_dash)
    X_dash = np.random.normal(Z/2, 1, n)
    X = discretize(X_dash)
    Y_dash = np.random.normal(Z/2, 1, n)
    Y = discretize(Y_dash)
    return X, Y, Z

def sample_from_model2():
    X_dash = np.random.normal(0, 1, n)
    X = discretize(X_dash)
    Z_dash = np.random.normal(X/2 ,1, n)
    Z = discretize(Z_dash)
    Y_dash = np.random.normal(Z/2 ,1, n)
    Y = discretize(Y_dash)
    return X, Y, Z

def sample_from_model3(m=1000):
    X_dash = np.random.normal(0, 1, n)
    X = discretize(X_dash)
    Y_dash = np.random.normal(0, 1, n)
    Y = discretize(Y_dash)
    Z_dash = np.random.normal((X + Y)/2, 1, n)
    Z = discretize(Z_dash)
    return X, Y, Z

### a)

answer:
- Model1:
    - X and Y are **dependent**
    - X and Y are conditionally **independent** given Z
- Model2:
    - X and Y are **dependent**
    - X and Y are conditionally **independent** given Z
- Model3:
    - X and Y are **independent**
    - X and Y are conditionally **dependent** given Z

### b)

In [61]:
X1, Y1, Z1 = sample_from_model1()
X2, Y2, Z2 = sample_from_model2()
X3, Y3, Z3 = sample_from_model3()

#### Model1

In [52]:
mi = mutual_info_score(X1, Y1)
cmi = cond_mutual_score(X1, Y1, Z1)
print(f"- MI: {mi:.4f}\n- CMI: {cmi:.4f}")

- MI: 0.0050
- CMI: 0.0026


#### Model2

In [53]:
mi = mutual_info_score(X2, Y2)
cmi = cond_mutual_score(X2, Y2, Z2)
print(f"- MI: {mi:.4f}\n- CMI: {cmi:.4f}")

- MI: 0.0104
- CMI: 0.0006


#### Model3

In [54]:
mi = mutual_info_score(X3, Y3)
cmi = cond_mutual_score(X3, Y3, Z3)
print(f"- MI: {mi:.4f}\n- CMI: {cmi:.4f}")

- MI: 0.0000
- CMI: 0.0130


### c)

In [56]:
def print_values(stat_value, p_value):
  print(f"statistic value: {stat_value:.4f}, p-value: {p_value:.4f}")

def run_tests(X, Y, Z, B=100):
  print("1. Independence tests")
  print("1.1. Asymptotic test of independence with mutual information:")
  stat_value, p_value = indep_test_asymptotic(X, Y, 'mi')
  print_values(stat_value, p_value)

  print("1.2. Permutation test of independence:")
  stat_value, p_value = indep_test_permutation(X, Y, B, stat="mi")
  print_values(stat_value, p_value)

  print("\n\n2. Conditional independence tests: ")
  print("2.1. Asymptotic test of conditional independence with mutual information:")
  stat_value, p_value = cond_indep_test_asymptotic(X, Y, Z)
  print_values(stat_value, p_value)

  print("2.2. Permutation test of conditional independence:")
  stat_value, p_value = cond_indep_test_permutation(X, Y, Z, B)
  print_values(stat_value, p_value)


### Model1

In [62]:
run_tests(X1, Y1, Z1, B=100)

1. Independence tests
1.1. Asymptotic test of independence with mutual information:
statistic value: 21.7094, p-value: 0.0000
1.2. Permutation test of independence:
statistic value: 21.7094, p-value: 0.0099


2. Conditional independence tests: 
2.1. Asymptotic test of conditional independence with mutual information:
statistic value: 0.8838, p-value: 0.6428
2.2. Permutation test of conditional independence:
statistic value: 0.8838, p-value: 0.7129


Conclusions:
- [Independence tests] *p-value* < 0.05, **reject** hypothesis of independence
- [Conditional independence tests] *p-value* > 0.05, **fail to reject** hypothesis of conditional independence

### Model2

In [58]:
run_tests(X2, Y2, Z2, B=100)

1. Independence tests
1.1. Asymptotic test of independence with mutual information:
statistic value: 20.7841, p-value: 0.0000
1.2. Permutation test of independence:
statistic value: 20.7841, p-value: 0.0099


2. Conditional independence tests: 
2.1. Asymptotic test of conditional independence with mutual information:
statistic value: 1.1160, p-value: 0.5724
2.2. Permutation test of conditional independence:
statistic value: 1.1160, p-value: 0.5347


Conclusions:
- [Independence tests] *p-value* < 0.05, **reject** hypothesis of independence
- [Conditional independence tests] *p-value* > 0.05, **fail to reject** hypothesis of conditional independence

### Model3

In [59]:
run_tests(X3, Y3, Z3, B=100)

1. Independence tests
1.1. Asymptotic test of independence with mutual information:
statistic value: 0.0676, p-value: 0.7949
1.2. Permutation test of independence:
statistic value: 0.0676, p-value: 0.8218


2. Conditional independence tests: 
2.1. Asymptotic test of conditional independence with mutual information:
statistic value: 26.0859, p-value: 0.0000
2.2. Permutation test of conditional independence:
statistic value: 26.0859, p-value: 0.0099


Conclusions:
- [Independence tests] *p-value* > 0.05 => **fail to reject** hypothesis of independence
- [Conditional independence tests] *p-value* < 0.05 => **reject** hypothesis of conditional independence