# Chains, forks and colliders

# 1. Chains

In [1]:
from numpy.random import seed, binomial
from numpy import corrcoef

seed(1234)

n = 10000

x = binomial(1, 0.5, n)
y = x * binomial(1, 0.3, n) + (1 - x) * binomial(1, 0.7, n)
z = y * binomial(1, 0.1, n) + (1 - y) * binomial(1, 0.9, n)

corrcoef(x, z)[0][1]

0.3127827977799799

The correlation between x and z, conditioning on y = 1 or y = 0, is
close to zero, while the unconditional correlation between x and z it is
not.

In [2]:
print(corrcoef(x[y == 1], z[y == 1])[0][1])
print(corrcoef(x[y == 0], z[y == 0])[0][1])

0.0012156033890064705
-0.0192420175747101

# 2. Forks

In [3]:
z = binomial(1, 0.5, n)
y = z * binomial(1, 0.3, n) + (1-z)*binomial(1, 0.7, n)
x = z * binomial(1, 0.3, n) + (1-z)*binomial(1, 0.7, n)

corrcoef(y, x)[0][1]

0.17360501378519805

The correlation between x and y, conditioning on z = 1 or z = 0, is
close to zero, while the unconditional correlation between x and y it is
not.

In [4]:
print(corrcoef(x[z == 1], y[z == 1])[0][1])
print(corrcoef(x[z == 0], y[z == 0])[0][1])

0.0007579216031402372
0.024683306278260306

# 3. Colliders

In [5]:
x = binomial(1, 0.5, n)
y = binomial(1, 0.5, n)
z = (1 - x * y) * binomial(1, 0.2, n) + x * y * binomial(1, 0.8, n)

corrcoef(y, x)[0][1]

-0.0011859375518880448

The correlation between x and y is close to zero, while conditioning on
z = 1 or z = 0 it is not.

In [6]:
print(corrcoef(x[z == 1], y[z == 1])[0][1])
print(corrcoef(x[z == 0], y[z == 0])[0][1])

0.29547733031171775
-0.29978176886553437