In [1]:
import numpy as np
from scipy import stats

In [4]:
class BookSCM:
    def __init__(self, random_seed=None):
        self.random_seed = random_seed
        self.u_0 = stats.uniform()
        self.u_1 = stats.norm()
    def sample(self, sample_size=100):
        """Samples from the SCM"""
        if self.random_seed:
            np.random.seed(self.random_seed)
        u_0 = self.u_0.rvs(sample_size)
        u_1 = self.u_1.rvs(sample_size)
        a = u_0 > .61
        b = (a + .5 * u_1) > .2
        return a, b

In [5]:
scm = BookSCM(random_seed=45)

In [6]:
buy_book_a, buy_book_a = scm.sample(100)

In [9]:
buy_book_a.shape, buy_book_b.shape

((100,), (100,))

In [12]:
SAMPLE_SIZE = 100
np.random.seed(45)
u_0 = np.random.randn(SAMPLE_SIZE)
u_1 = np.random.randn(SAMPLE_SIZE)
a = u_0
b = 5 * a + u_1
r, p = stats.pearsonr(a, b)
print(f"Mean of B before any intervention: {b.mean():.3f}")
print(f"Variance of B before any intervention: {b.var():.3f}")
print(f'Correlation between A and B:\nr = {r:.3f}; p ={p:.3f}\n')

Mean of B before any intervention: -0.620
Variance of B before any intervention: 22.667
Correlation between A and B:
r = 0.978; p =0.000



In [15]:
np.array([1.5] * SAMPLE_SIZE)

array([1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5,
       1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5,
       1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5,
       1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5,
       1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5,
       1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5,
       1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5,
       1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5])

In [14]:
a = np.array([1.5] * SAMPLE_SIZE)
b = 5 * a + u_1
print(f'Mean of B after the intervention on A: {b.mean():.3f}')
print(f'Variance of B after the intervention on A:{b.var():.3f}\n')

Mean of B after the intervention on A: 7.575
Variance of B after the intervention on A:1.003



In [1]:
# maximal information coefficient
# Hilbert-Schmidt independence criterion (HSIC)