In [None]:
import numpy as np
from numpy.random import randn
import pandas as pd
import seaborn as sns
from sklearn.linear_model import LinearRegression

# We need a lot of samples to plot the conditional distribution:
n_samples=1000

def simulate(seed):   
    np.random.default_rng=seed
    x1 = randn(n_samples)
    x2 = randn(n_samples)
    x3 = randn(n_samples)
    x5 = x1 + x2 + randn(n_samples)
    x4 = x5 + randn(n_samples)
    x6 = 0.8*x5 + x2 + x3 + randn(n_samples)
    x7 = 2 * x6 + randn(n_samples)
    return pd.DataFrame({ "x1": x1, "x2": x2, "x3": x3,"x4": x4, "x5":x5, "x6":x6, "x7":x7})

def linear_coeff(X,Y):
    linear_regressor = LinearRegression() 
    linear_regressor.fit(X, Y)
    return linear_regressor.coef_


In [121]:
unadjusted_coeff = []
parents_coeff = []
X2X3_coeff = []
X2X3X4_coeff = []

for i in range(1,1000):
    df = simulate(i)
    Y = df["x7"].values.reshape(-1, 1)
    # Estimate the effect without any adjustment
    unadjusted_coeff.append(linear_coeff(df["x5"].values.reshape(n_samples, 1),Y))
    X5_parents = df.iloc[:, [0,1,4]].values.reshape(n_samples, 3)
    # Use parents of X5 as adjustment, so X1 and X2
    parents_coeff.append(linear_coeff(X5_parents,Y)[:,2])
    # Use X2 and X3 as adjustment
    X235 = df.iloc[:, [1,2,4]].values.reshape(n_samples, 3)
    X2X3_coeff.append(linear_coeff(X235,Y)[:,2])
    # Use X2, X3 and X4 as adjustment
    X2345 = df.iloc[:, 1:5].values.reshape(n_samples, 4)
    X2X3X4_coeff.append(linear_coeff(X2345,Y)[:,3])

In [122]:
# Mean and variance of unadjusted estimation
print(np.mean(unadjusted_coeff), np.std(unadjusted_coeff))

2.2705492081250083 0.06224298552474189


In [123]:
# Use parents of X5 as adjustment, so X1 and X2
print(np.mean(parents_coeff), np.std(parents_coeff))

1.6046955308327764 0.09475652355644011


In [118]:
# Use X2 and X3 as adjustment
print(np.mean(X2X3_coeff), np.std(X2X3_coeff))

1.5992266833778408 0.05020348490675183


In [120]:
# Use X2, X3 and X4 as adjustment
print(np.mean(X2X3X4_coeff), np.std(X2X3X4_coeff))

1.596606679171312 0.0866883931867985
