# Chapter 16: Difficulties of Unconfoundedness in Observational Studies for Causal Effects

In [1]:
import numpy as np
import pandas as pd
import scipy as sp
import statsmodels.api as sm
import statsmodels.formula.api as smf
# viz
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
font = {'family' : 'IBM Plex Sans Condensed',
               'weight' : 'normal',
               'size'   : 10}
plt.rc('font', **font)
plt.rcParams['figure.figsize'] = (10, 10)
%matplotlib inline

from utils import *


In [2]:
n = int(1e6)
df = simulate(
        U1 = lambda: np.random.normal(size = n),
        U2 = lambda: np.random.normal(size = n),
        X = lambda U1, U2: U1 + U2 + np.random.normal(size=n),
        Y = lambda U2: U2 + np.random.normal(size=n),
    )

df.head()


Unnamed: 0,U1,U2,X,Y
0,0.868786,3.271211,2.870715,2.776858
1,0.102776,-1.424613,-0.343647,-2.323463
2,-0.4733,-0.808196,-2.437951,-0.117681
3,-0.524105,-0.641949,-0.149231,-0.537228
4,-0.183823,0.54047,-0.029903,1.374849


## M-bias

### continuous treatment


In [3]:

df['Z'] = df.U1 + np.random.normal(size=n)

smf.ols("Y ~ Z", df).fit().params[1], smf.ols("Y ~ Z + X", df).fit().params[1]


(-0.0005873841517624718, -0.19992316827000112)

### binary treatment

In [4]:
df['Z'] = df.Z >= 0

smf.ols("Y ~ Z", df).fit().params[1], smf.ols("Y ~ Z + X", df).fit().params[1]


(0.0010055221102859783, -0.4154833890606614)

## Z-bias

In [5]:
n = int(1e6)
df = simulate(
        U = lambda: np.random.normal(size = n),
        X = lambda: np.random.normal(size = n),
        Z = lambda X, U: X + U + np.random.normal(size=n),
        Y = lambda U: U + np.random.normal(size=n),
    )


In [7]:
smf.ols("Y ~ Z", df).fit().params[1], smf.ols("Y ~ Z + X", df).fit().params[1]


(0.33315108130802534, 0.4997989461297992)

Adjusted comparison is more biased.

### stronger association

In [8]:
df['Z'] = 2 * df.X + df.U + np.random.normal(size=n)
smf.ols("Y ~ Z", df).fit().params[1], smf.ols("Y ~ Z + X", df).fit().params[1]


(0.16699612964603475, 0.4998217107196198)

In [9]:
df['Z'] = 10 * df.X + df.U + np.random.normal(size=n)
smf.ols("Y ~ Z", df).fit().params[1], smf.ols("Y ~ Z + X", df).fit().params[1]


(0.00990024072283937, 0.500804991941852)