# Chapter 12: The Doubly Robust or the Augmented Inverse Probability Score Weighting Estimator for the Average Causal Effect

In [1]:
from joblib import Parallel, delayed

import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as skl

font = {'family' : 'IBM Plex Sans Condensed',
               'weight' : 'normal',
               'size'   : 10}
plt.rc('font', **font)
plt.rcParams['figure.figsize'] = (6, 6)
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

np.random.seed(42)
%load_ext autoreload
%autoreload 1

%load_ext watermark
%watermark --iversions



statsmodels      : 0.14.0
seaborn          : 0.12.2
sklearn          : 1.2.2
matplotlib       : 3.7.2
matplotlib_inline: 0.1.6
pandas           : 2.0.3
numpy            : 1.24.3



In [2]:
def OS_est(z, y, x, omod, pmod, lb=0, ub=1):
    pscore = pmod.fit(x, z).predict_proba(x)[:, 1]
    pscore = np.clip(pscore, lb, ub)
    # fitted potential outcomes
    outcome1 = omod.fit(x[z == 1, :], y[z == 1]).predict(x)
    outcome0 = omod.fit(x[z == 0, :], y[z == 0]).predict(x)
    # omod
    ace_reg = (outcome1 - outcome0).mean()
    # ipw
    y_treat = (y * z / pscore).mean()
    y_control = (y * (1 - z) / (1 - pscore)).mean()
    one_treat = (z / pscore).mean()
    one_control = ((1 - z) / (1 - pscore)).mean()
    ace_ipw0 = y_treat - y_control
    ace_ipw = y_treat / one_treat - y_control / one_control
    # aipw
    r_treat, r_control = (
        (z * (y - outcome1) / pscore).mean(),
        ((1 - z) * (y - outcome0) / (1 - pscore)).mean(),
    )
    ace_dr = ace_reg + r_treat - r_control
    return np.array([ace_reg, ace_ipw0, ace_ipw, ace_dr])

In [3]:
from sklearn.linear_model import LogisticRegression, LinearRegression

lmod, omod = LogisticRegression(penalty=None), LinearRegression()

In [15]:
def OS_ATE(z, y, x, omod=omod, pmod=lmod, n_boot=2 * 1e2, truncps=(0, 1)):
    point_est = OS_est(z, y, x, omod, pmod, *truncps)
    n = len(z)

    # nonparametric bootstrap
    def bootfn(*args):
        # draw indices
        ids = np.random.choice(np.arange(n), size=n, replace=True)
        return OS_est(z[ids], y[ids], x[ids, :], omod, pmod)

    boot_est = Parallel(n_jobs=-1)(delayed(bootfn)(i) for i in range(int(n_boot)))
    boot_est = np.vstack(boot_est)
    # return boot_est
    boot_se = boot_est.std(axis=0)

    res = pd.DataFrame(
        [point_est, boot_se],
        index=["point_est", "boot_se"],
        columns=["omod", "ipw0", "ipw", "aipw"],
    )
    return res

In [18]:
def simu11(n=500):
    x = np.random.normal(size=(n, 2))
    x1 = np.c_[np.ones(n), x]
    beta_z = np.array([0, 1, 1])
    pscore = 1 / (1 + np.exp(-x1 @ beta_z))
    z = np.random.binomial(1, pscore)
    beta_y1, beta_y0 = np.array([1, 2, 1]), np.array([1, 2, 1])
    y1, y0 = x1 @ beta_y1, x1 @ beta_y0
    y = z * y1 + (1 - z) * y0 + np.random.normal(size=n)
    ce = OS_ATE(z, y, x)
    return np.r_[(y1 - y0).mean(), ce.iloc[0, :], ce.iloc[1, :]]


simu11()

array([ 0.        , -0.03158827, -0.10764208, -0.16381696, -0.09518803,
        0.10472859,  0.16304548,  0.16203658,  0.11038457])

In [19]:
def simu01(n=500):
    x = np.random.normal(size=(n, 2))
    x1 = np.c_[np.ones(n), x, np.exp(x)]
    beta_z = np.array([-1, 0, 0, 1, -1])
    pscore = 1 / (1 + np.exp(-x1 @ beta_z))
    z = np.random.binomial(1, pscore)
    beta_y1, beta_y0 = np.array([1, 2, 1, 0, 0]), np.array([1, 1, 1, 0, 0])
    y1, y0 = x1 @ beta_y1, x1 @ beta_y0
    y = z * y1 + (1 - z) * y0 + np.random.normal(size=n)
    ce = OS_ATE(z, y, x)
    return np.r_[(y1 - y0).mean(), ce.iloc[0, :], ce.iloc[1, :]]


simu01()

array([ 0.04582345, -0.22549646, -0.46072515, -0.52516757, -0.17942783,
        0.12001089,  0.25216989,  0.24312848,  0.13680289])

In [20]:
def simu10(n=500):
    x = np.random.normal(size=(n, 2))
    x1 = np.c_[np.ones(n), x, np.exp(x)]
    beta_z = np.array([0, 1, 1, 0, 0])
    pscore = 1 / (1 + np.exp(-x1 @ beta_z))
    z = np.random.binomial(1, pscore)
    beta_y1, beta_y0 = np.array([1, 0, 0, 0.2, -0.1]), np.array([1, 0, 0, -0.2, 0.1])
    y1, y0 = x1 @ beta_y1, x1 @ beta_y0
    y = z * y1 + (1 - z) * y0 + np.random.normal(size=n)
    ce = OS_ATE(z, y, x)
    return np.r_[(y1 - y0).mean(), ce.iloc[0, :], ce.iloc[1, :]]


simu10()

array([0.37558253, 0.30140973, 0.34046859, 0.33410275, 0.34599118,
       0.1020255 , 0.10175789, 0.09951637, 0.10378403])

In [21]:
def simu00(n=500):
    x = np.random.normal(size=(n, 2))
    x1 = np.c_[np.ones(n), x, np.exp(x)]
    beta_z = np.array([-1, 0, 0, 1, -1])
    pscore = 1 / (1 + np.exp(-x1 @ beta_z))
    z = np.random.binomial(1, pscore)
    beta_y1, beta_y0 = np.array([1, 0, 0, 0.2, -0.1]), np.array([1, 0, 0, -0.2, 0.1])
    y1, y0 = x1 @ beta_y1, x1 @ beta_y0
    y = z * y1 + (1 - z) * y0 + np.random.normal(size=n)
    ce = OS_ATE(z, y, x)
    return np.r_[(y1 - y0).mean(), ce.iloc[0, :], ce.iloc[1, :]]


simu00()

array([0.35452903, 0.05032412, 0.19088308, 0.1505216 , 0.22355156,
       0.1339793 , 0.16197421, 0.13353598, 0.17576791])

In [22]:
def simstudy(f, n, truth=0):
    est = [f() for _ in range(n)]
    est = np.vstack(est)

    bias = est[:, 1:5] - truth
    return pd.DataFrame(
        [bias.mean(axis=0), bias.std(axis=0), est[:, 5:].mean(axis=0)],
        index=["bias", "true se", "est se"],
        columns=["omod", "ipw0", "ipw", "aipw"],
    )

Both well specified

In [23]:
simstudy(simu11, 500)

Unnamed: 0,omod,ipw0,ipw,aipw
bias,0.003401,-0.011995,0.007092,0.004379
true se,0.103634,0.328331,0.287792,0.119786
est se,0.10414,0.266542,0.232376,0.119668


bad pscore

In [24]:
simstudy(simu01, 500)

Unnamed: 0,omod,ipw0,ipw,aipw
bias,0.006553,-0.786128,-0.746743,-0.002292
true se,0.129372,0.762854,0.535186,0.209539
est se,0.126473,0.548638,0.386802,0.187415


bad omod

In [25]:
simstudy(simu10, 500, truth=0.2 * np.exp(1 / 2))

Unnamed: 0,omod,ipw0,ipw,aipw
bias,-0.052806,-0.002581,6e-06,0.000859
true se,0.114374,0.164134,0.15416,0.155509
est se,0.112447,0.148546,0.135525,0.137609


both bad

In [26]:
simstudy(simu00, 500, truth=0.2 * np.exp(1 / 2))

Unnamed: 0,omod,ipw0,ipw,aipw
bias,-0.072704,0.089918,-0.071039,0.133596
true se,0.125404,0.250922,0.188995,0.258324
est se,0.125969,0.226154,0.157942,0.222932


AIPW has the worst bias and variance when both are bad, verifying the Kang and Schafer (2007) result.

## application

In [27]:
from sklearn.preprocessing import MinMaxScaler

In [28]:
nhanes_bmi = pd.read_csv("nhanes_bmi.csv").iloc[:, 1:]
nhanes_bmi.head()

Unnamed: 0,BMI,School_meal,age,ChildSex,black,mexam,pir200_plus,WIC,Food_Stamp,fsdchbi,AnyIns,RefSex,RefAge
0,15.18,0,6,0,0,0,1,0,0,0,1,1,51
1,17.93,0,6,1,0,1,0,1,0,1,1,1,27
2,15.15,1,5,1,0,1,0,0,0,0,0,0,24
3,15.69,1,11,0,0,0,0,0,0,0,1,1,44
4,37.4,0,14,0,0,1,0,0,0,0,0,0,48


In [29]:
z, y, x = (
    nhanes_bmi.School_meal,
    nhanes_bmi.BMI,
    MinMaxScaler().fit_transform(X=nhanes_bmi.iloc[:, 2:].values),
)

In [30]:
(causaleffects := OS_ATE(z.values, y.values, x, n_boot=1e3))

Unnamed: 0,omod,ipw0,ipw,aipw
point_est,-0.016954,-1.516536,-0.155755,-0.019291
boot_se,0.227012,0.484243,0.246763,0.230823


In [32]:
(causaleffects2 := OS_ATE(z.values, y.values, x, n_boot=1e3, truncps=(0.1, 0.9)))

Unnamed: 0,omod,ipw0,ipw,aipw
point_est,-0.016954,-0.713539,-0.053634,-0.043381
boot_se,0.225728,0.490854,0.239052,0.229507
