# Chapter 12: The Doubly Robust or the Augmented Inverse Probability Score Weighting Estimator for the Average Causal Effect

In [1]:
from joblib import Parallel, delayed

import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as skl

font = {'family' : 'IBM Plex Sans Condensed',
               'weight' : 'normal',
               'size'   : 10}
plt.rc('font', **font)
plt.rcParams['figure.figsize'] = (6, 6)
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

np.random.seed(42)
%load_ext autoreload
%autoreload 1

%load_ext watermark
%watermark --iversions



seaborn          : 0.12.2
matplotlib       : 3.7.2
matplotlib_inline: 0.1.6
statsmodels      : 0.14.0
sklearn          : 1.2.2
numpy            : 1.24.3
pandas           : 2.0.3



In [2]:
def OS_est(
    z,
    y,
    x,
    omod,
    pmod,
    lb = 0, ub = 1
):
    pscore = pmod.fit(x, z).predict_proba(x)[:, 1]
    pscore = np.clip(pscore, lb, ub)
    # fitted potential outcomes
    outcome1 = omod.fit(x[z == 1, :], y[z == 1]).predict(x)
    outcome0 = omod.fit(x[z == 0, :], y[z == 0]).predict(x)
    # omod
    ace_reg = (outcome1 - outcome0).mean()
    # ipw
    y_treat = (y * z / pscore).mean()
    y_control = (y * (1 - z) / (1 - pscore)).mean()
    one_treat = (z / pscore).mean()
    one_control = ((1 - z) / (1 - pscore)).mean()
    ace_ipw0 = y_treat - y_control
    ace_ipw = y_treat / one_treat - y_control / one_control
    # aipw
    r_treat, r_control = (
        (z * (y - outcome1) / pscore).mean(),
        ((1 - z) * (y - outcome0) / (1 - pscore)).mean(),
    )
    ace_dr = ace_reg + r_treat - r_control
    return np.array([ace_reg, ace_ipw0, ace_ipw, ace_dr])


In [3]:
from sklearn.linear_model import LogisticRegression, LinearRegression
lmod, omod = LogisticRegression(penalty = None), LinearRegression()


In [4]:
def OS_ATE(z, y, x, omod=omod, pmod=lmod, n_boot=2 * 1e2, truncps = (0, 1)):
    point_est = OS_est(z, y, x, omod, pmod, *truncps)
    # nonparametric bootstrap
    samp = lambda x: skl.utils.resample(x, replace=True, n_samples=len(x))
    boot_est = Parallel(n_jobs=-1)(
        delayed(OS_est)(samp(z), samp(y), samp(x), omod, pmod) for _ in range(int(n_boot))
    )
    boot_est = np.vstack(boot_est)
    # return boot_est
    boot_se = boot_est.std(axis=0)

    res = pd.DataFrame(
        [point_est, boot_se],
        index=["point_est", "boot_se"],
        columns=["omod", "ipw0", "ipw", "aipw"],
    )
    return res


In [5]:
def simu11(n = 500):
    x = np.random.normal(size = (n, 2))
    x1 = np.c_[np.ones(n), x]
    beta_z = np.array([0, 1, 1])
    pscore = 1/(1+np.exp(-x1 @ beta_z))
    z = np.random.binomial(1, pscore)
    beta_y1, beta_y0 = np.array([1, 2, 1]), np.array([1, 2, 1])
    y1, y0 = x1 @ beta_y1, x1 @ beta_y0
    y = z * y1 + (1-z) * y0 + np.random.normal(size = n)
    ce = OS_ATE(z, y, x)
    return np.r_[(y1 - y0).mean(), ce.iloc[0, :], ce.iloc[1, :]]

simu11()


array([ 0.        ,  0.03910975, -0.08010026, -0.04221151,  0.03727168,
        0.22095922,  0.22120737,  0.22121034,  0.22118967])

In [6]:
def simu01(n = 500):
    x = np.random.normal(size = (n, 2))
    x1 = np.c_[np.ones(n), x, np.exp(x)]
    beta_z = np.array([-1, 0, 0, 1, -1])
    pscore = 1/(1+np.exp(-x1 @ beta_z))
    z = np.random.binomial(1, pscore)
    beta_y1, beta_y0 = np.array([1, 2, 1, 0, 0]), np.array([1, 1, 1, 0, 0])
    y1, y0 = x1 @ beta_y1, x1 @ beta_y0
    y = z * y1 + (1-z) * y0 + np.random.normal(size = n)
    ce = OS_ATE(z, y, x)
    return np.r_[(y1 - y0).mean(), ce.iloc[0, :], ce.iloc[1, :]]

simu01()


array([ 0.01557261, -0.0198599 , -0.96285837, -0.91644986, -0.03406536,
        0.20497719,  0.20478523,  0.20483473,  0.20474512])

In [7]:
def simu10(n = 500):
    x = np.random.normal(size = (n, 2))
    x1 = np.c_[np.ones(n), x, np.exp(x)]
    beta_z = np.array([0, 1, 1, 0, 0])
    pscore = 1/(1+np.exp(-x1 @ beta_z))
    z = np.random.binomial(1, pscore)
    beta_y1, beta_y0 = np.array([1, 0, 0, 0.2, -0.1]), np.array([1, 0, 0, -0.2, 0.1])
    y1, y0 = x1 @ beta_y1, x1 @ beta_y0
    y = z * y1 + (1-z) * y0 + np.random.normal(size = n)
    ce = OS_ATE(z, y, x)
    return np.r_[(y1 - y0).mean(), ce.iloc[0, :], ce.iloc[1, :]]

simu10()


array([0.27131018, 0.35487477, 0.45546237, 0.45953136, 0.43687853,
       0.10453037, 0.10446448, 0.10446066, 0.10451951])

In [8]:
def simu00(n = 500):
    x = np.random.normal(size = (n, 2))
    x1 = np.c_[np.ones(n), x, np.exp(x)]
    beta_z = np.array([-1, 0, 0, 1, -1])
    pscore = 1/(1+np.exp(-x1 @ beta_z))
    z = np.random.binomial(1, pscore)
    beta_y1, beta_y0 = np.array([1, 0, 0, 0.2, -0.1]), np.array([1, 0, 0, -0.2, 0.1])
    y1, y0 = x1 @ beta_y1, x1 @ beta_y0
    y = z * y1 + (1-z) * y0 + np.random.normal(size = n)
    ce = OS_ATE(z, y, x)
    return np.r_[(y1 - y0).mean(), ce.iloc[0, :], ce.iloc[1, :]]

simu00()


array([0.35174471, 0.44211447, 0.56073027, 0.54395967, 0.58812764,
       0.10104513, 0.10104914, 0.1009669 , 0.10102134])

In [9]:
def simstudy(f, n, truth = 0):
    est = [f() for _ in range(n)]
    est = np.vstack(est)

    bias = (est[:, 1:5] - truth)
    return pd.DataFrame([
        bias.mean(axis = 0),
        bias.std(axis = 0),
        est[:, 5:].mean(axis = 0)
    ], index = ['bias', 'true se', 'est se'], columns = ['omod', 'ipw0', 'ipw', 'aipw'])


Both well specified

In [10]:
simstudy(simu11, 500)


Unnamed: 0,omod,ipw0,ipw,aipw
bias,0.006143,0.025104,0.045253,0.011817
true se,0.106274,0.284053,0.254586,0.12442
est se,0.219485,0.219487,0.219486,0.219486


bad pscore

In [11]:
simstudy(simu01, 500)


Unnamed: 0,omod,ipw0,ipw,aipw
bias,0.002886,-0.833264,-0.776827,0.012587
true se,0.123799,0.878363,0.567909,0.24394
est se,0.193665,0.193679,0.193675,0.193682


bad omod

In [12]:
simstudy(simu10, 500, truth = 0.2 * np.exp(1/2))


Unnamed: 0,omod,ipw0,ipw,aipw
bias,-0.048562,-0.00201,-0.001819,-0.003366
true se,0.108144,0.153458,0.139331,0.140287
est se,0.099832,0.099836,0.09983,0.099833


both bad

In [13]:
simstudy(simu00, 500, truth = 0.2 * np.exp(1/2))


Unnamed: 0,omod,ipw0,ipw,aipw
bias,-0.069452,0.126688,-0.063211,0.172261
true se,0.129646,0.555496,0.194594,0.572
est se,0.104664,0.104672,0.104665,0.104674


AIPW has the worst bias and variance when both are bad, verifying the Kang and Schafer (2007) result.

## application

In [14]:
from sklearn.preprocessing import MinMaxScaler


In [15]:
nhanes_bmi = pd.read_csv('nhanes_bmi.csv').iloc[:, 1:]
nhanes_bmi.head()


Unnamed: 0,BMI,School_meal,age,ChildSex,black,mexam,pir200_plus,WIC,Food_Stamp,fsdchbi,AnyIns,RefSex,RefAge
0,15.18,0,6,0,0,0,1,0,0,0,1,1,51
1,17.93,0,6,1,0,1,0,1,0,1,1,1,27
2,15.15,1,5,1,0,1,0,0,0,0,0,0,24
3,15.69,1,11,0,0,0,0,0,0,0,1,1,44
4,37.4,0,14,0,0,1,0,0,0,0,0,0,48


In [16]:
z, y, x = nhanes_bmi.School_meal, nhanes_bmi.BMI, MinMaxScaler().fit_transform(X = nhanes_bmi.iloc[:, 2:].values)


In [17]:
(causaleffects := OS_ATE(z.values, y.values, x, n_boot=1e3))


Unnamed: 0,omod,ipw0,ipw,aipw
point_est,-0.016954,-1.516536,-0.155755,-0.019291
boot_se,0.225514,0.225703,0.225484,0.225478


In [18]:
(causaleffects := OS_ATE(z.values, y.values, x, n_boot=1e3))


Unnamed: 0,omod,ipw0,ipw,aipw
point_est,-0.016954,-1.516536,-0.155755,-0.019291
boot_se,0.228794,0.228678,0.228683,0.22869


In [19]:
(causaleffects2 := OS_ATE(z.values, y.values, x, n_boot=1e3, truncps = (0.1, 0.9)))


Unnamed: 0,omod,ipw0,ipw,aipw
point_est,-0.016954,-0.713539,-0.053634,-0.043381
boot_se,0.23514,0.235259,0.23509,0.235095
