# Python code: Simulated example from "Double Robust Variance Estimation"

Code to replicate the analysis on a simulated data set from Shook-Sa BE, Zivich PN, Lee C, Xue K, Ross RK, Edwards JK, Stringer JSA, Cole SR. "Double Robust Variance Estimation" Submitted 2024.

Paul Zivich (2024/04/23)

In [1]:
import numpy as np
import scipy as sp
import pandas as pd
import formulaic as formula
from formulaic import model_matrix
import delicatessen as deli
from delicatessen import MEstimator
from delicatessen.estimating_equations import ee_regression

from efuncs import ee_aipw_plugin, ee_aipw_wreg, ee_tmle, bound_unit
from helper import inf_func_inference

print("Versions")
print("====================")
print("NumPy:       ", np.__version__)
print("SciPy:       ", sp.__version__)
print("Pandas:      ", pd.__version__)
print("formulaic:   ", formula.__version__)
print("Delicatessen:", deli.__version__)

Versions
NumPy:        1.25.2
SciPy:        1.11.2
Pandas:       1.4.1
formulaic:    0.5.2
Delicatessen: 2.2


## Loading Data and Setup

In [2]:
d = pd.read_csv("data/exampledata.csv")

# Applying intervention to data
d1 = d.copy()         # Copy data
d1['X'] = 1           # And set X equal to 1
d0 = d.copy()         # Copy data
d0['X'] = 0           # And set X equal to 0

# Extracting outcome and action variables into arrays
a = np.asarray(d['X'])
y = np.asarray(d['Y'])

# Empty list for results storage
rows = []

## Model Specifications

In [3]:
# Propensity score model specifications
ps_model = model_matrix("Z1 + Z2 + Z1:Z2 + Z3 + Z1:Z3", d)

# Outcome model specifications
out_spec = ("X*Z1*Z2")
out_model = model_matrix(out_spec, d)
out1_model = model_matrix(out_spec, d1)
out0_model = model_matrix(out_spec, d0)

## Classic AIPW

In [4]:
# Classic AIPW estimator

def psi_aipw_plugin(theta):
    # Estimating function for the classic AIPW
    return ee_aipw_plugin(theta=theta,       # Parameter vector
                          y=y,               # Outcome column
                          a=a,               # Action column
                          PSM=ps_model,      # Propensity score model
                          OM=out_model,      # Outcome model
                          OM1=out1_model,    # Outcome model but with X=1
                          OM0=out0_model)    # Outcome model but with X=0


# Initial values (generic but causal means near observed mean)
init_vals = [0., 2000., 2000., ] + [0., ]*ps_model.shape[1] + [2000., ] + [0., ]*(out_model.shape[1] - 1)

# Applying M-estimator
estr = MEstimator(psi_aipw_plugin, init=init_vals)   # Setup M-estimator
estr.estimate(maxiter=20000)                         # Solve M-estimator
ci = estr.confidence_intervals()                     # Compute the confidence intervals

# Computing the influence-function variance by hand
ifvar = inf_func_inference(theta=estr.theta, y=y, a=a,
                           PSM=ps_model, OM=out_model,
                           OM1=out1_model, OM0=out0_model)

# Storing results for the output
rows.append(["Classic-AIPW", estr.theta[0], 
             estr.variance[0, 0]**0.5, ci[0, 0], ci[0, 1], 
             ifvar[0], ifvar[1], ifvar[2]])

## Weighted Regression AIPW

In [5]:
# Weighted-regression AIPW estimator

def psi_aipw_wreg(theta):
    # Estimating function for the weighted-regression AIPW
    return ee_aipw_wreg(theta=theta,       # Parameter vector
                        y=y,               # Outcome column
                        a=a,               # Action column
                        PSM=ps_model,      # Propensity score model
                        OM=out_model,      # Outcome model
                        OM1=out1_model,    # Outcome model but with X=1
                        OM0=out0_model)    # Outcome model but with X=0


# Initial values (generic but causal means near observed mean)
init_vals = [0., 2000., 2000., ] + [0., ]*ps_model.shape[1] + [2000., ] + [0., ]*(out_model.shape[1] - 1)

# Applying M-estimator
estr = MEstimator(psi_aipw_wreg, init=init_vals)  # Setup the M-estimator
estr.estimate(maxiter=20000)                      # Solve the M-estimator
ci = estr.confidence_intervals()                  # Compute the confidence intervals

# Computing the influence-function variance by hand
ifvar = inf_func_inference(theta=estr.theta, y=y, a=a, 
                           PSM=ps_model, OM=out_model,
                           OM1=out1_model, OM0=out0_model)

# Storing results for the output
rows.append(["WR-AIPW", estr.theta[0], 
             estr.variance[0, 0]**0.5, ci[0, 0], ci[0, 1], 
             ifvar[0], ifvar[1], ifvar[2]])

## TMLE

In [6]:
# Targeted maximum likelihood estimator

def psi_tmle(theta):
    # Estimating function for the TMLE
    return ee_tmle(theta=theta,       # Parameter vector
                   y=y,               # Outcome column
                   a=a,               # Action column
                   PSM=ps_model,      # Propensity score model
                   OM=out_model,      # Outcome model
                   OM1=out1_model,    # Outcome model but with X=1
                   OM0=out0_model)    # Outcome model but with X=0


# Initial values (generic but causal means near observed mean)
init_vals = [0., 0., 0., ] + [0., ]*ps_model.shape[1] + [0.5, ] + [0., ]*(out_model.shape[1] - 1) + [0., 0.]
# init_vals = starting_tmle[:9] + [0.5, ] + [0., ]*(out_model.shape[1] - 1) + [0., 0.]

# Applying M-estimator
estr = MEstimator(psi_tmle, init=init_vals)         # Setup the M-estimator
estr.estimate(maxiter=20000)                        # Solve the M-estimator
ci = estr.confidence_intervals()                    # Compute the confidence intervals

# Computing the influence-function variance by hand
ifvar = inf_func_inference(theta=estr.theta, y=y, a=a, 
                           PSM=ps_model, OM=out_model,
                           OM1=out1_model, OM0=out0_model, 
                           unbound_y=True)

# Storing results for the output
rows.append(["TMLE", estr.theta[0], 
             estr.variance[0, 0]**0.5, ci[0, 0], ci[0, 1], 
             ifvar[0], ifvar[1], ifvar[2]])

## Results

In [7]:
results = pd.DataFrame(rows, 
                       columns=["Estimator", "Est", 
                                "ES-SE", "ES-LCL", "ES-UCL", 
                                "IF-SE", "IF-LCL", "IF-UCL"])
results = results.set_index("Estimator")
results.round(0)

Unnamed: 0_level_0,Est,ES-SE,ES-LCL,ES-UCL,IF-SE,IF-LCL,IF-UCL
Estimator,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Classic-AIPW,-19.0,58.0,-132.0,94.0,58.0,-132.0,95.0
WR-AIPW,-16.0,57.0,-129.0,96.0,58.0,-129.0,97.0
TMLE,-19.0,58.0,-132.0,94.0,58.0,-132.0,95.0


END