In [1]:
import numpy as np
import sklearn
import sklearn.pipeline as skpipe
# learners
import celer as cel
from lightgbm import LGBMClassifier, LGBMRegressor

# this module
from aipyw import AIPyW
from aipyw.dgp import dgp_binary, dgp_discrete, hainmueller

np.random.seed(42)

In [2]:
import matplotlib.pyplot as plt
import seaborn as sns

# Basic Demo


## Discrete Treatments

In [3]:
Y, D, X = dgp_discrete(
    n=100_000,
    p=4,
    treat_effects=np.array([0.0, 0.4, 0.5, 0.55]),
)
Y.shape, D.shape, X.shape

((100000,), (100000,), (100000, 10))

In [4]:
(Y[D == 1,].mean() - Y[D == 0,].mean(),
  Y[D == 2,].mean() - Y[D == 0,].mean(),
  Y[D == 3,].mean() - Y[D == 0,].mean()
)

(np.float64(-2.459176579755844),
 np.float64(0.9312130397763212),
 np.float64(2.8166707333716685))

Naive estimates badly biased.

In [5]:
%%time
doubledouble3 = AIPyW(riesz_method="linear")
doubledouble3.fit(X, D, Y)
doubledouble3.summary()

CPU times: user 59 s, sys: 217 ms, total: 59.3 s
Wall time: 3.92 s


{'1 vs 0': {'effect': np.float64(0.3440241782297518),
  'se': np.float64(0.00031759006244523954)},
 '2 vs 0': {'effect': np.float64(0.43320743886748597),
  'se': np.float64(0.00032485666524239936)},
 '3 vs 0': {'effect': np.float64(0.47547790412244395),
  'se': np.float64(0.0003325522904819936)},
 '2 vs 1': {'effect': np.float64(0.08918326063773419),
  'se': np.float64(0.00024092384230735961)},
 '3 vs 1': {'effect': np.float64(0.13145372589269216),
  'se': np.float64(0.0002638624008049813)},
 '3 vs 2': {'effect': np.float64(0.042270465254957965),
  'se': np.float64(0.0002683575335978814)}}

In [6]:
%%time
doubledouble3 = AIPyW(riesz_method="balancing", bal_obj="quadratic")
doubledouble3.fit(X, D, Y)
doubledouble3.summary()

CPU times: user 7.52 s, sys: 15.7 ms, total: 7.54 s
Wall time: 532 ms


{'1 vs 0': {'effect': np.float64(0.3992624833348587),
  'se': np.float64(1.1131571660979226e-05)},
 '2 vs 0': {'effect': np.float64(0.4999967519495474),
  'se': np.float64(1.2429734209405745e-05)},
 '3 vs 0': {'effect': np.float64(0.55194785160569),
  'se': np.float64(1.3086491593015786e-05)},
 '2 vs 1': {'effect': np.float64(0.10073426861468866),
  'se': np.float64(1.26949544159626e-05)},
 '3 vs 1': {'effect': np.float64(0.15268536827083137),
  'se': np.float64(1.13142418777214e-05)},
 '3 vs 2': {'effect': np.float64(0.05195109965614269),
  'se': np.float64(1.3205131515576768e-05)}}

## Hainmueller (2012) Simulation study

Binary treatment, continuous outcome, 2 groups. We parametrize degree of overlap, functional form of outcome and treatment models. True effect is zero, so RMSE is easy to calculate.

In [7]:
def one_rep(n_samples, overlap_design, pscore_design, outcome_design, riesz_method, **kwargs):
    # generate data
    y, d, X = hainmueller(
        n_samples=n_samples,
        overlap_design=overlap_design,
        pscore_design=pscore_design,
        outcome_design=outcome_design,
    )
    m1, m2 = LGBMRegressor(verbose=-1, n_jobs=1), LGBMClassifier(verbose=-1, n_jobs=1)
    # model instantiation
    aipw = AIPyW(propensity_model=m2, outcome_model=m1, riesz_method=riesz_method, **kwargs)
    aipw.fit(X, d, y, n_rff=100)
    return aipw.summary()["1 vs 0"]["effect"]

Favorable case: good overlap, linear pscore and outcome

In [8]:
%%time
one_rep(10_000, 2, 1, 1, "ipw")

CPU times: user 1.62 s, sys: 0 ns, total: 1.62 s
Wall time: 519 ms


np.float64(-0.0033346200181945534)

In [9]:
%%time
one_rep(10_000, 2, 1, 1, "linear")

CPU times: user 601 ms, sys: 7.79 ms, total: 609 ms
Wall time: 337 ms


np.float64(0.006689004288240835)

In [10]:
%%time
one_rep(10_000, 2, 1, 1, "kernel")

CPU times: user 23.5 s, sys: 20.1 ms, total: 23.5 s
Wall time: 1.8 s


np.float64(-0.0032402673454760135)

In [11]:
%%time
one_rep(10_000, 2, 1, 1, "balancing")

CPU times: user 1.68 s, sys: 7.81 ms, total: 1.69 s
Wall time: 401 ms


np.float64(0.028061806731204706)

### Hard case: poor overlap, non-linear pscore and outcome

In [12]:
%%time
one_rep(10_000, 1, 3, 3, "ipw")

CPU times: user 403 ms, sys: 3.99 ms, total: 407 ms
Wall time: 406 ms


np.float64(1.3048733100150471)

In [13]:
%%time
one_rep(10_000, 1, 3, 3, "linear")

CPU times: user 558 ms, sys: 7.84 ms, total: 566 ms
Wall time: 317 ms


np.float64(-0.16985757185669997)

In [14]:
%%time
one_rep(10_000, 1, 3, 3, "kernel")

CPU times: user 20.8 s, sys: 16.2 ms, total: 20.9 s
Wall time: 1.64 s


np.float64(-0.29089672973480046)

In [17]:
%%time
one_rep(10_000, 1, 3, 3, "balancing", bal_obj="entropy")

CPU times: user 283 ms, sys: 12 ms, total: 295 ms
Wall time: 295 ms


np.float64(-5.099867468024708)

### all together

In [14]:
from joblib import Parallel, delayed

def compute_ate_rmse_parallel(
    n_samples,
    overlap_design,
    pscore_design,
    outcome_design,
    riesz_method,
    n_replications=100,
    n_jobs=-1,
):
    ate_estimates = Parallel(n_jobs=n_jobs)(
        delayed(one_rep)(
            n_samples, overlap_design, pscore_design, outcome_design, riesz_method
        )
        for _ in range(n_replications)
    )
    # Compute RMSE
    true_ate = 0
    rmse = np.sqrt(np.mean((np.array(ate_estimates) - true_ate) ** 2))
    return rmse

In [15]:
from itertools import product
params = np.arange(1, 4)
param_list = list(product(params, params, params, ['ipw', 'linear', 'kernel']))
res_dict = {}
for param in param_list:
  key = "_".join([str(x) for x in param])
  res_dict[key] = compute_ate_rmse_parallel(10_000, *param)

In [37]:
import pandas as pd
res_df = pd.DataFrame(
[
  list(product(['poor', 'good', 'medium'],
               ['linear', 'quad', 'trig'],
               ['linear', 'quad', 'nl'])),
  [v for k, v in res_dict.items() if k.endswith("ipw")],
  [v for k, v in res_dict.items() if k.endswith("linear")],
  [v for k, v in res_dict.items() if k.endswith("kernel")],
],
).T.infer_objects()
res_df.columns = ["design", "ipw", "linear", "kernel"]
# unpack design column
res_df['overlap_design'] = res_df['design'].apply(lambda x: x[0])
res_df['pscore_design'] = res_df['design'].apply(lambda x: x[1])
res_df['outcome_design'] = res_df['design'].apply(lambda x: x[2])
res_df.drop(columns=['design'], inplace=True)
res_df

Unnamed: 0,ipw,linear,kernel,overlap_design,pscore_design,outcome_design
0,0.137666,0.073477,0.037029,poor,linear,linear
1,0.155213,0.07099,0.031021,poor,linear,quad
2,17.547401,88.536906,20.961078,poor,linear,nl
3,0.110289,0.060276,0.039207,poor,quad,linear
4,0.113453,0.03733,0.022676,poor,quad,quad
5,58.048174,16.401004,8.909979,poor,quad,nl
6,0.026082,0.015245,0.012045,poor,trig,linear
7,0.023168,0.011092,0.01216,poor,trig,quad
8,15.719334,36.621279,7.64987,poor,trig,nl
9,0.047198,0.029076,0.016379,good,linear,linear
