In [1]:
import numpy as np
import pandas as pd
import sklearn
import sklearn.pipeline as skpipe
import celer as cel
import matplotlib.pyplot as plt
import statsmodels.api as sm

# dml dgps
from doubleml import datasets
# this module
from aipyw import AIPyW
from aipyw.dgp import dgp_binary, dgp_discrete

from xgboost import XGBClassifier, XGBRegressor
np.random.seed(42)

In [2]:
AIPyW?

[0;31mInit signature:[0m [0mAIPyW[0m[0;34m([0m[0my[0m[0;34m,[0m [0mw[0m[0;34m,[0m [0mX[0m[0;34m,[0m [0momod[0m[0;34m,[0m [0mpmod[0m[0;34m,[0m [0mnf[0m[0;34m=[0m[0;36m2[0m[0;34m,[0m [0mpslb[0m[0;34m=[0m[0;32mNone[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
Augmented Propensity Score Weighting for many discrete treatments.

Class to fit the Augmented IPW estimator using arbitrary scikit learners.
Extends the standard binary treatement estimator to K discrete treatments.
For details on the influence function, see Cattaneo (2010) JoE.
[0;31mInit docstring:[0m
Initialise an aipyw class that holds data and models.

Args:
                                y (N X 1 Numpy Array): Response vector
                                w (N X 1 Numpy Array): Treatment vector (integer valued)
                                X (N X K Numpy Array): Covariate Matrix
                                omod (sklearn model object): Model object with 

# Demo

## Binary Treatment

In [3]:
#######################################################################
# make some data - true effect is 1
X, y, w = datasets.make_irm_data(1_000, theta=1, return_type='array')
# naive estimate is biased
y[w==1].mean() - y[w==0].mean()

1.6645691739681896

In [4]:
# scale to unit interval and sieve
ppl = skpipe.Pipeline([
	('minmax', sklearn.preprocessing.MinMaxScaler()),
	('sieve',  sklearn.preprocessing.PolynomialFeatures(2)),
])
XX = ppl.fit_transform(X)
#######################################################################
# initialise it with data and model objects
doubledouble = AIPyW(y, w, XX,
                    omod = cel.ElasticNetCV(l1_ratio= [.5, .7, .9],
                                            n_alphas=20, cv=5, n_jobs = 8),
                    pmod = cel.LogisticRegression(C=1)
)
# fit
doubledouble.fit()
# summarise
doubledouble.summary()


Poor overlap - some pscores are < 0.02; Either call summary() with a trimming threshold as lb 
 or change the estimand to ATT.
                                   ATE        SE  95% CI-LB  95% CI-UB
Treat level 1 - Treat level 0  1.17469  0.085676   1.006766   1.342614


## Discrete Treatments

In [6]:
Y, D, X = dgp_discrete(
    n=1_000_000,
    p=4,
    treat_effects=np.array([0.0, 0.4, 0.5, 0.55]),
)

In [7]:
(Y[D == 1,].mean() - Y[D == 0,].mean(),
  Y[D == 2,].mean() - Y[D == 0,].mean(),
  Y[D == 3,].mean() - Y[D == 0,].mean()
)

(-0.6615591580053521, 1.8474127990739704, 1.7700781434662345)

Naive estimates badly biased.

In [8]:
regmod = XGBRegressor(learning_rate=0.1, n_jobs = 6)
psmod = XGBClassifier(learning_rate=0.1, n_jobs = 6)

In [9]:

%%time
doubledouble2 = AIPyW(Y, D, X,
                    omod = regmod,
                    pmod = psmod,
)
doubledouble2.fit()
doubledouble2.summary()

Poor overlap - some pscores are < 0.02; Either call summary() with a trimming threshold as lb 
 or change the estimand to ATT.
                                    ATE        SE  95% CI-LB  95% CI-UB
Treat level 1 - Treat level 0  0.376561  0.001700   0.373228   0.379894
Treat level 2 - Treat level 0  0.775688  0.001568   0.772615   0.778761
Treat level 3 - Treat level 0  0.881498  0.001797   0.877977   0.885020
CPU times: user 1min 23s, sys: 1.29 s, total: 1min 25s
Wall time: 20.9 s
