In [74]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [78]:
def cuped_generator(
    sample_size = 500,
    t_proportion = 0.5,
    effect_size = 0.5,
    seed = 123
):
    rng = np.random.default_rng(seed=seed)

    t = rng.binomial(n=1,p=t_proportion,size=(sample_size,))
    pre = rng.normal(loc=5, scale=2, size=(sample_size,))
    post = (
        pre + 
        rng.normal(loc=3, size=(sample_size,)) + 
        t*effect_size
    )
    pre_normal = pre - np.mean(pre)
    df = pd.DataFrame(
        {
            'Treatment': t,
            "Pre_trigger" : pre,
            "Post_trigger" : post,
            "Pre_normalized" : pre_normal
        }
    )
    return df

In [88]:
data = cuped_generator(seed=10)
reg = smf.ols("Post_trigger ~ Treatment",data).fit()
reg.get_robustcov_results('HC2').summary(slim=True)

0,1,2,3
Dep. Variable:,Post_trigger,R-squared:,0.013
Model:,OLS,Adj. R-squared:,0.011
No. Observations:,500,F-statistic:,6.354
Covariance Type:,HC2,Prob (F-statistic):,0.012

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,7.8951,0.139,56.672,0.000,7.621,8.169
Treatment,0.5008,0.199,2.521,0.012,0.110,0.891


In [89]:
cuped_lm = sm.OLS(data['Post_trigger'], data['Pre_trigger']).fit()
theta = cuped_lm.params[0]
data['Post_cuped'] = data['Post_trigger'] - theta*data['Pre_normalized']
cuped_reg = smf.ols("Post_cuped ~ Treatment", data).fit()
cuped_reg.get_robustcov_results('HC2').summary(slim=True)

0,1,2,3
Dep. Variable:,Post_cuped,R-squared:,0.02
Model:,OLS,Adj. R-squared:,0.018
No. Observations:,500,F-statistic:,10.25
Covariance Type:,HC2,Prob (F-statistic):,0.00145

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,7.9225,0.100,79.243,0.000,7.726,8.119
Treatment,0.4454,0.139,3.202,0.001,0.172,0.719


In [90]:
reg_adj = smf.ols(
    formula="Post_trigger ~ Treatment + Pre_trigger + Treatment:Pre_normalized",
    data=data).fit()
reg_adj.get_robustcov_results('HC2').summary(slim=True)

0,1,2,3
Dep. Variable:,Post_trigger,R-squared:,0.784
Model:,OLS,Adj. R-squared:,0.782
No. Observations:,500,F-statistic:,628.7
Covariance Type:,HC2,Prob (F-statistic):,1.56e-168

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,3.1339,0.183,17.153,0.000,2.775,3.493
Treatment,0.4660,0.093,5.004,0.000,0.283,0.649
Pre_trigger,0.9722,0.034,28.842,0.000,0.906,1.038
Treatment:Pre_normalized,0.0203,0.046,0.444,0.657,-0.070,0.110
