In [1]:
import numpy as np
import pprint
import sys
import warnings

sys.path.append('../src')
from simulation.user_sessions import simulate_users
from experiments.pre_test_analysis import required_sample_size, estimate_cuped_stats
from experiments.experiment import experiment

In [2]:

cuped_corrcoef, cuped_theta = estimate_cuped_stats(10000, 4)

print('Estimated CUPED correlation coefficient: {:.3f}'.format(cuped_corrcoef))
print('Estimated CUPED theta: {:.3f}\n'.format(cuped_theta))

pre_c_users, post_c_users, pre_t_users, post_t_users = simulate_users(100000,4,exp_effect=.03)
pre_exp_users_sum = np.vstack([pre_c_users, pre_t_users]).sum(axis=1)

t_test_n, t_test_cuped_n, seq_test_n, seq_test_cuped_n = required_sample_size(pre_exp_users_sum, cuped_corr_coef=cuped_corrcoef)
print(f'T-test required sample size: {t_test_n}')
print(f'T-test with CUPED required sample size: {t_test_cuped_n}')
print(f'Group sequential test required sample size: {seq_test_n}')
print(f'Group sequential test with CUPED required sample size: {seq_test_cuped_n}')

Estimated CUPED correlation coefficient: 0.752
Estimated CUPED theta: 0.885

T-test required sample size: 20382
T-test with CUPED required sample size: 8862
Group sequential test required sample size: 21037
Group sequential test with CUPED required sample size: 9147


I will be using an estimated CUPED $\theta$ rather than calculating using actual experiment data because 1. Treatment effects are heterogenous so including treatment in the covariance is biased; 2. Re-calculating theta during each look in sequential testing will increase the false positive rate.

I estimated using a sample size of 10000. ~.75 correlation coefficient between pre- and post-experiment sessions over a 4 week period seems reasonable. It creates a ~56% decrease in required sample size when using CUPED. Group sequential test is only slightly higher than t-test since I will be using the O'Brien-Flemming method.

In [6]:
exp = experiment(
    pre_c_user_sessions=pre_c_users,
    post_c_user_sessions=post_c_users,
    pre_t_user_sessions=pre_t_users,
    post_t_user_sessions=post_t_users,
    t_test_n=t_test_n,
    t_test_cuped_n=round(t_test_cuped_n * 1.1),
    seq_test_n=seq_test_n,
    seq_test_cuped_n=round(seq_test_cuped_n * 1.1),
    cuped_theta=cuped_theta
)
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    exp.create_test_groups()
    exp.create_test_entries()
    exp.create_test_entry_data()
    exp.run_experiment()
    pprint.pprint(exp.results)

{'seq': {'p_value': 0.014, 'sample_size': 42074, 'significance': 1},
 'seq_cuped': {'p_value': 0.011, 'sample_size': 20124, 'significance': 1},
 'ttest': {'p_value': 0.01, 'sample_size': 40764, 'significance': 1},
 'ttest_cuped': {'p_value': 0.0, 'sample_size': 19496, 'significance': 1}}


Simulated with 3% true treatment effect. Adding extra 10% to calculated minimum sample size for CUPED models to account for the larger estimation of variance.