# CUPED

Variance reduction approach using CUPED (covariates using pre-experimental data)

In [None]:
import warnings

import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

sys.path.append('../')

from abacus.auto_ab.abtest import ABTest
from abacus.auto_ab.params import ABTestParams
from abacus.auto_ab.params import DataParams, HypothesisParams

%load_ext autoreload
%autoreload 2

## Case: no difference between groups

In [None]:
df = pd.read_csv('./data/ab_data.csv')
df.head()

In [None]:
data_params = DataParams(
    id_col='id', 
    group_col='groups',
    control_name='A',
    treatment_name='B',
    target='height_now', 
    covariate='height_prev', 
    is_grouped=True
)

hypothesis_params = HypothesisParams(
    alpha=0.05, 
    beta=0.2, 
    alternative='greater',  
    metric_type='continuous', 
    metric_name='mean',
)

ab_params = ABTestParams(data_params, hypothesis_params)

In [None]:
ab_test = ABTest(df, ab_params)

In [None]:
print('Control mean = {:.3f}'.format(np.mean(ab_test.params.data_params.control)))
print('Treatment mean = {:.3f}'.format(np.mean(ab_test.params.data_params.treatment)))
print('Control var = {:.3f}'.format(np.var(ab_test.params.data_params.control)))
print('Treatment var = {:.3f}'.format(np.var(ab_test.params.data_params.treatment)))

In [None]:
ab_test1 = ab_test.cuped()

In [None]:
print('Control mean = {:.3f}'.format(np.mean(ab_test1.params.data_params.control)))
print('Treatment mean = {:.3f}'.format(np.mean(ab_test1.params.data_params.treatment)))
print('Control var = {:.3f}'.format(np.var(ab_test1.params.data_params.control)))
print('Treatment var = {:.3f}'.format(np.var(ab_test1.params.data_params.treatment)))

In [None]:
print(f"bootstrap_test: {ab_test.test_boot_confint()}")
print(f"mannwhitney_test: {ab_test.test_mannwhitney()}")
print(f"welch_test: {ab_test.test_welch()}")

In [None]:
ab_test.plot()

In [None]:
ab_test1.plot()

## Case: differences between groups

### Increase values of one group

In [None]:
df = pd.read_csv('./data/ab_data.csv')

incs = np.sort(np.random.uniform(0.0, 2, df.shape[0]))
df['height_now'].where(df['groups'] == 'A',
                                    df['height_now'] + incs, 
                                    axis=0,
                                    inplace=True)

In [None]:
df.head()

In [None]:
data_params = DataParams(
    id_col='id', 
    group_col='groups',
    control_name='A',
    treatment_name='B',
    target='height_now', 
    covariate='height_prev', 
    is_grouped=True
)

hypothesis_params = HypothesisParams(
    alpha=0.05, 
    beta=0.2, 
    alternative='greater',  
    metric_type='continuous', 
    metric_name='mean',
)

ab_params = ABTestParams(data_params, hypothesis_params)

In [None]:
ab_test = ABTest(df, ab_params)

In [None]:
print('Control mean = {:.3f}'.format(np.mean(ab_test.params.data_params.control)))
print('Treatment mean = {:.3f}'.format(np.mean(ab_test.params.data_params.treatment)))
print('Control var = {:.3f}'.format(np.var(ab_test.params.data_params.control)))
print('Treatment var = {:.3f}'.format(np.var(ab_test.params.data_params.treatment)))

In [None]:
print(f"bootstrap_test: {ab_test.test_boot_confint()}")
print(f"mannwhitney_test: {ab_test.test_mannwhitney()}")
print(f"welch_test: {ab_test.test_welch()}")

In [None]:
ab_test1 = ab_test.cuped()

In [None]:
print('Control mean = {:.3f}'.format(np.mean(ab_test1.params.data_params.control)))
print('Treatment mean = {:.3f}'.format(np.mean(ab_test1.params.data_params.treatment)))
print('Control var = {:.3f}'.format(np.var(ab_test1.params.data_params.control)))
print('Treatment var = {:.3f}'.format(np.var(ab_test1.params.data_params.treatment)))

In [None]:
print(f"bootstrap_test: {ab_test.test_boot_confint()}")
print(f"mannwhitney_test: {ab_test.test_mannwhitney()}")
print(f"welch_test: {ab_test.test_welch()}")

In [None]:
ab_test.plot()

In [None]:
ab_test1.plot()