# **CUPED (Controlled-experiment Using Pre-Experiment Data)**

# Publication "Reducing variance in A/B testing with CUPED"

In [1]:
# Ноутбук-первоисточник https://github.com/mtrencseni/playground/blob/master/CUPED.ipynb

In [2]:
import scipy
import numpy as np
from math import sqrt
from scipy import stats
from numpy.random import normal
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
def get_cuped_adjusted(A_before, B_before, A_after, B_after):
    cv = np.cov([A_after + B_after, A_before + B_before])
    theta = cv[0, 1] / cv[1, 1]
    mean_before = np.mean(A_before + B_before)
    A_after_adjusted = [after - (before - mean_before) * theta for after, before in zip(A_after, A_before)]
    B_after_adjusted = [after - (before - mean_before) * theta for after, before in zip(B_after, B_before)]
    return A_after_adjusted, B_after_adjusted

In [4]:
# # Аналогично
# def get_cuped_adjusted(A_before, B_before, A_after, B_after):
#     theta = np.cov([A_after + B_after, A_before + B_before], bias=True)[0][1] / np.var([A_before + B_before])
#     mean_before = np.mean(A_before + B_before)
#     A_after_adjusted = [after - (before - mean_before) * theta for after, before in zip(A_after, A_before)]
#     B_after_adjusted = [after - (before - mean_before) * theta for after, before in zip(B_after, B_before)]
#     return A_after_adjusted, B_after_adjusted

In [5]:
def get_AB_samples(before_mean, before_sigma, eps_sigma, treatment_lift, N):
    A_before = list(normal(loc=before_mean, scale=before_sigma, size=N))
    B_before = list(normal(loc=before_mean, scale=before_sigma, size=N))
    A_after  = [x + normal(loc=0, scale=eps_sigma) for x in A_before]
    B_after  = [x + normal(loc=0, scale=eps_sigma) + treatment_lift for x in B_before]
    return A_before, B_before, A_after, B_after

In [6]:
def lift(A, B):
    return np.mean(B) - np.mean(A)

In [7]:
def p_value(A, B):
    return stats.ttest_ind(A, B)[1]

In [8]:
N = 1000
before_mean = 100 
before_sigma = 50
eps_sigma = 20
treatment_lift = 2

In [9]:
A_before, B_before, A_after, B_after = get_AB_samples(before_mean, before_sigma, eps_sigma, treatment_lift, N)
A_after_adjusted, B_after_adjusted = get_cuped_adjusted(A_before, B_before, A_after, B_after)

In [10]:
print('A mean before = %05.1f, A mean after = %05.1f' % (np.mean(A_before), np.mean(A_after)))
print('B mean before = %05.1f, B mean after = %05.1f' % (np.mean(B_before), np.mean(B_after)))
print('Traditional    A/B test evaluation, lift = %.3f, p-value = %.3f' % (lift(A_after, B_after), p_value(A_after, B_after)))
print('CUPED adjusted A/B test evaluation, lift = %.3f, p-value = %.3f' % (lift(A_after_adjusted, B_after_adjusted), p_value(A_after_adjusted, B_after_adjusted)))

A mean before = 099.1, A mean after = 098.5
B mean before = 099.9, B mean after = 101.4
Traditional    A/B test evaluation, lift = 2.890, p-value = 0.231
CUPED adjusted A/B test evaluation, lift = 2.062, p-value = 0.024
