In [1]:
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import scipy.stats as stats

In [2]:
sns.set()

In [3]:
# power calc:
def calc_pre_experiment_power(delta, se, alpha):
    z_value = stats.norm.ppf(1-(alpha/2))
    v_ = (delta/se) - z_value
    power_ = stats.norm.cdf(x=v_)
    
    return power_

In [4]:
def calc_delta(relative_minimum_detectable_effect_size, baseline_conversion_rate):
    delta_ = baseline_conversion_rate * relative_minimum_detectable_effect_size
    
    return delta_

In [24]:
def calc_sigma(baseline_conversion_rate, return_square=False):
    sigma_squared = baseline_conversion_rate * (1 - baseline_conversion_rate)
    
    if return_square:
        return sigma_squared
    else:
        return np.sqrt(sigma_squared)

In [6]:
def calc_standard_error(sample_size, sigma=None, baseline_conversion_rate=None):
    assert not all(x is not None for x in [sigma, baseline_conversion_rate]), "Either sigma or baseline_conversion_rate must be specified"
    
    if sigma is None:
        sigma = calc_sigma(baseline_conversion_rate)
    
    se_ = sigma *np.sqrt(2/sample_size)
    
    return se_

In [23]:
def calc_sample_size(power, alpha, relative_minimum_detectable_effect_size, baseline_conversion_rate):
    sigma_squared = calc_sigma(baseline_conversion_rate=baseline_conversion_rate, return_square=True)
    delta_ = calc_delta(relative_minimum_detectable_effect_size=relative_minimum_detectable_effect_size, 
                        baseline_conversion_rate=baseline_conversion_rate)
    
    z_alpha = stats.norm.ppf(1-(alpha/2))
    z_power = stats.norm.ppf(power)
    
    n = 2*((sigma_squared)*(z_power + z_alpha)**2)/(delta_**2)
    
    return n

In [8]:
def false_positive_risk(alpha, power, success_rate):
    """Calculates the False Positive Risk (or probability that a statistically significant result is a false positive, i.e. the probability that the null 
    hypothesis is true when an experiment was statistically significant)"""
    pi = 1 - success_rate
    FPR = alpha*pi/(((alpha*pi)) + ((power)*(1-pi)))
    
    return FPR

In [67]:
def set_alpha(desired_fpr, power, success_rate, two_sided=True):
    
    alpha_ = desired_fpr*(power)*(success_rate)/((1-success_rate)*(1-desired_fpr))
    
    if two_sided:
        return 2*alpha_
    else:
        return alpha_

Unified checkout Opp sizing: https://docs.google.com/spreadsheets/d/1O7Wq6YedPIrHsQ75m_jGYF8tPYNTFhq4kgj0VhKEsjE/edit#gid=2144011426

In [90]:
baseline_conv = 0.8/100
relative_mde = 0.3
alpha = 0.05
power = 0.8
weekly_n = 6250 # Shouldn't this be per variant??

In [91]:
sample_size = calc_sample_size(power=power, 
                               alpha=alpha, 
                               relative_minimum_detectable_effect_size=relative_mde, 
                               baseline_conversion_rate=baseline_conv)

In [92]:
sample_size

21628.02415687305

In [93]:
num_weeks_required = sample_size/weekly_n

In [94]:
num_weeks_required

3.460483865099688

In [19]:
# So 5 weeks required...this is good

In [95]:
fpr = false_positive_risk(alpha=alpha/2, power=power, success_rate=0.05)

In [96]:
fpr

0.372549019607843

In [97]:
# Given the low success rate, the FPR is quite high.  Can we bring it down by setting an appropriate p-value cut off (i.e. alpha)?
# Of course, this assumes the 80% power level is actually going to happen
required_alpha = set_alpha(desired_fpr=0.1, power=0.8, success_rate=0.05, two_sided=True)

In [98]:
required_alpha

0.009356725146198832

In [99]:
# So much lower. What is the required sample size?
sample_size_rev = calc_sample_size(power=power, 
                                   alpha=required_alpha, 
                                   relative_minimum_detectable_effect_size=relative_mde, 
                                   baseline_conversion_rate=baseline_conv)

In [100]:
sample_size_rev

32614.94647790076

In [101]:
num_weeks_required_rev = sample_size_rev/weekly_n

In [102]:
num_weeks_required_rev

5.218391436464122

In [None]:
# So 8 weeks if we wanted a 10% risk of a false positive. 

In [104]:
alpha = 0.05
baseline_conv = 159/3032
relative_mde = 0.3
n = (3060 + 3032)/2

In [105]:
delta_wtf = calc_delta(relative_minimum_detectable_effect_size=relative_mde, baseline_conversion_rate=baseline_conv)

In [106]:
delta_wtf

0.015732189973614773

In [107]:
se_wtf = calc_standard_error(sample_size=n, baseline_conversion_rate=baseline_conv)

In [108]:
se_wtf

0.005711987212758526

In [109]:
power_wtf = calc_pre_experiment_power(delta=delta_wtf, se=se_wtf, alpha=alpha)

In [110]:
power_wtf

0.7864829397738686

In [103]:
(6.93 - 5.24)/5.24

0.32251908396946555

In [69]:
false_positive_risk(alpha=0.05/2, power=0.03, success_rate=0.33)

0.6285178236397748

In [66]:
2*(0.05)*(0.03)*(0.33)/(0.95*(1-0.33))

0.0015553809897879028

In [70]:
set_alpha(desired_fpr=0.05, power=0.03, success_rate=0.33, two_sided=True)

0.0015553809897879028

In [14]:
# if https://experiments.shopify.com/experiments/sq_starter_plan_segmentation_v2#retail_user_token_funnel_conversion_and_activation_rate_(retail_-_pos_pro_upgrade)_31455
# was an experiment for just 1st transaction:

In [17]:
# sample size:
(92821 + 93004)/2

92912.5

In [19]:
# Let's say
n = 92900

In [20]:
# baseline conversion we'll assume from the control group (could be in accurate)
baseline_conv = 0.0012

In [44]:
# We want to be able to detect a conversion rate of 0.1% in the exposure, so minumum detectable effect size would need to be
(0.0012 - 0.001)/0.001

0.19999999999999987

In [22]:
# Or 19.99%.  Let's call it 19%

In [50]:
# so:
delta_ = calc_delta(relative_minimum_detectable_effect_size=0.19, baseline_conversion_rate=baseline_conv)

In [51]:
delta_

0.00022799999999999999

In [52]:
sigma_ = calc_sigma(baseline_conversion_rate=baseline_conv)

In [53]:
sigma_

0.03462022530255977

In [54]:
se_ = calc_standard_error(sigma=sigma_, sample_size=n)

In [55]:
se_

0.0001606338360333663

In [60]:
pre_experiment_power = calc_pre_experiment_power(delta=delta_, se=se_, alpha=0.05)

In [61]:
pre_experiment_power

0.2943962049349005

In [62]:
(0.000228/0.0346)*np.sqrt(92900/2) - 1.96

-0.5397931357822179

In [63]:
stats.norm.cdf(-0.5398)

0.29466748353969463

In [64]:
stats.norm.ppf(0.8)

0.8416212335729143

In [66]:
2*(0.00112/(0.000228**2))*(0.8416+1.96)**2

338213.22203754995