Scratch notebook to work on power calc functions

In [5]:
import os
import sys
import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from matplotlib import style
import importlib

In [2]:
pd.options.display.max_columns=500
pd.options.display.max_colwidth=500
pd.options.display.max_rows=500

In [3]:
style.use('ggplot')

In [8]:
sys.path.append('../ab_testing_utils/')

In [11]:
import conversion_rate_utils

In [14]:
def compute_sample_size(p0, mde, alpha=0.05, beta=0.2, tails="Two"):
    """
    Returns the sample size for a two-tailed AB test comparing conversion
    rates.
    The sample size equation is for binomial distributions only.
    Parameters
    ----------
    p0 : float
        Baseline conversion rate
    mde : float or int
        Minimum detectable effect. This is the 'sensitivity' of the test or
        the relative difference in conversion rates that you want to be able
        to detect.
    alpha : float
        The chances of a Type I error. Tests are normally run to a 95%
        significance meaning an alpha of 1 - 0.95 = 0.05. Default = 0.05.
    beta : float
        The chances of a Type II error. For sample sizing, a beta of 0.2 is
        acceptable and provides the test with 80% statistical power as is
        standard.
    tails : str
        One or two tails to specify what type of hypothesis test this is.
    Returns
    -------
    Minimum number of observations required per variant.
    """

    # Conditional alpha value based on whether one or two tail test
    if tails == "Two":
        computed_alpha = alpha / 2
    else:
        computed_alpha = alpha

    p1 = p0 * (1 + mde)
    N = (
        (stats.norm.ppf(1 - computed_alpha) + stats.norm.ppf(1 - beta)) ** 2
        * (p0 * (1 - p0) + p1 * (1 - p1))
        / ((p0 - p1) ** 2)
    )
    return int(N)

In [15]:
compute_sample_size(0.1, 0.05)

57759

In [17]:
df = pd.DataFrame()

In [18]:
test_my_code = conversion_rate_utils.ConversionExperiment(df=df)

In [19]:
test_my_code.calc_sample_size(power=0.8, alpha=0.05, relative_minimum_detectable_effect_size=0.05, baseline_conversion_rate=0.1)

56511.93408731341

In [20]:
np.sin(np.arcsin(np.sqrt(0.1)) - 0.0117394/2)**2

0.09650582327777069

In [21]:
0.1 - 0.09650582327777069

0.0034941767222293196

In [22]:
# So my code seems to differ a bit.  The code above agrees mostly with pwr.2p.test(h=ES.h(p1=0.1, p2=0.105), sig.level=0.05, power=0.8, alternative="two.sided")
# Why? 
# mine is computing N as 2 * (sigma_squared * (z_power + z_alpha) ** 2) / delta**2
# Is this different than above? Let's walk through it and see where we diverge:

In [24]:
# My calc:
sigma_squared = 0.1 * (1 - 0.1)
delta_ = 0.1*0.05

In [25]:
sigma_squared

0.09000000000000001

In [26]:
delta_

0.005000000000000001

In [28]:
z_alpha = stats.norm.ppf(1 - (0.05)/2)
z_power = stats.norm.ppf(0.8)

In [29]:
z_alpha

1.959963984540054

In [30]:
z_power

0.8416212335729143

In [31]:
2 * (sigma_squared * (z_alpha + z_power) ** 2) / (delta_)**2

56511.93408731341

In [32]:
# Other calc:
# p1 = p0 * (1 + mde)
# N = (
#     (stats.norm.ppf(1 - computed_alpha) + stats.norm.ppf(1 - beta)) ** 2
#     * (p0 * (1 - p0) + p1 * (1 - p1))
#     / ((p0 - p1) ** 2)
# )
p1 = 0.1 * (1 + 0.05)

In [33]:
p1

0.10500000000000001

In [34]:
# This should be the same as my: (correct)
0.1 + delta_

0.10500000000000001

In [35]:
(stats.norm.ppf(1 - 0.05/2) + stats.norm.ppf(0.8)) ** 2

7.848879734349088

In [36]:
# Compare to my intermediate term:
(z_alpha + z_power) ** 2

7.848879734349088

In [37]:
# The same

In [38]:
# The calc above then gets multiplied by:
0.1 * (1 - 0.1) + p1 * (1 - p1)

0.18397500000000003

In [41]:
2*sigma_squared

0.18000000000000002

In [42]:
# So this is slightly higher than 2 times sigma squared? Why? Is this the difference?
# The result is divided by:
(0.1 - p1) ** 2

2.5000000000000045e-05

In [43]:
delta_**2

2.500000000000001e-05

In [44]:
# which is the same as my delta_**2 term, so the only difference arises from the 2*sigma_squared term.  I think this is possibly an approximation on my part

In [48]:
# Yeah, it's probably because of this:
int(np.round(16 * sigma_squared /delta_**2))

57600

In [51]:
# Let's try this.  Say p1 = 0.5, p2 = 0.55
# Then relative_mde = (0.55 - 0.5)/0.5 = 0.1
# My calc gives:
test_my_code.calc_sample_size(power=0.999, alpha=0.001, relative_minimum_detectable_effect_size=0.1, baseline_conversion_rate=0.5)

8142.817179335286

In [52]:
compute_sample_size(0.5, 0.1, alpha=0.001, beta=1-0.999)

8102

In [54]:
# Ok so this is fine. I'm going to stick with my code.  I have a better theoretical justification for why this is sensible. 
def create_mde_table(monthly_num_obs, baseline_conversion_rate, n_variants=2, alpha=0.05, power=0.8):
    mde_range = np.arange(0.001, 2.001, 0.001)
    
    sample_sizes = [test_my_code.calc_sample_size(power=power, 
                                                  alpha=alpha, 
                                                  relative_minimum_detectable_effect_size=mde, 
                                                  baseline_conversion_rate=baseline_conversion_rate) * n_variants for mde in mde_range]
    new_conversion_rates = [baseline_conversion_rate + test_my_code.calc_delta(baseline_conversion_rate=baseline_conversion_rate, 
                                                                              relative_minimum_detectable_effect_size=mde) for mde in mde_range]
    
    df_ = pd.DataFrame(mde_range, new_conversion_rates, sample_sizes).transpose()
    df_.columns = ['mde', 'new_conversion_rate', 'total_sample_size']
    df_['weeks'] = df_['total_sample_size'] / (monthly_num_obs / 4)
    df_['monthly_additional_conversions'] = df_['new_conversion_rate'] * monthly_num_obs
    
    return df_

In [56]:
# df_mde = create_mde_table(monthly_num_obs=5000, baseline_conversion_rate=0.1, n_variants=2, alpha=0.05, power=0.8)

In [57]:
mde_range = np.arange(0.001, 2.001, 0.001)

In [58]:
sample_sizes = [test_my_code.calc_sample_size(power=0.8, 
                                              alpha=0.05, 
                                              relative_minimum_detectable_effect_size=mde, 
                                              baseline_conversion_rate=0.1) * 2 for mde in mde_range]

In [59]:
# Should add upper and lower bounds to get a sense of which conversion rates will not be distinguishable from the baseline at the given power, alpha, and mde. 
new_conversion_rates = [0.1 + test_my_code.calc_delta(baseline_conversion_rate=0.1, 
                                                                          relative_minimum_detectable_effect_size=mde) for mde in mde_range]

In [61]:
len(mde_range)

2000

In [62]:
len(sample_sizes)

2000

In [63]:
len(new_conversion_rates)

2000

In [64]:
mde_range

array([1.000e-03, 2.000e-03, 3.000e-03, ..., 1.998e+00, 1.999e+00,
       2.000e+00])

In [71]:
df_ = pd.DataFrame()

In [73]:
df_['mde'] = mde_range

In [75]:
df_['new_conversion_rate'] = new_conversion_rates

In [76]:
df_['total_sample_size'] = sample_sizes

In [78]:
df_.loc[(df_['mde'] == 1)]

Unnamed: 0,mde,new_conversion_rate,total_sample_size
999,1.0,0.2,282.55967


In [81]:
# Yeah this is right. 
test_my_code.calc_sample_size(power=0.8, 
                              alpha=0.05, 
                              relative_minimum_detectable_effect_size=1, 
                              baseline_conversion_rate=0.1)

141.27983521828358