Based on [this](https://deliveroo.engineering/2018/12/07/monte-carlo-power-analysis.html) post from Deliveroo engineering blog.

In [10]:
import numpy as np
import pandas as pd
import altair as alt

from scipy.stats import norm, binom, mannwhitneyu
from statsmodels.stats.weightstats import ttest_ind


# t-test

In [11]:
# Sample data would be actual data measured over a fixed period of time prior to our 
# experiment. For illustration purposes here we have generated data from a normal 
# distribution.
sample_mean = 21.50
sample_sd = 12.91
sample_data = norm.rvs(loc=sample_mean, scale=sample_sd, size=20000)

sample_sizes = range(250, 20000 + 1, 250) # Sample sizes we will test over
alpha = 0.05 # Our fixed alpha
sims = 20 # The number of simulations we will run per sample size
# The minimum relative effect we will test for (3%). We could try multiple relative
# effect is we are not sure what our minimum relative effect should be
relative_effect = 1.03 
alternative = "two-sided" # Is the alternative one-sided or two-sided 

power_dist = np.empty((len(sample_sizes), 2))
for i in range(0, len(sample_sizes)): 
    N = sample_sizes[i]
    
    control_data = sample_data[0:N]
    # Multiply the control data by the relative effect, this will shift the distribution
    # of the variant left or right depending on the direction of the relative effect
    variant_data = control_data * relative_effect 
    
    significance_results = []
    for j in range(0, sims):
        # Randomly allocate the sample data to the control and variant
        rv = binom.rvs(1, 0.5, size=N) 
        control_sample = control_data[rv == True] 
        variant_sample = variant_data[rv == False]
        
        # Use Welch's t-test, make no assumptions on tests for equal variances
        test_result = ttest_ind(control_sample, variant_sample, 
                                alternative=alternative, usevar='unequal')
        
        # Test for significance
        significance_results.append(test_result[1] <= alpha) 
    # The power is the number of times we have a significant result 
    # as we are assuming the alternative hypothesis is true
    power_dist[i,] = [N, np.mean(significance_results)] 

In [12]:
power_dist = pd.DataFrame(power_dist, columns=['sample_size', 'power'])

source = power_dist

tbase = alt.Chart().encode(
    x=alt.X('sample_size', axis=alt.Axis(title='Sample size')),
    y=alt.Y('power', axis=alt.Axis(title='Power'))
)

hline = alt.Chart().mark_rule().encode(
    y=alt.Y('a:Q', axis=alt.Axis(title='')),

)

alt.layer(
    tbase.mark_point(),
    tbase.transform_loess('sample_size', 'power').mark_line(),
    hline,
    data=source
).transform_calculate(a="0.8")

# t and U test

In [13]:
# Sample data would be actual data measured over a fixed period of time prior to our 
# experiment. For illustration purposes here we have generated data from a normal 
# distribution.
sample_mean = 21.50
sample_sd = 12.91
sample_data = norm.rvs(loc=sample_mean, scale=sample_sd, size=20000)

sample_sizes = range(250, 20000 + 1, 250) # Sample sizes we will test over
alpha = 0.05 # Our fixed alpha
sims = 20 # The number of simulations we will run per sample size
# The minimum relative effect we will test for (3%). We could try multiple relative
# effect is we are not sure what our minimum relative effect should be
relative_effect = 1.03 
alternative = "two-sided" # Is the alternative one-sided or two-sided 

power_dist = np.empty((len(sample_sizes), 3))
for i in range(0, len(sample_sizes)): 
    N = sample_sizes[i]
    
    control_data = sample_data[0:N]
    # Multiply the control data by the relative effect, this will shift the distribution
    # of the variant left or right depending on the direction of the relative effect
    variant_data = control_data * relative_effect 
    
    significance_tresults = []
    significance_uresults = []
    for j in range(0, sims):
        # Randomly allocate the sample data to the control and variant
        rv = binom.rvs(1, 0.5, size=N) 
        control_sample = control_data[rv == True] 
        variant_sample = variant_data[rv == False]
        
        # Use Welch's t-test, make no assumptions on tests for equal variances
        ttest_result = ttest_ind(control_sample, variant_sample, 
                                alternative=alternative, usevar='unequal')
        # Use Mann-Whitney U-test
        utest_result = mannwhitneyu(control_sample, variant_sample, 
                                   alternative=alternative)
        
        # Test for significance
        significance_tresults.append(ttest_result[1] <= alpha)
        significance_uresults.append(utest_result[1] <= alpha)
        
    # The power is the number of times we have a significant result 
    # as we are assuming the alternative hypothesis is true
    power_dist[i,] = [N, np.mean(significance_tresults), np.mean(significance_uresults)] 

In [14]:
power_dist = pd.DataFrame(power_dist, columns=['sample_size', 'tpower', 'upower'])
power_dist = power_dist.melt(id_vars='sample_size', value_vars=['tpower', 'upower'], var_name='test', value_name='power')
labels = {'tpower':'MC Simulation t-test',
            'upower': 'MC Simulation MWW U-test'}
power_dist['test'].replace(labels, inplace=True)

In [15]:
power_dist.head()

Unnamed: 0,sample_size,test,power
0,250.0,MC Simulation t-test,0.05
1,500.0,MC Simulation t-test,0.15
2,750.0,MC Simulation t-test,0.1
3,1000.0,MC Simulation t-test,0.05
4,1250.0,MC Simulation t-test,0.15


In [16]:
dots = alt.Chart(power_dist).mark_point().encode(
    x=alt.X('sample_size', axis=alt.Axis(title='Sample size')),
    y=alt.Y('power', axis=alt.Axis(title='Power')),
    color=alt.Color('test', legend=alt.Legend(title=""))
)

hline = alt.Chart(power_dist).mark_rule().encode(
    y=alt.Y('a:Q', axis=alt.Axis(title='')),
).transform_calculate(a='0.8')

dots + dots.transform_loess('sample_size', 'power', groupby=['test']).mark_line() + hline

# Program

In [1]:
import numpy as np
import pandas as pd
import altair as alt

from scipy.stats import norm, binom, mannwhitneyu
from statsmodels.stats.weightstats import ttest_ind

from deliveroo.toolbox import get_orders_data
from deliveroo.toolbox import get_power_dist
from deliveroo.toolbox import get_power_graph

In [11]:
newdata = norm.rvs(loc=20, scale=10, size=12000)
data = get_power_dist(newdata, 250, 12000, 250)
get_power_graph(data)