In this notebook is the needed code that was used in my article on A/B testing p-value peeking. Two scenarios  are described here, where we want to show the effect of continuous monitoring on picking the wrong web version that was declared to be statistical significance, whereas, in reality, this is not true.

In [1]:
import pandas as pd
from scipy import stats
import numpy as np
import math  
from statsmodels.stats.power import TTestIndPower
from numpy.random.mtrand import seed 
 
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
 
import plotly.graph_objects as go
import plotly.express as px 

  import pandas.util.testing as tm


# Sample size calculation

In [2]:
mde =  0.1  # minimum detectable effect
cr_a = 0.25 
  
alpha = 0.05
power = 0.8
  
# Perform power analysis to find sample size 
 
power_obj = TTestIndPower()
n_obs = power_obj.solve_power(effect_size=mde, alpha=alpha, power=power, 
                                ratio=1, alternative='two-sided')
  
print('Sample size needed in each group: {:.3f}'.format(n_obs))

Sample size needed in each group: 1570.733


# Senario I : Valid A/B testing

In [3]:
effect = 0.1
cr_b = (1+effect)*cr_a
observations = math.ceil(2*n_obs)


seed(16) # 14 
conversions_a = np.random.binomial(1, cr_a, observations)
conversions_b =  np.random.binomial(1, cr_b, observations)

pval = [] 
for i in range(10,observations):
  pval.append(stats.ttest_ind(conversions_a[1:i], conversions_b[1:i], equal_var=False).pvalue)
 
df = pd.DataFrame(pval,columns =['pvalues'])
df['one-pvalue'] = 1 - df.pvalues

# Adding title, x as and y as title 
fig = px.line(df, y="one-pvalue") 
fig.add_vline(x=n_obs, line_width=3, line_dash="dash", line_color="green",annotation_text="Minimum sample size",annotation_position="bottom left")
fig.add_hrect(y0=1, y1=0.95, line_width=0, fillcolor="red", opacity=0.2)

fig.update_layout(
    font_family="Courier New",
    font_color="blue",
    title_font_family="Times New Roman",
    title_font_color="red",
    legend_title_font_color="green",
    xaxis_title="Sample size",
    yaxis_title=" 1 - p-value",
        title={
        'text': " Sample size vs 1 - p_value ",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'}
)

fig.show()

# Senario II :  Continuous Monitoring 

In [4]:
seed(14) 
effect = 0
cr_b = (1+effect)*cr_a
observations = math.ceil(2*n_obs)

conversions_a = np.random.binomial(1, cr_a, observations)
conversions_b =  np.random.binomial(1, cr_b, observations)

pval = [] 
for i in range(10,observations):
  pval.append(stats.ttest_ind(conversions_a[1:i], conversions_b[1:i], equal_var=False).pvalue)
 
df = pd.DataFrame(pval,columns =['pvalues'])
df['one-pvalue'] = 1 - df.pvalues

# Adding title, x as and y as title 
fig = px.line(df, y="one-pvalue") 
fig.add_vline(x=n_obs, line_width=3, line_dash="dash", line_color="green",annotation_text="Minimum sample size",annotation_position="bottom left")
fig.add_hrect(y0=1, y1=0.95, line_width=0, fillcolor="red", opacity=0.2)

fig.update_layout(
    font_family="Courier New",
    font_color="blue",
    title_font_family="Times New Roman",
    title_font_color="red",
    legend_title_font_color="green",
    xaxis_title="Sample size",
    yaxis_title=" 1 - p-value",
        title={
        'text': " Sample size vs 1 - p_value ",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'}
)

fig.show()