Interactive version of the graph from [A/B Tests for Engineers](https://win-vector.com/2023/10/15/a-b-tests-for-engineers/).

To see the interaction: download [directory](https://github.com/WinVector/Examples/tree/main/ab_test) and run this Jupyter notebook in JupyterLab or VSCode.

[Jupyter Widgets documentation](https://ipywidgets.readthedocs.io/en/latest/index.html)

In [None]:
# import our modules
import numpy as np
import pandas as pd
from IPython.display import display, clear_output
from scipy.stats import norm
import matplotlib.pyplot as plt
from sig_pow_visual import binomial_diff_sig_pow_visual
from plotnine import *
from data_algebra import RecordSpecification

import ipywidgets as widgets

In [None]:
# our specification of interest
power = 0.9  # desired true positive rate for the test
significance = 0.02  # desired false positive rate for the test
# derived from the above
n = 557  # the experiment size
r = 0.1  # the assumed large effect size (difference in conversion rates)
t = 0.061576 # the correct threshold for specified power and significance


In [None]:
# get the overall expected behavior of the experiment size
n_b_steps = 100
behaviors = pd.DataFrame({
    'threshold': np.arange(0, r + r/n_b_steps, r/n_b_steps)
})
stdev = np.sqrt(0.5 / n)
behaviors['false positive rate'] = [norm.sf(x=threshold, loc=0, scale=stdev) for threshold in behaviors["threshold"]]
behaviors['true positive rate'] = [norm.cdf(x=r, loc=threshold, scale=stdev) for threshold in behaviors["threshold"]]

In [None]:
map = RecordSpecification(
    pd.DataFrame({
        'measure': ["false positive rate", "true positive rate"],
        'value': ["false positive rate", "true positive rate"],
    }),
    record_keys=['threshold'],
    control_table_keys=['measure'],
).map_from_rows()

behaviors_kv = map.transform(behaviors)

In [None]:
def make_graph(threshold):
    # convert to what were the function arguments
    threshold = float(threshold)
    stdev = np.sqrt(0.5 / n)
    effect_size = r
    sig_area = norm.sf(x=threshold, loc=0, scale=stdev)  # .sf() = 1 - .cdf()
    mpow_area = norm.sf(x=effect_size, loc=threshold, scale=stdev)
    title='Shaded area under the tails give you significance and 1-power '
    subtitle = f' H0: significance (false positive rate) = {sig_area:.3f} = right area\n H1: 1-power (false negative rate) = {mpow_area:.3f} = left area'
    # find nearest threshold
    row_dist = np.abs(behaviors["threshold"] - threshold)
    selected_rows = behaviors.loc[[np.argmin(row_dist)], :].reset_index(drop=True, inplace=False)
    print( 
        binomial_diff_sig_pow_visual(
            stdev=stdev,
            effect_size=effect_size,
            threshold=threshold,
            title=title,
            subtitle=subtitle
        )
    )
    print (
        ggplot(
                data=behaviors_kv,
                mapping=aes(x='threshold', y='value'),
            )
            + geom_line()
            + ylim(0, 1)
            + geom_vline(xintercept=threshold, size=2, color="blue")
            + facet_wrap("measure", ncol=1)
    )
    print (
        ggplot(
                data=behaviors,
                mapping=aes(x='false positive rate', y='true positive rate'),
            )
            + geom_point(
                data=selected_rows,
                size=3,
                color="blue",
            )
            + geom_line()
            + coord_fixed()
            + ylim(0.5, 1)
            + xlim(0, 0.5)
    )


In [None]:
menu = widgets.Dropdown(
    options = [0.0, 0.03, 0.061576, 0.09, 0.1],
    value = 0.061576,
    description='Decision threshold:',
    disabled=False
)

output = widgets.Output()
display(menu, output)

def on_year_selected(change):
    with output:
        if(menu.value != ''):
            clear_output(wait=True) 
            make_graph(menu.value)
        else:
            clear_output(wait=False)
on_year_selected(menu.value)
menu.observe(on_year_selected, names='value')