In [168]:
%matplotlib inline

from pathlib import Path
import random
import decimal

import pandas as pd
import numpy as np

from scipy import stats
import statsmodels
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.power import NormalIndPower
from statsmodels.stats.power import TTestIndPower
from statsmodels.stats.proportion import proportions_ztest
from scipy.stats import binom

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns
import plotly.graph_objects as go


In [169]:
df_ind = pd.DataFrame(columns=('n', 'stat_sig', 'obs_effect', 'effect', 'p-value','exageration_ratio'))


In [170]:
# Set seed for reproducibility
#np.random.seed(42)

# tests parameters
n_obs = 100000 # list with different sample sizes for each variant in the tests
conv_control = 0.2
lift = 0.05
conv_variant = conv_control * (1+lift)
alpha = 0.05

# sample 1 time from the control binomial distribution
n, p = n_obs, conv_control
tests = 1
samples = np.random.binomial(n, p, tests)
success_A = samples[0]
trials_A = n

# sample 1 time from the treatment variant binomial distribution
n, p = n_obs, conv_variant
tests = 1
samples = np.random.binomial(n, p, tests)
success_B = samples[0]
trials_B = n

# perform hypothesis testing
count = np.array([success_B, success_A])
nobs = np.array([trials_B, trials_A])
z_stat, p_value = proportions_ztest(count, nobs, alternative='larger')
stat_sig = 1 if p_value < alpha else 0
obs_effect = (success_B / success_A) -1
effect = (success_B / success_A) -1 if stat_sig == 1 else None
exageration = effect / ((conv_variant - conv_control)/conv_control) if stat_sig == 1 else None

# save results into dataframe
record = [n_obs, stat_sig, obs_effect, effect, p_value, exageration]
df_ind.loc[len(df_ind)] = record

display(df_ind)


Unnamed: 0,n,stat_sig,obs_effect,effect,p-value,exageration_ratio
0,100000.0,1.0,0.055394,0.055394,4.982396e-10,1.107872


In [193]:
# Set seed for reproducibility
#np.random.seed(42)

# tests parameters
n_tests = 10000 # number of A/B tests to simulate
n_obs = 10000 # list with different sample sizes for each variant in the tests
conv_control = 0.2
lift = 0.05
conv_variant = conv_control * (1+lift)
alpha = 0.05

# Create DataFrame
df = pd.DataFrame(columns=('n', 'stat_sig', 'obs_effect', 'effect', 'p-value','exageration_ratio'))

# Simulate A/B tests
for k in range(n_tests):

    # sample 1 time from the control binomial distribution
    n, p = n_obs, conv_control
    tests = 1
    samples = np.random.binomial(n, p, tests)
    success_A = samples[0]
    trials_A = n

    # sample 1 time from the treatment variant binomial distribution
    n, p = n_obs, conv_variant
    tests = 1
    samples = np.random.binomial(n, p, tests)
    success_B = samples[0]
    trials_B = n

    # perform hypothesis testing
    count = np.array([success_B, success_A])
    nobs = np.array([trials_B, trials_A])
    z_stat, p_value = proportions_ztest(count, nobs, alternative='larger')
    stat_sig = 1 if p_value < alpha else 0
    obs_effect = (success_B / success_A) -1
    effect = (success_B / success_A) -1 if stat_sig == 1 else None
    exageration = effect / ((conv_variant - conv_control)/conv_control) if stat_sig == 1 else None

    # save results into dataframe
    record = [n_obs, stat_sig, obs_effect, effect, p_value, exageration]
    df.loc[len(df)] = record


# agregate the results of the 1000 tests for each sample size
df_grouped = df.groupby('n').agg({
    'stat_sig':'mean'
   # ,'p-value': 'median'
    #,'obs_effect':'median'
    ,'effect':'median'
    ,'exageration_ratio':'median'
    }).rename(columns={'n':'Sample Size per variant', 'stat_sig': 'Actual Power', 'lift':'Real Lift', 'effect': 'Avg Observed Lift (Signif. Tests)','exageration_ratio':'Exageration Ratio' }).reset_index()

df_grouped

Unnamed: 0,n,Actual Power,Avg Observed Lift (Signif. Tests),Exageration Ratio
0,10000.0,0.5446,0.068691,1.373818


In [201]:
import plotly.graph_objects as go

# Compute the mean observed effect for stat_sig == 1
mean_effect_sig = df[df['stat_sig'] == 1]['obs_effect'].mean()

# Create the figure
fig = go.Figure()

# Define colors
colors = {0: "rgba(255, 50, 50, 0.6)", 1: "rgba(0, 100, 255, 0.5)"}

# Add histogram for stat_sig == 0 (Not Significant)
fig.add_trace(go.Histogram(
    x=df[df['stat_sig'] == 0]['obs_effect'],
    name="Not Significant",
    marker=dict(color=colors[0], line=dict(color='red', width=1.2)),
    opacity=0.6,
    hovertemplate="<b>Category:</b> Not Significant<br>"
                  "<b>Bin Range:</b> %{x}<br>"
                  "<b>Count:</b> %{y}<extra></extra>"
))

# Add histogram for stat_sig == 1 (Significant)
fig.add_trace(go.Histogram(
    x=df[df['stat_sig'] == 1]['obs_effect'],
    name="Significant",
    marker=dict(color=colors[1], line=dict(color='blue', width=1.2)),
    opacity=0.7,
    hovertemplate="<b>Category:</b> Significant<br>"
                  "<b>Bin Range:</b> %{x}<br>"
                  "<b>Count:</b> %{y}<extra></extra>"
))

# Add vertical line for the mean observed effect (Significant category only)
fig.add_shape(
    dict(
        type="line",
        x0=mean_effect_sig, x1=mean_effect_sig,
        y0=0, y1=1,  # Scale to full height
        xref='x', yref='paper',
        line=dict(color="black", width=2, dash="dash")
    )
)

# Add vertical line for the real lift (gray, dashed)
fig.add_shape(
    dict(
        type="line",
        x0=lift, x1=lift,
        y0=0, y1=1,
        xref='x', yref='paper',
        line=dict(color="gray", width=2, dash="dot")  # Gray, dotted line
    )
)

# Add annotation for the real lift
fig.add_annotation(
    x=lift,
    y=0.9,  # Position annotation at 90% height
    xref="x",
    yref="paper",
    text=f"<b>Real Lift:</b><br>{lift:.4f}",
    showarrow=True,
    arrowhead=2,
    ax=-40,  # Offset annotation
    ay=-40,
    font=dict(size=12, color="black"),
    bgcolor="rgba(200, 200, 200, 0.8)",  # Light gray background for readability
    bordercolor="black",
    borderwidth=1
)

# Add annotation for the mean observed effect
fig.add_annotation(
    x=mean_effect_sig,
    y=0.95,  # Position at 95% height
    xref="x",
    yref="paper",
    text=f"<b>Avg Obs. Effect (Signif.):</b><br>{mean_effect_sig:.4f}",
    showarrow=True,
    arrowhead=2,
    ax=40,  # Offset annotation
    ay=-40,
    font=dict(size=12, color="black"),
    bgcolor="rgba(255, 255, 255, 0.8)",  # Light background for readability
    bordercolor="black",
    borderwidth=1
)

# Update layout for better aesthetics
fig.update_layout(
    width=900,
    height=550,
    title=dict(
        text="Histogram of Observed Effects",
        font=dict(size=20, family="Arial Bold"),
        x=0.5  # Center the title
    ),
    xaxis=dict(
        title="Observed Effect",
        title_font=dict(size=16),
        tickfont=dict(size=12),
        showgrid=False,  # ⬅️ Remove gridlines
        zeroline=False,
        showline=True,
        linewidth=1.2,
        linecolor="black"
    ),
    yaxis=dict(
        title="Count",
        title_font=dict(size=16),
        tickfont=dict(size=12),
        showgrid=False,  # ⬅️ Remove gridlines
        zeroline=False,
        showline=True,
        linewidth=1.2,
        linecolor="black"
    ),
    plot_bgcolor='rgba(0,0,0,0)',  # Remove background color (transparent)
    paper_bgcolor='rgba(0,0,0,0)',  # Remove outer background color (transparent)
    bargap=0.1,  # Spacing between bars
    bargroupgap=0.05,
    barmode="overlay",  # Overlay bars for better comparison
    legend=dict(
        title="Statistical Significance",
        font=dict(size=14),
        bgcolor="rgba(240,240,240,0.8)",  # Light grey background for clarity
        bordercolor="black",
        borderwidth=1
    )
)

# Show the improved plot
fig.show()
