# A/B Experiment Readout: Treatment Effect, CUPED, MDE & Power

This notebook performs a full experiment readout on a synthetic A/B experiment dataset,
following industry-standard experimentation practices used in Big Tech.

It covers:
- Raw treatment effect
- CUPED variance reduction
- Minimum Detectable Effect (MDE)
- Statistical power
- Guardrail metric validation


In [19]:
df.describe()
df.groupby("treatment")[["pre_metric", "outcome", "guardrail"]].mean()

Unnamed: 0_level_0,pre_metric,outcome,guardrail
treatment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,100.452278,260.244645,49.954931
1,100.035604,270.012559,49.843101


In [20]:
control = df[df["treatment"] == 0]["outcome"]
treatment = df[df["treatment"] == 1]["outcome"]

ate = treatment.mean() - control.mean()
t_stat, p_value = stats.ttest_ind(treatment, control)

ate, t_stat, p_value


(np.float64(9.767913825922562),
 np.float64(31.061036518744437),
 np.float64(2.6906773862723555e-202))

In [21]:
theta = np.cov(df["outcome"], df["pre_metric"])[0, 1] / np.var(df["pre_metric"])

df["outcome_cuped"] = df["outcome"] - theta * (
    df["pre_metric"] - df["pre_metric"].mean()
)

control_cuped = df[df["treatment"] == 0]["outcome_cuped"]
treatment_cuped = df[df["treatment"] == 1]["outcome_cuped"]

ate_cuped = treatment_cuped.mean() - control_cuped.mean()
t_stat_cuped, p_value_cuped = stats.ttest_ind(
    treatment_cuped, control_cuped
)

ate_cuped, t_stat_cuped, p_value_cuped


(np.float64(10.02128780754174), np.float64(50.58369446813159), np.float64(0.0))

In [22]:
alpha = 0.05
power = 0.8

std = df["outcome"].std()
n_per_group = df["treatment"].value_counts().min()

z_alpha = stats.norm.ppf(1 - alpha / 2)
z_beta = stats.norm.ppf(power)

mde = (z_alpha + z_beta) * std * np.sqrt(2 / n_per_group)
mde

np.float64(0.9294964611754545)

In [23]:
abs(ate), mde, abs(ate) > mde

(np.float64(9.767913825922562), np.float64(0.9294964611754545), np.True_)

In [24]:
guardrail_control = df[df["treatment"] == 0]["guardrail"]
guardrail_treatment = df[df["treatment"] == 1]["guardrail"]

stats.ttest_ind(guardrail_treatment, guardrail_control)

TtestResult(statistic=np.float64(-1.1123453170832687), pvalue=np.float64(0.26601645135659113), df=np.float64(9998.0))

In [25]:
import numpy as np
import pandas as pd
from scipy import stats
import statsmodels.api as sm

# NOTE:
# VS Code runs notebooks with `notebooks/` as the working directory
# CSV is located at: notebooks/data/ab_synthetic.csv

DATA_PATH = "data/ab_synthetic.csv"

df = pd.read_csv(DATA_PATH)
df.head()


Unnamed: 0,user_id,treatment,pre_metric,outcome,guardrail
0,1,0,70.242766,235.984178,53.91841
1,2,1,77.496278,252.57102,50.037693
2,3,1,107.776378,264.347561,53.274066
3,4,1,76.522533,247.020998,44.928415
4,5,0,122.252686,277.695381,50.716931
