# 03 · A/B Test (Simulated)
Random assignment, conversion uplift, frequentist tests + bootstrap CI.

In [None]:
import numpy as np, pandas as pd
from statsmodels.stats.proportion import proportions_ztest
from pathlib import Path
DATA = Path("../data")
rng = np.random.default_rng(42)

tx = pd.read_csv(DATA/"transactions.csv", parse_dates=["date"])
active_users = tx["user_id"].unique()
assign = pd.DataFrame({"user_id": active_users})
assign["variant"] = rng.choice(["control","treatment"], size=len(assign), p=[0.5,0.5])

# User-level conversion & AOV in recent window
cutoff = tx["date"].max() - pd.Timedelta(days=60)
user_level = (tx[tx["date"]>=cutoff]
              .groupby("user_id")
              .agg(conversion=("cart_id", lambda s: int(s.nunique()>0)),
                   aov=("line_revenue", "mean"))
              .reset_index()).fillna({"aov":0.0})
df = assign.merge(user_level, on="user_id", how="left").fillna({"conversion":0, "aov":0.0})

# Inject small uplift for demonstration
mask = df["variant"].eq("treatment")
uplift_rate = 0.10
df["conversion_sim"] = df["conversion"].astype(int)
flip = (df.loc[mask, "conversion_sim"]==0).values
df.loc[mask, "conversion_sim"] = np.where(flip, rng.binomial(1, uplift_rate, size=flip.sum()), 1)

# Two-proportion z-test
conv = df.groupby("variant")["conversion_sim"].agg(["sum","count"])
count = conv["sum"].values
nobs = conv["count"].values
stat, pval = proportions_ztest(count, nobs, alternative="larger")
print({
    "control_cr": float(count[0]/nobs[0]),
    "treatment_cr": float(count[1]/nobs[1]),
    "z": float(stat),
    "p_value": float(pval)
})

# Bootstrap 95% CI for uplift
def bootstrap_diff(a, b, B=5000, seed=0):
    rng = np.random.default_rng(seed)
    diffs = []
    for _ in range(B):
        aa = rng.choice(a, size=a.size, replace=True)
        bb = rng.choice(b, size=b.size, replace=True)
        diffs.append(bb.mean() - aa.mean())
    return np.percentile(diffs, [2.5, 97.5])

a = df.loc[df.variant=="control", "conversion_sim"].values
b = df.loc[df.variant=="treatment", "conversion_sim"].values
ci = bootstrap_diff(a, b)
print({"uplift_ci_95": [float(ci[0]), float(ci[1])]})