In [None]:
import numpy as np
import pandas as pd

np.random.seed(42)

# Number of users
n = 10000

# Assign groups
groups = np.random.choice(['A', 'B'], size=n, p=[0.5, 0.5])

# Countries
countries = np.random.choice(['DE', 'IN', 'US', 'UK', 'FR'], size=n)

# Signup dates
signup_dates = pd.to_datetime(
    np.random.choice(pd.date_range("2024-01-01", "2024-01-31"), size=n)
)

# Base engagement rates
base_engagement_A = 0.21
base_engagement_B = 0.235  # treatment lift

day7_engaged = [
    np.random.binomial(1, base_engagement_A if g == 'A' else base_engagement_B)
    for g in groups
]

# Feature usage (higher in treatment)
feature_used = [
    np.random.binomial(1, 0.18 if g == 'A' else 0.26)
    for g in groups
]

# Session time
avg_session_time = np.round(
    np.random.normal(5 if g == 'A' else 6.5, 1.2, n), 2
)

# Guardrail metrics
error_rate = np.round(np.random.uniform(0.005, 0.02, n), 3)
page_load_ms = np.random.normal(330, 25, n).astype(int)

# Build DataFrame
df = pd.DataFrame({
    "user_id": range(1, n+1),
    "group": groups,
    "country": countries,
    "signup_date": signup_dates,
    "day7_engaged": day7_engaged,
    "feature_used": feature_used,
    "avg_session_time": avg_session_time,
    "error_rate": error_rate,
    "page_load_ms": page_load_ms
})

df.head()
