In [3]:
import numpy as np
import pandas as pd

np.random.seed(42)

# Number of users
n = 10000

# Assign groups
groups = np.random.choice(['A', 'B'], size=n, p=[0.5, 0.5])

# Countries
countries = np.random.choice(['DE', 'IN', 'US', 'UK', 'FR'], size=n)

# Signup dates
signup_dates = pd.to_datetime(
    np.random.choice(pd.date_range("2024-01-01", "2024-01-31"), size=n)
)

# Base engagement rates
base_engagement_A = 0.21
base_engagement_B = 0.235  # treatment lift

day7_engaged = [
    np.random.binomial(1, base_engagement_A if g == 'A' else base_engagement_B)
    for g in groups
]

# Feature usage (higher in treatment)
feature_used = [
    np.random.binomial(1, 0.18 if g == 'A' else 0.26)
    for g in groups
]

# Session time (fixed version)
avg_session_time = np.array([
    np.round(np.random.normal(5, 1.2), 2) if g == 'A'
    else np.round(np.random.normal(6.5, 1.2), 2)
    for g in groups
])

# Guardrail metrics
error_rate = np.round(np.random.uniform(0.005, 0.02, n), 3)
page_load_ms = np.random.normal(330, 25, n).astype(int)

# Build DataFrame
df = pd.DataFrame({
    "user_id": range(1, n+1),
    "group": groups,
    "country": countries,
    "signup_date": signup_dates,
    "day7_engaged": day7_engaged,
    "feature_used": feature_used,
    "avg_session_time": avg_session_time,
    "error_rate": error_rate,
    "page_load_ms": page_load_ms
})

df.head()


Unnamed: 0,user_id,group,country,signup_date,day7_engaged,feature_used,avg_session_time,error_rate,page_load_ms
0,1,A,UK,2024-01-28,0,0,5.43,0.008,396
1,2,B,IN,2024-01-31,1,0,7.07,0.016,341
2,3,B,FR,2024-01-31,0,0,7.92,0.017,326
3,4,B,US,2024-01-09,0,0,6.77,0.009,368
4,5,A,US,2024-01-06,1,0,5.97,0.011,307


In [7]:
df.to_csv("../data/ab_test_data.csv", index=False)


In [4]:
df.shape

(10000, 9)

# A/B Test Evaluation — Smart Alert Tooltip
### Product Analytics Portfolio — Project 01

This notebook analyzes whether the new **Smart Alert Tooltip** increased **Day‑7 Engagement** for IoT device managers.

---

## 1. Import Libraries


In [5]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
import statsmodels.api as sm


In [6]:
df = pd.read_csv("../data/ab_test_data.csv")
df.head()


FileNotFoundError: [Errno 2] No such file or directory: '../data/ab_test_data.csv'