In [None]:
# Step 1: Import Required Libraries
import pandas as pd
import numpy as np
import random

# Step 2: Set random seed (for reproducibility)
np.random.seed(42)

# Step 3: Define sample Tier 2/3 cities
tier2_3_cities = ['Agra', 'Udaipur', 'Patna', 'Kanpur', 'Guwahati', 'Nagpur', 'Kochi', 'Varanasi', 'Raipur', 'Jodhpur']

# Step 4: Create fake user data
num_users = 1000
user_ids = [f"U{i:04d}" for i in range(1, num_users+1)]
cities = np.random.choice(tier2_3_cities, size=num_users)
ages = np.random.randint(18, 50, size=num_users)
genders = np.random.choice(['Male', 'Female'], size=num_users)

# Randomly assign A/B groups
groups = np.random.choice(['A', 'B'], size=num_users)

# Step 5: Simulate number of orders per user
orders = np.random.poisson(lam=2.5, size=num_users)  # average 2.5 orders per user

# Step 6: Simulate average order value
# Group A has standard AOV, Group B has higher AOV due to personalization
aov_base = np.where(groups == 'A',
                    np.random.normal(120, 25, size=num_users),   # Group A
                    np.random.normal(150, 30, size=num_users))   # Group B (improved recommendations)

# Step 7: Calculate total spend per user
total_spend = orders * aov_base
engaged = np.where(orders > 0, 1, 0)  # whether user placed any order

# Step 8: Create DataFrame
data = pd.DataFrame({
    'UserID': user_ids,
    'City': cities,
    'Age': ages,
    'Gender': genders,
    'Group': groups,
    'Orders': orders,
    'AvgOrderValue': aov_base.round(2),
    'TotalSpend': total_spend.round(2),
    'Engaged': engaged
})

# Step 9: Preview the first few rows
data.head()
