### Synthetic Event Generator for Brandazon (2023–2025)

This script creates a realistic multi-platform behavioral dataset for the fictional e-commerce brand "Brandazon," simulating user activity across mobile and web environments.

#### Key Components

- **User Profile Generation**: Uses Faker to assign each user a unique profile (`userId`, name, email, signup date).
- **Products & Campaigns**:
  - Special product: `Labubu` (used in A/B test).
  - 99 additional products across categories: Toys, Electronics, Home, Beauty.
  - Campaigns include seasonal drops and influencer pushes.
- **Platform Context**:
  - Platforms: `web`, `mobile`, or `both`.
  - Web: generates `page` events with browser + OS context.
  - Mobile: generates `screen` events with device + OS info.

#### Event Types Simulated

- `identify`: Trait assignment + A/B group (A or B).
- `page` / `screen`: First event of every session.
- `track`: Behavioral events like:
  - `Product Viewed`
  - `Promotion Clicked` (for A/B test)
  - `Order Completed`

#### A/B Test Logic

Users in Group B may be exposed to a `labubu_drop_code` promotion:
- If exposed, they have a higher chance of purchasing the Labubu product.
- Labubu-related events are tagged with experiment metadata.

#### Simulation Logic

- Each user has 2–8 sessions.
- Each session includes:
  - A page/screen view
  - Optional product views, purchases, or promotion clicks
  - Randomized event timing across a 90-day period post-signup
- Timestamps are spread between January 2023 and July 2025.

#### Output

- All events are collected into a single pandas DataFrame.
- Saved as `brandazon_all_events.csv` (flat file for analysis).
- Final output includes tens of thousands of realistic Segment-style rows.

This synthetic dataset is ideal for modeling customer journeys, campaign attribution, churn prediction, and A/B test impact.

In [None]:
import pandas as pd
import numpy as np
import random
from faker import Faker
from datetime import timedelta

# Config
fake = Faker()
random.seed(42)
np.random.seed(42)
n_users = 5000

labubu_product = {
    "product_id": "prod_labubu",
    "name": "Labubu",
    "category": "Toys"
}
products = [
    labubu_product,
    *[
        {
            "product_id": f"prod_{i:04d}",
            "name": fake.word(),
            "category": random.choice(['Toys','Electronics','Home','Beauty'])
        }
        for i in range(1, 100)
    ]
]
campaigns = ["summer_sale", "labubu_drop_code", "winter_clearance", "influencer_push"]
browsers = ["Chrome", "Safari", "Edge", "Firefox"]
devices = ["iPhone 15", "Galaxy S23", "Pixel 8"]
oses_web = ["Windows", "MacOS", "Linux"]
oses_mobile = ["iOS", "Android"]
screens = ["Home", "Product Detail", "Cart", "Checkout", "Profile", "Deals", "Labubu Drop"]

# Utility
def make_session_id(user_id, idx):
    return f"{user_id}_sess{idx}"

def random_date(start, end):
    delta = end - start
    rand_days = random.randint(0, delta.days)
    rand_secs = random.randint(0, 86400)
    return start + pd.Timedelta(days=rand_days, seconds=rand_secs)

def ab_group():
    return random.choice(["A", "B"])

def set_if_val(d, k, v):
    if v is not None:
        d[k] = v

# Identify Event
def generate_identify(user_id, platform, ab_group, signup_time):
    traits = {
        "name": fake.name(),
        "email": fake.email(),
        "plan": random.choice(['free','plus','premium']),
        "logins": random.randint(1, 30),
        "experiment_id": "labubu_ab",
        "variation_id": ab_group
    }
    context = {
        "platform": platform,
        "session_id": make_session_id(user_id, 0),
        "os": random.choice(oses_web if platform == "web" else oses_mobile)
    }
    if platform == "web":
        context["browser"] = random.choice(browsers)
    if platform == "mobile":
        context["device_model"] = random.choice(devices)
    return {
        "anonymousId": fake.uuid4(),
        "userId": user_id,
        "timestamp": signup_time,
        "type": "identify",
        "traits": traits,
        "context": context
    }

# Page Event (web only)
def generate_page(user_id, ts, session_id, campaign, page_name="Home"):
    context = {
        "platform": "web",
        "browser": random.choice(browsers),
        "os": random.choice(oses_web),
        "session_id": session_id
    }
    set_if_val(context, "campaign", campaign)
    props = {
        "url": f"https://brandazon.com/{page_name.lower().replace(' ','_')}"
    }
    return {
        "anonymousId": fake.uuid4(),
        "userId": user_id,
        "timestamp": ts,
        "type": "page",
        "name": page_name,
        "properties": props,
        "context": context
    }

# Screen Event (mobile only)
def generate_screen(user_id, ts, session_id, campaign, screen_name="Home"):
    context = {
        "platform": "mobile",
        "device_model": random.choice(devices),
        "os": random.choice(oses_mobile),
        "app_version": f"{random.randint(1,5)}.{random.randint(0,9)}",
        "session_id": session_id
    }
    set_if_val(context, "campaign", campaign)
    props = {}
    if screen_name in ["Product Detail", "Labubu Drop"]:
        set_if_val(props, "product_id", labubu_product["product_id"])
        set_if_val(props, "category", labubu_product["category"])
    return {
        "anonymousId": fake.uuid4(),
        "userId": user_id,
        "timestamp": ts,
        "type": "screen",
        "name": screen_name,
        "properties": props,
        "context": context
    }

# Track Event (web or mobile)
def generate_track(user_id, ts, session_id, platform, ab_group, campaign, event_name, product=None, price=None, quantity=None, experiment=False):
    context = {
        "platform": platform,
        "session_id": session_id,
        "os": random.choice(oses_web if platform=="web" else oses_mobile)
    }
    if platform == "web":
        context["browser"] = random.choice(browsers)
    if platform == "mobile":
        context["device_model"] = random.choice(devices)
    set_if_val(context, "campaign", campaign)
    if experiment:
        context["experiment_id"] = "labubu_ab"
        context["variation_id"] = ab_group
    props = {}
    if event_name in ["Product Viewed", "Order Completed"]:
        if product:
            set_if_val(props, "product_id", product["product_id"])
            set_if_val(props, "name", product["name"])
            set_if_val(props, "category", product["category"])
        set_if_val(props, "price", price)
        set_if_val(props, "quantity", quantity)
    return {
        "anonymousId": fake.uuid4(),
        "userId": user_id,
        "timestamp": ts,
        "type": "track",
        "event": event_name,
        "properties": props,
        "context": context
    }

# Main Data Generation
events = []
start = pd.Timestamp('2023-01-01')
end = pd.Timestamp('2025-07-01')

for i in range(n_users):
    user_id = f"user_{i+1:06d}"
    signup = random_date(start, end - pd.Timedelta(days=90))
    user_ab = ab_group()
    platform = random.choices(["web", "mobile", "both"], [0.35,0.35,0.3])[0]
    platform_list = ["web","mobile"] if platform=="both" else [platform]
    # Identify (first session)
    events.append(generate_identify(user_id, random.choice(platform_list), user_ab, signup))

    n_sessions = random.randint(2, 8)
    for sess_idx in range(n_sessions):
        session_id = make_session_id(user_id, sess_idx)
        sess_date = signup + pd.Timedelta(days=random.randint(0, 90), minutes=random.randint(0, 1440))
        sess_platform = random.choice(platform_list)
        # Always start session with a page or screen event
        if sess_platform == "web":
            events.append(generate_page(user_id, sess_date, session_id, random.choice(campaigns), page_name="Home"))
        else:
            events.append(generate_screen(user_id, sess_date, session_id, random.choice(campaigns), screen_name="Home"))

        # Track: A/B test for Labubu, only group B gets exposed
        if user_ab == "B" and random.random() < 0.5:
            labubu_event_time = sess_date + pd.Timedelta(minutes=random.randint(1,15))
            events.append(generate_track(
                user_id, labubu_event_time, session_id, sess_platform, user_ab, "labubu_drop_code",
                event_name="Promotion Clicked", product=labubu_product, experiment=True
            ))
            # Higher chance to buy Labubu if exposed
            if random.random() < 0.6:
                events.append(generate_track(
                    user_id, labubu_event_time + pd.Timedelta(minutes=5), session_id, sess_platform, user_ab, "labubu_drop_code",
                    event_name="Order Completed", product=labubu_product, price=99.99, quantity=1, experiment=True
                ))
        # Typical commerce flow
        if random.random() < 0.7:
            prod = random.choice(products)
            pv_time = sess_date + pd.Timedelta(minutes=random.randint(1,20))
            events.append(generate_track(
                user_id, pv_time, session_id, sess_platform, user_ab, random.choice(campaigns),
                event_name="Product Viewed", product=prod
            ))
        if random.random() < 0.25:
            prod = random.choice(products)
            oc_time = sess_date + pd.Timedelta(minutes=random.randint(2,25))
            events.append(generate_track(
                user_id, oc_time, session_id, sess_platform, user_ab, random.choice(campaigns),
                event_name="Order Completed", product=prod, price=round(random.uniform(10,200),2), quantity=random.randint(1,3)
            ))
        # Secondary navigation
        if sess_platform == "web" and random.random() < 0.5:
            events.append(generate_page(user_id, sess_date + pd.Timedelta(minutes=10), session_id, random.choice(campaigns), page_name="Product"))
        if sess_platform == "mobile" and random.random() < 0.5:
            events.append(generate_screen(user_id, sess_date + pd.Timedelta(minutes=12), session_id, random.choice(campaigns), screen_name=random.choice(screens)))

print(f"Generated {len(events):,} events.")

# Convert to DataFrame and save
df = pd.DataFrame(events)
df.to_csv("brandazon_all_events.csv", index=False)
print("CSV generated: brandazon_all_events.csv")

Generated 77,085 events.
CSV generated: brandazon_all_events.csv
