In [3]:
import os
import random
import string
from datetime import datetime, timedelta

import pandas as pd

random.seed(42)

ALL_TABLES = [
    "customers",
    "customer_addresses",
    "customer_payment_methods",
    "restaurants",
    "restaurant_locations",
    "menu_items",
    "menu_item_options",
    "drivers",
    "driver_vehicles",
    "orders",
    "order_items",
    "order_discounts",
    "order_status_events",
    "verification_requests",
    "verification_results",
    "manual_reviews",
    "kitchen_tickets",
    "delivery_requests",
    "delivery_assignments",
    "delivery_status_events",
    "support_tickets",
    "payment_authorizations",
    "payment_captures",
]



# ============================================================
# ID generation helpers (all IDs >= 9 chars, non-trivial)
# ============================================================

def generate_id(prefix, store=None, key=None, length=9):
    """
    Generate an alphanumeric ID with a given prefix and length (excluding prefix).
    If a store and key are provided, ensure persistence for that key.
    """
    if store is not None and key is not None and key in store:
        return store[key]
    core = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(length))
    id_val = prefix + core
    if store is not None and key is not None:
        store[key] = id_val
    return id_val

def generate_simple_id(prefix, length=9):
    return generate_id(prefix, None, None, length)

# Persistent ID stores for entities that must be referenceable
customer_ids = {}
restaurant_ids = {}
menu_item_ids = {}
driver_ids = {}
vehicle_ids = {}
order_ids = {}
kitchen_ticket_ids = {}
delivery_request_ids = {}
delivery_assignment_ids = {}
support_ticket_ids = {}

# First-use timestamps for entities (Option 2: order-centric)
customer_first_ts = {}
restaurant_first_ts = {}
menu_item_first_ts = {}
driver_first_ts = {}
vehicle_first_ts = {}

# ============================================================
# Synthetic configuration
# ============================================================

NUM_CUSTOMERS = 2000
NUM_RESTAURANTS = 200
NUM_MENU_ITEMS = 600
NUM_DRIVERS = 800
NUM_ORDERS = 10000  # full size

REGIONS = ["NORTH", "SOUTH", "EAST", "WEST", "CENTRAL"]
CATEGORIES = ["FOOD", "APPAREL", "ELECTRONICS", "GROCERY"]

BASE_DATE = datetime(2025, 1, 1)

os.makedirs("data", exist_ok=True)

# ============================================================
# 1. Customers, addresses, payment methods
# ============================================================

customers = []
customer_addresses = []
customer_payment_methods = []

for cid in range(NUM_CUSTOMERS):
    customer_id = generate_id("C", customer_ids, cid)
    region = random.choice(REGIONS)
    credit_rating = random.randint(1, 5)
    outstanding_amount = round(random.uniform(0, 200), 2)

    customers.append({
        "customer_id": customer_id,
        "region": region,
        "credit_rating": credit_rating,
        "outstanding_amount": outstanding_amount,
    })

    addr_id = generate_simple_id("A")
    customer_addresses.append({
        "address_id": addr_id,
        "customer_id": customer_id,
        "region": region,
        "address_line": f"{random.randint(1,999)} Example Street",
        "city": "Metropolis",
    })

    num_methods = random.randint(1, 3)
    for _ in range(num_methods):
        pm_id = generate_simple_id("P")
        wallet_balance = round(random.uniform(0, 200), 2)
        customer_payment_methods.append({
            "payment_method_id": pm_id,
            "customer_id": customer_id,
            "method_type": random.choice(["CARD", "WALLET", "BANK"]),
            "wallet_balance": wallet_balance,
        })

df_customers = pd.DataFrame(customers)
df_customer_addresses = pd.DataFrame(customer_addresses)
df_customer_payment_methods = pd.DataFrame(customer_payment_methods)

# ============================================================
# 2. Restaurants, locations, menu items, options
# ============================================================

restaurants = []
restaurant_locations = []
menu_items = []
menu_item_options = []

for rid in range(NUM_RESTAURANTS):
    restaurant_id = generate_id("R", restaurant_ids, rid)
    r_region = random.choice(REGIONS)
    restaurants.append({
        "restaurant_id": restaurant_id,
        "name": f"Restaurant_{rid}",
        "region": r_region,
    })
    loc_id = generate_simple_id("L")
    restaurant_locations.append({
        "location_id": loc_id,
        "restaurant_id": restaurant_id,
        "region": r_region,
        "address_line": f"{random.randint(1,999)} Food Street",
        "city": "Metropolis",
    })

for mid in range(NUM_MENU_ITEMS):
    menu_item_id = generate_id("M", menu_item_ids, mid)
    restaurant_idx = random.randint(0, NUM_RESTAURANTS - 1)
    restaurant_id = restaurant_ids[restaurant_idx]
    category = random.choices(
        CATEGORIES,
        weights=[0.6, 0.15, 0.15, 0.1],
        k=1
    )[0]
    price = round(random.uniform(5, 100), 2)
    menu_items.append({
        "menu_item_id": menu_item_id,
        "restaurant_id": restaurant_id,
        "name": f"Item_{mid}",
        "category": category,
        "price": price,
    })
    num_opts = random.randint(0, 3)
    for _ in range(num_opts):
        opt_id = generate_simple_id("O")
        menu_item_options.append({
            "option_id": opt_id,
            "menu_item_id": menu_item_id,
            "option_name": random.choice(
                ["EXTRA_CHEESE", "LARGE_SIZE", "SPICY", "NO_ONION"]
            ),
        })

df_restaurants = pd.DataFrame(restaurants)
df_restaurant_locations = pd.DataFrame(restaurant_locations)
df_menu_items = pd.DataFrame(menu_items)
df_menu_item_options = pd.DataFrame(menu_item_options)

# ============================================================
# 3. Drivers & vehicles
# ============================================================

drivers = []
driver_vehicles = []

for did in range(NUM_DRIVERS):
    driver_id = generate_id("DVR", driver_ids, did)
    region = random.choice(REGIONS)
    drivers.append({
        "driver_id": driver_id,
        "name": f"Driver_{did}",
        "region": region,
    })
    vehicle_id = generate_id("V", vehicle_ids, did)
    driver_vehicles.append({
        "vehicle_id": vehicle_id,
        "driver_id": driver_id,
        "vehicle_type": random.choice(["BIKE", "CAR", "SCOOTER"]),
        "plate_number": f"PLT{random.randint(1000,9999)}",
    })

df_drivers = pd.DataFrame(drivers)
df_driver_vehicles = pd.DataFrame(driver_vehicles)

# ============================================================
# 4. Orders and process tables
# ============================================================

orders = []
order_items = []
order_discounts = []

order_status_events = []
kitchen_tickets = []
delivery_requests = []
delivery_assignments = []
delivery_status_events = []
payment_authorizations = []
payment_captures = []
support_tickets = []
verification_requests = []
verification_results = []
manual_reviews = []

event_traces_rows = []

def choose_payment_method_for_customer(cust_id):
    methods = df_customer_payment_methods[
        df_customer_payment_methods["customer_id"] == cust_id
    ]
    if methods.empty:
        return None
    return methods.sample(1).iloc[0]

for oid in range(NUM_ORDERS):
    order_id = generate_id("O", order_ids, oid)
    customer_idx = random.randint(0, NUM_CUSTOMERS - 1)
    customer_id = customer_ids[customer_idx]
    cust_row = df_customers.iloc[customer_idx]
    region = cust_row["region"]
    credit_rating = int(cust_row["credit_rating"])
    outstanding_amount = float(cust_row["outstanding_amount"])

    restaurant_idx = random.randint(0, NUM_RESTAURANTS - 1)
    restaurant_id = restaurant_ids[restaurant_idx]

    restaurant_menu = df_menu_items[df_menu_items["restaurant_id"] == restaurant_id]
    if restaurant_menu.empty:
        restaurant_menu = df_menu_items
    items_for_order = restaurant_menu.sample(
        random.randint(1, min(3, len(restaurant_menu)))
    ).reset_index(drop=True)

    main_item = items_for_order.iloc[0]
    category = main_item["category"]

    pm = choose_payment_method_for_customer(customer_id)
    if pm is not None:
        wallet_balance = float(pm["wallet_balance"])
        payment_method_id = pm["payment_method_id"]
    else:
        wallet_balance = 0.0
        payment_method_id = None

    order_amount = float(items_for_order["price"].sum())
    discount_amount = round(order_amount * 0.1, 2) if random.random() < 0.2 else 0.0
    net_amount = round(order_amount - discount_amount, 2)

    start_time = BASE_DATE + timedelta(minutes=random.randint(0, 60 * 24 * 30))

    # ---------------- Verification rules ----------------
    fail_reason = None

    if category == "APPAREL":
        fail_reason = "RULE_1_CATEGORY_APPAREL"
    if region == "NORTH" and fail_reason is None:
        fail_reason = "RULE_2_REGION_NORTH"
    if wallet_balance < 10 and fail_reason is None:
        fail_reason = "RULE_3_LOW_WALLET"
    if category == "APPAREL" and region == "NORTH" and fail_reason is None:
        fail_reason = "RULE_4_APPAREL_NORTH"
    if category == "APPAREL" and wallet_balance < 20 and fail_reason is None:
        fail_reason = "RULE_5_APPAREL_WALLET"
    if (
        category == "ELECTRONICS"
        and outstanding_amount > 100
        and credit_rating <= 2
        and fail_reason is None
    ):
        fail_reason = "RULE_6_ELECTRONICS_DEBT_LOW_RATING"

    needs_manual = False
    if fail_reason is None:
        if credit_rating == 3:
            needs_manual = True
        if 10 <= outstanding_amount < 100:
            needs_manual = True
        if 10 <= wallet_balance < 20:
            needs_manual = True

    # ---------------- Base events before branching ----------------
    events = ["CustomerCreated", "OrderPlaced", "VerificationStarted"]

    verification_request_id = generate_simple_id("VRQ")
    verification_requests.append({
        "verification_request_id": verification_request_id,
        "order_id": order_id,
        "created_at": None,  # filled later
    })

    verification_result_id = generate_simple_id("VRS")
    verification_outcome = None

    manual_review_id = None
    manual_review_outcome = None

    final_order_status = None

    # Branch 1: Verification
    if fail_reason is not None:
        events.append("VerificationFailed")
        verification_outcome = "FAILED"
        events.append("OrderRejected")
        final_order_status = "REJECTED"
    elif needs_manual:
        events.append("VerificationNeedsManualReview")
        manual_review_id = generate_simple_id("MRV")
        events.append("ManualReviewCompleted")
        if random.random() < 0.9:
            manual_review_outcome = "APPROVED"
            events.append("OrderConfirmed")
            final_order_status = "CONFIRMED"
            verification_outcome = "APPROVED_AFTER_REVIEW"
        else:
            manual_review_outcome = "REJECTED"
            events.append("OrderRejected")
            final_order_status = "REJECTED"
            verification_outcome = "REJECTED_AFTER_REVIEW"
    else:
        events.append("VerificationAutoApproved")
        verification_outcome = "AUTO_APPROVED"
        events.append("OrderConfirmed")
        final_order_status = "CONFIRMED"

    # ---------------- Initial timestamps (5-min increments) ----------------
    event_times = []
    current_time = start_time
    for _ in events:
        event_times.append(current_time)
        current_time = current_time + timedelta(minutes=5)

    name_to_index = {name: idx for idx, name in enumerate(events)}

    delivered_time = None
    payment_auth_time = None
    payment_capture_time = None
    delivery_request_id = None
    delivery_assignment_id = None

    # If confirmed, extend with Kitchen + Delivery + Support + Payment
    if final_order_status == "CONFIRMED":
        more_events = [
            "KitchenStarted",
            "KitchenCompleted",
            "DriverAssigned",
            "DriverAtRestaurant",
            "PickupComplete",
            "OnTheWay",
            "Delivered",
        ]
        for ev in more_events:
            events.append(ev)
            event_times.append(current_time)
            current_time = current_time + timedelta(minutes=5)

        has_support_issue = random.random() < 0.2
        if has_support_issue:
            for ev in ["SupportTicketOpened", "VoucherIssued"]:
                events.append(ev)
                event_times.append(current_time)
                current_time = current_time + timedelta(minutes=5)

        events.append("PaymentAuthorized")
        event_times.append(current_time)
        current_time = current_time + timedelta(minutes=5)
        events.append("PaymentCaptured")
        event_times.append(current_time)
        current_time = current_time + timedelta(minutes=5)

    # ---------------- Adjust for Delivered / Support / Payment ----------------
    name_to_index = {name: idx for idx, name in enumerate(events)}

    if "Delivered" in name_to_index:
        delivered_idx = name_to_index["Delivered"]
        delivered_time = event_times[delivered_idx]

        if "SupportTicketOpened" in name_to_index:
            sto_idx = name_to_index["SupportTicketOpened"]
            event_times[sto_idx] = delivered_time + timedelta(minutes=10)
            if "VoucherIssued" in name_to_index:
                vou_idx = name_to_index["VoucherIssued"]
                event_times[vou_idx] = delivered_time + timedelta(minutes=30)

        if "PaymentAuthorized" in name_to_index:
            pa_idx = name_to_index["PaymentAuthorized"]
            payment_auth_time = delivered_time + timedelta(days=1)
            event_times[pa_idx] = payment_auth_time

        if "PaymentCaptured" in name_to_index and payment_auth_time is not None:
            pc_idx = name_to_index["PaymentCaptured"]
            payment_capture_time = payment_auth_time + timedelta(hours=1)
            event_times[pc_idx] = payment_capture_time

        # Enforce strictly increasing timestamps
        for i in range(1, len(event_times)):
            if event_times[i] <= event_times[i - 1]:
                event_times[i] = event_times[i - 1] + timedelta(seconds=1)

    name_to_index = {name: idx for idx, name in enumerate(events)}

    # ---------------- Derive key times ----------------
    t_customer_created = event_times[name_to_index["CustomerCreated"]]
    t_order_placed = event_times[name_to_index["OrderPlaced"]]
    t_ver_start = event_times[name_to_index["VerificationStarted"]]

    # First-use timestamps for entities (order-centric)
    if customer_id not in customer_first_ts:
        customer_first_ts[customer_id] = t_customer_created
    else:
        customer_first_ts[customer_id] = min(customer_first_ts[customer_id], t_customer_created)

    if restaurant_id not in restaurant_first_ts:
        restaurant_first_ts[restaurant_id] = t_customer_created - timedelta(hours=4)

    for _, mi in items_for_order.iterrows():
        mid = mi["menu_item_id"]
        if mid not in menu_item_first_ts:
            menu_item_first_ts[mid] = t_order_placed - timedelta(hours=1)

    # ---------------- orders ----------------
    orders.append({
        "order_id": order_id,
        "customer_id": customer_id,
        "restaurant_id": restaurant_id,
        "region": region,
        "order_amount": order_amount,
        "discount_amount": discount_amount,
        "net_amount": net_amount,
        "created_at": t_order_placed.isoformat(),
        "payment_method_id": payment_method_id,
    })

    # ---------------- order_items ----------------
    for _, mi in items_for_order.iterrows():
        order_item_id = generate_simple_id("OI")
        added_at = t_order_placed + timedelta(minutes=1)
        order_items.append({
            "order_item_id": order_item_id,
            "order_id": order_id,
            "menu_item_id": mi["menu_item_id"],
            "quantity": random.randint(1, 3),
            "unit_price": mi["price"],
            "added_at": added_at.isoformat(),
        })

    # ---------------- order_discounts ----------------
    if discount_amount > 0:
        discount_id = generate_simple_id("DSC")
        applied_at = t_order_placed + timedelta(minutes=2)
        order_discounts.append({
            "discount_id": discount_id,
            "order_id": order_id,
            "amount": discount_amount,
            "reason": "PROMO",
            "applied_at": applied_at.isoformat(),
        })

    # ---------------- verification_requests & results ----------------
    for vr in verification_requests:
        if vr["verification_request_id"] == verification_request_id:
            vr["created_at"] = t_ver_start.isoformat()
            break

    if "VerificationFailed" in name_to_index:
        verification_result_time = event_times[name_to_index["VerificationFailed"]]
    elif "ManualReviewCompleted" in name_to_index:
        verification_result_time = event_times[name_to_index["ManualReviewCompleted"]]
    elif "VerificationAutoApproved" in name_to_index:
        verification_result_time = event_times[name_to_index["VerificationAutoApproved"]]
    else:
        verification_result_time = t_ver_start + timedelta(minutes=10)

    verification_results.append({
        "verification_result_id": verification_result_id,
        "verification_request_id": verification_request_id,
        "outcome": verification_outcome,
        "reason": fail_reason if fail_reason is not None else (
            "MANUAL_REVIEW" if needs_manual else "AUTO"
        ),
        "completed_at": verification_result_time.isoformat(),
    })

    # ---------------- manual_reviews ----------------
    if manual_review_id is not None:
        manual_review_time = event_times[name_to_index["ManualReviewCompleted"]]
        manual_reviews.append({
            "manual_review_id": manual_review_id,
            "verification_result_id": verification_result_id,
            "outcome": manual_review_outcome,
            "reviewed_at": manual_review_time.isoformat(),
        })

    # ---------------- Kitchen, Delivery, Support, Payment ----------------
    if final_order_status == "CONFIRMED":
        # Kitchen tickets
        kt_id = generate_id("KT", kitchen_ticket_ids, oid)
        t_kitchen_start = event_times[name_to_index["KitchenStarted"]]
        t_kitchen_done = event_times[name_to_index["KitchenCompleted"]]
        kitchen_tickets.append({
            "kitchen_ticket_id": kt_id,
            "order_id": order_id,
            "status": "COMPLETED",
            "created_at": t_kitchen_start.isoformat(),
            "started_at": t_kitchen_start.isoformat(),
            "completed_at": t_kitchen_done.isoformat(),
        })

        # Delivery requests & assignments
        delivery_request_id = generate_id("DRQ", delivery_request_ids, oid)
        t_driver_assigned = event_times[name_to_index["DriverAssigned"]]
        t_delivery_req = t_driver_assigned - timedelta(minutes=5)
        delivery_requests.append({
            "delivery_request_id": delivery_request_id,
            "order_id": order_id,
            "requested_at": t_delivery_req.isoformat(),
        })

        drv_idx = random.randint(0, NUM_DRIVERS - 1)
        driver_id = driver_ids[drv_idx]
        vehicle_id = vehicle_ids[drv_idx]

        if driver_id not in driver_first_ts:
            driver_first_ts[driver_id] = t_driver_assigned - timedelta(hours=2)
        else:
            driver_first_ts[driver_id] = min(driver_first_ts[driver_id], t_driver_assigned - timedelta(hours=2))

        if vehicle_id not in vehicle_first_ts:
            vehicle_first_ts[vehicle_id] = driver_first_ts[driver_id] + timedelta(minutes=30)
        else:
            vehicle_first_ts[vehicle_id] = min(vehicle_first_ts[vehicle_id], driver_first_ts[driver_id] + timedelta(minutes=30))

        delivery_assignment_id = generate_id("DAS", delivery_assignment_ids, oid)
        delivery_assignments.append({
            "delivery_assignment_id": delivery_assignment_id,
            "delivery_request_id": delivery_request_id,
            "driver_id": driver_id,
            "vehicle_id": vehicle_id,
            "assigned_at": t_driver_assigned.isoformat(),
        })

    # ---------------- order_status_events & delivery_status_events ----------------
    for ev_name, ev_time in zip(events, event_times):
        if ev_name in [
            "CustomerCreated", "OrderPlaced", "VerificationStarted",
            "VerificationAutoApproved", "VerificationNeedsManualReview",
            "VerificationFailed", "ManualReviewCompleted",
            "OrderConfirmed", "OrderRejected",
            "SupportTicketOpened", "VoucherIssued",
        ]:
            ose_id = generate_simple_id("OSE")
            order_status_events.append({
                "order_status_event_id": ose_id,
                "order_id": order_id,
                "event_name": ev_name,
                "timestamp": ev_time.isoformat(),
            })

        if ev_name in [
            "KitchenStarted", "KitchenCompleted",
            "DriverAssigned", "DriverAtRestaurant",
            "PickupComplete", "OnTheWay", "Delivered",
        ] and final_order_status == "CONFIRMED":
            dse_id = generate_simple_id("DSE")
            delivery_status_events.append({
                "delivery_status_event_id": dse_id,
                "order_id": order_id,
                "delivery_assignment_id": delivery_assignment_id,
                "event_name": ev_name,
                "timestamp": ev_time.isoformat(),
            })

    # ---------------- Support tickets & payments ----------------
    if final_order_status == "CONFIRMED":
        if "SupportTicketOpened" in name_to_index:
            t_support_open = event_times[name_to_index["SupportTicketOpened"]]
            t_support_resolved = event_times[name_to_index.get("VoucherIssued", name_to_index["SupportTicketOpened"])]
            st_id = generate_id("ST", support_ticket_ids, oid)
            support_tickets.append({
                "support_ticket_id": st_id,
                "order_id": order_id,
                "status": "RESOLVED",
                "opened_at": t_support_open.isoformat(),
                "resolved_at": t_support_resolved.isoformat(),
                "voucher_amount": round(random.uniform(1, 10), 2),
            })

        pa_idx = name_to_index["PaymentAuthorized"]
        pc_idx = name_to_index["PaymentCaptured"]
        payment_auth_time = event_times[pa_idx]
        payment_capture_time = event_times[pc_idx]

        payment_auth_id = generate_simple_id("PAU")
        payment_authorizations.append({
            "payment_auth_id": payment_auth_id,
            "order_id": order_id,
            "amount": net_amount,
            "authorized_at": payment_auth_time.isoformat(),
        })
        payment_capture_id = generate_simple_id("PAC")
        payment_captures.append({
            "payment_capture_id": payment_capture_id,
            "payment_auth_id": payment_auth_id,
            "amount": net_amount,
            "captured_at": payment_capture_time.isoformat(),
        })

    # ---------------- event_traces row (Key_ID = order_id) ----------------
    tables_trace = []

    # Entity side (always present for an order)
    tables_trace.append("customers")
    tables_trace.append("customer_addresses")
    if payment_method_id is not None:
        tables_trace.append("customer_payment_methods")

    tables_trace.append("restaurants")
    tables_trace.append("restaurant_locations")
    tables_trace.append("menu_items")
    tables_trace.append("menu_item_options")  # structurally reachable via menu_items

    # Order core
    tables_trace.append("orders")
    tables_trace.append("order_status_events")
    tables_trace.append("order_items")
    if discount_amount > 0:
        tables_trace.append("order_discounts")

    # Verification pipeline
    tables_trace.append("verification_requests")
    tables_trace.append("verification_results")
    if manual_review_id is not None:
        tables_trace.append("manual_reviews")

    # Downstream flow only if the order is actually confirmed
    if final_order_status == "CONFIRMED":
        tables_trace.append("kitchen_tickets")
        tables_trace.append("delivery_requests")
        tables_trace.append("delivery_assignments")
        tables_trace.append("delivery_status_events")

        tables_trace.append("drivers")
        tables_trace.append("driver_vehicles")

        if "SupportTicketOpened" in name_to_index:
            tables_trace.append("support_tickets")

        tables_trace.append("payment_authorizations")
        tables_trace.append("payment_captures")

    # (Optional but nice: enforce uniqueness while keeping order)
    seen = set()
    tables_trace_unique = []
    for t in tables_trace:
        if t not in seen:
            seen.add(t)
            tables_trace_unique.append(t)
            
    event_traces_rows.append({
        "Key_Selector": "Order_ID",
        "Key_ID": order_id,
        "Event_Trace": str(tables_trace_unique),   # <--- TABLE NAMES, not event names
        "Join_Path": str([
            "customers.customer_id",
            "customer_addresses.customer_id",
            "customer_payment_methods.customer_id",
            "orders.customer_id",
            "order_status_events.order_id",
            "order_items.order_id",
            "order_discounts.order_id",
            "verification_requests.order_id",
            "verification_results.verification_request_id",
            "manual_reviews.verification_result_id",
            "kitchen_tickets.order_id",
            "delivery_requests.order_id",
            "delivery_assignments.delivery_request_id",
            "delivery_status_events.order_id",
            "support_tickets.order_id",
            "payment_authorizations.order_id",
            "payment_captures.payment_auth_id",
            "restaurants.restaurant_id",
            "restaurant_locations.restaurant_id",
            "menu_items.restaurant_id",
            "menu_item_options.menu_item_id",
            "drivers.driver_id",
            "driver_vehicles.driver_id",
        ]),
    })

# ============================================================
# Build DataFrames for process tables
# ============================================================

df_orders = pd.DataFrame(orders)
df_order_items = pd.DataFrame(order_items)
df_order_discounts = pd.DataFrame(order_discounts)
df_order_status_events = pd.DataFrame(order_status_events)
df_kitchen_tickets = pd.DataFrame(kitchen_tickets)
df_delivery_requests = pd.DataFrame(delivery_requests)
df_delivery_assignments = pd.DataFrame(delivery_assignments)
df_delivery_status_events = pd.DataFrame(delivery_status_events)
df_payment_authorizations = pd.DataFrame(payment_authorizations)
df_payment_captures = pd.DataFrame(payment_captures)
df_support_tickets = pd.DataFrame(support_tickets)
df_verification_requests = pd.DataFrame(verification_requests)
df_verification_results = pd.DataFrame(verification_results)
df_manual_reviews = pd.DataFrame(manual_reviews)
df_event_traces = pd.DataFrame(event_traces_rows)

# ============================================================
# Assign timestamps for static tables based on first-use (Option 2)
# ============================================================

def dt_to_iso(dt):
    return dt.isoformat() if isinstance(dt, datetime) else None

# customers
cust_created_map = {}
for _, row in df_customers.iterrows():
    cid = row["customer_id"]
    created_dt = customer_first_ts.get(cid, BASE_DATE)
    cust_created_map[cid] = created_dt
df_customers["created_at"] = df_customers["customer_id"].map(lambda cid: dt_to_iso(cust_created_map[cid]))

# customer_addresses
df_customer_addresses["created_at"] = df_customer_addresses["customer_id"].map(
    lambda cid: dt_to_iso(cust_created_map.get(cid, BASE_DATE))
)

# customer_payment_methods
df_customer_payment_methods["created_at"] = df_customer_payment_methods["customer_id"].map(
    lambda cid: dt_to_iso(cust_created_map.get(cid, BASE_DATE) + timedelta(minutes=10))
)

# restaurants
rest_created_map = {}
for _, row in df_restaurants.iterrows():
    rid = row["restaurant_id"]
    created_dt = restaurant_first_ts.get(rid, BASE_DATE - timedelta(days=1))
    rest_created_map[rid] = created_dt
df_restaurants["created_at"] = df_restaurants["restaurant_id"].map(lambda rid: dt_to_iso(rest_created_map[rid]))

# restaurant_locations
df_restaurant_locations["created_at"] = df_restaurant_locations["restaurant_id"].map(
    lambda rid: dt_to_iso(rest_created_map.get(rid, BASE_DATE - timedelta(days=1)))
)

# menu_items
menu_created_map = {}
for _, row in df_menu_items.iterrows():
    mid = row["menu_item_id"]
    rid = row["restaurant_id"]
    base_dt = rest_created_map.get(rid, BASE_DATE - timedelta(days=1))
    created_dt = menu_item_first_ts.get(mid, base_dt + timedelta(hours=1))
    menu_created_map[mid] = created_dt
df_menu_items["created_at"] = df_menu_items["menu_item_id"].map(lambda mid: dt_to_iso(menu_created_map[mid]))

# menu_item_options
df_menu_item_options["created_at"] = df_menu_item_options["menu_item_id"].map(
    lambda mid: dt_to_iso(menu_created_map.get(mid, BASE_DATE))
)

# drivers
driver_onboard_map = {}
for _, row in df_drivers.iterrows():
    did = row["driver_id"]
    created_dt = driver_first_ts.get(did, BASE_DATE - timedelta(days=1))
    driver_onboard_map[did] = created_dt
df_drivers["onboarded_at"] = df_drivers["driver_id"].map(lambda did: dt_to_iso(driver_onboard_map[did]))

# driver_vehicles
df_driver_vehicles["registered_at"] = df_driver_vehicles["driver_id"].map(
    lambda did: dt_to_iso(driver_onboard_map.get(did, BASE_DATE - timedelta(days=1)) + timedelta(minutes=30))
)

# ============================================================
# transition_graph.csv  (TABLE-LEVEL GRAPH)
# ============================================================

# Explicit edges that describe how tables can be joined / flow
table_edges = [
    # Customer side
    ("customers", "customer_addresses"),
    ("customers", "customer_payment_methods"),
    ("customers", "orders"),

    # Restaurant / menu side
    ("restaurants", "restaurant_locations"),
    ("restaurants", "menu_items"),
    ("menu_items", "menu_item_options"),

    # Order core
    ("orders", "order_status_events"),
    ("orders", "order_items"),
    ("orders", "order_discounts"),
    ("orders", "verification_requests"),
    ("orders", "kitchen_tickets"),
    ("orders", "delivery_requests"),
    ("orders", "support_tickets"),
    ("orders", "payment_authorizations"),

    # Verification
    ("verification_requests", "verification_results"),
    ("verification_results", "manual_reviews"),

    # Delivery
    ("delivery_requests", "delivery_assignments"),
    ("delivery_assignments", "delivery_status_events"),

    # Drivers / vehicles
    ("drivers", "driver_vehicles"),
    ("delivery_assignments", "drivers"),
    ("delivery_assignments", "driver_vehicles"),

    # Payments
    ("payment_authorizations", "payment_captures"),
]

# Build adjacency: From -> [To, To, ...]
transition_graph = {t: set() for t in ALL_TABLES}
for frm, to in table_edges:
    transition_graph[frm].add(to)

tg_rows = []
for frm, tos in transition_graph.items():
    tg_rows.append({
        "From": frm,
        "To_List": str(sorted(list(tos))),
    })

df_transition_graph = pd.DataFrame(tg_rows)
df_transition_graph.to_csv("transition_graph.csv", index=False)

# (saving stays the same)


# ============================================================
# Save all CSVs
# ============================================================

df_event_traces.to_csv("event_traces.csv", index=False)
df_transition_graph.to_csv("transition_graph.csv", index=False)

df_customers.to_csv(os.path.join("data", "customers.csv"), index=False)
df_customer_addresses.to_csv(os.path.join("data", "customer_addresses.csv"), index=False)
df_customer_payment_methods.to_csv(os.path.join("data", "customer_payment_methods.csv"), index=False)
df_restaurants.to_csv(os.path.join("data", "restaurants.csv"), index=False)
df_restaurant_locations.to_csv(os.path.join("data", "restaurant_locations.csv"), index=False)
df_menu_items.to_csv(os.path.join("data", "menu_items.csv"), index=False)
df_menu_item_options.to_csv(os.path.join("data", "menu_item_options.csv"), index=False)
df_drivers.to_csv(os.path.join("data", "drivers.csv"), index=False)
df_driver_vehicles.to_csv(os.path.join("data", "driver_vehicles.csv"), index=False)
df_orders.to_csv(os.path.join("data", "orders.csv"), index=False)
df_order_items.to_csv(os.path.join("data", "order_items.csv"), index=False)
df_order_discounts.to_csv(os.path.join("data", "order_discounts.csv"), index=False)
df_order_status_events.to_csv(os.path.join("data", "order_status_events.csv"), index=False)
df_kitchen_tickets.to_csv(os.path.join("data", "kitchen_tickets.csv"), index=False)
df_delivery_requests.to_csv(os.path.join("data", "delivery_requests.csv"), index=False)
df_delivery_assignments.to_csv(os.path.join("data", "delivery_assignments.csv"), index=False)
df_delivery_status_events.to_csv(os.path.join("data", "delivery_status_events.csv"), index=False)
df_payment_authorizations.to_csv(os.path.join("data", "payment_authorizations.csv"), index=False)
df_payment_captures.to_csv(os.path.join("data", "payment_captures.csv"), index=False)
df_support_tickets.to_csv(os.path.join("data", "support_tickets.csv"), index=False)
df_verification_requests.to_csv(os.path.join("data", "verification_requests.csv"), index=False)
df_verification_results.to_csv(os.path.join("data", "verification_results.csv"), index=False)
df_manual_reviews.to_csv(os.path.join("data", "manual_reviews.csv"), index=False)

print("Done. Generated event_traces.csv, transition_graph.csv, and all data/*.csv files.")


Done. Generated event_traces.csv, transition_graph.csv, and all data/*.csv files.
