In [12]:
import pandas as pd
import numpy as np
import random
import os


# Set seed for reproducibility
np.random.seed(42)

# Parameters
NUM_CUSTOMERS = 500
NUM_ORDERS = 3000
NUM_PRODUCTS = 100
START_DATE = pd.to_datetime("2024-01-01")
END_DATE = pd.to_datetime("2024-03-31")
DATE_RANGE = pd.date_range(start=START_DATE, end=END_DATE, freq='H')



  DATE_RANGE = pd.date_range(start=START_DATE, end=END_DATE, freq='H')


# Customers

In [13]:
# Basic info
basic_customers = pd.DataFrame({
    "customer_id": range(1, NUM_CUSTOMERS + 1),
    "zip_code": np.random.choice(["1011", "1012", "1013", "1014", "1015", "1016"], size=NUM_CUSTOMERS),
    "city": np.random.choice(["Amsterdam", "Rotterdam", "Utrecht"], size=NUM_CUSTOMERS)
})

# Profile info
customer_profiles = pd.DataFrame({
    "customer_id": basic_customers["customer_id"],
    "signup_date": pd.to_datetime(np.random.choice(pd.date_range("2022-01-01", "2023-12-31"), size=NUM_CUSTOMERS)),
    "preferred_slot": np.random.choice(["08:00-10:00", "10:00-12:00", "18:00-20:00"], size=NUM_CUSTOMERS),
    "loyalty_score": np.random.normal(loc=3, scale=1, size=NUM_CUSTOMERS).clip(1, 5),
    "has_kids": np.random.choice([True, False], size=NUM_CUSTOMERS, p=[0.3, 0.7])
})

# Final customer dataset
customers = basic_customers.merge(customer_profiles, on="customer_id")
customers.to_csv("data/customer_profiles.csv", index=False)


# Orders

In [14]:
orders = pd.DataFrame({
    "order_id": range(1, NUM_ORDERS + 1),
    "customer_id": np.random.choice(customers["customer_id"], size=NUM_ORDERS),
    "order_timestamp": np.random.choice(DATE_RANGE, size=NUM_ORDERS)
})

# Delivery slots
slot_hours = list(range(8, 20, 2))
orders["delivery_slot"] = orders["order_timestamp"].apply(
    lambda x: f"{random.choice(slot_hours)}:00-{random.choice(slot_hours) + 2}:00"
)

# Delivery duration and basket value
orders["delivery_duration_min"] = np.random.normal(loc=35, scale=10, size=NUM_ORDERS).clip(15, 90)
orders["basket_value_eur"] = np.random.normal(loc=45, scale=15, size=NUM_ORDERS).clip(5, 150)

# Add location from customers
orders = orders.merge(customers[["customer_id", "zip_code", "city"]], on="customer_id")
orders.to_csv("data/orders.csv", index=False)


# Products

In [15]:
products = pd.DataFrame({
    "product_id": range(1, NUM_PRODUCTS + 1),
    "product_name": [f"Product_{i}" for i in range(1, NUM_PRODUCTS + 1)],
    "category": np.random.choice(["Fruit", "Dairy", "Bakery", "Meat", "Drinks"], size=NUM_PRODUCTS)
})
products.to_csv("data/products.csv", index=False)



# Product Orders

In [16]:
product_orders = pd.DataFrame({
    "order_id": np.random.choice(orders["order_id"], size=6000),
    "product_id": np.random.choice(products["product_id"], size=6000),
    "quantity": np.random.randint(1, 5, size=6000),
    "unit_price": np.random.normal(loc=3.5, scale=1.0, size=6000).clip(0.5, 10)
})
product_orders.to_csv("data/product_orders.csv", index=False)



# Warehouse Operations

In [20]:
warehouse_dates = pd.date_range(start=START_DATE, end=END_DATE)
warehouse_operations = pd.DataFrame({
    "date": warehouse_dates,
    "warehouse_id": np.random.choice(["WH1", "WH2", "WH3"], size=len(warehouse_dates)),
    "total_orders_processed": np.random.poisson(lam=120, size=len(warehouse_dates)),
    "avg_packing_time_min": np.random.normal(loc=5.5, scale=1.0, size=len(warehouse_dates)).clip(2, 10)
})
warehouse_operations.to_csv("data/warehouse_operations.csv", index=False)