In [1]:
import pandas as pd
import numpy as np
from faker import Faker
import random
from datetime import datetime, timedelta

# Initialize Faker with Indian locale for realistic names
fake = Faker('en_IN')

# Configuration
NUM_CUSTOMERS = 1000
NUM_RIDERS = 200
START_DATE = datetime(2022, 1, 1)
END_DATE = datetime(2026, 1, 10)

CITIES = [
    'Delhi', 'Mumbai', 'Pune', 'Bengaluru', 'Indore', 'Bhopal', 'Jaipur', 
    'Ahmedabad', 'Surat', 'Chennai', 'Hyderabad', 'Kolkata', 'Noida', 
    'Gurugram', 'Faridabad', 'Lucknow', 'Chandigarh', 'Kochi'
]

CUISINES = ['North Indian', 'South Indian', 'Chinese', 'Italian', 'Street Food', 'Bakery', 'Mughlai', 'Continental']
DISHES = {
    'North Indian': [('Butter Chicken', 350), ('Paneer Tikka', 250), ('Dal Makhani', 200), ('Naan', 40)],
    'South Indian': [('Masala Dosa', 120), ('Idli Sambhar', 80), ('Vada', 60), ('Uttapam', 100)],
    'Chinese': [('Hakka Noodles', 180), ('Manchurian', 160), ('Spring Rolls', 120), ('Dimsums', 200)],
    'Street Food': [('Chole Bhature', 90), ('Pav Bhaji', 110), ('Gol Gappa', 50), ('Aloo Tikki', 60)],
    'Bakery': [('Chocolate Cake', 450), ('Pastry', 80), ('Brownie', 120), ('Cookies', 150)]
}

# 1. GENERATE CUSTOMERS
customers_list = []
for i in range(1, NUM_CUSTOMERS + 1):
    reg_date = fake.date_between(start_date=START_DATE, end_date=END_DATE - timedelta(days=30))
    customers_list.append({
        'customer_id': i,
        'customer_name': fake.name(),
        'reg_date': reg_date
    })
df_customers = pd.DataFrame(customers_list)

# 2. GENERATE RIDERS
riders_list = []
for i in range(1, NUM_RIDERS + 1):
    sign_up_date = fake.date_between(start_date=START_DATE, end_date=END_DATE - timedelta(days=60))
    riders_list.append({
        'rider_id': i,
        'rider_name': fake.name(),
        'sign_up_date': sign_up_date
    })
df_riders = pd.DataFrame(riders_list)

# 3. GENERATE RESTAURANTS
restaurants_list = []
res_id = 1
for city in CITIES:
    num_res = random.randint(5, 8)
    for _ in range(num_res):
        cuisine = random.choice(CUISINES)
        restaurants_list.append({
            'restaurant_id': res_id,
            'restaurant_name': f"{fake.company()} {cuisine}",
            'city': city,
            'opening_hours': random.choice(['11:00 AM - 11:00 PM', '10:00 AM - 10:00 PM', '12:00 PM - 12:00 AM']),
            'cuisine_type': cuisine # Helper for order generation
        })
        res_id += 1
df_restaurants = pd.DataFrame(restaurants_list)

# 4. GENERATE ORDERS & DELIVERIES
orders_list = []
deliveries_list = []
delivery_id_counter = 1

# Higher tier cities multiplier
city_multipliers = {city: random.uniform(1.0, 1.4) if city in ['Mumbai', 'Delhi', 'Bengaluru', 'Gurugram'] else 1.0 for city in CITIES}

for order_id in range(1, 5001):
    # Select Random Customer and Restaurant
    cust_row = df_customers.sample(n=1).iloc[0]
    res_row = df_restaurants.sample(n=1).iloc[0]
    
    # Logic: Order date must be after customer registration
    order_date = fake.date_between(start_date=cust_row['reg_date'], end_date=END_DATE)
    order_time = (datetime.combine(order_date, datetime.min.time()) + timedelta(minutes=random.randint(480, 1400))).time()
    
    # Pricing Logic
    cuisine = res_row['cuisine_type']
    if cuisine not in DISHES: cuisine = 'Street Food'
    item_name, base_price = random.choice(DISHES[cuisine])
    
    multiplier = city_multipliers[res_row['city']]
    total_amount = round(base_price * multiplier * random.uniform(0.9, 1.1), 2)
    
    status_weights = [0.85, 0.15] # 85% Delivered, 15% Cancelled
    order_status = random.choices(['Delivered', 'Cancelled'], weights=status_weights)[0]

    orders_list.append({
        'order_id': order_id,
        'customer_id': cust_row['customer_id'],
        'restaurant_id': res_row['restaurant_id'],
        'order_item': item_name,
        'order_date': order_date,
        'order_time': order_time,
        'order_status': order_status,
        'total_amount': total_amount
    })
    
    # 5. GENERATE DELIVERIES (One for each order)
    # Rider must have signed up before order date
    eligible_riders = df_riders[df_riders['sign_up_date'] <= order_date]
    if eligible_riders.empty:
        rider_id = random.randint(1, NUM_RIDERS)
    else:
        rider_id = eligible_riders.sample(n=1).iloc[0]['rider_id']
        
    delivery_time = (datetime.combine(order_date, order_time) + timedelta(minutes=random.randint(15, 60))).time()
    
    deliveries_list.append({
        'delivery_id': delivery_id_counter,
        'order_id': order_id,
        'delivery_status': order_status, # Mirrors order status for logic
        'delivery_time': delivery_time if order_status == 'Delivered' else None,
        'rider_id': rider_id
    })
    delivery_id_counter += 1

# Create Final DataFrames
df_orders = pd.DataFrame(orders_list)
df_deliveries = pd.DataFrame(deliveries_list)

# Clean up temporary column
df_restaurants = df_restaurants.drop(columns=['cuisine_type'])

# Save to CSV
df_customers.to_csv('customers.csv', index=False)
df_restaurants.to_csv('restaurants.csv', index=False)
df_orders.to_csv('orders.csv', index=False)
df_deliveries.to_csv('deliveries.csv', index=False)
df_riders.to_csv('riders.csv', index=False)

# Display Heads
print("--- CUSTOMERS ---")
print(df_customers.head())
print("\n--- RESTAURANTS ---")
print(df_restaurants.head())
print("\n--- ORDERS ---")
print(df_orders.head())
print("\n--- DELIVERIES ---")
print(df_deliveries.head())
print("\n--- RIDERS ---")
print(df_riders.head())

--- CUSTOMERS ---
   customer_id    customer_name    reg_date
0            1    George Varkey  2022-11-19
1            2  Yashasvi Sunder  2025-05-12
2            3   Champak Bakshi  2023-08-17
3            4     Urvashi Mane  2024-04-23
4            5      Amruta Gaba  2023-05-28

--- RESTAURANTS ---
   restaurant_id             restaurant_name   city        opening_hours
0              1          Jani-Doshi Chinese  Delhi  12:00 PM - 12:00 AM
1              2  Mahajan Group South Indian  Delhi  11:00 AM - 11:00 PM
2              3        Minhas-Magar Mughlai  Delhi  12:00 PM - 12:00 AM
3              4   Seth and Sons Continental  Delhi  10:00 AM - 10:00 PM
4              5           Rau Group Italian  Delhi  10:00 AM - 10:00 PM

--- ORDERS ---
   order_id  customer_id  restaurant_id   order_item  order_date order_time  \
0         1          673             47  Dal Makhani  2025-08-13   22:48:00   
1         2          554             27      Dimsums  2025-10-11   20:18:00   
2     