In [3]:
import pandas as pd
import random
from faker import Faker

# Initialize Faker with Indian locale
fake = Faker("en_IN")

# Define constants
num_customers = 5000
num_restaurants = 1000
num_riders = 2000
num_orders = 1500000
num_deliveries = 1500000

cities = ["Mumbai", "Delhi", "Bangalore", "Hyderabad", "Chennai", "Kolkata", "Pune", "Ahmedabad", "Jaipur", "Lucknow", "Daltonganj", "Indore", "Ranchi"]
order_statuses = ["Completed", "Pending", "Cancelled"]
delivery_statuses = ["Delivered", "Failed", "In Transit"]
order_items = ["Biryani", "Pizza", "Burger", "Pasta", "Dosa", "Sandwich", "Rolls", "Chowmein", "Paneer Tikka", "Butter Chicken", "Idli Sambar", "Vada Pav", "Samosa", "Chole Bhature", "Pav Bhaji", "Momos", "Tandoori Chicken", "Dal Makhani", "Fish Curry", "Rogan Josh", "Palak Paneer", "Chicken Tikka", "Egg Curry", "Malai Kofta", "Aloo Paratha", "Gulab Jamun", "Rasgulla", "Dhokla", "Jalebi", "Chana Masala", "Paneer Butter Masala", "Kadhi Pakora", "Rajma Chawal", "Baingan Bharta", "Puri Sabzi", "Litti Chokha", "Misal Pav", "Keema Paratha", "Seekh Kebab", "Mutton Curry", "Hyderabadi Biryani", "Pesarattu", "Chicken 65", "Aloo Tikki", "Gajar Ka Halwa", "Shahi Paneer", "Navratan Korma", "Dal Tadka", "Bhindi Masala"]

genders = ["Male", "Female"]

# Generate Customers Data
customers = pd.DataFrame({
    "customer_id": range(1, num_customers + 1),
    "customer_name": [fake.name() for _ in range(num_customers)],
    "age": [random.randint(18, 65) for _ in range(num_customers)],
    "gender": [random.choice(genders) for _ in range(num_customers)],
    "registration_date": [fake.date_between(start_date="-3y", end_date="today") for _ in range(num_customers)]
})
customers.to_csv("customers.csv", index=False)

# Generate Restaurants Data
restaurants = pd.DataFrame({
    "restaurant_id": range(1, num_restaurants + 1),
    "restaurant_name": [fake.company() for _ in range(num_restaurants)],
    "city": [random.choice(cities) for _ in range(num_restaurants)],
    "opening_hours": ["{:02d}:00AM to {:02d}:00PM".format(random.randint(6, 10), random.randint(9, 12) + 12) for _ in range(num_restaurants)]
})
restaurants.to_csv("restaurants.csv", index=False)

# Generate Riders Data
riders = pd.DataFrame({
    "rider_id": range(1, num_riders + 1),
    "rider_name": [fake.name() for _ in range(num_riders)],
    "sign_up": [fake.date_between(start_date="-2y", end_date="today") for _ in range(num_riders)]
})
riders.to_csv("riders.csv", index=False)

# Generate Orders Data
orders = pd.DataFrame({
    "order_id": range(1, num_orders + 1),
    "customer_id": [random.randint(1, num_customers) for _ in range(num_orders)],
    "restaurant_id": [random.randint(1, num_restaurants) for _ in range(num_orders)],
    "order_item": [random.choice(order_items) for _ in range(num_orders)],
    "order_date": [fake.date_between(start_date="-1y", end_date="today") for _ in range(num_orders)],
    "order_time": [fake.time(pattern="%H:%M:%S") for _ in range(num_orders)],
    "order_status": [random.choice(order_statuses) for _ in range(num_orders)],
    "total_amount": [round(random.uniform(100, 2000), 2) for _ in range(num_orders)],
    "rating": [random.randint(1, 5) for _ in range(num_orders)]
})
orders.to_csv("orders.csv", index=False)

# Generate Deliveries Data
deliveries = pd.DataFrame({
    "delivery_id": range(1, num_deliveries + 1),
    "order_id": [random.randint(1, num_orders) for _ in range(num_deliveries)],
    "delivery_status": [random.choice(delivery_statuses) for _ in range(num_deliveries)],
    "delivery_time": [fake.time(pattern="%H:%M:%S") for _ in range(num_deliveries)],
    "rider_id": [random.randint(1, num_riders) for _ in range(num_deliveries)]
})
deliveries.to_csv("deliveries.csv", index=False)

print("CSV files have been successfully generated!")


CSV files have been successfully generated!
