In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

In [2]:
rows = 100_000
delivery_persons = 2000
cities = ['Delhi', 'Mumbai', 'Bangalore', 'Hyderabad', 'Chennai', 'Kolkata', 'Pune', 'Ahmedabad']
start_date = datetime(2025, 6, 1)
end_date = datetime(2025, 6, 30)

# Generate data

In [3]:
np.random.seed(42)
random.seed(42)

delivery_person_ids = np.random.randint(1000, 1000 + delivery_persons, size=rows)
order_ids = [f"ORD{100000 + i}" for i in range(rows)]
order_values = np.round(np.random.uniform(100, 2000, size=rows), 2)
order_quantities = np.random.randint(1, 6, size=rows)
cities_sample = np.random.choice(cities, size=rows)

In [4]:
order_timestamps = [start_date + timedelta(seconds=random.randint(0, int((end_date - start_date).total_seconds()))) for _ in range(rows)]
pickup_offsets = [timedelta(minutes=random.randint(5, 30)) for _ in range(rows)]
delivery_offsets = [timedelta(minutes=random.randint(10, 60)) for _ in range(rows)]

pickup_timestamps = [order + offset for order, offset in zip(order_timestamps, pickup_offsets)]
delivery_timestamps = [pickup + offset for pickup, offset in zip(pickup_timestamps, delivery_offsets)]

In [9]:
ratings = np.random.choice([1, 2, 3, 4, 5], size=rows, p=[0.05, 0.1, 0.2, 0.35, 0.3])

# Creating DataFrame

In [10]:
df = pd.DataFrame({
    'delivery_person_id': delivery_person_ids,
    'order_id': order_ids,
    'order_value': order_values,
    'order_quantity': order_quantities,
    'city': cities_sample,
    'order_timestamp': order_timestamps,
    'order_pickup_timestamp': pickup_timestamps,
    'delivery_timestamp': delivery_timestamps,
    'order_rating': ratings
})

In [11]:
df.head()

Unnamed: 0,delivery_person_id,order_id,order_value,order_quantity,city,order_timestamp,order_pickup_timestamp,delivery_timestamp,order_rating
0,2126,ORD100000,1181.01,4,Ahmedabad,2025-06-06 09:42:36,2025-06-06 10:07:36,2025-06-06 10:31:36,1
1,2459,ORD100001,1422.54,2,Mumbai,2025-06-02 05:08:22,2025-06-02 05:33:22,2025-06-02 06:29:22,5
2,1860,ORD100002,268.19,5,Kolkata,2025-06-14 08:25:56,2025-06-14 08:36:56,2025-06-14 09:09:56,5
3,2294,ORD100003,1766.92,3,Ahmedabad,2025-06-12 21:19:10,2025-06-12 21:48:10,2025-06-12 22:39:10,3
4,2130,ORD100004,1124.13,2,Mumbai,2025-06-11 20:03:33,2025-06-11 20:29:33,2025-06-11 21:07:33,2


In [12]:
df.shape

(100000, 9)

In [13]:
df.to_csv("swiggy_orders.csv", index=False)
