In [1]:
import pandas as pd
import random
import numpy as np
from datetime import datetime, timedelta

In [2]:
num_rows = 100000

In [3]:
start_date = datetime(2025, 2, 1)

In [4]:
end_date = datetime(2025, 2, 28)

In [5]:
vehicle_types = ["Auto", "Prime Plus", "Prime Sedan", "Mini", "Bike", "eBike", "Prime SUV"]

In [6]:
pickup_locations = [
    "Connaught Place", "Saket", "Dwarka", "Rohini", "Pitampura", "Vasant Kunj", "Karol Bagh", "Lajpat Nagar",
    "Noida Sector 18", "Gurgaon Cyber City", "Greater Kailash", "Mayur Vihar", "Nehru Place", "South Extension",
    "Kalkaji", "Uttam Nagar", "Rajouri Garden", "Punjabi Bagh", "Tilak Nagar", "Indirapuram", "Paschim Vihar",
    "Chandni Chowk", "Preet Vihar", "Shahdara", "Ghaziabad", "Faridabad", "Sushant Lok", "DLF Phase 1", "DLF Phase 2",
    "DLF Phase 3", "Green Park", "Hauz Khas", "Ashok Vihar", "Model Town", "Mehrauli", "Okhla", "Jasola", "Sarita Vihar",
    "Yamuna Vihar", "Janakpuri", "Laxmi Nagar", "Patparganj", "Vivek Vihar", "Nangloi", "Najafgarh", "Badarpur",
    "Mahipalpur", "Dwarka Mor", "Gagan Vihar", "Chhatarpur"
]

In [7]:
drop_locations = pickup_locations.copy()

In [8]:
customer_cancel_reasons = [
    "Driver is not moving towards pickup location", "Driver asked to cancel",
    "AC is not working", "Change of plans", "Wrong Address"
]

In [9]:
driver_cancel_reasons = [
    "Personal & Car related issues", "Customer related issue",
    "The customer was coughing/sick", "More than permitted people in there"
]

In [10]:
incomplete_ride_reasons = ["Customer Demand", "Vehicle Breakdown", "Other Issue"]

In [14]:
# Generate data
data = []
for _ in range(num_rows):
    date = start_date + timedelta(days=random.randint(0, (end_date - start_date).days))
    time = datetime.strptime(f"{random.randint(0, 23)}:{random.randint(0, 59)}", "%H:%M").time()
    booking_id = f"BK{random.randint(100000, 999999)}"
    customer_id = f"CUST{random.randint(10000, 99999)}"
    vehicle_type = random.choice(vehicle_types)
    pickup_location = random.choice(pickup_locations)
    drop_location = random.choice(drop_locations)

    # 62% success rate for booking status
    is_successful = random.random() < 0.62
    booking_status = "Success" if is_successful else random.choice(["Cancelled by Customer", "Cancelled by Driver", "Incomplete"])
    avg_vtat = round(random.uniform(2, 10), 2) if is_successful else None
    avg_ctat = round(random.uniform(3, 12), 2) if is_successful else None
    ride_distance = round(random.uniform(2, 25), 2) if is_successful else None
    booking_value = round(ride_distance * random.uniform(8, 20), 2) if is_successful else None

    cancelled_by_customer = 1 if booking_status == "Cancelled by Customer" else 0
    reason_cancel_customer = random.choice(customer_cancel_reasons) if cancelled_by_customer else None

    cancelled_by_driver = 1 if booking_status == "Cancelled by Driver" else 0
    reason_cancel_driver = random.choice(driver_cancel_reasons) if cancelled_by_driver else None

    incomplete_rides = 1 if booking_status == "Incomplete" else 0
    reason_incomplete = random.choice(incomplete_ride_reasons) if incomplete_rides else None

    driver_rating = round(random.uniform(3, 5), 1) if is_successful else None
    customer_rating = round(random.uniform(3, 5), 1) if is_successful else None

    data.append([
        date, time, booking_id, booking_status, customer_id, vehicle_type, pickup_location, drop_location,
        avg_vtat, avg_ctat, cancelled_by_customer, reason_cancel_customer, cancelled_by_driver, reason_cancel_driver,
        incomplete_rides, reason_incomplete, booking_value, ride_distance, driver_rating, customer_rating
    ])

In [15]:
# Create DataFrame
columns = [
    "Date", "Time", "Booking ID", "Booking Status", "Customer ID", "Vehicle Type", "Pickup Location", "Drop Location",
    "Avg VTAT", "Avg CTAT", "Cancelled Rides by Customer", "Reason for cancelling by Customer",
    "Cancelled Rides by Driver", "Reason for cancelling by Driver", "Incomplete Rides", "Incomplete Rides Reason",
    "Booking Value", "Ride Distance", "Driver Ratings", "Customer Rating"
]

In [16]:
df = pd.DataFrame(data, columns=columns)

In [None]:
# Save to CSV
file_path = "/mnt/data/delhi_ncr_rides_data.csv"
df.to_csv(file_path, index=False)

file_path