# synthetic data generation 

In [3]:
import pandas as pd
import numpy as np
import random
from geopy.distance import geodesic
from datetime import datetime, timedelta

In [4]:
# generate random pickup / drop coordinates (delhi)
min_lat, max_lat = 28.40, 28.88
min_lon, max_lon = 76.84, 77.34

# function to generate values
def generate_random_coordinates(n):
    pickup_lat = np.random.uniform(min_lat, max_lat, n)
    pickup_lon = np.random.uniform(min_lon, max_lon, n)
    drop_lat = np.random.uniform(min_lat, max_lat, n)
    drop_lon = np.random.uniform(min_lon, max_lon, n)
    return pickup_lat, pickup_lon, drop_lat, drop_lon

n = 500
pickup_lat, pickup_lon, drop_lat, drop_lon = generate_random_coordinates(n)


In [5]:
# add random features (vehicles , weather , time , day)

vehicle_types = ['bike', 'scooter', 'car']
weather_conditions = ['clear', 'rainy', 'foggy']
days_of_week = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

def random_time():
    hour = random.randint(6, 22)
    minute = random.choice([0, 15, 30, 45])
    return f"{hour:02d}:{minute:02d}"

vehicles = [random.choice(vehicle_types) for _ in range(n)]
weathers = [random.choice(weather_conditions) for _ in range(n)]
days = [random.choice(days_of_week) for _ in range(n)]
order_times = [random_time() for _ in range(n)]

In [6]:
# calculate distance (haversine via Geopy)

distances = []
for i in range(n):
    coord1 = (pickup_lat[i], pickup_lon[i])
    coord2 = (drop_lat[i], drop_lon[i])
    dist = geodesic(coord1, coord2).km
    distances.append(round(dist, 2))


In [7]:
# generate delivery time based on weather , distance , vehicle

base_speed = {'bike': 35, 'scooter': 30, 'car': 25}  # km/h
weather_delay = {'clear': 1.0, 'rainy': 1.25, 'foggy': 1.4}

delivery_times = []
for i in range(n):
    dist = distances[i]
    v = vehicles[i]
    w = weathers[i]
    speed = base_speed[v]
    delay = weather_delay[w]
    time = (dist / speed) * 60 * delay  # in minutes
    time = round(max(time + np.random.normal(2, 1), 5), 2)  #  some noise, min (5 min)
    delivery_times.append(time)


In [8]:
# create dataset and export to csv

df = pd.DataFrame({
    'pickup_lat': pickup_lat,
    'pickup_lon': pickup_lon,
    'drop_lat': drop_lat,
    'drop_lon': drop_lon,
    'vehicle_type': vehicles,
    'order_time': order_times,
    'day_of_week': days,
    'weather': weathers,
    'distance_km': distances,
    'delivery_time_min': delivery_times
})

df.to_csv('/Users/anuragchaubey/RouteWise/data/simulated_delivery_data.csv', index=False)
df.head()


Unnamed: 0,pickup_lat,pickup_lon,drop_lat,drop_lon,vehicle_type,order_time,day_of_week,weather,distance_km,delivery_time_min
0,28.621325,77.234181,28.807159,77.281354,car,06:45,Thursday,foggy,21.11,72.99
1,28.679671,76.994958,28.556067,76.941465,scooter,12:15,Thursday,clear,14.66,29.1
2,28.828712,77.16875,28.764905,77.025425,bike,21:30,Friday,clear,15.68,27.58
3,28.837595,76.944271,28.483177,76.919257,car,06:00,Tuesday,clear,39.36,95.23
4,28.479233,77.242343,28.661209,77.199906,scooter,14:15,Tuesday,rainy,20.59,53.11
