In [32]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta


def init_locations(n_drivers):
    lat = np.random.uniform(-90, 90, n_drivers)
    long = np.random.uniform(-180, 180, n_drivers)
    locations = np.vstack((lat, long))
    return locations

def apply_random_movement(locations, size, weights=None, max_distance=10):
    if not weights:
        weights = np.ones(size)
    
    delta = (np.hstack((weights, weights)) * np.random.normal(0, max_distance/4, 2*size)).reshape((2, -1))
    new_loc = delta + locations
    return np.vstack((
        np.clip(new_loc[0, :], -90, 90), 
        np.clip(new_loc[1, :], -180, 180)
    ))

def rand_wait_time(n_batch, max_wait=120):
    # Random wait time between 1 and 60 seconds  
    wait_time = np.random.uniform(1, max_wait, n_batch)  
    wait_time = wait_time.cumsum()
    return wait_time

def update_location(locations, selected, weights):
    n_batch = selected.shape[0]

    X = locations[:, selected]
    X = apply_random_movement(X, n_batch, weights)
    
    return X
    
def generate_drivers_data(n_drivers, driver_batch_size, n_max_iterations):
    # Set a start time
    time = datetime(2024, 1, 1, 8, 0, 0)

    last_stamp = { i: -1 for i in range(n_drivers) }
    locations = init_locations(n_drivers)

    for it in range(n_max_iterations):
        selected = np.random.choice(n_drivers, driver_batch_size, replace=False)
        
        wait_time = rand_wait_time(driver_batch_size)

        weights = [last_stamp[i]-it for i in selected]
        locations[:, selected] = update_location(locations, selected, weights)
        stack = []
        
        for i, idx in enumerate(selected):
            time += timedelta(seconds=wait_time[i])
            timestamp = time.strftime('%Y-%m-%d %H:%M:%S')

            row = {
                'Timestamp': timestamp, 
                'Driver_ID': idx,
                'Latitude': locations[0, idx], 
                'Longitude': locations[1, idx], 
                }
            stack.append(row)
            
            last_stamp[idx] = it


        yield pd.DataFrame(stack)


# prints 20 * 200 rows
for df in generate_drivers_data(n_drivers=100, driver_batch_size=20, n_max_iterations=200):
    print(df)

              Timestamp  Driver_ID   Latitude   Longitude
0   2024-01-01 08:00:47         71  27.379591  120.525272
1   2024-01-01 08:02:42         33 -82.660451   11.530386
2   2024-01-01 08:05:36         19 -75.166257   17.784298
3   2024-01-01 08:08:36         44  72.513418   99.934166
4   2024-01-01 08:12:13         30 -52.970621 -152.943634
5   2024-01-01 08:17:31         42 -53.211990  -47.499672
6   2024-01-01 08:23:05         78 -47.431277  138.448543
7   2024-01-01 08:29:28         25  14.631660  -65.868271
8   2024-01-01 08:37:27         55  74.260979  -42.278410
9   2024-01-01 08:47:10         85 -90.000000 -143.819844
10  2024-01-01 08:58:34         52  58.357250   85.670226
11  2024-01-01 09:11:42         65  65.755665   -1.075780
12  2024-01-01 09:25:56         75  72.734484  -12.536688
13  2024-01-01 09:41:27         91 -31.659893  142.910462
14  2024-01-01 09:57:03         39  24.032251  106.378085
15  2024-01-01 10:14:02         38 -59.249074   15.966219
16  2024-01-01