In [None]:
import pandas as pd
import numpy as np
import os
from dataclasses import dataclass
from typing import List, Tuple, Literal
from itertools import product
from pathlib import Path

In [None]:
#from google.colab import drive
#drive.mount('/content/drive')

In [None]:
outdir = Path("")
outdir.mkdir(parents=True, exist_ok=True)
os.chdir(outdir)

In [None]:
@dataclass
class SimulationConfig:
    seed: int
    sim_duration: float
    lambda_p: float
    passenger_priority_probability_set: List[float]
    passenger_patience_before: float
    lambda_d: float
    driver_priority_probability_set: List[float]
    driver_patience_before: float

In [None]:
class SimulationDataCreator:
  """
  Generates synthetic passenger and driver datasets for assignment simulations.
  """

  def __init__(self, config: SimulationConfig) -> None:
    self.config = config
    np.random.seed(config.seed)

  def _generate_arrival_times(self, lam: float) -> np.ndarray:
    """Generates cumulative arrival times and interarrival times for a Poisson process.

      Args:
          lam (float): The rate parameter (lambda) for the Poisson process.
    """

    interarrival_times = []
    total_time = 0
    while total_time < self.config.sim_duration:
        interarrival = np.random.exponential(1 / lam)
        total_time += interarrival
        if total_time < self.config.sim_duration:
             interarrival_times.append(interarrival)

    interarrival_times = np.around(np.array(interarrival_times), 3)
    arrival_times = np.around(np.cumsum(interarrival_times), 3)
    return arrival_times

  def _generate_locations(self, size: int, scale: float = 2000) -> np.ndarray:
      """Generates (x, y) coordinates for arrivals based on exponential distance distribution.

        Args:
            size (int): Number of locations to generate.
            scale (float): Mean distance from center.
      """

      locx_list = []
      locy_list = []

      dist_x = np.random.exponential(scale, size)
      for d in dist_x:
          locx_list.append(d if np.random.rand() > 0.5 else -d)
      locx_list = list(np.around(np.array(locx_list), 3))

      dist_y = np.random.exponential(scale, size)
      for d in dist_y:
          locy_list.append(d if np.random.rand() > 0.5 else -d)
      locy_list = list(np.around(np.array(locy_list), 3))

      return list(zip(locx_list, locy_list))

  def _generate_priorities(self, size: int, priority_set: List[float]) -> pd.DataFrame:
      """Generates a DataFrame of binary priority status columns.

      Args:
          size (int): Number of entities.
          priority_set (List[float]): List of priority probabilities.
      """

      df = pd.DataFrame()
      for p in priority_set:
          df[f"priority_{p}"] = (np.random.uniform(size=size) < p).astype(int)
      return df

  def _generate_waits(self, size: int, patience: float) -> np.ndarray:
    """Generates waiting periods before matching.

    Args:
        size (int): Number of entities.
        patience (float): Mean waiting period before matching.
    """

    waits = np.random.poisson(lam=patience, size=size)
    return np.round(waits, 3)

  def _generate_max_distances(self, size) -> np.ndarray:
    """Generates maximum willing distance.

    Args:
      size (int): Number of entities.
    """

    max_distance = np.random.poisson(lam=3, size=size)
    return np.round(max_distance, 3)

  def _generate_entities(self, lam: float, patience: float, priority_set: List[float], entity_type: Literal["passenger", "driver"]) -> pd.DataFrame:
        """
        Generic method to generate entities (passengers or drivers).

        Args:
            lam (float): Arrival rate paramater (lambda).
            patience (float): Mean waiting period before matching.
            priority_set (List[float]): List of priority probabilities.
            entity_type (Literal["passenger", "driver"]): Type of entity to generate.
        """

        arrival_times = self._generate_arrival_times(lam)
        size = len(arrival_times)

        locations = self._generate_locations(size)
        waits = self._generate_waits(size, patience)
        waits_until = np.ceil(arrival_times) + waits

        if entity_type == 'passenger':
          max_distance = self._generate_max_distances(size)

        priorities = self._generate_priorities(size, priority_set)

        data = {
            'id': np.array([f"{'p' if entity_type == 'passenger' else 'd'}_{i:0{len(str(size))}d}" for i in range(size)]),
            'arrival_time': arrival_times,
            'loc': list(map(tuple, locations)),
            'waits_until_matching': waits_until
        }

        if entity_type == 'passenger':
            data['willing_distance'] = max_distance

        df = pd.DataFrame(data)
        df = pd.concat([df, priorities], axis=1)

        df['status'] = pd.Series(dtype='str')
        df['assignment_time'] = pd.Series(dtype='float64')
        if entity_type == 'passenger':
            df['assignment_distance'] = pd.Series(dtype='float64')
            df['assigned_driver'] = pd.Series(dtype='str')
        elif entity_type == 'driver':
            df['assigned_passenger'] = pd.Series(dtype='str')

        return df

  def create_data(self) -> Tuple[pd.DataFrame, pd.DataFrame]:
        """Generates and returns passenger and driver datasets."""

        passenger_df = self._generate_entities(
            lam=self.config.lambda_p,
            patience=self.config.passenger_patience_before,
            priority_set=self.config.passenger_priority_probability_set,
            entity_type='passenger'
        )

        driver_df = self._generate_entities(
            lam=self.config.lambda_d,
            patience=self.config.driver_patience_before,
            priority_set=self.config.driver_priority_probability_set,
            entity_type='driver'
        )

        return passenger_df, driver_df

In [None]:
#Passenger parameters
lambda_p_set=[10, 20, 30]   #expected number of passengers to arrive per batcing period
passenger_priority_probability_set = [0.05, 0.1, 0.2, 0.3]
patience_before_passenger_set = [3, 5, 10]   #expected number of batching periods passengers are willing to wait before assignment

#Driver parameters
lambda_d_ratio_set = [0.25, 0.5, 1, 2]    #driver/passenger arrival ratio
driver_priority_probability_set = [0.05, 0.1, 0.2, 0.3]
patience_before_driver_set = [3, 5, 10]  #expected number of batching periods drivers are willing to wait before assignment

#Simulation control
sim_duration = 500
seeds = [231, 313, 42]

#Run simulations
param_records = []
set_count = 0

for (lambda_p,
     lambda_d_ratio,
     passenger_patience_before,
     driver_patience_before) in product(lambda_p_set,
                                        lambda_d_ratio_set,
                                        patience_before_passenger_set,
                                        patience_before_driver_set):

        set_count = set_count+1
        lambda_d = lambda_p * lambda_d_ratio
        param_record = {
        "set_id": set_count,
        "lambda_p": lambda_p,
        "lambda_d": lambda_d,
        "passenger_patience_before": passenger_patience_before,
        "driver_patience_before": driver_patience_before,
        }
        param_records.append(param_record)

        print(f"[Set {set_count:03d}] λp={lambda_p}, λd={lambda_d}, "
          f"PassengerPatience={passenger_patience_before}, DriverPatience={driver_patience_before}")


        for count, seed in enumerate(seeds, start=1):
          config = SimulationConfig(
              seed=seed,
              sim_duration=sim_duration,
              lambda_p=lambda_p,
              passenger_priority_probability_set=passenger_priority_probability_set,
              passenger_patience_before=passenger_patience_before,
              lambda_d=lambda_d,
              driver_priority_probability_set=driver_priority_probability_set,
              driver_patience_before=driver_patience_before
          )

          sim_data_creator = SimulationDataCreator(config)

          passenger_df, driver_df  = sim_data_creator.create_data()

          passenger_df_copy = passenger_df.copy()
          driver_df_copy = driver_df.copy()
          passenger_df_copy['loc'] = passenger_df_copy['loc'].apply(lambda x: f"({x[0]}, {x[1]})")
          driver_df_copy['loc'] = driver_df_copy['loc'].apply(lambda x: f"({x[0]}, {x[1]})")

          passenger_df_copy.to_csv(f'pdf_{set_count:03d}_{count:03d}.csv', index=False)
          driver_df_copy.to_csv(f'ddf_{set_count:03d}_{count:03d}.csv', index=False)


#Save param records
pd.DataFrame(param_records).to_csv("param_set_list.csv", index=False)