# TaxiCab Simulation

In [2]:
import os
import numpy as np

In [3]:
# change working path
os.chdir('../')
os.getcwd()

'/home/box/dev/ptsa'

In [36]:
def taxi_id_number(num_taxis):
    arr = np.arange(num_taxis)
    np.random.shuffle(arr)
    for i in range(num_taxis):
        yield arr[i]

In the case of this simple taxi simulation, we compartmentalize our taxis into differ‐
ent shifts, and we also use a generator to indicate shifts. We schedule more taxis in
the middle of the day than in the evening or overnight shifts by setting different
probabilities for starting a shift at a given time:

In [37]:
def shift_info():
    start_times_and_freqs = [(0, 8), (8, 30), (16, 15)]
    indices = np.arange(len(start_times_and_freqs))
    while True:
        idx = np.random.choice(indices, p = [0.25, 0.5, 0.25])
        start = start_times_and_freqs[idx]
        yield (start[0], start[0] + 7.5, start[1])

Pay attention to start_times_and_freqs . This is our first bit of code that will con‐
tribute to making this a time series simulation. We are indicating that different parts
of the day have different likelihoods of having a taxi assigned to the shift. Addition‐
ally, different times of the day have a different mean number of trips.

Now we create a more complex generator that will use the preceding generators to
establish individual taxi parameters as well as create individual taxi timelines:

In [58]:
def taxi_process(taxi_id_generator, shift_info_generator):
    taxi_id = next(taxi_id_generator)
    shift_start, shift_end, shift_mean_trips = next(shift_info_generator)
    actual_trips = round(np.random.normal(loc = shift_mean_trips,
                                          scale = 2))
    average_trip_time = 6.5 / shift_mean_trips * 60
    # convert mean trip time to minutes
    between_events_time = 1.0 / (shift_mean_trips - 1) * 60
    # this is an effivient city where cabs are seldom unused
    time = shift_start
    yield TimePoint(taxi_id, 'start shift    ', time)
    deltaT = np.random.poisson(average_trip_time) / 60
    time += deltaT
    for i in range(actual_trips):
        yield TimePoint(taxi_id, 'pick up    ', time)
        deltaT = np.random.poisson(average_trip_time) / 60
        time += deltaT
        yield TimePoint(taxi_id, 'drop of    ', time)
        deltaT = np.random.poisson(between_events_time) / 60
        time += deltaT
    deltaT = np.random.poisson(between_events_time) / 60
    time += deltaT
    yield TimePoint(taxi_id, 'end shift   ', time)

The taxi generator yields TimePoint s, which are defined as follows:

In [59]:
from dataclasses import dataclass

@dataclass
class TimePoint:
    taxi_id: int
    name: str
    time: float
        
    def __lt__(self, other):
        return self.time < other.time
    
    def __str__(self):
        return f'id:{self.taxi_id}, name:{self.name}, time:{self.time}'


A priority queue is an abstract data type into which objects can be inserted in any
order but which will emit objects in a specified order based on their priority.

We have a simulation class to run these taxi generators and keep them assembled.
This is not merely a dataclass because it has quite a bit of functionality, even in the
initializer, to arrange the inputs into a sensible array of information and processing.
Note that the only public-facing functionality is the run() function:

In [60]:
import queue

class Simulator:
    def __init__(self, num_taxis):
        self._time_points = queue.PriorityQueue()
        taxi_id_generator = taxi_id_number(num_taxis)
        shift_info_generator = shift_info()
        self._taxis = [taxi_process(taxi_id_generator, shift_info_generator)
                                    for i in range(num_taxis)]
        self._prepare_run()
    
    def _prepare_run(self):
        for t in self._taxis[0]:
            print(t)
            while True:
                try:
                    e = next(t)
                    self._time_points.put(e)
                except:
                    break
    
    def run(self):
        sim_time = 0
        while sim_time < 24:
            if self._time_points.empty():
                break
            p = self._time_points.get()
            sim_time = p.time
            print(p)



First, we create the number of taxi generators that we need to represent the right
number of taxis. Then we run through each of these taxis while it still has TimePoint s
and push all these TimePoint s into a priority queue. The priority of the object is
determined for a custom class such as TimePoint by our implementation of a Time
Point ’s __lt__ , where we compare start time. So, as the TimePoint s are pushed into
the priority queue, it will prepare them to be emitted in temporal order.


In [71]:
sim = Simulator(1000)
sim.run()

id:232, name:start shift    , time:8
id:232, name:pick up    , time:8.233333333333333
id:232, name:drop of    , time:8.433333333333332
id:232, name:pick up    , time:8.483333333333333
id:232, name:drop of    , time:8.566666666666666
id:232, name:pick up    , time:8.583333333333334
id:232, name:drop of    , time:8.783333333333333
id:232, name:pick up    , time:8.816666666666666
id:232, name:drop of    , time:8.95
id:232, name:pick up    , time:9.016666666666666
id:232, name:drop of    , time:9.216666666666665
id:232, name:pick up    , time:9.233333333333333
id:232, name:drop of    , time:9.516666666666666
id:232, name:pick up    , time:9.533333333333333
id:232, name:drop of    , time:9.666666666666666
id:232, name:pick up    , time:9.683333333333334
id:232, name:drop of    , time:9.933333333333334
id:232, name:pick up    , time:9.966666666666667
id:232, name:drop of    , time:10.233333333333334
id:232, name:pick up    , time:10.250000000000002
id:232, name:drop of    , time:10.416666666