In [None]:
import math 
import pandas as pd
import numpy as np
from networkx.algorithms import bipartite
import networkx as nx
from ortools.linear_solver import pywraplp as OR
import matplotlib.pyplot as plt
from matplotlib.ticker import (MultipleLocator, AutoMinorLocator)
import copy
import pickle
from bokeh import palettes
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.tile_providers import get_provider, Vendors
from bokeh.models import (GraphRenderer, Circle, MultiLine, StaticLayoutProvider,
                          HoverTool, TapTool, EdgesAndLinkedNodes, NodesAndLinkedEdges,
                          ColumnDataSource, LabelSet, NodesOnly)
from bipartite_matching import *
output_notebook()

## Retrospective Minimum Fleet-size Problem to Bipartite Matching Formulation

Consider a minimum fleet-size problem over some time horizon. Over this time period, we know all the ride requests we will receive. Each ride request has a start/end location, start time, and duration. A retrospective management question is to find the minimum number of vehicles needed to cover all the rides. How can we create a bipartite matching formulation to solve this problem?

In [None]:
# Load the taxi trips information as well as NYC street nodes and arcs.
trips_df = pd.read_csv('data/2013-09-01_trip_data_manhattan.csv').drop(columns='id')
nodes_df = pd.read_csv('data/nyc_nodes_manhattan.csv').drop(columns='Unnamed: 0')
arcs_df = pd.read_csv('data/nyc_links_manhattan.csv').drop(columns='Unnamed: 0')
# Load travel time matrix
times_df = pd.read_csv('data/times.csv', index_col =0)
times_df.columns = times_df.columns.astype(float)

In [None]:
# A list of 10 example trip_ids
ex_trips = [68326, 69501, 70802, 68619, 69802, 70142, 68751, 69558, 70296, 68272]

# Locate the corresponding trip information
trips = trips_df.iloc[ex_trips]
trips

In [None]:
# Intialize nodes and edges
DO_nodes = list()
PU_nodes = list()
edges = list()
# Initialize a dict that maps a PU node to a DO node
PUtoDO = dict()
# Construct nodes and edges
for index, row in trips.iterrows():
    s = row['start_node']
    t = row['end_node']
    s_t = row['start_time']
    t_t = s_t + row['trip_time']
    DO_node = (int(t), t_t, index, 'DO')
    PU_node = (int(s), s_t, index, 'PU')
    DO_nodes.append(DO_node)
    PU_nodes.append(PU_node)
    PUtoDO[PU_node] = DO_node
# Sort the nodes by time
DO_nodes = sorted(DO_nodes, key = lambda x: x[1])
PU_nodes = sorted(PU_nodes, key = lambda x: x[1])

# Specify the edges
max_waiting_time = 10 # delta

for DO_node in DO_nodes:
    for PU_node in PU_nodes:
        if PU_node[1] > DO_node[1] + max_waiting_time:
            break
        else:
            if PU_node[1] >= DO_node[1]:
                time = times_df.at[(DO_node[0], PU_node[0])]
                if ((PU_node[1] - DO_node[1]) - max_waiting_time  <= time) & (time <= (PU_node[1] - DO_node[1])):
                    edges.append((DO_node, PU_node))

In [None]:
# Generate the bipartite graph
B = nx.Graph()
# Add nodes with the node attribute "bipartite"
B.add_nodes_from(DO_nodes, bipartite=0)
B.add_nodes_from(PU_nodes, bipartite=1)
# Add edges only between nodes of opposite node sets
B.add_edges_from(edges)

In [None]:
match = nx.bipartite.maximum_matching(B, DO_nodes)
print('Size of max cardinality matching:', int(len(match)/2)) # divided by two because the output edges are directed

In [None]:
# Trace optimal taxi trajetory
opt_paths = match_to_path(match, trips)

In [None]:
# Plot the bipartite graph
plot_ex_bipartite(B, match, opt_paths, True)

In [None]:
# Plot the bipartite graph
G = street_network(nodes_df, arcs_df, weight = 'trip_time')
plot_bipartite_graph2(B, match, opt_paths, G, nodes_df)

Only 4 taxis are needed to serve all 10 trips (shown as dotted lines above) according to our matching obtained above.

In [None]:
# Plot the corresponding taxi paths on the map
plot_taxi_route(G, opt_paths, nodes_df)

## matching with infinite allowable waiting time
## show pairwise incompatible trips

Now, simply change the maximum allowable waiting time to infinity; that is, we assume that drivers can wait at the new pickup location forever.

In [None]:
# Intialize nodes and edges
DO_nodes = list()
PU_nodes = list()
edges = list()
# Initialize a dict that maps a PU node to a DO node
PUtoDO = dict()
# Construct nodes and edges
for index, row in trips.iterrows():
    s = row['start_node']
    t = row['end_node']
    s_t = row['start_time']
    t_t = s_t + row['trip_time']
    DO_node = (int(t), t_t, index, 'DO')
    PU_node = (int(s), s_t, index, 'PU')
    DO_nodes.append(DO_node)
    PU_nodes.append(PU_node)
    PUtoDO[PU_node] = DO_node
# Sort the nodes by time
DO_nodes = sorted(DO_nodes, key = lambda x: x[1])
PU_nodes = sorted(PU_nodes, key = lambda x: x[1])

# Specify the edges
max_waiting_time = np.inf # delta

for DO_node in DO_nodes:
    for PU_node in PU_nodes:
        if PU_node[1] > DO_node[1] + max_waiting_time:
            break
        else:
            if PU_node[1] >= DO_node[1]:
                time = times_df.at[(DO_node[0], PU_node[0])]
                if ((PU_node[1] - DO_node[1]) - max_waiting_time  <= time) & (time <= (PU_node[1] - DO_node[1])):
                    edges.append((DO_node, PU_node))

In [None]:
# Generate the bipartite graph
B = nx.Graph()
# Add nodes with the node attribute "bipartite"
B.add_nodes_from(DO_nodes, bipartite=0)
B.add_nodes_from(PU_nodes, bipartite=1)
# Add edges only between nodes of opposite node sets
B.add_edges_from(edges)

In [None]:
match = nx.bipartite.maximum_matching(B, DO_nodes)
print('Size of max cardinality matching:', int(len(match)/2)) # divided by two because the output edges are directed

In [None]:
# Trace optimal taxi trajetory
opt_paths = match_to_path(match, trips)

In [None]:
# Plot the bipartite graph
plot_ex_bipartite(B, match, opt_paths, True)

In [None]:
plot_bipartite_graph2(B, match, opt_paths, G, nodes_df)

Notice that the size of the max cardinality matching is unchanged, though the number of edges on the bipartite graph has increased.

In [None]:
# Plot the corresponding taxi paths on the map
G = street_network(nodes_df, arcs_df, weight = 'trip_time')
plot_taxi_route(G, opt_paths, nodes_df)

### Pairwise Incompatible Trips

Any two trips $N_i = (p_i, T^p_{i}, d_i, T^d_{i})$ and $N_j = (p_j, T^p_{j}, d_j, T^d_{j})$ are considered *compatible* with each other if a vehicle can feasibly cover both, either by reaching from $d_i$ to $p_j$ or from $d_j$ to $p_i$ in time

*Theorem:* The maximum size of a set of trips that are pairwise incompatible is equal to the minimum number of vehicles needed to cover all of the trips.

In [None]:
# Identify the set of pairwise incompatible trips
incomp_trips = [68272, 68326,  68619, 68751]
plot_pairwise_incompatible(B, match, opt_paths, G, nodes_df, incomp_trips)

By the theorem, the maximum size of a set of pairwise incompatible trips should be equal to the minimum number of vehicles needed to cover all the trips, which is 4.  

In the plot above, the set of pairwise incompatible trips are highlighted in red. Notice that this is the maximum set we could possibly find since we cannot find a larger set such that every pair of trips in the set is incompatible of each other.

## Retrospective Minimum Fleet-size Problem At Scale

Now, filter the trips by time window of interest. The following example selects all the trips from 5 pm to 5:30 pm.

In [None]:
# Filter trips by time window of interest
start_time = 1020
end_time = 1050
trips = trips_df.copy()
trips = trips[(trips.start_time >= start_time) & 
              (trips.start_time + trips.trip_time <= end_time)].copy()
trips.start_time = trips.start_time - start_time

In [None]:
# Intialize nodes and edges
DO_nodes = list()
PU_nodes = list()
edges = list()
# Initialize a dict that maps a PU node to a DO node
PUtoDO = dict()
# Specify nodes - each node is a tuple of (location_id, time, trip_id, "DO"/"PU")
for index, row in trips.iterrows():
    s = row['start_node']
    t = row['end_node']
    s_t = row['start_time']
    t_t = s_t + row['trip_time']
    DO_node = (int(t), t_t, index, 'DO')
    PU_node = (int(s), s_t, index, 'PU')
    DO_nodes.append(DO_node)
    PU_nodes.append(PU_node)
    PUtoDO[PU_node] = DO_node
# Sort the nodes by time
DO_nodes = sorted(DO_nodes, key = lambda x: x[1])
PU_nodes = sorted(PU_nodes, key = lambda x: x[1])

# Specify edges
max_waiting_time = 10

for DO_node in DO_nodes:
    for PU_node in PU_nodes:
        if PU_node[1] > DO_node[1] + max_waiting_time:
            break
        else:
            if PU_node[1] >= DO_node[1]:
                time = times_df.at[(DO_node[0], PU_node[0])]
                if ((PU_node[1] - DO_node[1]) - max_waiting_time  <= time) & (time <= (PU_node[1] - DO_node[1])):
#                     if (PU_node[0] not in list(nx.isolates(G))) and (DO_node[0] not in list(nx.isolates(G))):
                        edges.append((DO_node, PU_node))
# load the model
B2 = nx.Graph()
# Add nodes with the node attribute "bipartite"
B2.add_nodes_from(DO_nodes, bipartite=0)
B2.add_nodes_from(PU_nodes, bipartite=1)
# Add edges only between nodes of opposite node sets
B2.add_edges_from(edges)

In [None]:
match2 = nx.bipartite.maximum_matching(B2, DO_nodes)
print('size of max cardinality matching:', len(match2) / 2)
print('total number of trips:', len(DO_nodes))
num_taxi = len(DO_nodes) - len(match2)/2
print('min number of taxis needed to cover all trips:', num_taxi)

In [None]:
# Trace optimal taxi trajetory
opt_paths2 = match_to_path(match2, trips)
# Plot the first 10 taxi paths on the map
G = street_network(nodes_df, arcs_df, 'trip_time')
plot_taxi_route(G, opt_paths2[:10], nodes_df)