In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import osmnx as ox
from pyproj import CRS, Transformer
from shapely.geometry import LineString, Point, MultiLineString
import math
from tqdm.notebook import trange, tqdm
from time import sleep
from datetime import datetime
from sqlalchemy import create_engine
from itertools import combinations

In [2]:
# Load Manhattan road network with Uber link speed (not a complete link network, contains missing links)
graph_proj_v2 = ox.load_graphml('manhattan_v2.graphml')
nodes_proj_new,edges_proj_new = ox.graph_to_gdfs(graph_proj_v2)

In [3]:
# Create graph of Manhattan Road Network
graph = ox.load_graphml('manhattan.osm')
graph_proj = ox.project_graph(graph,to_crs=CRS.from_epsg(2263))
# Create graph with travel time
graph_proj_speed = ox.add_edge_speeds(graph_proj)
graph_proj = ox.add_edge_travel_times(graph_proj_speed)
nodes_proj,edges_proj = ox.graph_to_gdfs(graph_proj)

In [4]:
# Replace osm maxspeed with Uber speed
for index,row in edges_proj.iterrows():
    travel_t = edges_proj_new.loc[edges_proj_new['u'] ==row.u]
    travel_t = travel_t.loc[travel_t['v'] ==row.v] 
    if travel_t.empty:
        if isinstance(row.maxspeed,list):
            edges_proj.loc[index,['maxspeed']] = row.maxspeed[0]
        else:
            edges_proj.loc[index,['maxspeed']] = row.maxspeed
    else:
        edges_proj.loc[index,['maxspeed']] = travel_t.maxspeed.values[0]


In [5]:
# Update network and calculate missing link speeds
graph_proj = ox.graph_from_gdfs(nodes_proj,edges_proj)
graph_proj_speed = ox.add_edge_speeds(graph_proj)
graph_proj = ox.add_edge_travel_times(graph_proj_speed)
nodes_proj,edges_proj = ox.graph_to_gdfs(graph_proj)

In [6]:
# Import requests and vehicle locations
man_veh = pd.read_csv('Manhattan_dropoff_14_80000-80030.csv',index_col='index')
man_req = pd.read_csv('Manhattan_pickup_14_80000-80030.csv',index_col='index')

In [7]:
# The following sections contain functions used to prepare and process input data for Google OR-TOOLS
# convert request dataframe into a geodataframe with projected coordinates and linestring
def convert_req_coord(requests):
    transformer = Transformer.from_crs("epsg:4326", "epsg:2263")
    request_proj=pd.DataFrame()
    request_geom=[]
    for index,row in requests.iterrows():
        
        request_proj.loc[index,'pickup_x'],request_proj.loc[index,'pickup_y']=transformer.transform(row['pickup_latitude'],row['pickup_longitude'])
        request_proj.loc[index,'dropoff_x'],request_proj.loc[index,'dropoff_y']=transformer.transform(row['dropoff_latitude'],row['dropoff_longitude'])
        pickup = list(np.around(np.array(request_proj.loc[index,['pickup_x','pickup_y']].to_list()),2))
        dropoff = list(np.around(np.array(request_proj.loc[index,['dropoff_x','dropoff_y']].to_list()),2))
        line = LineString([pickup,dropoff])
        request_geom.append(line)
    request_geom_df=gpd.GeoDataFrame(request_proj, geometry=request_geom,crs="EPSG:2263")
    request_geom_df['pickup_datetime'] = requests['pickup_datetime']
    return request_geom_df

In [8]:
# Plot requests
def plot_mahattan_projected(graph,gdf):
    fig,ax=plt.subplots(figsize=(30,20))
    nodes,edges = ox.graph_to_gdfs(graph)
    nodes.plot(ax=ax,color='#7DD9FC',markersize=0.5, zorder=10)
    edges.plot(ax=ax,linewidth=1,color='#B4B3B3',alpha=0.7, zorder=5)
    if 'pickup_x' in gdf.columns:
        ax.scatter(gdf['pickup_x'],gdf['pickup_y'],marker=">",zorder=15)
        ax.scatter(gdf['dropoff_x'],gdf['dropoff_y'],marker="*",color="orange",zorder=15)
    elif 'pickup_node' in gdf.columns:
        nodes.loc[gdf['pickup_node']].plot(ax=ax,marker=">",zorder=15)
        nodes.loc[gdf['dropoff_node']].plot(ax=ax,marker="*",color="orange",zorder=15)
    elif 'veh_node' in gdf.columns:
        nodes.loc[gdf['veh_node']].plot(ax=ax,marker=".",color="magenta",zorder=15)
    return(ax)

In [9]:
# Convert projected coordinates to nodes and find shortest paths
# Input graph must be projected and contain edge travel times
def map_matching_paths(graph,req_gdf):
    nodes,edges = ox.graph_to_gdfs(graph)
    route_line_request=[]
    nodes_pair=[]
    nodes_time=[]
    path = []
    count = 0
    req_gdf['dropp']=0
    #convert projected coordinates to nodes
    for index,row in req_gdf.iterrows():
        pickup_node = ox.get_nearest_node(graph,(row['pickup_y'],row['pickup_x']),method='euclidean')
        dropoff_node = ox.get_nearest_node(graph,(row['dropoff_y'],row['dropoff_x']),method='euclidean')
        if pickup_node == dropoff_node:
            req_gdf.loc[index,'dropp'] = 1
            count += 1
            continue
        path = nx.shortest_path(graph,source=pickup_node,target=dropoff_node, weight='travel_time', method='dijkstra')
        route_nodes_request = nodes.loc[path]
        route_nodes_request_list = list(route_nodes_request.geometry.values)
        line = LineString(route_nodes_request_list)
        
        route_line_request.append(line)
        nodes_pair.append([pickup_node,dropoff_node])
        time_r = ox.utils_graph.get_route_edge_attributes(graph,path,attribute='travel_time')
        nodes_time.append(sum(time_r))
    req_gdf = req_gdf[req_gdf.dropp != 1]
    route_line_geom_request = gpd.GeoDataFrame(nodes_pair, geometry= route_line_request,columns=['pickup_node','dropoff_node'],index=req_gdf.index)
    route_line_geom_request['pickup_datetime'] = req_gdf['pickup_datetime']
    route_line_geom_request['trip_time']=nodes_time
    route_line_geom_request['trip_length_ft'] = route_line_geom_request.geometry.length
    return(route_line_geom_request,req_gdf)

In [10]:
# Load vehicle position and time into a dataframe
def convert_veh_coord(veh_data,graph):
    # specify target CRS (from WGS84 to local projected crs(feets))
    transformer = Transformer.from_crs("epsg:4326", "epsg:2263")
    list_node=[]
    veh_x = []
    veh_y = []
    for index,row in veh_data.iterrows():
        v_x,v_y=transformer.transform(row['dropoff_latitude'],row['dropoff_longitude'])
        v_node = ox.get_nearest_node(graph,(v_y,v_x),method='euclidean')
        list_node.append(v_node)
        veh_x.append(v_x)
        veh_y.append(v_y)
    df=veh_data.copy(deep=True)
    df['dropoff_datetime'] = pd.to_datetime(veh_data['dropoff_datetime'])
    df['veh_node'] = list_node
    df['veh_x'] = veh_x
    df['veh_y'] = veh_y
    return(df)

In [11]:
# Create time windows of a vehicle and requests based on waiting time and traveltime delay
def create_time_windows(request_data,veh_data,waiting_time,delay_time):
    time_win = []
    time_req = ((pd.to_datetime(request_data.pickup_datetime, utc=True)- pd.Timestamp("1970-01-01",tz='UTC'))// pd.Timedelta('1s'))
    time_veh = (pd.to_datetime(veh_data.dropoff_datetime, utc=True)- pd.Timestamp("1970-01-01",tz='UTC'))// pd.Timedelta('1s')
    time_all = []
    time_all.append(time_veh)
    check_bool = isinstance(request_data, pd.DataFrame)
    if check_bool==True:
        time_req = time_req.to_list()
        time_all.extend(time_req)
    else:
        time_all.append(time_req)
        
    min_t =  min(time_all)
    df=request_data.copy(deep=True)
    
    if check_bool==True:  
        df['pickup_datetime'] = [x - min_t for x in time_req]
    else:
        df['pickup_datetime'] = time_req - min_t
    #add time window of the vehicle
    time_win.append((time_veh - min_t,time_veh - min_t+90))
    #add time window of requests
    if check_bool==True: 
        for index,row in df.iterrows():
            pickup_earliest = int(row['pickup_datetime'])
            pickup_latest = pickup_earliest + waiting_time

            dropoff_earliest = pickup_earliest + int(row['trip_time'])
            dropoff_latest = dropoff_earliest + delay_time

            time_win.append((pickup_earliest,pickup_latest))
            time_win.append((dropoff_earliest,dropoff_latest))
    else:
        pickup_earliest = int(df['pickup_datetime'])
        pickup_latest = pickup_earliest + waiting_time

        dropoff_earliest = pickup_earliest + int(df['trip_time'])
        dropoff_latest = dropoff_earliest + delay_time

        time_win.append((pickup_earliest,pickup_latest))
        time_win.append((dropoff_earliest,dropoff_latest))
    
    return (time_win,df)

In [12]:
# Create list of nodes, start with vehicle location as first row
def combine_veh_req_nodes(request_data,veh_data):
    #nodes_proj,edges_proj = ox.graph_to_gdfs(graph_proj)
    nodes_all=[]
    #add vehicle node
    nodes_all.append(veh_data.veh_node)
    #add requests' pickup and dropoff nodes
    if isinstance(request_data, pd.DataFrame):
        for index,row in request_data.iterrows():
            nodes_all.append(row['pickup_node'])
            nodes_all.append(row['dropoff_node'])
    else:
        nodes_all.append(request_data['pickup_node'])
        nodes_all.append(request_data['dropoff_node'])
    #create data for time matrix & distance matrix
    data_dist=[]
    data_time = []
    for node1 in nodes_all:
        df_node1 = df_all_nodes.loc[df_all_nodes['u']==node1]
        row_dist=[]
        row_time = []
        for node2 in nodes_all:
            if node1==node2:
                row_dist.append(0)
                row_time.append(0)
                continue
            else:
                df_node2 = df_node1.loc[df_node1['v']==node2]
                row_dist.append(df_node2.length.item())
                row_time.append(df_node2.travel_time.item())
        data_dist.append(row_dist)
        data_time.append(row_time)
    data_time_noreturn = pd.DataFrame(data_time)
    data_time_noreturn.loc[:,0] = 0
    data_time_v2 = data_time_noreturn.values.tolist()
    
    data_dist_noreturn = pd.DataFrame(data_dist)
    data_dist_noreturn.loc[:,0] = 0
    data_dist_v2 = data_dist_noreturn.values.tolist()
    
    return(data_dist_v2,data_time_v2)

In [13]:
#create list of nodes, start with vehicle location as first row
#now using sqlite database to store data
def create_matrix_all_nodes(request_data,veh_data,graph_proj):
    nodes_proj,edges_proj = ox.graph_to_gdfs(graph_proj)
    nodes_all=[]
    #add requests' pickup and dropoff nodes
    if isinstance(request_data, pd.DataFrame):
        for index,row in request_data.iterrows():
            if row['pickup_node'] not in nodes_all:
                nodes_all.append(row['pickup_node'])
            if row['dropoff_node'] not in nodes_all:
                nodes_all.append(row['dropoff_node']) 
    for index,row in veh_data.iterrows():
        if row.veh_node not in nodes_all:
            nodes_all.append(row.veh_node) 
    data_all_nodes=gpd.GeoDataFrame(columns=['u','v','geometry','length','travel_time'])
    i=0
    pbar = tqdm(total=len(nodes_all))
    for node1 in nodes_all:
        for node2 in nodes_all:
            
            if node1 == node2:
                continue
            else:
                data_all_nodes_v2=pd.DataFrame(columns=['u','v','length','travel_time'])
                path = nx.shortest_path(graph_proj,source=node1,target=node2, weight='travel_time')
                #convert list of nodes from shortest path into a list
                path_nodes = nodes_proj.loc[path]
                route_list = list(path_nodes.geometry.values)
                route_geom = LineString(route_list)
                route_length = int(route_geom.length)
                #get time
                time_list = ox.utils_graph.get_route_edge_attributes(graph_proj,path,attribute='travel_time')
                route_time = sum(time_list)
                data_all_nodes_v2 = data_all_nodes_v2.append({'u':node1,'v':node2,'length':route_length,'travel_time':route_time},ignore_index=True)  
                data_all_nodes_v2.to_sql('data', disk_engine, if_exists='append')
        pbar.update(1)
    pbar.close()
    #return(data_all_nodes)

In [14]:
# Create demand for each request based on origin and destination its node
def create_demand(request_data):
    demand = [0]
    a = 0
    if isinstance(request_data, pd.DataFrame):
        for index,row in request_data.iterrows():
            a=index
            demand.append(1)
            demand.append(-1)
    else:
        demand.append(1)
        demand.append(-1)
    return(demand)

In [15]:
# Create request pairs with nex index
def create_pickup_delivery(request_data):
    #request start from node 1 // depot=0
    new_index_pair = []
    b = 0
    if isinstance(request_data, pd.DataFrame):
        for n in range(len(request_data.index)):
            new_index_pair.append([b+n+1,b+n+2])
            b += 1
    else:
        new_index_pair.append([1,2])
    return(new_index_pair)

In [16]:
# Google OR-TOOLS function
from __future__ import print_function
from ortools.constraint_solver import routing_enums_pb2
from ortools.constraint_solver import pywrapcp

def create_data_model(distance_matrix,time_matrix,time_windows,requests,veh,req_demand,veh_capacity):
    """Stores the data for the problem."""
    data = {}
    data['distance_matrix'] = distance_matrix
    data['time_matrix'] = time_matrix
    data['time_windows'] = time_windows
    data['pickups_deliveries'] = requests
    data['num_vehicles'] = veh
    data['depot'] = 0
    data['demands'] = req_demand
    data['vehicle_capacities'] = [veh_capacity]
    return data

In [17]:
def print_solution(data, manager, routing, solution):
    """Prints solution on console."""
    time_dimension = routing.GetDimensionOrDie('Time')
    total_distance = 0
    total_time = 0
    node_all = []
    total_load = 0
    check_share = 0
    passenger_delay = 0
    pickup_d = 0
    dropoff_d = 0
    dropoff_t = 0
    total_waiting_t = 0
    total_trip_dist = 0
    total_trip_time = 0
    pickup_t = 0
    for vehicle_id in range(data['num_vehicles']):
        index = routing.Start(vehicle_id)
        plan_output = 'Route for vehicle {}:\n'.format(vehicle_id)
        route_load = 0
        route_distance = 0
        
        while not routing.IsEnd(index):
            time_var = time_dimension.CumulVar(index)
#             plan_output += '{0} Time({1},{2}) -> '.format(

            
            node_index = manager.IndexToNode(index)
            plan_output += '{0} Time({1},{2},{3})'.format(
                manager.IndexToNode(index), solution.Min(time_var),
                solution.Max(time_var),data['time_windows'][node_index][0])
            route_load += data['demands'][node_index]
            if data['demands'][node_index]==-1:
                passenger_delay += solution.Min(time_var) - data['time_windows'][node_index][0]
                
                dropoff_t = solution.Min(time_var)
                total_trip_time += dropoff_t - pickup_t
                
                dropoff_d =  route_distance
                total_trip_dist += dropoff_d - pickup_d
                
            if data['demands'][node_index]==1:
                total_waiting_t += (solution.Min(time_var) - data['time_windows'][node_index][0])
                
                pickup_t = solution.Min(time_var)
                pickup_d = route_distance
                
            plan_output += ' {0} Load({1}) -> '.format(node_index, route_load)
            
            if route_load > check_share:
                check_share = route_load
            node_all.append(manager.IndexToNode(index))
            index = solution.Value(routing.NextVar(index))
            route_distance += routing.GetArcCostForVehicle(
                node_index, index, vehicle_id)
        time_var = time_dimension.CumulVar(index)
        plan_output += '{0} Time({1},{2})\n'.format(manager.IndexToNode(index),
                                                    solution.Min(time_var),
                                                    solution.Max(time_var))
        plan_output += ' {0} Load({1})\n'.format(manager.IndexToNode(index),
                                                 route_load)
        plan_output += 'Time of the route: {}sec\n'.format(
            solution.Min(time_var))
        node_all.append(manager.IndexToNode(index))
        plan_output += 'Load of the route: {}\n'.format(route_load)
        total_time += solution.Min(time_var)
        total_load += route_load
        total_distance += route_distance
    return(node_all,total_time,total_distance,check_share,passenger_delay,[total_waiting_t,total_trip_dist,total_trip_time])

In [18]:
def vehicle_routing(data_dist,data_time,time_win,req_pair,veh_no,req_demand,veh_capacity):
    # Instantiate the data problem.
    data = create_data_model(data_dist,data_time,time_win,req_pair,veh_no,req_demand,veh_capacity)
    # Create the routing index manager.
    manager = pywrapcp.RoutingIndexManager(len(data['distance_matrix']),data['num_vehicles'], data['depot'])

    # Create Routing Model.
    routing = pywrapcp.RoutingModel(manager)
    # Create and register a transit callback.
    def time_callback(from_index, to_index):
        """Returns the travel time between the two nodes."""
        
        from_node = manager.IndexToNode(from_index)
        to_node = manager.IndexToNode(to_index)
        return data['time_matrix'][from_node][to_node]

    transit_callback_index = routing.RegisterTransitCallback(time_callback)
    # Define cost of each arc.
    routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)
    # Convert from routing variable Index to time matrix NodeIndex.
    # Add Time Windows constraint.
    time = 'Time'
    routing.AddDimension(
        transit_callback_index,
        60,  # allow waiting time
        3600,  # maximum time per vehicle
        False,  # Don't force start cumul to zero.
        time)
    time_dimension = routing.GetDimensionOrDie(time)
    
    # Add time window constraints for each location except depot.
    for location_idx, time_window in enumerate(data['time_windows']):    
        if location_idx == 0:
            continue
        index = manager.NodeToIndex(location_idx)
        time_dimension.CumulVar(index).SetRange(time_window[0], time_window[1])
    # Add time window constraints for each vehicle start node.
    for vehicle_id in range(data['num_vehicles']):
        index = routing.Start(vehicle_id)
        time_dimension.CumulVar(index).SetRange(data['time_windows'][0][0],
                                                data['time_windows'][0][1])
    #set time value
    time_dimension.SetSpanCostCoefficientForAllVehicles(99)
    
    # Define distance of each arc (added).
    def distance_callback(from_index, to_index):
        """Returns the manhattan distance between the two nodes."""
        # Convert from routing variable Index to distance matrix NodeIndex.
        from_node = manager.IndexToNode(from_index)
        to_node = manager.IndexToNode(to_index)
        return data['distance_matrix'][from_node][to_node]

    transit_callback_index_distance = routing.RegisterTransitCallback(distance_callback)
    
    # Add Distance dimension (added).
    dimension_name = 'Distance'
    routing.AddDimension(
        transit_callback_index_distance,
        0,  # no slack
        300000000,  # vehicle maximum travel distance
        True,  # start cumul to zero
        dimension_name)
    distance_dimension = routing.GetDimensionOrDie(dimension_name)
    
    
    
    # Define Transportation Requests.
    for request in data['pickups_deliveries']:
        pickup_index = manager.NodeToIndex(request[0])
        delivery_index = manager.NodeToIndex(request[1])
        routing.AddPickupAndDelivery(pickup_index, delivery_index)
        routing.solver().Add(
            routing.VehicleVar(pickup_index) == routing.VehicleVar(
                delivery_index))
#change distance_dimension to time_dimension       
        routing.solver().Add(
            time_dimension.CumulVar(pickup_index) <=
            time_dimension.CumulVar(delivery_index))
 
    # Add Capacity constraint.
    def demand_callback(from_index):
        """Returns the demand of the node."""
        from_node = manager.IndexToNode(from_index)
        return data['demands'][from_node]
    demand_callback_index = routing.RegisterUnaryTransitCallback(
        demand_callback)
    routing.AddDimensionWithVehicleCapacity(
        demand_callback_index,
        0,  # null capacity slack
        data['vehicle_capacities'],  # vehicle maximum capacities
        True,  # start cumul to zero
        'Capacity')
 
    # Instantiate route start and end times to produce feasible times.
    for i in range(data['num_vehicles']):
        routing.AddVariableMinimizedByFinalizer(
            time_dimension.CumulVar(routing.Start(i)))
        routing.AddVariableMinimizedByFinalizer(
            time_dimension.CumulVar(routing.End(i)))



    # Setting first solution heuristic.
    search_parameters = pywrapcp.DefaultRoutingSearchParameters()
    search_parameters.first_solution_strategy = (
        routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC)
    # Solve the problem.
    solution = routing.SolveWithParameters(search_parameters)
    if solution:
        result_nodes,result_time,result_dist,check_share,passenger_delay,total_waiting_t = print_solution(data, manager, routing, solution)
        return(solution,result_nodes,result_time,result_dist,check_share,passenger_delay,total_waiting_t)
    else:
        return(solution,[0],[0],[0],[0],[0],[0])


In [19]:
def data_preparation(request_raw,veh_raw,graph):
    #convert request coord
    request_proj = convert_req_coord(request_raw)
    #get path of each request and clean request
    req_line_geom, clean_request_proj = map_matching_paths(graph,request_proj)
    #convert vehicle coord
    man_veh_new = convert_veh_coord(veh_raw,graph)
    return(req_line_geom,man_veh_new)

In [20]:
def vrp_input_preparation(req_data,veh_data,waiting_t,delay_t,graph):
    #get time window for the specified requests
    time_win,time_df = create_time_windows(req_data,veh_data,waiting_t,delay_t)
    #get dist&time matrix for the specified requests and a specfied vehicle
    data_dist,data_time = combine_veh_req_nodes(req_data,veh_data)
    #get request pair with new index
    req_pair = create_pickup_delivery(req_data)
    #get demand of a specified requests
    req_demand = create_demand(req_data)
    return(data_dist,data_time,time_win,req_pair,req_demand)

In [35]:
# Start of the process: calling data preparation function and save all data to a database
request_data, vehicle_data = data_preparation(man_req,man_veh,graph_proj)
disk_engine = create_engine('sqlite:///nyc_all_nodes.db')

# create_matrix_all_nodes(request_data,vehicle_data,graph_proj)
df_all_nodes = pd.read_sql_query('SELECT *' 'FROM data' , disk_engine)
df_all_nodes = df_all_nodes.astype('int64')
df_all_nodes = df_all_nodes.drop_duplicates()


In [23]:
# Set up constraints
vehicle_no = 1
capacity = 4
waiting_t = 300
delay_t = 600

In [None]:
# Creating RV-graph

# create RV-graph for a vehicle and a reqeust
now1 = datetime.now()
request_copy =request_data.copy(deep=True)
edges_rv = pd.DataFrame()
count_i = 0

with tqdm(total=vehicle_data.shape[0]) as pbar:
    for index_veh,row_veh in vehicle_data.iterrows():
        
        for index, row in request_data.iterrows():
            data_dist,data_time,time_win,req_pair,req_demand = vrp_input_preparation(request_data.loc[index],row_veh,waiting_t,delay_t,graph_proj)
            #print(1)
            solution,result_nodes,result_time,result_dist,check_share,passenger_delay,total_waiting_t = vehicle_routing(data_dist,data_time,time_win,req_pair,vehicle_no,req_demand,capacity)
            #print(2)
            if solution:
                request_copy.loc[index,'vrp']=1
                #print(9999)
                #rv_graph.add_edge(row.pickup_node,row_veh.veh_node)
                edges_rv.loc[count_i,'pickup_node'] = int(row.pickup_node)
                edges_rv.loc[count_i,'veh_node'] = int(row_veh.veh_node)
                #edges_rv.loc[count_i,'nodes_list'] = result_nodes
                edges_rv.loc[count_i,'result_time'] = int(result_time)
                edges_rv.loc[count_i,'result_dist'] = int(result_dist)
                edges_rv.loc[count_i,'req_index'] = int(index)
                edges_rv.loc[count_i,'veh_index'] = int(index_veh)
                edges_rv.loc[count_i,'passengers_delay'] = int(passenger_delay)
                count_i += 1
            else:
                request_copy.loc[index,'vrp']=0
                #print('----')
        pbar.update(1)
now2 = datetime.now()
print(now2-now1)

In [None]:
# Create rv graph for request1-request2
now1 = datetime.now()
request_copy_v2 =request_data.copy(deep=True)
edges_rr = pd.DataFrame()
count_i = 0

with tqdm(total=request_data.shape[0]) as pbar:
    for index_veh,row_veh in request_data.iterrows():
        row_veh_convert=pd.DataFrame(columns=['dropoff_datetime','veh_node'])
        row_veh_convert.loc[0,'dropoff_datetime'] = row_veh.pickup_datetime
        row_veh_convert.loc[0,'veh_node'] = row_veh['pickup_node']
        for index, row in request_data.iterrows():
            if (index==index_veh):
                continue
            request_data_2 = pd.DataFrame(columns=['pickup_node', 'dropoff_node', 'geometry', 'pickup_datetime',
       'trip_time', 'trip_length_ft'])
            request_data_2.loc[0]=row_veh
            request_data_2.loc[1]=row
            
            data_dist,data_time,time_win,req_pair,req_demand = vrp_input_preparation(request_data_2,row_veh_convert.iloc[0],waiting_t,delay_t,graph_proj)
            #print(1)
            solution,result_nodes,result_time,result_dist,check_share,passenger_delay,total_waiting_t = vehicle_routing(data_dist,data_time,time_win,req_pair,vehicle_no,req_demand,capacity)
            #print(2)
            if solution:
                request_copy_v2.loc[index,'vrp']=1
                #rv_graph.add_edge(row.pickup_node,row_veh.veh_node)
                edges_rr.loc[count_i,'pickup2_node'] = int(row.pickup_node)
                edges_rr.loc[count_i,'pickup1_node'] = int(row_veh_convert.veh_node)
                #edges_rv.loc[count_i,'nodes_list'] = (result_nodes)
                edges_rr.loc[count_i,'result_time'] = int(result_time)
                edges_rr.loc[count_i,'result_dist'] = int(result_dist)
                edges_rr.loc[count_i,'req2_index'] = int(index)
                edges_rr.loc[count_i,'req1_index'] = int(index_veh)
                edges_rr.loc[count_i,'share'] = check_share
                edges_rr.loc[count_i,'passengers_delay'] = int(passenger_delay)
                data_time[1][2]
                for node_new in result_nodes:
                    data_time
                    
                count_i += 1
            else:
                request_copy_v2.loc[index,'vrp']=0
                #print('----')
        pbar.update(1)
now2 = datetime.now()
print(now2-now1)    

In [None]:
#save output of rv-graph
edges_rv = edges_rv.astype('int64')
edges_rv.to_csv('edges_rv_wait-5_delay-7.csv',index=True)
edges_rr = edges_rr.astype('int64')
edges_rr = edges_rr.loc[edges_rr['share']==2]
edges_rr.to_csv('edges_rr_wait-5_delay-7.csv',index=True)

In [84]:
# Create RTV-graph
df_rv = pd.read_csv("edges_rv_wait-3_delay-5.csv",index_col=0)
df_rr = pd.read_csv("edges_rr_wait-3_delay-5.csv",index_col=0)
df_rr = df_rr.loc[df_rr['share']==2]
rv_graph_50 = nx.Graph()
rr_graph = nx.Graph()
RTV_graph = nx.Graph()
RTV_node=pd.DataFrame(columns=['node_index','x','y','type'])

In [85]:
vehicle_no = 1
capacity = 4
waiting_t = 180
delay_t = 300

In [86]:
#use 50% vehicle
# df_rv_50 = df_rv.loc[df_rv['veh_index'].isin(man_veh_50.index)]
df_rv_50 = df_rv

In [87]:
now1 = datetime.now()
for index,row in df_rv_50.iterrows():
    x_req = nodes_proj.loc[row.pickup_node].x
    y_req = nodes_proj.loc[row.pickup_node].y
    rv_graph_50.add_node(row['req_index'],pos=(x_req,y_req))
    rr_graph.add_node(row['req_index'],pos=(x_req,y_req))
    RTV_graph.add_node(row['req_index'],pos=(x_req,y_req),node_type='request')
    RTV_node = RTV_node.append({'node_index':row['req_index'],'x':x_req,'y':y_req,'type':'request'},ignore_index=True)
for index,row in df_rv_50.iterrows():
    x_veh = nodes_proj.loc[row.veh_node].x
    y_veh = nodes_proj.loc[row.veh_node].y
    rv_graph_50.add_node(row['veh_index'],pos=(x_veh,y_veh))
    RTV_graph.add_node(row['veh_index'],pos=(x_veh,y_veh),node_type='vehicle')
    RTV_node = RTV_node.append({'node_index':row['veh_index'],'x':x_veh,'y':y_veh,'type':'vehicle'},ignore_index=True)

In [None]:
RTV_node = RTV_node.drop_duplicates(ignore_index=True)

In [88]:
for index,row in df_rv_50.iterrows():
    rv_graph_50.add_edge(row.req_index,row.veh_index,t_time=row.result_time,dist=row.result_dist,delay=row.passengers_delay)
for index,row in df_rr.iterrows():
    rr_graph.add_edge(row.req1_index,row.req2_index,t_time=row.result_time,dist=row.result_dist,delay=row.passengers_delay)

In [89]:
disk_engine_T = create_engine('sqlite:///T_wait-3_delay-5_v2.db')

In [None]:
pbar = tqdm(total=df_rv_50.veh_index.unique().size)
T_1 = pd.DataFrame(columns=['request_index','request1'])
T_2 = pd.DataFrame(columns=['request_index','request1','request2'])
T_3 = pd.DataFrame(columns=['request_index','request1','request2','request3'])
T_4 = pd.DataFrame(columns=['request_index','request1','request2','request3','request4'])
for vehicle in df_rv_50.veh_index.unique():
    T_sql = pd.DataFrame(columns=['veh_index','request_index','size','r1','r2','r3','r4','delay','total_time','dist','pas_wait_t','pas_wait_dist','pas_trip_t'])
    T1_per_veh = []
#     pbar2 = tqdm(total=4)
    for edge in rv_graph_50.edges.data(nbunch=vehicle):
        
        if edge[1] not in T_1.request_index.values:
            T_1 = T_1.append({'request_index':edge[1],'request1':edge[1]},ignore_index=True)

        delay=edge[2]['delay']
        trip_time=edge[2]['t_time']
        trip_dist=edge[2]['dist']
        T1_per_veh.append(edge[1])
        T_sql = T_sql.append({'veh_index':vehicle,'request_index':str(edge[1]),'size':1,'r1':edge[1],'delay':delay,'total_time':trip_time,'dist':trip_dist},ignore_index=True)
#     pbar2.update(1)
    T2_per_veh=[]
    for r1 in T1_per_veh:
        for r2 in T1_per_veh:
            if r1==r2:
                continue
            test_array_t1= [r1,r2]
            test_array_t1.sort() 
            if rr_graph.has_edge(r1,r2) and test_array_t1 not in T2_per_veh:
                check_req_index = str('{}_{}'.format(r1,r2))

                data_dist,data_time,time_win,req_pair,req_demand = vrp_input_preparation(request_data.loc[[r1,r2]],vehicle_data.loc[vehicle],waiting_t,delay_t,graph_proj)
                solution,result_nodes,result_time,result_dist,check_share,passenger_delay,performance_data = vehicle_routing(data_dist,data_time,time_win,req_pair,vehicle_no,req_demand,capacity)
                if solution and check_share==2:
                    total_waiting_t = performance_data[0]
                    total_trip_dist = performance_data[1]
                    total_trip_time = performance_data[2]
                    if check_req_index not in T_2.request_index:
                        T_2 = T_2.append({'request_index':check_req_index ,'request1':r1,'request2':r2},ignore_index=True)
                    T_sql = T_sql.append({'veh_index':vehicle,'request_index':check_req_index,'size':2,'r1':r1,'r2':r2,'delay':passenger_delay,'total_time':result_time,'dist':result_dist,'pas_wait_t':total_waiting_t,'pas_wait_dist':total_trip_dist,'pas_trip_t':total_trip_time},ignore_index=True)
                    T2_per_veh.append(test_array_t1)
#     pbar2.update(1)
    T3_per_veh=[]
    for sub_T2 in T2_per_veh:
        for sub_T1 in T1_per_veh:
            test_array=[]
            if sub_T1 not in sub_T2:
                test_array.append(sub_T1)
                test_array.extend(sub_T2)
                test_array.sort()
                for sub_com_T3 in combinations(test_array,2):
                    if sub_com_T3 not in T2_per_veh:
                        continue
                if test_array not in T3_per_veh:
                    data_dist,data_time,time_win,req_pair,req_demand = vrp_input_preparation(request_data.loc[test_array],vehicle_data.loc[vehicle],waiting_t,delay_t,graph_proj)
                    solution,result_nodes,result_time,result_dist,check_share,passenger_delay,performance_data = vehicle_routing(data_dist,data_time,time_win,req_pair,vehicle_no,req_demand,capacity)
                    if solution and check_share==3:
                        total_waiting_t = performance_data[0]
                        total_trip_dist = performance_data[1]
                        total_trip_time = performance_data[2]
                        check_req_index = str('{}_{}_{}'.format(test_array[0],test_array[1],test_array[2]))
                        if check_req_index not in T_3.request_index:
                            T_3 = T_3.append({'request_index':check_req_index ,'request1':test_array[0],'request2':test_array[1],'request3':test_array[2]},ignore_index=True)
                        T3_per_veh.append(test_array)
                        T_sql = T_sql.append({'veh_index':vehicle,'request_index':check_req_index,'size':3,'r1':test_array[0],'r2':test_array[1],'r3':test_array[2],'delay':passenger_delay,'total_time':result_time,'dist':result_dist,'pas_wait_t':total_waiting_t,'pas_wait_dist':total_trip_dist,'pas_trip_t':total_trip_time},ignore_index=True)
#                         T_sql.to_sql('data', disk_engine_T, if_exists='append')
#     pbar2.update(1)
    flat_list_T3 = [item for sublist in T3_per_veh for item in sublist]
    flat_list_T3 = list(set(flat_list_T3))
    T4_per_veh = []
    for sub_T3 in T3_per_veh:
        for com_T1 in flat_list_T3:
            if com_T1 in sub_T3:
                continue
            test_array=[]
            test_array.append(com_T1)
            test_array.extend(sub_T3)
            test_array.sort()
            if test_array in T4_per_veh:
                continue
            
            for sub_com_T4 in combinations(test_array,3):
                if sub_com_T4 not in T3_per_veh:
                    continue    
            data_dist,data_time,time_win,req_pair,req_demand = vrp_input_preparation(request_data.loc[test_array],vehicle_data.loc[vehicle],waiting_t,delay_t,graph_proj)
            solution,result_nodes,result_time,result_dist,check_share,passenger_delay,performance_data = vehicle_routing(data_dist,data_time,time_win,req_pair,vehicle_no,req_demand,capacity)
            if solution and check_share==4:
                total_waiting_t = performance_data[0]
                total_trip_dist = performance_data[1]
                total_trip_time = performance_data[2]
                check_req_index = str('{}_{}_{}_{}'.format(test_array[0],test_array[1],test_array[2],test_array[3]))
                if check_req_index not in T_4.request_index:
                    T_4 = T_4.append({'request_index':check_req_index ,'request1':test_array[0],'request2':test_array[1],'request3':test_array[2],'request4':test_array[3]},ignore_index=True)
                T4_per_veh.append(test_array)
                T_sql = T_sql.append({'veh_index':vehicle,'request_index':check_req_index,'size':4,'r1':test_array[0],'r2':test_array[1],'r3':test_array[2],'r4':test_array[3],'delay':passenger_delay,'total_time':result_time,'dist':result_dist,'pas_wait_t':total_waiting_t,'pas_wait_dist':total_trip_dist,'pas_trip_t':total_trip_time},ignore_index=True)
                
                
#     pbar2.update(1)
#     pbar2.close()
    T_sql.to_sql('data', disk_engine_T, if_exists='append')
    pbar.update(1)
pbar.close()                
now2 = datetime.now()
print(now2-now1)     

In [None]:
# Save RTV graph for future optimization
T.to_csv('rtv_allveh_wait-5_delay-10_first-run.csv')

In [None]:
for a in rv_graph_50.edges.data(nbunch=6087):
    print(a[2]['dist'])

In [None]:
# Alternative optimization method: Greedy

T=pd.read_csv('rtv_50veh_first-run.csv')
T_sorted = T.sort_values(by=['delay'])
T_greedy=pd.DataFrame()
req_greedy = request_data

In [None]:
req_greedy['assigned_veh']=0
req_greedy['assigned_trip']=0
for index,row in T_sorted.loc[T_sorted['size']==4].iterrows():
    if all(i ==0 for i in req_greedy.loc[[row.r1,row.r2,row.r3,row.r4],'assigned_veh'].tolist()) and row.veh_index not in req_greedy['assigned_veh'].tolist():
        req_greedy.loc[[row.r1,row.r2,row.r3,row.r4],'assigned_veh']=row.veh_index
        req_greedy.loc[[row.r1,row.r2,row.r3,row.r4],'assigned_trip']=row.request_index
        T_greedy = T_greedy.append(row)

for index,row in T_sorted.loc[T_sorted['size']==3].iterrows():
    if all(i ==0 for i in req_greedy.loc[[row.r1,row.r2,row.r3],'assigned_veh'].tolist()) and row.veh_index not in req_greedy['assigned_veh'].tolist():
        req_greedy.loc[[row.r1,row.r2,row.r3],'assigned_veh']=row.veh_index
        req_greedy.loc[[row.r1,row.r2,row.r3],'assigned_trip']=row.request_index
        T_greedy = T_greedy.append(row)
        
for index,row in T_sorted.loc[T_sorted['size']==2].iterrows():
    if all(i ==0 for i in req_greedy.loc[[row.r1,row.r2],'assigned_veh'].tolist()) and row.veh_index not in req_greedy['assigned_veh'].tolist():
        req_greedy.loc[[row.r1,row.r2],'assigned_veh']=row.veh_index
        req_greedy.loc[[row.r1,row.r2],'assigned_trip']=row.request_index
        T_greedy = T_greedy.append(row)
for index,row in T_sorted.loc[T_sorted['size']==1].iterrows():
    if req_greedy.loc[row.r1,'assigned_veh'] == 0 and row.veh_index not in req_greedy['assigned_veh'].tolist():
#         print(row.veh_index)
        req_greedy.loc[row.r1,'assigned_veh']=row.veh_index
        req_greedy.loc[row.r1,'assigned_trip']=row.request_index
        T_greedy = T_greedy.append(row)


In [None]:
req_greedy['assigned_veh'].value_counts()

In [None]:
request_rv_filtered = df_rv['req_index'].unique().tolist()

In [94]:
#Process Optimization result from an imported csv 
df = pd.read_csv('all_ilp_200914.csv')
capacity = 4
vehicle_no = 1
count_none = 0
not_feasible = []
for index,row in df.iterrows():
    if row.vehicle == "rejected":
        continue
    trip_list = [int(x) for x in str(row.trip).split("_")]
    if len(trip_list)==1:
        trip_each = trip_list[0]
    else:
        trip_each = trip_list
    vehicle = float(row.vehicle)
    waiting_t = row.waiting_time*60
    delay_t = row.ride_time*60
    
    data_dist,data_time,time_win,req_pair,req_demand = vrp_input_preparation(request_data.loc[trip_each],vehicle_data.loc[vehicle],waiting_t,delay_t,graph_proj)
    solution,result_nodes,result_time,result_dist,check_share,passenger_delay,performance_data = vehicle_routing(data_dist,data_time,time_win,req_pair,vehicle_no,req_demand,capacity)
    if solution:
        total_waiting_t = performance_data[0]
        total_trip_dist = performance_data[1]
        total_trip_time = performance_data[2]
        df.loc[index,'veh_dist'] = result_dist
        df.loc[index,'pas_wait_time'] = total_waiting_t
        df.loc[index,'pas_wait_dist'] = total_trip_dist
        df.loc[index,'pas_trip_time'] = total_trip_time
    else:
        count_none += 1
        not_feasible.append(index)

In [103]:
df['ride_time_delay'] = df['delay']-df['pas_wait_time']

In [None]:
df.groupby(['fleet_size','waiting_time','ride_time','capacity'],as_index=False).sum('trip_size_all')