In [1]:
from datetime import datetime, timedelta
import numpy as np
import pandas as pd
import json
import os
import math
import heapq
import seaborn as sns
import matplotlib.pyplot as plt
import csv

import random

In [None]:
cwd = os.getcwd()
print(cwd)

# Load Data

In [None]:
with open(f'{cwd}/adjacency 2.json') as adjacency_f:
    adjacency = json.load(adjacency_f)
    
with open(f'{cwd}/node_data.json') as node_f:
    nodes = json.load(node_f)

#nodes_df = pd.DataFrame(nodes).T

# Functions

In [None]:
def haversine_distance(lat1, lon1, lat2, lon2):
    '''
    Haversine distance metric between two points
    '''
    
    #earth radius in km
    radius = 6371.0

    #convert lat lon into radians from degress
    lat1 = math.radians(lat1)
    lon1 = math.radians(lon1)
    lat2 = math.radians(lat2)
    lon2 = math.radians(lon2)

    #haversine from wikipedia
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = math.sin(dlat / 2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    distance = radius * c

    return distance

In [None]:
def get_nearest_node(lat, lon):
    '''
    Find nearest node via Haversine distance
    TODO: Find more efficient nearest node method, how account for time if very far from nearest node?
    '''
    min_dist = float('inf')
    nearest_node = ''
    for node in nodes:
        dist = haversine_distance(nodes[node]['lat'], nodes[node]['lon'], lat, lon)
        if dist < min_dist:
            min_dist = dist
            nearest_node = node
    return nearest_node

In [None]:
def get_datetime(time_string):
    '''
    Convert any datetime strings to datetime
    '''
    datetime_format = "%m/%d/%Y %H:%M:%S"
    time = datetime.strptime(time_string, datetime_format)
    return time

In [None]:
def djikstras(source, dest, dt, day_type, start_time):
    '''
    Djikstra's algorithm
    Returns: dict of minimum distances to nodes and dict of node parents
    '''
    distance = {} #distances to each node
    parent = {} #
    hour = dt.hour
    
    #initialize all distances to infinity
    for node in nodes:
        distance[node] = float('inf')
    distance[source] = 0
    pq = [(0, source)]
    while pq:
        node_dist, node = heapq.heappop(pq)
        if node == dest: #early stopping
            break
        if node in adjacency:
            for neighbor in adjacency[node]:
                for edge in adjacency[node][neighbor]:
                    if edge['hour'] == hour and edge['day_type'] == day_type:
                        edge_dist = edge['time']
                        if distance[neighbor] > node_dist + edge_dist:
                            #print("edge dist ", edge_dist)
                           # print("node dist ", node_dist)
                            distance[neighbor] = node_dist + edge_dist
                           # print(distance[neighbor])
                            parent[neighbor] = node
                            heapq.heappush(pq, (distance[neighbor], neighbor))
    return distance, parent

In [None]:
def get_path(parent, source_node, dest_node):
    '''
    Recover path using list of node parents, start at destination and end at source
    Returns: list of nodes in path (reverse order)
    '''
    path = [dest_node]
    current_node = dest_node
    while current_node != source_node:
        path.append(parent[current_node])
        current_node = parent[current_node]
    return path

In [None]:
def get_driver(p, dt, day_type, start_time):
    '''
    Get the next driver from the driver PQ (Task 1 based on time)
    Update relevant driver and passenger variables and get the time to traverse from driver location to passenger pickup
    Returns: Driver match
    '''
    p.total_time = 0
    available_time, idx, d = heapq.heappop(driver_q) #(datetime, id, driver object)

    wait_time = 0
    if p.appear < available_time:
        time_diff = available_time - p.appear
        wait_time += time_diff.total_seconds()/3600 #get number of hours passenger has to wait for next driver to be available
    
    p.total_time += wait_time
    time, _ = djikstras(d.current_node, p.source_node, dt, day_type, start_time)
    pickup_time = time[p.source_node]
    d.current_node = p.source_node
    d.pickup_time += pickup_time
    p.total_time += pickup_time
    return d, pickup_time + wait_time

In [None]:
def check_active(current_time, d):
    if d.ride_time != 0:
        max_active_time = timedelta(hours=8) #check if driver has been active for over 8 hours, reference passenger appear for current time
        time_active = current_time - d.appear
        
        
        probability = 0.0

        if time_active >= max_active_time:
            probability = 1.0

        # Calculate probability based on hours active
        else:
            hours_active = time_active.total_seconds() / 3600  # Convert to hours
            probability = hours_active / 8


        if random.random() < probability:
            with open('driver_order.txt', 'a') as file:
                file.write(f"Driver {d.idx}: exited the queue \n")
            return False
        else:
            with open('driver_order.txt', 'a') as file:
                file.write(f"Driver {d.idx}: still in queue \n")
            return True
    else:
        return True

In [None]:
def ride():
    '''
    Main function for going through passenger and driver queues
    '''
    completed_passengers = []
    completed_drivers = []
    passenger_times = []
    driver_profits = []
    day_type = 'weekday'
    start_time = passenger_q[0][0]
    current_time = start_time
    passenger_index = 0  # Track the passenger queue index
    
    
    while passenger_index < len(passenger_q) and driver_q:
        _, _, p = passenger_q[passenger_index]  #(datetime, id, passenger object)
        
        d, pickup_time = get_driver(p, p.appear, day_type, start_time)
        current_time = current_time + timedelta(hours=pickup_time) #account for time to pickup
     
        time, parent = djikstras(p.source_node, p.dest_node, current_time, day_type, start_time)
        ride_time = time[p.dest_node]
        p.total_time += ride_time
        completed_passengers.append(p)
        passenger_times.append(p.total_time)
        d.source_node = p.dest_node
        d.source = p.dest
        d.ride_time += ride_time
        
        if math.isfinite(p.total_time):
            current_time = timedelta(hours=ride_time) + current_time
        passenger_index += 1
        if check_active(current_time, d):
            heapq.heappush(driver_q, (current_time, d.idx, d))
       
        else:
            completed_drivers.append(d)
            driver_profits.append(d.get_profit())
    
    while driver_q:
        _, _, d = heapq.heappop(driver_q)
        completed_drivers.append(d)
        driver_profits.append(d.get_profit())
        
    return completed_passengers, completed_drivers, passenger_times, driver_profits

# Classes (Driver, Passenger)

In [None]:
class driver:
    def __init__(self, index, appear, source_lat, source_lon):
        self.idx = index
        self.appear = get_datetime(appear)
        self.source = (source_lat, source_lon)
        self.ride_time = 0 #total time logged driving passengers
        self.pickup_time = 0 #total time logged driving to passengers
        self.source_node = get_nearest_node(self.source[0], self.source[1])
        self.current_node = self.source_node #update as performing routes

    def get_profit(self):
        self.profit = self.ride_time - self.pickup_time
        return self.profit

In [None]:
class passenger:
    def __init__(self, index, appear, source_lat, source_lon, dest_lat, dest_lon):
        self.idx = index
        self.appear = get_datetime(appear)
        self.source = (source_lat, source_lon)
        self.dest = (dest_lat, dest_lon)
        self.total_time = 0 #total time logged for passenger (time appear to time drop off)
        self.source_node = get_nearest_node(self.source[0], self.source[1])
        self.dest_node = get_nearest_node(self.dest[0], self.dest[1])

# Run Task 1

In [None]:
# #set up passenger q, NOTE: EXTREMELY SLOW (???), LIMIT QUEUE SIZE TO 10 FOR TESTING
# passenger_q = []
# for index, row in passengers.iloc[:20,:].iterrows():
#     p = passenger(row['Date/Time'], row['Source Lat'], row['Source Lon'], row['Dest Lat'], row['Dest Lon'])
#     heapq.heappush(passenger_q, (p.appear, p))
    
# #set up driver q, NOTE: EXTREMELY SLOW (???), LIMIT QUEUE SIZE TO 10 FOR TESTING
# driver_q = []
# for index, row in drivers.iloc[:2,:].iterrows():
#     d = driver(row['Date/Time'], row['Source Lat'], row['Source Lon'])
#     heapq.heappush(driver_q, (d.appear, d))

In [None]:
passenger_q = []

with open(f'{cwd}/passengers.csv', 'r') as csvfile:
    reader_variable = csv.reader(csvfile, delimiter=",")
    first_line = csvfile.readline()
    for index, row in enumerate(reader_variable):
        p = passenger(index, row[0], float(row[1]), float(row[2]), float(row[3]), float(row[4]))
        passenger_q.append((p.appear, p.idx, p))

In [None]:
file_path = 'passenger_brute_force.txt'

with open(file_path, 'w') as file:
    for passenger in passenger_q:
        file.write('start: '+str(passenger[2].source_node) +'   dest: '+ str(passenger[2].dest_node) + '\n')

In [None]:
driver_q = []

with open(f'{cwd}/drivers.csv', 'r') as csvfile:
    reader_variable = csv.reader(csvfile, delimiter=",")
    first_line = csvfile.readline()
    for index, row in enumerate(reader_variable):
        d = driver(index, row[0], float(row[1]), float(row[2]))
        driver_q.append((d.appear, d.idx, d))
        
heapq.heapify(driver_q)

In [None]:
completed_passengers, completed_drivers, passenger_times, driver_profits = ride()

In [None]:
file_path = 'passenger_times.txt'

# Open the file in write mode
with open(file_path, 'w') as file:
    # Write each value in passenger_times list to the file
    for time in passenger_times:
        file.write(str(time) + '\n')

file_path = 'driver_profits.txt'

# Open the file in write mode
with open(file_path, 'w') as file:
    # Write each value in passenger_times list to the file
    for profit in driver_profits:
        file.write(str(profit) + '\n')

import numpy as np

print('mean passenger times for t1: ',sum(passenger_times)/len(passenger_times))
print('mean driver profit for t1',sum(driver_profits)/len(driver_profits))
print('number of passengers worked t1', len(passenger_times))
print('sanity check for number of drivers t1',len(driver_profits))
print('stdev of passenger times t1',np.std(passenger_times))
print('stdev of driver profits t1',np.std(driver_profits))

# Djikstra's Test
Single passenger and single driver

In [None]:
#test passenger
p1_test = passengers.iloc[0,:]
p1 = passenger(p1_test['Date/Time'], p1_test['Source Lat'], p1_test['Source Lon'], p1_test['Dest Lat'], p1_test['Dest Lon'])
print(vars(p1))

In [None]:
#test driver
d1_test = drivers.iloc[0,:]
d1 = driver(d1_test['Date/Time'], d1_test['Source Lat'], d1_test['Source Lon'])
print(vars(d1))

https://docs.python.org/3/library/heapq.html
Heapq --> PQ implementation

https://builtin.com/software-engineering-perspectives/dijkstras-algorithm
Djikstras with heapq as PQ implementation

In [None]:
time, parent = djikstras(p1.source_node, p1.dest_node)
print('route time (hours):', time[p1.dest_node]) #get time from source to destination
path = get_path(parent, p1.source_node, p1.dest_node) #retrieve path from parent dict
path_coords = nodes_df[nodes_df.index.isin(path)] #retrieve path coordinates (to verify graphically)
print('path trajectory:')
print(path_coords.head(5))

In [None]:
#plot nodes and path coords
plt.figure(figsize=(20, 20))
sns.scatterplot(nodes_df, x='lon', y='lat', size=1, alpha=0.6, linewidth=0, label='node', color='grey')
sns.lineplot(path_coords, x='lon', y='lat', color='red', alpha=1, label='path')
plt.plot([p1.source[1]], [p1.source[0]], marker='*', color='orange', markersize=20, label='p source')
plt.plot([d1.source[1]], [d1.source[0]], marker='^', color='orange', markersize=20, label='d source')
plt.plot([p1.dest[1]], [p1.dest[0]], marker='*', color='blue', markersize=20, label='p dest')
plt.legend()