In [1]:
from data.graph_loader import GraphLoader
from data.gtfs_loader import GTFSLoader
import pandas as pd
import geopandas as gpd
import osmnx as ox
import networkx as nx

def openMap(m):
    html = "map.html"
    m.save(html)

    import webbrowser
    webbrowser.open(html)

In [2]:
graph_loader = GraphLoader()
graph_walk = graph_loader.create_graph_walk("data/graphs/ZMG_walk", "data/osm/ZMG_enclosure_2km.geojson")


Loading graph from data/graphs/ZMG_walk.pkl


In [3]:
gtfs_loader = GTFSLoader()
transit_df = gtfs_loader.load_transit_dataframe("data/gtfs")
stops_df = gtfs_loader.load_stops_dataframe("data/gtfs", transit_df)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12231 entries, 0 to 12230
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   stop_id    12231 non-null  object
 1   stop_name  12231 non-null  object
 2   stop_lat   12231 non-null  object
 3   stop_lon   12231 non-null  object
dtypes: object(4)
memory usage: 382.3+ KB
None
<class 'pandas.core.frame.DataFrame'>
Index: 10345 entries, 57 to 12230
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   stop_id    10345 non-null  object
 1   stop_name  10345 non-null  object
 2   stop_lat   10345 non-null  object
 3   stop_lon   10345 non-null  object
dtypes: object(4)
memory usage: 404.1+ KB
None
<class 'pandas.core.frame.DataFrame'>
Index: 10345 entries, 57 to 12230
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   stop_id    10345 non-null  object 
 1

In [4]:
def create_graph_transit(transit_df, stops_df):
    # create graph transit adjacency list in stops_df
    G = nx.DiGraph()

    for idx, stop in stops_df.iterrows():
        stop_id = stop['stop_id']
        G.add_node(stop_id, pos=stop['geometry'], stop_name=stop['stop_name'], x=stop['stop_lon'], y=stop['stop_lat'], routes=stop['routes_by_stop'])
        next_stops = stop['next_stop_id']
        for next_stop_id, edge_data in next_stops.items():
            G.add_edge(stop_id, next_stop_id, **edge_data)
    return G

graph_transit = create_graph_transit(transit_df, stops_df)

   

In [5]:
#get first edge in the walk graph
walk_gdf = ox.graph_to_gdfs(graph_walk, nodes=True, edges=False, fill_edge_geometry=True)
#create dataframe from walk_gdf
walk_df = pd.DataFrame(walk_gdf)


stops_gdf = gpd.GeoDataFrame(stops_df, geometry='geometry', crs='EPSG:4326')

In [32]:
import heapq
from shapely import Point, LineString
#src and dst are Point
def euclidean_heuristic(src:Point, dst:Point) -> float:
    # Time to reache the dst
    # distance in meters using euclidean distance
    transit_average_speed_kph = 55.0  # average transit max speed in km/h
    transit_average_speed_mps = transit_average_speed_kph / 3.6  # convert to m/s    
    pts_m = gpd.GeoSeries([src, dst], crs="EPSG:4326").to_crs("EPSG:32613")
    distance_meters = pts_m.iloc[0].distance(pts_m.iloc[1])
    time_to_reach = distance_meters / transit_average_speed_mps  #time in s
    return round(time_to_reach)
 

def dijkstra(graph:nx.Graph, src, dst, heuristic=None):
    #accumulated cost for each node from the start
    cost = {node: None for node in graph.nodes}
    #stores the node that it was coming from
    previous = {node: None for node in graph.nodes}
    #stores if the node has been already been proccessed
    visited = {node: False for node in graph.nodes}

    #"minumum heap" of the candidates. Implemented as a simple list in each insertion we take care of insert in the proper index
    queue = []
    #to return the path from src to dst
    path = []
    
    #current node is beeing proccesed, cost is 0 for the src node
    current = src
    cost[src] = 0

    while True:
        #visit all the neighbors of the current node
        for neighbor in graph.neighbors(current):
            if not visited[neighbor]:
                weight = graph[current][neighbor].get('weight', 1)
                if heuristic is not None and heuristic.lower() == "euclidean":
                    heuristic_cost = euclidean_heuristic(graph.nodes[neighbor]['pos'], graph.nodes[dst]['pos'])
                else:
                    heuristic_cost = 0
                tentative_cost = cost[current] + weight
                #update if we found a better path or this is the first time visiting
                if cost[neighbor] is None or cost[neighbor] > tentative_cost:
                    cost[neighbor] = tentative_cost
                    previous[neighbor] = current
                    # Add it into the queue keeping it sorted by cost
                    priority_cost = tentative_cost + heuristic_cost
                    heapq.heappush(queue, (priority_cost, neighbor))
        
        #current node is visited since it checked all neighbors, finish if the visited one was the dst
        if current == dst:
            visited[dst] = True
            break
        else:
            visited[current] = True
        
        # Check if queue is empty destination not reachable
        if not queue:
            return None, None
        
        #determine the next node to be visited, select the first element of the queue
        #Skip nodes that are already visited (stale queue entries)
        while queue:
            _, current = heapq.heappop(queue)
            if not visited[current]:  # Only process if not already visited
                break
        else:
            # All nodes in queue were already visited, no path exists
            return None, None

    #recreate the path based on the previous going backwards but store it in forward 
    node = dst
    while node is not None:
        path.append(node)
        node = previous[node]
    path.reverse()
    
    return path, cost[dst]


In [33]:
#choose randomly a node from G
import random

path = []

while path == []:
    start = random.choice(list(graph_transit.nodes))
    destination = random.choice(list(graph_transit.nodes))

    try:
        path = nx.shortest_path(graph_transit, source=start, target=destination, weight='weight')
    except nx.NetworkXNoPath:
        path = []

print("Shortest path from", start, "to", destination, ":", path)




Shortest path from mxd_mxc_919944283 to mxd_mxc_AMG_T11C02_STP_142 : ['mxd_mxc_919944283', 'mxd_mxc_1423117333', 'mxd_mxc_1843048711', 'mxd_mxc_1425469446', 'mxc_C66V2_STP_01', 'mxc_C66V2_STP_02', 'mxc_C19_STP_03', 'mxc_C02_STP_04', 'mxc_C02_STP_05', 'mxc_C02_STP_06', 'mxc_C02_STP_07', 'mxc_C02_STP_08', 'mxc_C02_STP_09', 'mxc_C02_STP_10', 'mxc_C36_STP_20', 'mxc_C02_STP_12', 'mxc_C02_STP_13', 'mxc_C02_STP_14', 'mxc_C122_STP_58', 'mxc_C02_STP_16', 'mxc_C108V2_STP_09', 'mxc_C108V2_STP_10', 'mxc_C37V1_STP_42', 'mxc_C108V2_STP_12', 'mxc_C38V1_STP_32', 'mxc_C108V2_STP_14', 'mxc_C38V1_STP_34', 'mxc_C38V1_STP_35', 'mxc_C02_STP_29', 'mxc_C02_STP_30', 'mxc_C02_STP_31', 'mxc_C38V1_STP_39', 'mxc_C02_STP_33', 'mxc_C02_STP_34', 'mxc_C02_STP_35', 'mxc_C02_STP_36', 'mxc_C38V1_STP_44', 'mxc_C38V1_STP_45', 'mxc_C02_STP_39', 'mxc_C38V1_STP_47', 'mxc_C38V1_STP_48', 'mxc_C02_STP_42', 'mxc_C02_STP_43', 'mxc_C06_STP_62', 'mxc_C17_STP_40', 'mxc_C02_STP_46', 'mxc_C13V1_STP_53', 'mxc_C06_STP_67', 'mxc_C02_STP_4

In [34]:
m = stops_gdf.explore()
openMap(m)

In [35]:
dijkstra_path, dijkstra_cost = dijkstra(graph_transit, start, destination, heuristic="euclidean")
print("Shortest path from", start, "to", destination, ":", dijkstra_path)

print("Cost of the path:", dijkstra_cost/60)

#compare both paths
print("Paths are the same:", path == dijkstra_path)

Shortest path from mxd_mxc_919944283 to mxd_mxc_AMG_T11C02_STP_142 : ['mxd_mxc_919944283', 'mxd_mxc_1423117333', 'mxd_mxc_1843048711', 'mxd_mxc_1425469446', 'mxc_C66V2_STP_01', 'mxc_C66V2_STP_02', 'mxc_C19_STP_03', 'mxc_C02_STP_04', 'mxc_C02_STP_05', 'mxc_C02_STP_06', 'mxc_C02_STP_07', 'mxc_C02_STP_08', 'mxc_C02_STP_09', 'mxc_C02_STP_10', 'mxc_C36_STP_20', 'mxc_C02_STP_12', 'mxc_C02_STP_13', 'mxc_C02_STP_14', 'mxc_C122_STP_58', 'mxc_C02_STP_16', 'mxc_C108V2_STP_09', 'mxc_C108V2_STP_10', 'mxc_C37V1_STP_42', 'mxc_C108V2_STP_12', 'mxc_C38V1_STP_32', 'mxc_C108V2_STP_14', 'mxc_C38V1_STP_34', 'mxc_C38V1_STP_35', 'mxc_C02_STP_29', 'mxc_C02_STP_30', 'mxc_C02_STP_31', 'mxc_C38V1_STP_39', 'mxc_C02_STP_33', 'mxc_C02_STP_34', 'mxc_C02_STP_35', 'mxc_C02_STP_36', 'mxc_C38V1_STP_44', 'mxc_C38V1_STP_45', 'mxc_C02_STP_39', 'mxc_C38V1_STP_47', 'mxc_C38V1_STP_48', 'mxc_C02_STP_42', 'mxc_C02_STP_43', 'mxc_C06_STP_62', 'mxc_C17_STP_40', 'mxc_C02_STP_46', 'mxc_C13V1_STP_53', 'mxc_C06_STP_67', 'mxc_C02_STP_4

In [36]:
path_gdf = stops_gdf[stops_gdf['stop_id'].isin(dijkstra_path)]
m = path_gdf.explore()
openMap(m)

In [37]:
# Vectorize projection: project both points in one GeoSeries
pts_m = gpd.GeoSeries(
    [graph_transit.nodes["mxa_T07_STP_188"]["pos"],
     graph_transit.nodes["mxa_T07_STP_187"]["pos"]],
    crs="EPSG:4326"
).to_crs(32613)

distance_meters = pts_m.iloc[0].distance(pts_m.iloc[1])
print("Distance in meters:", distance_meters)


Distance in meters: 193.45865111250075
