### Packages

In [1]:
import osmnx as ox
import os
import warnings
import geopandas as gpd
import pandas as pd
import numpy as np
import networkx as nx
#nx.shortest_path

#ox.distance.nearest_edges
warnings.filterwarnings('ignore')
pd.options.display.max_columns = 100
pd.options.display.max_rows = None

### Directory and parameters

In [4]:
data_dir = os.path.join('../data')

CRS = 'EPSG:4326'
CRS_METER = 'EPSG:3857'
route_pattern_id = 'G801'

### Load Data

In [9]:
def create_point_gdf(df, lon, lat, crs = 'EPSG:4326'):
    """
    Converts a DataFrame to a GeoDataFrame when
    column names for longitude and latitude are given.
    """
    return gpd.GeoDataFrame(df, geometry = gpd.points_from_xy(df[lon], df[lat]), crs = crs)

In [14]:
def read_stop_sequence(stop_seq_csv, crs, route_pattern_id):
    stop_seq = pd.read_csv(stop_seq_csv)
    stop_seq.columns = [c.lower() for c in stop_seq.columns]
    stop_seq['shape_id'] = stop_seq['pattern_id']
    stop_seq = stop_seq[['route_id', 'pattern_id', 'shape_id', 'stopid', 'stop_sequence', 'longitude', 'latitude', 'geodescription']]
    stop_seq = create_point_gdf(stop_seq, lon = 'longitude', lat = 'latitude', crs = crs)
    stop_seq = stop_seq[stop_seq.shape_id == route_pattern_id].sort_values(by = ['stop_sequence'])
    return stop_seq

In [25]:
G = ox.load_graphml(os.path.join(data_dir, 'interim', f'{route_pattern_id}_SUBGRAPH_HARDCLIPPED.graphml'))
# Add bearing
G = ox.add_edge_bearings(G)

nodes, edges = ox.graph_to_gdfs(G)
edges = edges.reset_index()

In [26]:
route_stop_seq = read_stop_sequence(os.path.join(data_dir, 'stop_sequence', f'bus_net_stop_sequence.csv'), CRS, route_pattern_id)
shapes = gpd.read_file(os.path.join(data_dir, 'interim', f'{route_pattern_id}_ROUTE_SHAPES.geojson'))

### Attach origin and destination edge for each shape-shape path

In [27]:
shapes['shape_pt_lat_next'] = shapes['shape_pt_lat'].shift(-1)
shapes['shape_pt_lon_next'] = shapes['shape_pt_lon'].shift(-1)
shapes = shapes.dropna(subset = ['shape_pt_lat_next', 'shape_pt_lon_next'])
shapes['orig_edge'] = ox.nearest_edges(G, shapes.shape_pt_lon, shapes.shape_pt_lat)
shapes['dest_edge'] = ox.nearest_edges(G, shapes.shape_pt_lon_next, shapes.shape_pt_lat_next)
shapes['shape_bearing'] = ox.bearing.calculate_bearing(shapes.shape_pt_lat, shapes.shape_pt_lon, shapes.shape_pt_lat_next, shapes.shape_pt_lon_next)

In [28]:
shapes.head()

Unnamed: 0,shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence,shape_dist_traveled,geometry,shape_pt_lat_next,shape_pt_lon_next,orig_edge,dest_edge,shape_bearing
0,G801,38.945938,-76.977158,10001,,POINT (-76.97716 38.94594),38.945808,-76.97699,"(7800984659, 7521666505, 0)","(7800984659, 7521666505, 0)",134.854798
1,G801,38.945808,-76.97699,10002,,POINT (-76.97699 38.94581),38.945675,-76.976822,"(7800984659, 7521666505, 0)","(7521666505, 7800984659, 0)",135.50831
2,G801,38.945675,-76.976822,10003,,POINT (-76.97682 38.94568),38.945541,-76.976654,"(7521666505, 7800984659, 0)","(7800984659, 7521666505, 0)",135.722798
3,G801,38.945541,-76.976654,10004,,POINT (-76.97665 38.94554),38.945408,-76.976486,"(7800984659, 7521666505, 0)","(7800984659, 7521666505, 0)",135.508202
4,G801,38.945408,-76.976486,10005,,POINT (-76.97649 38.94541),38.945274,-76.976318,"(7800984659, 7521666505, 0)","(7800984659, 7521666505, 0)",135.72269


### Compare bearing to correct the direction of OSM edges

In [29]:
shapes['orig_u'] = shapes['orig_edge'].str[0]
shapes['orig_v'] = shapes['orig_edge'].str[1]

shapes['rev_orig_u'] = shapes['orig_edge'].str[1]
shapes['rev_orig_v'] = shapes['orig_edge'].str[0]

shapes['dest_u'] = shapes['dest_edge'].str[0]
shapes['dest_v'] = shapes['dest_edge'].str[1]

shapes['rev_dest_u'] = shapes['dest_edge'].str[1]
shapes['rev_dest_v'] = shapes['dest_edge'].str[0]

shapes['rev_orig_edge'] = [*zip(shapes.rev_orig_u, shapes.rev_orig_v, [0] * len(shapes.rev_orig_v))]
shapes['rev_dest_edge'] = [*zip(shapes.rev_dest_u, shapes.rev_dest_v, [0] * len(shapes.rev_dest_v))]

In [30]:
shapes = shapes.merge(edges[['u', 'v', 'bearing']].rename(columns = {'u': 'orig_u', 'v':'orig_v', 'bearing': 'orig_bearing'}),
                      on = ['orig_u', 'orig_v'],
                      how = 'left')

shapes = shapes.merge(edges[['u', 'v', 'bearing']].rename(columns = {'u': 'rev_orig_u', 'v':'rev_orig_v', 'bearing': 'rev_orig_bearing'}),
                      on = ['rev_orig_u', 'rev_orig_v'],
                      how = 'left')

In [32]:
def get_bearing_difference(b1, b2):
    b = abs(b1 - b2) % 360
    if b > 180:
        return 360 - b
    else: return b

In [33]:
shapes['bearing_diff_orig'] = np.vectorize(get_bearing_difference)(shapes.shape_bearing, shapes.orig_bearing)
shapes['bearing_diff_rev_orig'] = np.vectorize(get_bearing_difference)(shapes.shape_bearing, shapes.rev_orig_bearing)
shapes['corrected_orig_edge'] = np.where((shapes.bearing_diff_orig <= shapes.bearing_diff_rev_orig), shapes.orig_edge, shapes.rev_orig_edge)
shapes.loc[shapes.bearing_diff_rev_orig.isnull(), 'corrected_orig_edge'] = shapes.orig_edge

In [34]:
shapes = shapes.merge(edges[['u', 'v', 'bearing']].rename(columns = {'u': 'dest_u', 'v':'dest_v', 'bearing': 'dest_bearing'}),
                      on = ['dest_u', 'dest_v'],
                      how = 'left')

shapes = shapes.merge(edges[['u', 'v', 'bearing']].rename(columns = {'u': 'rev_dest_u', 'v':'rev_dest_v', 'bearing': 'rev_dest_bearing'}),
                      on = ['rev_dest_u', 'rev_dest_v'],
                      how = 'left')

In [35]:

shapes['bearing_diff_dest'] = np.vectorize(get_bearing_difference)(shapes.shape_bearing, shapes.dest_bearing)
shapes['bearing_diff_rev_dest'] = np.vectorize(get_bearing_difference)(shapes.shape_bearing, shapes.rev_dest_bearing)
shapes['corrected_dest_edge'] = np.where((shapes.bearing_diff_dest <= shapes.bearing_diff_rev_dest), shapes.dest_edge, shapes.rev_dest_edge)
shapes.loc[shapes.bearing_diff_rev_dest.isnull(), 'corrected_dest_edge'] = shapes.dest_edge

In [36]:
shapes['single_edge'] = 1 * (shapes.corrected_orig_edge == shapes.corrected_dest_edge)
shapes['corrected_u'] = shapes['corrected_orig_edge'].str[0]
shapes['corrected_v'] = shapes['corrected_dest_edge'].str[1]

### Get shortest path for each shape-shape in sequential order and keep unique segments

In [40]:
od_shapes = shapes[['corrected_u', 'corrected_v']].drop_duplicates()
od_shapes['order'] = list(range(1, len(od_shapes) + 1))
od_shapes['u'] = ox.shortest_path(G, od_shapes.corrected_u, od_shapes.corrected_v)

In [41]:
od_shapes = od_shapes.explode('u') #.drop_duplicates(subset = 'node_list')
od_shapes['v'] = od_shapes.u.shift(-1)
od_shapes['remove_tag'] = 1 * (od_shapes.order != od_shapes.order.shift(-1))
od_shapes = od_shapes[od_shapes.remove_tag != 1]
od_shapes = od_shapes.drop_duplicates(subset = ['u', 'v'])

### Get the nodes in correct order using forward search method

In [43]:
path = list(zip(od_shapes.u, od_shapes.v))

searchables = path[1:].copy()
segment = [path[0]]
i = path[0]


def find_next(m, s):
    for n in s:
        if m[1] == n[0]:
            return n


while len(searchables) > 0:
    #print(f'Current Segment: {i}')
    #print(f'Available options: {searchables}')
    j = find_next(i, searchables)
    if j is not None:
        segment.append(j)
        i = j
        searchables = searchables[1:]
        # searchables.remove(i)
        #print(f'Next Segment: {j}')
    else:
        segment = segment[:-1]
        i = segment[-1]

### Construct route path

In [125]:
route_path = pd.DataFrame({'u': [x[0] for x in segment], 'v': [x[1] for x in segment]})

route_path['edge_code'] = route_path.u.astype(str) + '-' + route_path.v.astype(str)
route_path['edge_order'] = [*range(1, len(route_path) + 1)]

In [126]:
route_path = edges[['u', 'v', 'geometry']].to_crs(CRS_METER).merge(route_path, on = ['u', 'v'], how = 'right').drop_duplicates('geometry')

In [128]:
route_path['edge_len_ft'] = route_path.geometry.length * 3.28084
route_path['node_list'] = np.where((route_path.u < route_path.v), (route_path.u.astype(str) +'-'+ route_path.v.astype(str)), (route_path.v.astype(str) +'-'+ route_path.u.astype(str)))

### Merge stop sequence

In [134]:
def project_point_on_route_line(pt, route_line_geom):
    """
    Projects a point on route line geometry.
    """
    return (route_line_geom.interpolate(route_line_geom.project(pt)))

def project_stops_on_route_line(stop_seq, route_line):
    """
    Returns projected stops in route line.
    """
    projected_stop_seq = stop_seq.copy().to_crs(CRS_METER)
    projected_stop_seq['projected_pt'] = (projected_stop_seq.loc[:,['geometry']]
                                          .apply(lambda x: project_point_on_route_line(x['geometry'], route_line), axis = 1))
    projected_stop_seq['geometry'] = projected_stop_seq['projected_pt']
    return projected_stop_seq.drop(columns = ['projected_pt'])

In [148]:
route_stop_seq = project_stops_on_route_line(route_stop_seq, route_path.dissolve().geometry[0])

route_path_buffer = route_path.copy()
route_path_buffer['geometry'] = route_path.buffer(5)

route_stop_seq_with_path = gpd.sjoin(route_stop_seq, route_path_buffer, how = 'left', predicate = 'within').drop_duplicates('stop_sequence')
route_stop_seq_with_path = route_stop_seq_with_path[['stop_sequence', 'stopid', 'pattern_id', 'edge_code', 'edge_len_ft']]

route_path_with_stops = route_path.merge(route_stop_seq_with_path, on = ['edge_code', 'edge_len_ft'], how = 'left')

### Create new columns

In [149]:
route_path_with_stops['from_stop_seq'] = np.where(route_path_with_stops['stop_sequence'].isna(), np.nan, route_path_with_stops['stop_sequence'])
route_path_with_stops['from_stop_seq'] = route_path_with_stops['from_stop_seq'].fillna(method = 'ffill').astype(int)

route_path_with_stops['to_stop_seq'] = route_path_with_stops['from_stop_seq'] + 1

route_path_with_stops.loc[route_path_with_stops['edge_order'].index.max():, 'from_stop_seq'] = route_path_with_stops['from_stop_seq'] - 1
route_path_with_stops.loc[route_path_with_stops['edge_order'].index.max():, 'to_stop_seq'] = route_path_with_stops['to_stop_seq'] - 1

In [151]:
route_path_with_stops = (route_path_with_stops.merge(route_stop_seq.rename(columns = {'stopid': 'from_stopid', 'stop_sequence' : 'from_stop_seq'})[['from_stopid', 'from_stop_seq']])
 .merge(route_stop_seq.rename(columns = {'stopid': 'to_stopid', 'stop_sequence' : 'to_stop_seq'})[['to_stopid', 'to_stop_seq']]))

route_path_with_stops['from_node'] = route_path_with_stops['edge_code'].str.split('-', expand = True)[0]
route_path_with_stops['to_node'] = route_path_with_stops['edge_code'].str.split('-', expand = True)[1]
route_path_with_stops['link_code'] = route_path_with_stops['from_stopid'].astype(str) + '-' + route_path_with_stops['to_stopid'].astype(str)
route_path_with_stops['pattern_id'] = route_pattern_id

In [155]:
final_net_link = route_path_with_stops[['pattern_id', 'stop_sequence', 'stopid', 
                                       'edge_order', 'edge_code','edge_len_ft', 
                                        'from_node', 'to_node', 'geometry']].to_crs(CRS).fillna({'PATTERN_ID':route_pattern_id})

In [157]:
final_net_link.to_file(os.path.join(data_dir, 'interim', f'{route_pattern_id}_FINAL_BUS_NET_LINKS.geojson'))