In [2]:
# This file plots the elevation gradients of bus route road segments with a
# red/blue diverging color map to indicate uphill (red) and downhill (blue)
# slopes.

In [97]:
from os import listdir
from rtree.index import Index as RTreeIndex
from shapely.geometry import LineString
from shapely.geometry import MultiLineString
from shapely.geometry import Point
from shapely.ops import snap
from tqdm import tqdm
import geopandas as gpd
import json
import matplotlib
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import numpy as npm
import osmnx as ox
import pandas as pd
import re
import shapely

import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [98]:
CRS_LATLON = 'EPSG:4326'
DATA_DIR = '../../data'
EXPORTS_DIR = f'{DATA_DIR}/exports'

In [99]:
def peek(df):
    print(len(df))
    display(df.iloc[0:3])

In [136]:
stops_df = pd.read_csv(f'{DATA_DIR}/gtfs/manhattan/stops.txt')
stops_df = stops_df.set_index('stop_id')
stops_df['node'] = None
peek(stops_df)

1808


Unnamed: 0_level_0,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,node
stop_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
400001,4 AV/E 10 ST,,40.731342,-73.990292,,,0,,
400002,4 AV/E 12 ST,,40.732608,-73.989958,,,0,,
400003,4 AV/E 13 ST,,40.733936,-73.98972,,,0,,


In [101]:
trip_stop_sequence_dict = {}
with open(f'{EXPORTS_DIR}/json/manhattan/trip_stop_sequence_dict.json', 'r') as fp:
    trip_stop_sequence_dict = json.load(fp)

In [116]:
route_ids = listdir(f'{EXPORTS_DIR}/shp/manhattan/routes')
route_ids = [f.split('.')[0] for f in route_ids]
route_ids = list(set([f for f in route_ids if re.match('M\d+$', f)]))
route_ids.sort()

In [118]:
def get_mg(route_id):
    route_mg = nx.read_shp(f'{EXPORTS_DIR}/shp/manhattan/routes/{route_id}.shp', simplify=False)
    route_mg.graph['crs'] = CRS_LATLON
    return route_mg

In [119]:
route_mgs = [get_mg(route_id) for route_id in route_ids]

In [160]:
def closest_node(station, nodes):
    """Returns the closest node of the given nodes to the given station."""
    min_node = None
    min_dist = 100
    for node in nodes:
        dist = station.distance(node)
        if dist < min_dist:
            min_dist = dist
            min_node = node
    return Point(min_node.x, min_node.y)

In [171]:
def fill_nodes(route_id, route_mg):
    print(route_id)
    route_keys = [k for k in trip_stop_sequence_dict.keys() if k.startswith(f'{route_id},')]
    route_stops = [list(trip_stop_sequence_dict[k].values()) for k in route_keys]
    route_stops = [item for sublist in route_stops for item in sublist]
    route_stops = [item for sublist in route_stops for item in sublist]
    route_stops = list(set(route_stops))
    route_stops.sort()
    route_rows = [stops_df.loc[s] for s in route_stops]
    route_lats = [r['stop_lat'] for r in route_rows]
    route_lons = [r['stop_lon'] for r in route_rows]
    
    print(len(route_stops))
    
    stops_df[route_id] = None
    if len(route_stops) == 0:
        return False
    
    route_nodes = []
    route_mg_nodes = [Point(n[0], n[1]) for n in route_mg.nodes()]
    for i in range(len(route_stops)):
        stop = Point(route_lons[i], route_lats[i])
        route_nodes.append(closest_node(stop, route_mg_nodes))
    
    for i in range(len(route_nodes)):
        stops_df.loc[route_stops[i], 'node'] = route_nodes[i].wkt
        stops_df.loc[route_stops[i], route_id] = route_nodes[i].wkt
    return True

In [172]:
valid_routes = []
for i, route_id in enumerate(route_ids):
    if fill_nodes(route_id, route_mgs[i]):
        valid_routes.append(route_id)

M1
134
M10
93
M100
0
M101
158
M102
119
M103
111
M104
81
M106
33
M11
120
M116
42
M12
41
M15
125
M191
0
M2
146
M20
81
M21
41
M22
43
M3
159
M31
57
M35
22
M4
153
M42
34
M5
125
M50
32
M55
67
M57
44
M66
26
M7
119
M72
38
M8
43
M9
56
M96
24


In [173]:
valid_stops_df = stops_df[~stops_df['node'].isna()]
valid_stops_df = valid_stops_df.drop(columns=[
    'location_type',
    'node',
    'parent_station',
    'stop_desc',
    'stop_url',
    'zone_id',
])
peek(valid_stops_df)

1780


Unnamed: 0_level_0,stop_name,stop_lat,stop_lon,M1,M10,M100,M101,M102,M103,M104,...,M5,M50,M55,M57,M66,M7,M72,M8,M9,M96
stop_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
400001,4 AV/E 10 ST,40.731342,-73.990292,POINT (-73.99037 40.73135299999986),,,,,,,...,,,,,,,,,,
400002,4 AV/E 12 ST,40.732608,-73.989958,POINT (-73.990059 40.73262099999981),,,,,,,...,,,,,,,,,,
400003,4 AV/E 13 ST,40.733936,-73.98972,POINT (-73.98985 40.73393799999985),,,,,,,...,,,,,,,,,,


In [174]:
valid_stops_df.to_csv(f'{DATA_DIR}/exports/csv/manhattan/stops_nodes.csv')