In [1]:
# This file plots the elevation gradients of bus route road segments with a
# red/blue diverging color map to indicate uphill (red) and downhill (blue)
# slopes.

In [2]:
from os import listdir
from rtree.index import Index as RTreeIndex
from shapely.geometry import LineString
from shapely.geometry import MultiLineString
from shapely.geometry import Point
from shapely.ops import snap
from tqdm import tqdm
import geopandas as gpd
import json
import matplotlib
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import numpy as npm
import osmnx as ox
import pandas as pd
import re
import shapely

import warnings
warnings.filterwarnings('ignore')
%matplotlib inline



In [3]:
CRS_LATLON = 'EPSG:4326'
DATA_DIR = '../../data'
EXPORTS_DIR = f'{DATA_DIR}/exports'

In [4]:
def peek(df):
    print(len(df))
    display(df.iloc[0:3])

In [5]:
stops_df = pd.read_csv(f'{DATA_DIR}/gtfs/manhattan/stops.txt')
stops_df = stops_df.set_index('stop_id')
stops_df['node'] = None
peek(stops_df)

1808


Unnamed: 0_level_0,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,node
stop_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
400001,4 AV/E 10 ST,,40.731342,-73.990292,,,0,,
400002,4 AV/E 12 ST,,40.732608,-73.989958,,,0,,
400003,4 AV/E 13 ST,,40.733936,-73.98972,,,0,,


In [6]:
trip_stop_sequence_dict = {}
with open(f'{EXPORTS_DIR}/json/manhattan/trip_stop_sequence_dict.json', 'r') as fp:
    trip_stop_sequence_dict = json.load(fp)

In [7]:
route_ids = listdir(f'{EXPORTS_DIR}/shp/manhattan/routes')
route_ids = [f.split('.')[0] for f in route_ids]
route_ids = list(set([f for f in route_ids if re.match('M\d+$', f)]))
route_ids.sort()

In [8]:
def get_mg(route_id):
    route_mg = nx.read_shp(f'{EXPORTS_DIR}/shp/manhattan/routes/{route_id}.shp', simplify=False)
    route_mg.graph['crs'] = CRS_LATLON
    return route_mg

In [9]:
route_mgs = [get_mg(route_id) for route_id in route_ids]

In [10]:
def closest_node(station, nodes):
    """Returns the closest node of the given nodes to the given station."""
    min_node = None
    min_dist = 100
    for node in nodes:
        dist = station.distance(node)
        if dist < min_dist:
            min_dist = dist
            min_node = node
    return Point(min_node.x, min_node.y)

In [11]:
def fill_nodes(route_id, route_mg):
    route_keys = [k for k in trip_stop_sequence_dict.keys() if k.startswith(f'{route_id},') and 'SDon' not in k]
    route_sdon_keys = [k for k in trip_stop_sequence_dict.keys() if k.startswith(f'{route_id},') and 'SDon' in k]
    
    route_stops = [trip_stop_sequence_dict[k] for k in route_keys]
    route_stops = [item for sublist in route_stops for item in sublist]
    
    route_sdon_stops = [trip_stop_sequence_dict[k] for k in route_sdon_keys]
    route_sdon_stops = [item for sublist in route_sdon_stops for item in sublist]
    
    route_set = set([])
    route_set_stops = []
    for route_stop in route_stops:
        if route_stop not in route_set:
            route_set_stops.append(route_stop)
            
    route_sdon_set = set([])
    route_sdon_set_stops = []
    for route_sdon_stop in route_sdon_stops:
        if route_sdon_stop not in route_sdon_set:
            route_sdon_set_stops.append(route_sdon_stop)
    
    route_rows = [stops_df.loc[s] for s in route_set_stops]
    route_lats = [r['stop_lat'] for r in route_rows]
    route_lons = [r['stop_lon'] for r in route_rows]
    
    route_sdon_rows = [stops_df.loc[s] for s in route_sdon_set_stops]
    route_sdon_lats = [r['stop_lat'] for r in route_sdon_rows]
    route_sdon_lons = [r['stop_lon'] for r in route_sdon_rows]
    
    route_stops = route_set_stops
    route_sdon_stops = route_sdon_set_stops
    
    print(f'{route_id}: {len(route_stops) + len(route_sdon_stops)}')
    
    stops_df[route_id] = None
    if len(route_stops) + len(route_sdon_stops) == 0:
        return False
    
    route_nodes = []
    route_mg_nodes = [Point(n[0], n[1]) for n in route_mg.nodes()]
    for i in range(len(route_stops)):
        stop = Point(route_lons[i], route_lats[i])
        route_nodes.append(closest_node(stop, route_mg_nodes))
    
    for i in range(len(route_nodes)):
        stops_df.loc[route_stops[i], 'node'] = route_nodes[i].wkt
        stops_df.loc[route_stops[i], route_id] = route_nodes[i].wkt
        #stops_df.loc[route_stops[i], 'node'] =   shapely.wkt.dumps(route_nodes[i], rounding_precision=5)
        #stops_df.loc[route_stops[i], route_id] = shapely.wkt.dumps(route_nodes[i], rounding_precision=5)
        
    route_sdon_nodes = []
    route_sdon_mg_nodes = [Point(n[0], n[1]) for n in route_mg.nodes()]
    for i in range(len(route_sdon_stops)):
        stop = Point(route_sdon_lons[i], route_sdon_lats[i])
        route_sdon_nodes.append(closest_node(stop, route_sdon_mg_nodes))
    
    for i in range(len(route_sdon_nodes)):
        stops_df.loc[route_sdon_stops[i], 'node'] = route_sdon_nodes[i].wkt
        stops_df.loc[route_sdon_stops[i], route_id] = route_sdon_nodes[i].wkt
        #stops_df.loc[route_sdon_stops[i], 'node'] =   shapely.wkt.dumps(route_sdon_nodes[i], rounding_precision=5)
        #stops_df.loc[route_sdon_stops[i], route_id] = shapely.wkt.dumps(route_sdon_nodes[i], rounding_precision=5)
        
    return True

In [12]:
valid_routes = []
for i, route_id in enumerate(route_ids):
    if fill_nodes(route_id, route_mgs[i]):
        valid_routes.append(route_id)

M1: 264
M10: 186
M100: 0
M101: 202
M102: 238
M103: 334
M104: 162
M106: 66
M11: 234
M116: 84
M12: 82
M15: 372
M191: 0
M2: 192
M20: 162
M21: 82
M22: 86
M3: 318
M31: 114
M35: 88
M4: 302
M42: 68
M5: 198
M50: 64
M55: 134
M57: 88
M66: 52
M7: 238
M72: 76
M8: 86
M9: 112
M96: 48


In [13]:
valid_stops_df = stops_df[~stops_df['node'].isna()]
valid_stops_df = valid_stops_df.drop(columns=[
    'location_type',
    'node',
    'parent_station',
    'stop_desc',
    'stop_url',
    'zone_id',
])
peek(valid_stops_df)

1590


Unnamed: 0_level_0,stop_name,stop_lat,stop_lon,M1,M10,M100,M101,M102,M103,M104,...,M5,M50,M55,M57,M66,M7,M72,M8,M9,M96
stop_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
400001,4 AV/E 10 ST,40.731342,-73.990292,POINT (-73.99037 40.73135),,,,,,,...,,,,,,,,,,
400002,4 AV/E 12 ST,40.732608,-73.989958,POINT (-73.99006 40.73262),,,,,,,...,,,,,,,,,,
400003,4 AV/E 13 ST,40.733936,-73.98972,POINT (-73.98985 40.73394),,,,,,,...,,,,,,,,,,


In [14]:
valid_stops_df.to_csv(f'{DATA_DIR}/exports/csv/manhattan/stops_nodes.csv')