In [1]:
# This file plots the elevation gradients of bus route road segments with a
# red/blue diverging color map to indicate uphill (red) and downhill (blue)
# slopes.

In [65]:
import geopandas as gpd
import json
import matplotlib
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import numpy as npm
import osmnx as ox
import pandas as pd
from tqdm import tqdm

import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [3]:
CRS = 'EPSG:4326'
DATA_DIR = '../../data'
EXPORTS_DIR = f'{DATA_DIR}/exports'

In [4]:
def peek(df):
    print(len(df))
    display(df.head())

In [6]:
routes_gdf = gpd.read_file(f'{DATA_DIR}/shp/manhattan/bus_routes_nyc_may2020.shp')
stops_gdf = gpd.read_file(f'{DATA_DIR}/shp/manhattan/bus_stops_nyc_may2020.shp')

In [7]:
routes_df = pd.read_csv(f'{DATA_DIR}/gtfs/manhattan/routes.txt')

In [8]:
calendar_df = pd.read_csv(f'{DATA_DIR}/gtfs/manhattan/calendar.txt')
calendar_df

Unnamed: 0,service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date
0,MQ_C1-Sunday,0,0,0,0,0,0,1,20210627,20210829
1,MQ_C1-Weekday-SDon,1,1,1,1,1,0,0,20210628,20210903
2,MQ_C1-Saturday,0,0,0,0,0,1,0,20210703,20210904
3,MV_C1-Sunday,0,0,0,0,0,0,1,20210627,20210829
4,MV_C1-Weekday-SDon,1,1,1,1,1,0,0,20210628,20210903
5,MV_C1-Saturday,0,0,0,0,0,1,0,20210703,20210904
6,MV_C1-Saturday-BM,0,0,0,0,1,0,0,20210702,20210903
7,OF_C1-Sunday,0,0,0,0,0,0,1,20210627,20210829
8,OF_C1-Weekday-SDon,1,1,1,1,1,0,0,20210628,20210903
9,OF_C1-Saturday,0,0,0,0,0,1,0,20210703,20210904


In [9]:
stop_times_df = pd.read_csv(f'{DATA_DIR}/gtfs/manhattan/stop_times.txt')
stop_times_df

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,pickup_type,drop_off_type
0,MQ_C1-Weekday-032400_M57_451,05:24:00,05:24:00,400745,1,0,0
1,MQ_C1-Weekday-032400_M57_451,05:25:30,05:25:30,402233,2,0,0
2,MQ_C1-Weekday-032400_M57_451,05:26:44,05:26:44,403986,3,0,0
3,MQ_C1-Weekday-032400_M57_451,05:28:10,05:28:10,405562,4,0,0
4,MQ_C1-Weekday-032400_M57_451,05:29:09,05:29:09,405353,5,0,0
...,...,...,...,...,...,...,...
934104,OH_C1-Weekday-SDon-004200_M101_1,01:21:39,01:21:39,405181,50,0,0
934105,OH_C1-Weekday-SDon-004200_M101_1,01:22:18,01:22:18,402707,51,0,0
934106,OH_C1-Weekday-SDon-004200_M101_1,01:23:12,01:23:12,403438,52,0,0
934107,OH_C1-Weekday-SDon-004200_M101_1,01:24:00,01:24:00,903042,53,0,0


In [13]:
trips_df = pd.read_csv(f'{DATA_DIR}/gtfs/manhattan/trips.txt')
trips_df.head()

Unnamed: 0,route_id,service_id,trip_id,trip_headsign,direction_id,shape_id
0,M57,MQ_C1-Weekday,MQ_C1-Weekday-032400_M57_451,WEST SIDE BROADWAY-72 ST CROSSTOWN,1,M570110
1,M57,MQ_C1-Weekday,MQ_C1-Weekday-034000_M57_451,EAST SIDE YORK-60 ST CROSSTOWN,0,M570111
2,M57,MQ_C1-Weekday,MQ_C1-Weekday-037300_M57_451,WEST SIDE BROADWAY-72 ST CROSSTOWN,1,M570109
3,M57,MQ_C1-Weekday,MQ_C1-Weekday-041700_M57_451,EAST SIDE YORK-60 ST CROSSTOWN,0,M570111
4,M57,MQ_C1-Weekday,MQ_C1-Weekday-045500_M57_451,WEST SIDE BROADWAY-72 ST CROSSTOWN,1,M570109


In [14]:
timetable_df = stop_times_df.merge(trips_df, on='trip_id', how='inner')
timetable_df = timetable_df[timetable_df['service_id'].str.contains('Weekday')]
timetable_df = timetable_df.sort_values(by=['route_id', 'direction_id', 'trip_id', 'stop_sequence'])
timetable_df = timetable_df.set_index(['route_id', 'service_id', 'direction_id'])
peek(timetable_df)

505068


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,pickup_type,drop_off_type,trip_headsign,shape_id
route_id,service_id,direction_id,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
M1,OF_C1-Weekday,0,OF_C1-Weekday-033500_M1_101,05:35:00,05:35:00,400001,1,0,0,HARLEM 147 ST via MADISON AV,M010006
M1,OF_C1-Weekday,0,OF_C1-Weekday-033500_M1_101,05:35:36,05:35:36,400002,2,0,0,HARLEM 147 ST via MADISON AV,M010006
M1,OF_C1-Weekday,0,OF_C1-Weekday-033500_M1_101,05:36:14,05:36:14,400003,3,0,0,HARLEM 147 ST via MADISON AV,M010006
M1,OF_C1-Weekday,0,OF_C1-Weekday-033500_M1_101,05:37:02,05:37:02,404120,4,0,0,HARLEM 147 ST via MADISON AV,M010006
M1,OF_C1-Weekday,0,OF_C1-Weekday-033500_M1_101,05:37:43,05:37:43,404936,5,0,0,HARLEM 147 ST via MADISON AV,M010006


In [15]:
routes = list(set(multiindex[0] for multiindex in timetable_df.index))
routes.sort()

In [16]:
services = list(set(multiindex[0:2] for multiindex in timetable_df.index))
services.sort()

In [66]:
def hash_stop_sequence(stop_sequence):
    return ','.join([str(i) for i in stop_sequence])

def hash_multi_index(multi_index):
    return ','.join([str(i) for i in multi_index])

trip_stop_sequence_dict = {}
trip_manifest = {}

for multi_index in tqdm(timetable_df.index):
    stop_dict = {}
    stop_sequence_dict = {}
    stop_sequence_set = set([''])
    stop_sequence = []
    stop_sequence_next_index = 0
    hash_to_index_dict = {}
    multi_index_hash = hash_multi_index(multi_index)
    
    if multi_index_hash in trip_manifest:
        continue
    
    rows = list((timetable_df.loc[multi_index][['trip_id', 'stop_sequence', 'stop_id']]).iterrows())
    for i, row in enumerate(rows):
        trip_id, stop_index, stop_id = row[1]
        
        # Last row.
        if i == len(rows) - 1:
            stop_sequence.append(stop_id)
        
        if (stop_index == 1 or i == len(rows) - 1) and len(stop_sequence) > 0:
            stop_sequence_hash = hash_stop_sequence(stop_sequence)
            if stop_sequence_hash not in stop_sequence_set:
                stop_sequence_set.add(stop_sequence_hash)
                stop_sequence_dict[stop_sequence_next_index] = stop_sequence
                hash_to_index_dict[stop_sequence_hash] = stop_sequence_next_index
                stop_sequence_next_index += 1
                
            if multi_index_hash not in trip_manifest:
                trip_manifest[multi_index_hash] = {}
                
            stop_sequence_index = hash_to_index_dict[stop_sequence_hash]
            trip_manifest[multi_index_hash][trip_id] = stop_sequence_index
            stop_sequence = []
            
        stop_sequence.append(stop_id)
    trip_stop_sequence_dict[hash_multi_index(multi_index)] = stop_sequence_dict

100%|██████████| 505068/505068 [00:31<00:00, 16247.51it/s]


In [68]:
with open(f'{EXPORTS_DIR}/json/manhattan/trip_stop_sequence_dict.json', 'w') as fp:
    json.dump(trip_stop_sequence_dict, fp)
with open(f'{EXPORTS_DIR}/json/manhattan/trip_manifest.json', 'w') as fp:
    json.dump(trip_manifest, fp)

In [None]:
stops_df = pd.read_csv(f'{DATA_DIR}/gtfs/manhattan/stops.txt')
stops_df.head()

In [None]:
routes_df = pd.read_csv(f'{DATA_DIR}/gtfs/manhattan/routes.txt')
routes_df.head()

In [None]:
shapes_df = pd.read_csv(f'{DATA_DIR}/gtfs/manhattan/shapes.txt')
shapes_df.head()

In [None]:
routes_df.head()

In [None]:
m11_route_gdf.head()

In [None]:
stops_gdf.head()

In [None]:
m11_route_gdf = routes_gdf[routes_gdf['route_id'] == 'M11'].iloc[0:1]

In [None]:
fig, ax = plt.subplots(figsize=(10, 10))

m11_gdf.plot(ax=ax)

plt.show()

In [None]:
m11_geom = m11_gdf.iloc[0].geometry

In [None]:
list(m11_geom.geoms)