In [20]:
import networkx as nx
import pandas as pd
from tqdm import tqdm

In [21]:
stops = pd.read_csv('../data/raw/stops.txt')
trips = pd.read_csv('../data/raw/trips.txt')
stop_times = pd.read_csv('../data/raw/stop_times.txt')

In [22]:
stop_times

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled
0,45825045,9:15:00,9:15:00,14155,1,,0,0,
1,45825045,9:16:20,9:16:20,3807,2,,0,0,0.3546
2,45825045,9:17:13,9:17:13,6904,3,,0,0,0.5903
3,45825045,9:18:36,9:18:36,1163,4,,0,0,0.9613
4,45825045,9:19:49,9:19:49,7723,5,,0,0,1.2849
...,...,...,...,...,...,...,...,...,...
4176079,45958486,26:25:00,26:25:00,14535,1,,0,0,
4176080,45958486,26:27:05,26:27:05,14536,2,,0,0,1.6279
4176081,45958486,26:28:50,26:28:50,14537,3,,0,0,2.4244
4176082,45958486,26:30:43,26:30:43,14538,4,,0,0,3.2976


In [23]:
stops

Unnamed: 0,stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding
0,262,662,Danforth Rd at Kennedy Rd,,43.714379,-79.260939,,,,,,1
1,263,929,Davenport Rd at Bedford Rd,,43.674448,-79.399659,,,,,,1
2,264,940,Davenport Rd at Dupont St,,43.675511,-79.401938,,,,,,2
3,265,1871,Davisville Ave at Cleveland St,,43.702088,-79.378112,,,,,,1
4,266,11700,Disco Rd at Attwell Dr,,43.701362,-79.594843,,,,,,1
...,...,...,...,...,...,...,...,...,...,...,...,...
9425,24602,16351,THE QUEENSWAY AT MILTON ST,,43.627673,-79.496938,,,,,,1
9426,24603,16353,Lawrence Ave East at Tremont Cres (East) East ...,,43.735770,-79.349610,,,,,,1
9427,24604,16354,Queen St East at Bay St West Side,,43.651860,-79.382223,,,,,,1
9428,24605,16355,Willowdale Ave at Spring Garden Ave,,43.766461,-79.400824,,,,,,1


In [24]:
G = nx.DiGraph()

In [25]:
for _ ,route in tqdm(stop_times.groupby('trip_id'), total=stop_times['trip_id'].nunique()):
    route = route.sort_values('stop_sequence')
    route_id = trips[trips['trip_id'] == route['trip_id'].iloc[0]]['route_id'].iloc[0]
    # Sliding window of pairs of stops
    for i in range(len(route)-1):
        stop1 = route.iloc[i]
        stop2 = route.iloc[i+1]
        for stop in [stop1, stop2]:
            if stop['stop_id'] not in G.nodes:
                stop_data = stops[stops['stop_id'] == stop['stop_id']].iloc[0]
                G.add_node(
                    stop_data['stop_id'],
                    code=stop_data['stop_code'],
                    name=stop_data['stop_name'],
                    lat=stop_data['stop_lat'],
                    lon=stop_data['stop_lon'],
                    wheelchair=stop_data['wheelchair_boarding']
                )

        G.add_edge(stop1['stop_id'], stop2['stop_id'], route=route_id, trip=route['trip_id'].iloc[0])



100%|██████████| 125581/125581 [10:34<00:00, 198.02it/s] 


In [26]:
nx.write_graphml(G, '../data/processed/ttc.graphml')

In [27]:
G = nx.read_graphml('../data/processed/ttc.graphml')

In [28]:
# Sanity check - College Streetcar has route_id 69413
# Pretty print the stops on the route
college = []
for edge in G.edges:
    if G.edges[edge]['route'] == 69413:
        # If last entry in the list is the same as edge[0], skip it
        if len(college) == 0 or college[-1] != edge[0]:
            college.append(edge[0])
        college.append(edge[1])

In [29]:
G.nodes['10206']

{'code': 9698,
 'name': 'Gerrard St East at Victoria Park Ave West Side',
 'lat': 43.68687,
 'lon': -79.287143,
 'wheelchair': 1}

In [30]:
# Visualize the route using folium
import folium
# Initialize map of Toronto
m = folium.Map(location=[43.6532, -79.3832], zoom_start=12)
# Add route to map
folium.PolyLine(locations=[(G.nodes[node]['lat'], G.nodes[node]['lon']) for node in college]).add_to(m)
# Add stops to map
for node in college:
    folium.Marker(location=(G.nodes[node]['lat'], G.nodes[node]['lon']), popup=G.nodes[node]['name']).add_to(m)

m

In [31]:
college[0]

'8504'