In [8]:
import pandas as pd
import re
import json
from datetime import datetime

In [9]:
df = pd.read_csv("./files/routes.txt")
df

Unnamed: 0,route_id,agency_id,route_short_name,route_long_name,route_type,route_color,route_text_color
0,06220922-ST,GO,ST,Stouffville,2,794500,FFFFFF
1,06220922-RH,GO,RH,Richmond Hill,2,0099c7,FFFFFF
2,06220922-MI,GO,MI,Milton,2,f57f25,FFFFFF
3,06220922-LW,GO,LW,Lakeshore West,2,98002e,FFFFFF
4,06220922-LE,GO,LE,Lakeshore East,2,ff0d00,FFFFFF
...,...,...,...,...,...,...,...
86,09221222-19,GO,19,Mississauga / North York,3,f57f25,000000
87,09221222-18,GO,18,Lakeshore West,3,98002e,FFFFFF
88,09221222-16,GO,16,Hamilton / Toronto Express,3,98002e,FFFFFF
89,09221222-15,GO,15,Brantford / Aldershot,3,98002e,FFFFFF


In [10]:
routeInfos = {}

for index, row in df.iterrows():
    
    routeInfoStruct = {}

    identifier = row["route_short_name"]

    routeInfoStruct["index"] = index
    routeInfoStruct["route_id"] = row["route_id"]
    routeInfoStruct["route_long_name"] = row["route_long_name"]
    routeInfoStruct["route_type"] = row["route_type"]
    routeInfoStruct["route_color"] = row["route_color"]

    routeInfoStruct["stops_on_route"] = []
    routeInfoStruct["edges_travel"] = []

    routeInfos[identifier] = routeInfoStruct


In [11]:
timetabledf = pd.read_csv("./files/stop_times.txt")
timetabledf

  timetabledf = pd.read_csv("./files/stop_times.txt")


Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,pickup_type,drop_off_type,stop_headsign
0,20220831-LE-9019,13:10:00,13:10:00,UN,28,0,0,
1,20220831-LE-9019,12:56:00,12:56:00,DA,19,0,0,Danforth GO 12:56 - Union Station 13:10
2,20220831-LE-9019,12:51:00,12:51:00,SC,17,0,0,Scarborough GO 12:51 - Union Station 13:10
3,20220831-LE-9019,12:47:00,12:47:00,EG,16,0,0,Eglinton GO 12:47 - Union Station 13:10
4,20220831-LE-9019,12:42:00,12:42:00,GU,15,0,0,Guildwood GO 12:42 - Union Station 13:10
...,...,...,...,...,...,...,...,...
2560952,20220912-16-16480,14:04:00,14:04:00,02759,5,0,0,
2560953,20220912-16-16480,14:03:00,14:03:00,01001,4,0,0,
2560954,20220912-16-16480,14:02:00,14:02:00,01006,3,0,0,
2560955,20220912-16-16480,14:01:00,14:01:00,01002,2,0,0,


In [12]:
laststopid = None
lastarrivaltime = None
lastrouteid = None

edges = {}

In [13]:
def fix_route(route):
    if (route == "GT"):
        route = "KI"

    return route

In [14]:
id_finder = r"[0-9]{8}-(.*)-"


for index, row in timetabledf.iterrows():
    
    #Find IDentifier from trip ID
    trip_id = row["trip_id"]
    route = ""
    #Get
    try:
        search = re.search(id_finder, trip_id)
        route = search.groups()[0]

        route = fix_route(route)
    except:
        print("Skipping For Trip Id " + trip_id)
        continue

    #Get Route Struct
    routeStruct = routeInfos[route]

    #Store Stop
    stop = row["stop_id"]

    #Store time (Fix Issues with Timing)
    arrivaltimestr = ">" + row["arrival_time"]
    arrivaltimestr = arrivaltimestr.replace(">24:", ">00:")
    arrivaltimestr = arrivaltimestr.replace(">25:", ">01:")
    arrivaltimestr = arrivaltimestr.replace(">26:", ">02:")
    arrivaltimestr = arrivaltimestr.replace(">27:", ">03:")
    arrivaltimestr = arrivaltimestr.replace(">28:", ">04:")
    arrivaltimestr = arrivaltimestr.replace(">29:", ">05:")
    arrivaltimestr = arrivaltimestr.replace(">30:", ">06:")
    arrivaltimestr = arrivaltimestr.replace(">", "")

    #Correct Time
    arrivaltime = datetime.strptime(arrivaltimestr, "%H:%M:%S")

    #Only Add New Stops
    if not stop in routeStruct["stops_on_route"]:

        #Add
        routeStruct["stops_on_route"].append(stop)

        #Create Edge Between this and last
        if route == lastrouteid:
            if stop != laststopid:
                
                #Create Keys
                uid = laststopid+"-"+stop
                altuid = stop+"-"+laststopid

                #Time Elapsed
                timeelapsed = lastarrivaltime - arrivaltime
                durtion_seconds = timeelapsed.total_seconds()
                timemins = round(durtion_seconds/60)

                #Make Sure Unique
                if not (uid in edges and altuid in edges):

                    #Construct Info
                    edgeinfo = {'node1': laststopid, 'node2':stop, 'time_minutes':timemins, 'used_by': [route]}
                    edges[uid] = edgeinfo

                    routeStruct[""]

                else:

                    # Get Edge
                    if (uid in edgeinfo):
                        edgeinfo = edges[uid]
                    else:
                        edgeinfo = edges[altuid]

                    # Store Route Who's Taking
                    usedby = edgeinfo['used_by']
                    if (route not in usedby):
                        usedby.append(route)                  

        #Store
        laststopid = stop
        lastrouteid = route
        lastarrivaltime = arrivaltime

with (open("route_info.json", "w") as f):
    finalwrite = json.dumps(routeInfos, indent = 4)
    f.write(finalwrite)

In [15]:
#Export Edges

with (open("route_edges.json", "w") as f):
    finalwrite = json.dumps(edges, indent = 4)
    f.write(finalwrite)