In [5]:
import requests
import json
import os
from dotenv import load_dotenv
import osmnx as ox

In [6]:
import asyncio
import aiohttp
    

In [7]:
load_dotenv()  # take environment variables

True

In [8]:
MTA_API_KEY = os.getenv("MTA_API_KEY")

In [9]:
# getting data given a route id
async def get_bus_route(route_id):
    url = f"https://bustime.mta.info/api/where/stops-for-route/MTA%20NYCT_{route_id}.json?key={MTA_API_KEY}&includePolylines=false&version=2"
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as response:
            response = await response.json()
    if response['code'] == 200:
        return response
    else:
        raise Exception(f"Error: bus route {route_id} returned response code {response['code']}")

In [10]:
route_id = "B52" # Example route
route_data = await get_bus_route(route_id)
print(json.dumps(route_data, indent=4))



{
    "code": 200,
    "currentTime": 1747011208413,
    "data": {
        "entry": {
            "polylines": [],
            "routeId": "MTA NYCT_B52",
            "stopGroupings": [
                {
                    "ordered": true,
                    "stopGroups": [
                        {
                            "id": "1",
                            "name": {
                                "name": "DOWNTOWN BKLYN TILLARY ST via GATES",
                                "names": [
                                    "DOWNTOWN BKLYN TILLARY ST via GATES"
                                ],
                                "type": "destination"
                            },
                            "polylines": [],
                            "stopIds": [
                                "MTA_504965",
                                "MTA_504119",
                                "MTA_304196",
                                "MTA_304197",
                                "MT

In [11]:
# getting data given a route id
async def get_route_schedule(route_id):
    url = f"https://bustime.mta.info/api/where/trips-for-route/MTA%20NYCT_{route_id}.json?key={MTA_API_KEY}&includeSchedule=true"
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as response:
            response = await response.json()
    if response['code'] == 200:
        return response['data']['list']
    else:
        raise Exception(f"Error: bus stop {route_id} returned response code {response['code']}")

In [12]:
stop_data = await get_route_schedule(route_id)
print(json.dumps(stop_data, indent=4))

[
    {
        "frequency": null,
        "schedule": {
            "frequency": null,
            "nextTripId": "MTA NYCT_FP_B5-Sunday-127500_B26_219",
            "previousTripId": "MTA NYCT_FP_B5-Sunday-116200_B26_219",
            "stopTimes": [
                {
                    "arrivalTime": 73020,
                    "departureTime": 73020,
                    "distanceAlongTrip": 0.08656897032969031,
                    "stopHeadsign": "",
                    "stopId": "MTA_307175"
                },
                {
                    "arrivalTime": 73126,
                    "departureTime": 73126,
                    "distanceAlongTrip": 285.61860445152655,
                    "stopHeadsign": "",
                    "stopId": "MTA_307699"
                },
                {
                    "arrivalTime": 73173,
                    "departureTime": 73173,
                    "distanceAlongTrip": 409.6188950017041,
                    "stopHeadsign": "",
          

In [13]:
def get_stop_groups(route_data):
    if not route_data['data']['entry']['stopGroupings'][0]['ordered']:
        print(f"{route_data['routeId']} not ordered")
        return {}
    route_lists = {}
    stopGroups = route_data['data']['entry']['stopGroupings'][0]['stopGroups']
    for group in stopGroups:

        route_lists[group['name']['name']] = group['stopIds']
        #print(group['stopIds'][0])
    
    return route_lists

    

In [14]:
test_route_lists = get_stop_groups(route_data)

In [15]:
json_path='./graph_data/bus_info_dict.json'
with open(json_path) as json_file:
    bus_info = json.load(json_file)

In [16]:
print(bus_info['MTA_901601'])

{'name': 'Brooklyn Bridge Park /PIER 6', 'routes': ['B63'], 'direction': 'W', 'link': 'https://bustime.mta.info/m/index?q=901601', 'id': 'MTA_901601', 'latitude': 40.693078, 'longitude': -74.000877, 'osmid': 1567286111, 'diroutes': ['BAY RIDGE SHORE RD via 5 AV']}


In [43]:
json_path='./graph_data_archive/all_routes.json'
with open(json_path) as json_file:
    all_routes = json.load(json_file)

In [44]:
print(len(all_routes))

267


In [45]:
def shorten_id(route):
    shortened_id = route['id'].split('_')[1]
    return shortened_id

In [46]:
shorten_id(all_routes[0])

'M34+'

In [68]:
route_data = get_bus_route('B52')
print(route_data)

<coroutine object get_bus_route at 0x3000e1240>


In [69]:
from tqdm import tqdm
#loading bar module

In [70]:
unknown_routes = {}
def get_edge_times(first_stop, route_schedule, route_id, route_name):
    '''
    give first stop of a route and possible route schedules, find the time it takes to get between each stop
    '''
    edge_times = {}
    schedule_found = False
    group_stopTimes = None

    for schedule in route_schedule:
        #going through each schedule and seeing if one of the first stop matches the first stop in the stored route
        if first_stop == schedule['schedule']['stopTimes'][0]['stopId']:
            schedule_found = True
            group_stopTimes= schedule['schedule']['stopTimes']
            break

    if not schedule_found:
        #print(f'schedule for {first_stop} not found - Route: {route_id}')
        # a bunch of routes didn't have the first stop matching the first stop in my route list, so 
        unknown_routes[route_id] = unknown_routes.get(route_id, []) + [route_name]
        
        return {}
    
    #looping through each stop (except for the first one) and recording num seconds between the current stop and the previous stop
    for idx, stop in enumerate(group_stopTimes[1:]):
        prev_depart_time =group_stopTimes[idx]['departureTime']
        prev_stopId = group_stopTimes[idx]['stopId']

        cur_arrive_time = stop['arrivalTime']
        cur_stopId = stop['stopId']
        
        edge_times[(prev_stopId, cur_stopId)] = cur_arrive_time - prev_depart_time
    
    return edge_times
    

In [71]:
async def get_stop_group_times(stop_groups, route_id):
    '''
    getting route schedule for each stop group
    edge_data format = {
        (stopId, stopId2) : {est travel time from stopId to stopId2 in seconds}
    }
    '''
    edge_data = {}

    try:
        route_schedule = await get_route_schedule(route_id)
    except Exception as e:
        print(f"Error: {e}")
        return


    for route_name, group in list(stop_groups.items()):
        edge_data.update(get_edge_times(group[0], route_schedule, route_id, route_name)) #get travel time for route where first stop matches group[0]
    
    return edge_data


In [None]:
#getting a dict of all routes 
all_route_lists = {}
est_stop_times = {}
fails = []
for i in tqdm(range(len(all_routes)), desc="fetching routes"):
    route_id = shorten_id(all_routes[i])

    try:
        route_data = await get_bus_route(route_id)
        
    except Exception as e:
        fails.append(route_id)
        continue
    
    #extracting routes (stop groups) from data
    stop_groups = get_stop_groups(route_data)
    #adding route to dictionary
    all_route_lists.update(stop_groups)

    #getting a list of route times for each edge in the routes
    route_times = await get_stop_group_times(stop_groups, route_id)
    #print(route_times)
    est_stop_times.update(route_times)
    print(len(est_stop_times))

    
print(f"failed to fetch {len(fails)} routes: {fails}")
    

fetching routes:   0%|          | 1/267 [00:00<03:53,  1.14it/s]

{('MTA_405286', 'MTA_405587'): 58, ('MTA_405587', 'MTA_403192'): 103, ('MTA_403192', 'MTA_401818'): 97, ('MTA_401818', 'MTA_401819'): 84, ('MTA_401819', 'MTA_404280'): 78, ('MTA_404280', 'MTA_401821'): 121, ('MTA_401821', 'MTA_401822'): 126, ('MTA_401822', 'MTA_401824'): 118, ('MTA_401824', 'MTA_401826'): 116, ('MTA_401826', 'MTA_401827'): 59, ('MTA_401827', 'MTA_405336'): 132, ('MTA_405336', 'MTA_401832'): 121, ('MTA_401832', 'MTA_401833'): 62, ('MTA_401833', 'MTA_402052'): 76, ('MTA_402052', 'MTA_903027'): 89}


fetching routes:   1%|          | 3/267 [00:02<04:05,  1.08it/s]

{('MTA_405286', 'MTA_405587'): 58, ('MTA_405587', 'MTA_403192'): 103, ('MTA_403192', 'MTA_401818'): 97, ('MTA_401818', 'MTA_401819'): 84, ('MTA_401819', 'MTA_404280'): 78, ('MTA_404280', 'MTA_401821'): 121, ('MTA_401821', 'MTA_401822'): 126, ('MTA_401822', 'MTA_401824'): 118, ('MTA_401824', 'MTA_401826'): 116, ('MTA_401826', 'MTA_401827'): 59, ('MTA_401827', 'MTA_405336'): 132, ('MTA_405336', 'MTA_401832'): 121, ('MTA_401832', 'MTA_401833'): 62, ('MTA_401833', 'MTA_402052'): 76, ('MTA_402052', 'MTA_903027'): 89, ('MTA_504965', 'MTA_504119'): 75, ('MTA_504119', 'MTA_304196'): 80, ('MTA_304196', 'MTA_304197'): 68, ('MTA_304197', 'MTA_304198'): 60, ('MTA_304198', 'MTA_304199'): 64, ('MTA_304199', 'MTA_304200'): 75, ('MTA_304200', 'MTA_304201'): 52, ('MTA_304201', 'MTA_308046'): 66, ('MTA_308046', 'MTA_304203'): 47, ('MTA_304203', 'MTA_304204'): 89, ('MTA_304204', 'MTA_304205'): 81, ('MTA_304205', 'MTA_304206'): 80, ('MTA_304206', 'MTA_304207'): 65, ('MTA_304207', 'MTA_306530'): 95, ('MTA_

fetching routes:   1%|▏         | 4/267 [00:03<04:40,  1.07s/it]

{('MTA_405286', 'MTA_405587'): 58, ('MTA_405587', 'MTA_403192'): 103, ('MTA_403192', 'MTA_401818'): 97, ('MTA_401818', 'MTA_401819'): 84, ('MTA_401819', 'MTA_404280'): 78, ('MTA_404280', 'MTA_401821'): 121, ('MTA_401821', 'MTA_401822'): 126, ('MTA_401822', 'MTA_401824'): 118, ('MTA_401824', 'MTA_401826'): 116, ('MTA_401826', 'MTA_401827'): 59, ('MTA_401827', 'MTA_405336'): 132, ('MTA_405336', 'MTA_401832'): 121, ('MTA_401832', 'MTA_401833'): 62, ('MTA_401833', 'MTA_402052'): 76, ('MTA_402052', 'MTA_903027'): 89, ('MTA_504965', 'MTA_504119'): 75, ('MTA_504119', 'MTA_304196'): 80, ('MTA_304196', 'MTA_304197'): 68, ('MTA_304197', 'MTA_304198'): 60, ('MTA_304198', 'MTA_304199'): 64, ('MTA_304199', 'MTA_304200'): 75, ('MTA_304200', 'MTA_304201'): 52, ('MTA_304201', 'MTA_308046'): 66, ('MTA_308046', 'MTA_304203'): 47, ('MTA_304203', 'MTA_304204'): 89, ('MTA_304204', 'MTA_304205'): 81, ('MTA_304205', 'MTA_304206'): 80, ('MTA_304206', 'MTA_304207'): 65, ('MTA_304207', 'MTA_306530'): 95, ('MTA_

fetching routes:   2%|▏         | 5/267 [00:04<04:13,  1.03it/s]

{('MTA_405286', 'MTA_405587'): 58, ('MTA_405587', 'MTA_403192'): 103, ('MTA_403192', 'MTA_401818'): 97, ('MTA_401818', 'MTA_401819'): 84, ('MTA_401819', 'MTA_404280'): 78, ('MTA_404280', 'MTA_401821'): 121, ('MTA_401821', 'MTA_401822'): 126, ('MTA_401822', 'MTA_401824'): 118, ('MTA_401824', 'MTA_401826'): 116, ('MTA_401826', 'MTA_401827'): 59, ('MTA_401827', 'MTA_405336'): 132, ('MTA_405336', 'MTA_401832'): 121, ('MTA_401832', 'MTA_401833'): 62, ('MTA_401833', 'MTA_402052'): 76, ('MTA_402052', 'MTA_903027'): 89, ('MTA_504965', 'MTA_504119'): 75, ('MTA_504119', 'MTA_304196'): 80, ('MTA_304196', 'MTA_304197'): 68, ('MTA_304197', 'MTA_304198'): 60, ('MTA_304198', 'MTA_304199'): 64, ('MTA_304199', 'MTA_304200'): 75, ('MTA_304200', 'MTA_304201'): 52, ('MTA_304201', 'MTA_308046'): 66, ('MTA_308046', 'MTA_304203'): 47, ('MTA_304203', 'MTA_304204'): 89, ('MTA_304204', 'MTA_304205'): 81, ('MTA_304205', 'MTA_304206'): 80, ('MTA_304206', 'MTA_304207'): 65, ('MTA_304207', 'MTA_306530'): 95, ('MTA_

fetching routes:   2%|▏         | 6/267 [00:06<04:49,  1.11s/it]

{('MTA_405286', 'MTA_405587'): 58, ('MTA_405587', 'MTA_403192'): 103, ('MTA_403192', 'MTA_401818'): 97, ('MTA_401818', 'MTA_401819'): 84, ('MTA_401819', 'MTA_404280'): 78, ('MTA_404280', 'MTA_401821'): 121, ('MTA_401821', 'MTA_401822'): 126, ('MTA_401822', 'MTA_401824'): 118, ('MTA_401824', 'MTA_401826'): 116, ('MTA_401826', 'MTA_401827'): 59, ('MTA_401827', 'MTA_405336'): 132, ('MTA_405336', 'MTA_401832'): 121, ('MTA_401832', 'MTA_401833'): 62, ('MTA_401833', 'MTA_402052'): 76, ('MTA_402052', 'MTA_903027'): 89, ('MTA_504965', 'MTA_504119'): 75, ('MTA_504119', 'MTA_304196'): 80, ('MTA_304196', 'MTA_304197'): 68, ('MTA_304197', 'MTA_304198'): 60, ('MTA_304198', 'MTA_304199'): 64, ('MTA_304199', 'MTA_304200'): 75, ('MTA_304200', 'MTA_304201'): 52, ('MTA_304201', 'MTA_308046'): 66, ('MTA_308046', 'MTA_304203'): 47, ('MTA_304203', 'MTA_304204'): 89, ('MTA_304204', 'MTA_304205'): 81, ('MTA_304205', 'MTA_304206'): 80, ('MTA_304206', 'MTA_304207'): 65, ('MTA_304207', 'MTA_306530'): 95, ('MTA_

fetching routes:   3%|▎         | 7/267 [00:06<04:18,  1.01it/s]

{('MTA_405286', 'MTA_405587'): 58, ('MTA_405587', 'MTA_403192'): 103, ('MTA_403192', 'MTA_401818'): 97, ('MTA_401818', 'MTA_401819'): 84, ('MTA_401819', 'MTA_404280'): 78, ('MTA_404280', 'MTA_401821'): 121, ('MTA_401821', 'MTA_401822'): 126, ('MTA_401822', 'MTA_401824'): 118, ('MTA_401824', 'MTA_401826'): 116, ('MTA_401826', 'MTA_401827'): 59, ('MTA_401827', 'MTA_405336'): 132, ('MTA_405336', 'MTA_401832'): 121, ('MTA_401832', 'MTA_401833'): 62, ('MTA_401833', 'MTA_402052'): 76, ('MTA_402052', 'MTA_903027'): 89, ('MTA_504965', 'MTA_504119'): 75, ('MTA_504119', 'MTA_304196'): 80, ('MTA_304196', 'MTA_304197'): 68, ('MTA_304197', 'MTA_304198'): 60, ('MTA_304198', 'MTA_304199'): 64, ('MTA_304199', 'MTA_304200'): 75, ('MTA_304200', 'MTA_304201'): 52, ('MTA_304201', 'MTA_308046'): 66, ('MTA_308046', 'MTA_304203'): 47, ('MTA_304203', 'MTA_304204'): 89, ('MTA_304204', 'MTA_304205'): 81, ('MTA_304205', 'MTA_304206'): 80, ('MTA_304206', 'MTA_304207'): 65, ('MTA_304207', 'MTA_306530'): 95, ('MTA_

fetching routes:   3%|▎         | 8/267 [00:07<04:14,  1.02it/s]

{('MTA_405286', 'MTA_405587'): 58, ('MTA_405587', 'MTA_403192'): 103, ('MTA_403192', 'MTA_401818'): 97, ('MTA_401818', 'MTA_401819'): 84, ('MTA_401819', 'MTA_404280'): 78, ('MTA_404280', 'MTA_401821'): 121, ('MTA_401821', 'MTA_401822'): 126, ('MTA_401822', 'MTA_401824'): 118, ('MTA_401824', 'MTA_401826'): 116, ('MTA_401826', 'MTA_401827'): 59, ('MTA_401827', 'MTA_405336'): 132, ('MTA_405336', 'MTA_401832'): 121, ('MTA_401832', 'MTA_401833'): 62, ('MTA_401833', 'MTA_402052'): 76, ('MTA_402052', 'MTA_903027'): 89, ('MTA_504965', 'MTA_504119'): 75, ('MTA_504119', 'MTA_304196'): 80, ('MTA_304196', 'MTA_304197'): 68, ('MTA_304197', 'MTA_304198'): 60, ('MTA_304198', 'MTA_304199'): 64, ('MTA_304199', 'MTA_304200'): 75, ('MTA_304200', 'MTA_304201'): 52, ('MTA_304201', 'MTA_308046'): 66, ('MTA_308046', 'MTA_304203'): 47, ('MTA_304203', 'MTA_304204'): 89, ('MTA_304204', 'MTA_304205'): 81, ('MTA_304205', 'MTA_304206'): 80, ('MTA_304206', 'MTA_304207'): 65, ('MTA_304207', 'MTA_306530'): 95, ('MTA_

fetching routes:   3%|▎         | 9/267 [00:08<03:59,  1.08it/s]

{('MTA_405286', 'MTA_405587'): 58, ('MTA_405587', 'MTA_403192'): 103, ('MTA_403192', 'MTA_401818'): 97, ('MTA_401818', 'MTA_401819'): 84, ('MTA_401819', 'MTA_404280'): 78, ('MTA_404280', 'MTA_401821'): 121, ('MTA_401821', 'MTA_401822'): 126, ('MTA_401822', 'MTA_401824'): 118, ('MTA_401824', 'MTA_401826'): 116, ('MTA_401826', 'MTA_401827'): 59, ('MTA_401827', 'MTA_405336'): 132, ('MTA_405336', 'MTA_401832'): 121, ('MTA_401832', 'MTA_401833'): 62, ('MTA_401833', 'MTA_402052'): 76, ('MTA_402052', 'MTA_903027'): 89, ('MTA_504965', 'MTA_504119'): 75, ('MTA_504119', 'MTA_304196'): 80, ('MTA_304196', 'MTA_304197'): 68, ('MTA_304197', 'MTA_304198'): 60, ('MTA_304198', 'MTA_304199'): 64, ('MTA_304199', 'MTA_304200'): 75, ('MTA_304200', 'MTA_304201'): 52, ('MTA_304201', 'MTA_308046'): 66, ('MTA_308046', 'MTA_304203'): 47, ('MTA_304203', 'MTA_304204'): 89, ('MTA_304204', 'MTA_304205'): 81, ('MTA_304205', 'MTA_304206'): 80, ('MTA_304206', 'MTA_304207'): 65, ('MTA_304207', 'MTA_306530'): 95, ('MTA_

fetching routes:   4%|▎         | 10/267 [00:09<04:19,  1.01s/it]

{('MTA_405286', 'MTA_405587'): 58, ('MTA_405587', 'MTA_403192'): 103, ('MTA_403192', 'MTA_401818'): 97, ('MTA_401818', 'MTA_401819'): 84, ('MTA_401819', 'MTA_404280'): 78, ('MTA_404280', 'MTA_401821'): 121, ('MTA_401821', 'MTA_401822'): 126, ('MTA_401822', 'MTA_401824'): 118, ('MTA_401824', 'MTA_401826'): 116, ('MTA_401826', 'MTA_401827'): 59, ('MTA_401827', 'MTA_405336'): 132, ('MTA_405336', 'MTA_401832'): 121, ('MTA_401832', 'MTA_401833'): 62, ('MTA_401833', 'MTA_402052'): 76, ('MTA_402052', 'MTA_903027'): 89, ('MTA_504965', 'MTA_504119'): 75, ('MTA_504119', 'MTA_304196'): 80, ('MTA_304196', 'MTA_304197'): 68, ('MTA_304197', 'MTA_304198'): 60, ('MTA_304198', 'MTA_304199'): 64, ('MTA_304199', 'MTA_304200'): 75, ('MTA_304200', 'MTA_304201'): 52, ('MTA_304201', 'MTA_308046'): 66, ('MTA_308046', 'MTA_304203'): 47, ('MTA_304203', 'MTA_304204'): 89, ('MTA_304204', 'MTA_304205'): 81, ('MTA_304205', 'MTA_304206'): 80, ('MTA_304206', 'MTA_304207'): 65, ('MTA_304207', 'MTA_306530'): 95, ('MTA_

fetching routes:   4%|▍         | 11/267 [00:10<04:32,  1.06s/it]

{('MTA_405286', 'MTA_405587'): 58, ('MTA_405587', 'MTA_403192'): 103, ('MTA_403192', 'MTA_401818'): 97, ('MTA_401818', 'MTA_401819'): 84, ('MTA_401819', 'MTA_404280'): 78, ('MTA_404280', 'MTA_401821'): 121, ('MTA_401821', 'MTA_401822'): 126, ('MTA_401822', 'MTA_401824'): 118, ('MTA_401824', 'MTA_401826'): 116, ('MTA_401826', 'MTA_401827'): 59, ('MTA_401827', 'MTA_405336'): 132, ('MTA_405336', 'MTA_401832'): 121, ('MTA_401832', 'MTA_401833'): 62, ('MTA_401833', 'MTA_402052'): 76, ('MTA_402052', 'MTA_903027'): 89, ('MTA_504965', 'MTA_504119'): 75, ('MTA_504119', 'MTA_304196'): 80, ('MTA_304196', 'MTA_304197'): 68, ('MTA_304197', 'MTA_304198'): 60, ('MTA_304198', 'MTA_304199'): 64, ('MTA_304199', 'MTA_304200'): 75, ('MTA_304200', 'MTA_304201'): 52, ('MTA_304201', 'MTA_308046'): 66, ('MTA_308046', 'MTA_304203'): 47, ('MTA_304203', 'MTA_304204'): 89, ('MTA_304204', 'MTA_304205'): 81, ('MTA_304205', 'MTA_304206'): 80, ('MTA_304206', 'MTA_304207'): 65, ('MTA_304207', 'MTA_306530'): 95, ('MTA_

fetching routes:   4%|▍         | 12/267 [00:11<04:28,  1.05s/it]

{('MTA_405286', 'MTA_405587'): 58, ('MTA_405587', 'MTA_403192'): 103, ('MTA_403192', 'MTA_401818'): 97, ('MTA_401818', 'MTA_401819'): 84, ('MTA_401819', 'MTA_404280'): 78, ('MTA_404280', 'MTA_401821'): 121, ('MTA_401821', 'MTA_401822'): 126, ('MTA_401822', 'MTA_401824'): 118, ('MTA_401824', 'MTA_401826'): 116, ('MTA_401826', 'MTA_401827'): 59, ('MTA_401827', 'MTA_405336'): 132, ('MTA_405336', 'MTA_401832'): 121, ('MTA_401832', 'MTA_401833'): 62, ('MTA_401833', 'MTA_402052'): 76, ('MTA_402052', 'MTA_903027'): 89, ('MTA_504965', 'MTA_504119'): 75, ('MTA_504119', 'MTA_304196'): 80, ('MTA_304196', 'MTA_304197'): 68, ('MTA_304197', 'MTA_304198'): 60, ('MTA_304198', 'MTA_304199'): 64, ('MTA_304199', 'MTA_304200'): 75, ('MTA_304200', 'MTA_304201'): 52, ('MTA_304201', 'MTA_308046'): 66, ('MTA_308046', 'MTA_304203'): 47, ('MTA_304203', 'MTA_304204'): 89, ('MTA_304204', 'MTA_304205'): 81, ('MTA_304205', 'MTA_304206'): 80, ('MTA_304206', 'MTA_304207'): 65, ('MTA_304207', 'MTA_306530'): 95, ('MTA_

fetching routes:   5%|▌         | 14/267 [00:13<04:21,  1.03s/it]

{('MTA_405286', 'MTA_405587'): 58, ('MTA_405587', 'MTA_403192'): 103, ('MTA_403192', 'MTA_401818'): 97, ('MTA_401818', 'MTA_401819'): 84, ('MTA_401819', 'MTA_404280'): 78, ('MTA_404280', 'MTA_401821'): 121, ('MTA_401821', 'MTA_401822'): 126, ('MTA_401822', 'MTA_401824'): 118, ('MTA_401824', 'MTA_401826'): 116, ('MTA_401826', 'MTA_401827'): 59, ('MTA_401827', 'MTA_405336'): 132, ('MTA_405336', 'MTA_401832'): 121, ('MTA_401832', 'MTA_401833'): 62, ('MTA_401833', 'MTA_402052'): 76, ('MTA_402052', 'MTA_903027'): 89, ('MTA_504965', 'MTA_504119'): 75, ('MTA_504119', 'MTA_304196'): 80, ('MTA_304196', 'MTA_304197'): 68, ('MTA_304197', 'MTA_304198'): 60, ('MTA_304198', 'MTA_304199'): 64, ('MTA_304199', 'MTA_304200'): 75, ('MTA_304200', 'MTA_304201'): 52, ('MTA_304201', 'MTA_308046'): 66, ('MTA_308046', 'MTA_304203'): 47, ('MTA_304203', 'MTA_304204'): 89, ('MTA_304204', 'MTA_304205'): 81, ('MTA_304205', 'MTA_304206'): 80, ('MTA_304206', 'MTA_304207'): 65, ('MTA_304207', 'MTA_306530'): 95, ('MTA_

fetching routes:   6%|▌         | 15/267 [00:15<04:35,  1.09s/it]

{('MTA_405286', 'MTA_405587'): 58, ('MTA_405587', 'MTA_403192'): 103, ('MTA_403192', 'MTA_401818'): 97, ('MTA_401818', 'MTA_401819'): 84, ('MTA_401819', 'MTA_404280'): 78, ('MTA_404280', 'MTA_401821'): 121, ('MTA_401821', 'MTA_401822'): 126, ('MTA_401822', 'MTA_401824'): 118, ('MTA_401824', 'MTA_401826'): 116, ('MTA_401826', 'MTA_401827'): 59, ('MTA_401827', 'MTA_405336'): 132, ('MTA_405336', 'MTA_401832'): 121, ('MTA_401832', 'MTA_401833'): 62, ('MTA_401833', 'MTA_402052'): 76, ('MTA_402052', 'MTA_903027'): 89, ('MTA_504965', 'MTA_504119'): 75, ('MTA_504119', 'MTA_304196'): 80, ('MTA_304196', 'MTA_304197'): 68, ('MTA_304197', 'MTA_304198'): 60, ('MTA_304198', 'MTA_304199'): 64, ('MTA_304199', 'MTA_304200'): 75, ('MTA_304200', 'MTA_304201'): 52, ('MTA_304201', 'MTA_308046'): 66, ('MTA_308046', 'MTA_304203'): 47, ('MTA_304203', 'MTA_304204'): 89, ('MTA_304204', 'MTA_304205'): 81, ('MTA_304205', 'MTA_304206'): 80, ('MTA_304206', 'MTA_304207'): 65, ('MTA_304207', 'MTA_306530'): 95, ('MTA_

fetching routes:   6%|▌         | 16/267 [00:15<04:03,  1.03it/s]

{('MTA_405286', 'MTA_405587'): 58, ('MTA_405587', 'MTA_403192'): 103, ('MTA_403192', 'MTA_401818'): 97, ('MTA_401818', 'MTA_401819'): 84, ('MTA_401819', 'MTA_404280'): 78, ('MTA_404280', 'MTA_401821'): 121, ('MTA_401821', 'MTA_401822'): 126, ('MTA_401822', 'MTA_401824'): 118, ('MTA_401824', 'MTA_401826'): 116, ('MTA_401826', 'MTA_401827'): 59, ('MTA_401827', 'MTA_405336'): 132, ('MTA_405336', 'MTA_401832'): 121, ('MTA_401832', 'MTA_401833'): 62, ('MTA_401833', 'MTA_402052'): 76, ('MTA_402052', 'MTA_903027'): 89, ('MTA_504965', 'MTA_504119'): 75, ('MTA_504119', 'MTA_304196'): 80, ('MTA_304196', 'MTA_304197'): 68, ('MTA_304197', 'MTA_304198'): 60, ('MTA_304198', 'MTA_304199'): 64, ('MTA_304199', 'MTA_304200'): 75, ('MTA_304200', 'MTA_304201'): 52, ('MTA_304201', 'MTA_308046'): 66, ('MTA_308046', 'MTA_304203'): 47, ('MTA_304203', 'MTA_304204'): 89, ('MTA_304204', 'MTA_304205'): 81, ('MTA_304205', 'MTA_304206'): 80, ('MTA_304206', 'MTA_304207'): 65, ('MTA_304207', 'MTA_306530'): 95, ('MTA_

fetching routes:   6%|▋         | 17/267 [00:17<04:33,  1.09s/it]

{('MTA_405286', 'MTA_405587'): 58, ('MTA_405587', 'MTA_403192'): 103, ('MTA_403192', 'MTA_401818'): 97, ('MTA_401818', 'MTA_401819'): 84, ('MTA_401819', 'MTA_404280'): 78, ('MTA_404280', 'MTA_401821'): 121, ('MTA_401821', 'MTA_401822'): 126, ('MTA_401822', 'MTA_401824'): 118, ('MTA_401824', 'MTA_401826'): 116, ('MTA_401826', 'MTA_401827'): 59, ('MTA_401827', 'MTA_405336'): 132, ('MTA_405336', 'MTA_401832'): 121, ('MTA_401832', 'MTA_401833'): 62, ('MTA_401833', 'MTA_402052'): 76, ('MTA_402052', 'MTA_903027'): 89, ('MTA_504965', 'MTA_504119'): 75, ('MTA_504119', 'MTA_304196'): 80, ('MTA_304196', 'MTA_304197'): 68, ('MTA_304197', 'MTA_304198'): 60, ('MTA_304198', 'MTA_304199'): 64, ('MTA_304199', 'MTA_304200'): 75, ('MTA_304200', 'MTA_304201'): 52, ('MTA_304201', 'MTA_308046'): 66, ('MTA_308046', 'MTA_304203'): 47, ('MTA_304203', 'MTA_304204'): 89, ('MTA_304204', 'MTA_304205'): 81, ('MTA_304205', 'MTA_304206'): 80, ('MTA_304206', 'MTA_304207'): 65, ('MTA_304207', 'MTA_306530'): 95, ('MTA_

fetching routes:   7%|▋         | 18/267 [00:17<04:09,  1.00s/it]

{('MTA_405286', 'MTA_405587'): 58, ('MTA_405587', 'MTA_403192'): 103, ('MTA_403192', 'MTA_401818'): 97, ('MTA_401818', 'MTA_401819'): 84, ('MTA_401819', 'MTA_404280'): 78, ('MTA_404280', 'MTA_401821'): 121, ('MTA_401821', 'MTA_401822'): 126, ('MTA_401822', 'MTA_401824'): 118, ('MTA_401824', 'MTA_401826'): 116, ('MTA_401826', 'MTA_401827'): 59, ('MTA_401827', 'MTA_405336'): 132, ('MTA_405336', 'MTA_401832'): 121, ('MTA_401832', 'MTA_401833'): 62, ('MTA_401833', 'MTA_402052'): 76, ('MTA_402052', 'MTA_903027'): 89, ('MTA_504965', 'MTA_504119'): 75, ('MTA_504119', 'MTA_304196'): 80, ('MTA_304196', 'MTA_304197'): 68, ('MTA_304197', 'MTA_304198'): 60, ('MTA_304198', 'MTA_304199'): 64, ('MTA_304199', 'MTA_304200'): 75, ('MTA_304200', 'MTA_304201'): 52, ('MTA_304201', 'MTA_308046'): 66, ('MTA_308046', 'MTA_304203'): 47, ('MTA_304203', 'MTA_304204'): 89, ('MTA_304204', 'MTA_304205'): 81, ('MTA_304205', 'MTA_304206'): 80, ('MTA_304206', 'MTA_304207'): 65, ('MTA_304207', 'MTA_306530'): 95, ('MTA_

fetching routes:   7%|▋         | 19/267 [00:18<03:58,  1.04it/s]

{('MTA_405286', 'MTA_405587'): 58, ('MTA_405587', 'MTA_403192'): 103, ('MTA_403192', 'MTA_401818'): 97, ('MTA_401818', 'MTA_401819'): 84, ('MTA_401819', 'MTA_404280'): 78, ('MTA_404280', 'MTA_401821'): 121, ('MTA_401821', 'MTA_401822'): 126, ('MTA_401822', 'MTA_401824'): 118, ('MTA_401824', 'MTA_401826'): 116, ('MTA_401826', 'MTA_401827'): 59, ('MTA_401827', 'MTA_405336'): 132, ('MTA_405336', 'MTA_401832'): 121, ('MTA_401832', 'MTA_401833'): 62, ('MTA_401833', 'MTA_402052'): 76, ('MTA_402052', 'MTA_903027'): 89, ('MTA_504965', 'MTA_504119'): 75, ('MTA_504119', 'MTA_304196'): 80, ('MTA_304196', 'MTA_304197'): 68, ('MTA_304197', 'MTA_304198'): 60, ('MTA_304198', 'MTA_304199'): 64, ('MTA_304199', 'MTA_304200'): 75, ('MTA_304200', 'MTA_304201'): 52, ('MTA_304201', 'MTA_308046'): 66, ('MTA_308046', 'MTA_304203'): 47, ('MTA_304203', 'MTA_304204'): 89, ('MTA_304204', 'MTA_304205'): 81, ('MTA_304205', 'MTA_304206'): 80, ('MTA_304206', 'MTA_304207'): 65, ('MTA_304207', 'MTA_306530'): 95, ('MTA_

fetching routes:   7%|▋         | 20/267 [00:19<03:59,  1.03it/s]

{('MTA_405286', 'MTA_405587'): 58, ('MTA_405587', 'MTA_403192'): 103, ('MTA_403192', 'MTA_401818'): 97, ('MTA_401818', 'MTA_401819'): 84, ('MTA_401819', 'MTA_404280'): 78, ('MTA_404280', 'MTA_401821'): 121, ('MTA_401821', 'MTA_401822'): 126, ('MTA_401822', 'MTA_401824'): 118, ('MTA_401824', 'MTA_401826'): 116, ('MTA_401826', 'MTA_401827'): 59, ('MTA_401827', 'MTA_405336'): 132, ('MTA_405336', 'MTA_401832'): 121, ('MTA_401832', 'MTA_401833'): 62, ('MTA_401833', 'MTA_402052'): 76, ('MTA_402052', 'MTA_903027'): 89, ('MTA_504965', 'MTA_504119'): 75, ('MTA_504119', 'MTA_304196'): 80, ('MTA_304196', 'MTA_304197'): 68, ('MTA_304197', 'MTA_304198'): 60, ('MTA_304198', 'MTA_304199'): 64, ('MTA_304199', 'MTA_304200'): 75, ('MTA_304200', 'MTA_304201'): 52, ('MTA_304201', 'MTA_308046'): 66, ('MTA_308046', 'MTA_304203'): 47, ('MTA_304203', 'MTA_304204'): 89, ('MTA_304204', 'MTA_304205'): 81, ('MTA_304205', 'MTA_304206'): 80, ('MTA_304206', 'MTA_304207'): 65, ('MTA_304207', 'MTA_306530'): 95, ('MTA_

fetching routes:   8%|▊         | 21/267 [00:20<04:01,  1.02it/s]

{('MTA_405286', 'MTA_405587'): 58, ('MTA_405587', 'MTA_403192'): 103, ('MTA_403192', 'MTA_401818'): 97, ('MTA_401818', 'MTA_401819'): 84, ('MTA_401819', 'MTA_404280'): 78, ('MTA_404280', 'MTA_401821'): 121, ('MTA_401821', 'MTA_401822'): 126, ('MTA_401822', 'MTA_401824'): 118, ('MTA_401824', 'MTA_401826'): 116, ('MTA_401826', 'MTA_401827'): 59, ('MTA_401827', 'MTA_405336'): 132, ('MTA_405336', 'MTA_401832'): 121, ('MTA_401832', 'MTA_401833'): 62, ('MTA_401833', 'MTA_402052'): 76, ('MTA_402052', 'MTA_903027'): 89, ('MTA_504965', 'MTA_504119'): 75, ('MTA_504119', 'MTA_304196'): 80, ('MTA_304196', 'MTA_304197'): 68, ('MTA_304197', 'MTA_304198'): 60, ('MTA_304198', 'MTA_304199'): 64, ('MTA_304199', 'MTA_304200'): 75, ('MTA_304200', 'MTA_304201'): 52, ('MTA_304201', 'MTA_308046'): 66, ('MTA_308046', 'MTA_304203'): 47, ('MTA_304203', 'MTA_304204'): 89, ('MTA_304204', 'MTA_304205'): 81, ('MTA_304205', 'MTA_304206'): 80, ('MTA_304206', 'MTA_304207'): 65, ('MTA_304207', 'MTA_306530'): 95, ('MTA_

fetching routes:   8%|▊         | 22/267 [00:21<03:44,  1.09it/s]

{('MTA_405286', 'MTA_405587'): 58, ('MTA_405587', 'MTA_403192'): 103, ('MTA_403192', 'MTA_401818'): 97, ('MTA_401818', 'MTA_401819'): 84, ('MTA_401819', 'MTA_404280'): 78, ('MTA_404280', 'MTA_401821'): 121, ('MTA_401821', 'MTA_401822'): 126, ('MTA_401822', 'MTA_401824'): 118, ('MTA_401824', 'MTA_401826'): 116, ('MTA_401826', 'MTA_401827'): 59, ('MTA_401827', 'MTA_405336'): 132, ('MTA_405336', 'MTA_401832'): 121, ('MTA_401832', 'MTA_401833'): 62, ('MTA_401833', 'MTA_402052'): 76, ('MTA_402052', 'MTA_903027'): 89, ('MTA_504965', 'MTA_504119'): 75, ('MTA_504119', 'MTA_304196'): 80, ('MTA_304196', 'MTA_304197'): 68, ('MTA_304197', 'MTA_304198'): 60, ('MTA_304198', 'MTA_304199'): 64, ('MTA_304199', 'MTA_304200'): 75, ('MTA_304200', 'MTA_304201'): 52, ('MTA_304201', 'MTA_308046'): 66, ('MTA_308046', 'MTA_304203'): 47, ('MTA_304203', 'MTA_304204'): 89, ('MTA_304204', 'MTA_304205'): 81, ('MTA_304205', 'MTA_304206'): 80, ('MTA_304206', 'MTA_304207'): 65, ('MTA_304207', 'MTA_306530'): 95, ('MTA_

fetching routes:   9%|▊         | 23/267 [00:22<04:01,  1.01it/s]

{('MTA_405286', 'MTA_405587'): 58, ('MTA_405587', 'MTA_403192'): 103, ('MTA_403192', 'MTA_401818'): 97, ('MTA_401818', 'MTA_401819'): 84, ('MTA_401819', 'MTA_404280'): 78, ('MTA_404280', 'MTA_401821'): 121, ('MTA_401821', 'MTA_401822'): 126, ('MTA_401822', 'MTA_401824'): 118, ('MTA_401824', 'MTA_401826'): 116, ('MTA_401826', 'MTA_401827'): 59, ('MTA_401827', 'MTA_405336'): 132, ('MTA_405336', 'MTA_401832'): 121, ('MTA_401832', 'MTA_401833'): 62, ('MTA_401833', 'MTA_402052'): 76, ('MTA_402052', 'MTA_903027'): 89, ('MTA_504965', 'MTA_504119'): 75, ('MTA_504119', 'MTA_304196'): 80, ('MTA_304196', 'MTA_304197'): 68, ('MTA_304197', 'MTA_304198'): 60, ('MTA_304198', 'MTA_304199'): 64, ('MTA_304199', 'MTA_304200'): 75, ('MTA_304200', 'MTA_304201'): 52, ('MTA_304201', 'MTA_308046'): 66, ('MTA_308046', 'MTA_304203'): 47, ('MTA_304203', 'MTA_304204'): 89, ('MTA_304204', 'MTA_304205'): 81, ('MTA_304205', 'MTA_304206'): 80, ('MTA_304206', 'MTA_304207'): 65, ('MTA_304207', 'MTA_306530'): 95, ('MTA_

fetching routes:   9%|▉         | 24/267 [00:23<03:45,  1.08it/s]

{('MTA_405286', 'MTA_405587'): 58, ('MTA_405587', 'MTA_403192'): 103, ('MTA_403192', 'MTA_401818'): 97, ('MTA_401818', 'MTA_401819'): 84, ('MTA_401819', 'MTA_404280'): 78, ('MTA_404280', 'MTA_401821'): 121, ('MTA_401821', 'MTA_401822'): 126, ('MTA_401822', 'MTA_401824'): 118, ('MTA_401824', 'MTA_401826'): 116, ('MTA_401826', 'MTA_401827'): 59, ('MTA_401827', 'MTA_405336'): 132, ('MTA_405336', 'MTA_401832'): 121, ('MTA_401832', 'MTA_401833'): 62, ('MTA_401833', 'MTA_402052'): 76, ('MTA_402052', 'MTA_903027'): 89, ('MTA_504965', 'MTA_504119'): 75, ('MTA_504119', 'MTA_304196'): 80, ('MTA_304196', 'MTA_304197'): 68, ('MTA_304197', 'MTA_304198'): 60, ('MTA_304198', 'MTA_304199'): 64, ('MTA_304199', 'MTA_304200'): 75, ('MTA_304200', 'MTA_304201'): 52, ('MTA_304201', 'MTA_308046'): 66, ('MTA_308046', 'MTA_304203'): 47, ('MTA_304203', 'MTA_304204'): 89, ('MTA_304204', 'MTA_304205'): 81, ('MTA_304205', 'MTA_304206'): 80, ('MTA_304206', 'MTA_304207'): 65, ('MTA_304207', 'MTA_306530'): 95, ('MTA_

In [None]:
print(len(all_route_lists))
print(unknown_routes)
print(len(est_stop_times))


446
{'S66': ['PORT RICHMOND via GRYMES HILL', 'ST GEORGE FERRY via GRYMES HILL'], 'Q26': ['FLUSHING MAIN ST STATION', 'FRESH MEADOWS HOLLIS COURT BL via 46 AV'], 'S61': ['ST GEORGE FERRY'], 'SIM15': ['DOWNTOWN LOOP via CHURCH ST via WATER ST', 'ELTINGVILLE TRANSIT CENTER via RICHMOND'], 'SIM10': ['ELTINGVILLE TRANS CTR via HYLAN via RICH', 'MIDTOWN via 23 ST via 6 AV'], 'SIM11': ['MIDTOWN via 23 ST via MADISON AV', 'NEW DORP via HYLAN BL'], 'SIM9': ['ELTINGVILLE via F CAP via HYLAN BL', 'GREENWICH VILL via WEST ST via 6 AV'], 'SIM8': ['ARDEN HTS via RICHMOND AV via WOODROW RD', 'MIDTOWN via 42 ST via MADISON AV'], 'SIM7': ['ELTVLLE TRANS CTR via HYLAN via RICHMOND', 'GREENWICH VILLAGE via WEST ST via 6 AV'], 'SIM6': ['ELTVLLE TRANS CTR via F CAP BL via GFRDS', 'MIDTOWN via 23 ST via MADISON AV'], 'SIM5': ['DOWNTOWN FRANKFORT ST via WATER ST', 'ELTVLLE TRANS CTR via F CAP BL via GFRDS'], 'SIM4': ['ANNADALE via RICHMOND AV', 'DOWNTOWN WORTH ST via CHURCH ST'], 'SIM3': ['MIDTOWN via 23 ST

## I ended up with with a lot of missing route schedules, so I'll need to do some error correcting based on 

In [None]:
filename = "./graph_data/all_route_lists.json"
with open(filename, 'w') as file:
    json.dump(all_route_lists, file, indent=4)

In [None]:
json_path='./graph_data/all_route_lists.json'
with open(json_path) as json_file:
    all_route_lists = json.load(json_file)

## Need to get all missing stops into stations.graphml

In [None]:
async def get_stop_info(stop_id):
        url = f"https://bustime.mta.info/api/where/stop/{stop_id}.json?key={MTA_API_KEY}"
        async with aiohttp.ClientSession() as session:
            async with session.get(url) as response:
                return await response.json()

In [None]:
def findNearestNode2Placemark(G, placemark_info):
    '''
    get the ID of the node nearest to a placemark
    '''
    placemark_longitude, placemark_latitude = placemark_info['longitude'], placemark_info['latitude']

    nearest_node = ox.distance.nearest_nodes(G,
                                         placemark_longitude, placemark_latitude,
                                         return_dist=True)
    
    return nearest_node[0]

In [None]:
async def add_info(stop_id, bus_info_dict, G):
    info = {}
    #add data retrieved online ab a stop id
    response = await get_stop_info(stop_id)
    info['name'] = response['data']['name']
    info['direction'] = response['data']['direction']
    info['id'] = stop_id
    info['longitude'] = response['data']['lon']
    info['latitude'] = response['data']['lat']
    info['osmid'] = findNearestNode2Placemark(G, info)
    if 'bus_stops' not in G.nodes[info['osmid']]:
        G.nodes[info['osmid']]['bus_stops'] = []
    G.nodes[info['osmid']]['bus_stops'].append(stop_id)
    bus_info_dict[stop_id] = info
    print(bus_info_dict[stop_id])
    print(G.nodes[info['osmid']])

    return bus_info_dict
    
    
    




In [None]:
# example info entry
# "MTA_308248": {
#         "name": "4 AV/SENATOR ST",
#         "routes": [
#             "B9"
#         ],
#         "direction": "S",
#         "link": "https://bustime.mta.info/m/index?q=308248",
#         "id": "MTA_308248",
#         "latitude": 40.636048,
#         "longitude": -74.022991,
#         "osmid": 42521235

In [None]:
G = ox.io.load_graphml('./graph_data/stations.graphml')

In [None]:

for route_list in all_route_lists.values():
    for stop in route_list:
        if stop not in bus_info:
            bus_info = await add_info(stop, bus_info, G)

filename = "./graph_data/bus_info_dict.json"
with open(filename, 'w') as file:
    json.dump(bus_info, file, indent=4)
ox.io.save_graphml(G, filepath='./graph_data/stations.graphml')

In [None]:
# getting data given a route id
async def get_closest_stops(lon, lat, radius):
    url = f"https://bustime.mta.info/api/where/stops-for-location.json?key={MTA_API_KEY}&lat={lat}&lon={lon}&radius={radius}"
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as response:
            response = await response.json()
    if response['code'] == 200:
        return response
    else:
        raise Exception(f"Error: {response['code']}")

In [None]:
async def hi():
    closest_stops = await get_closest_stops(-73.946764,40.6319761, 200)
    print(closest_stops)

await hi()

{'code': 200, 'currentTime': 1747012678146, 'data': {'limitExceeded': False, 'stops': [{'code': '303232', 'direction': 'NW', 'id': 'MTA_303232', 'lat': 40.632805, 'locationType': 0, 'lon': -73.947493, 'name': 'FLATBUSH AV/NOSTRAND AV', 'routes': [{'agency': {'disclaimer': '', 'email': '', 'fareUrl': '', 'id': 'MTA NYCT', 'lang': 'en', 'name': 'MTA New York City Transit', 'phone': '718-330-1234', 'privateService': False, 'timezone': 'America/New_York', 'url': 'http://www.mta.info'}, 'color': '006CB7', 'description': 'via Flatbush Av / Livingston St', 'id': 'MTA NYCT_B41', 'longName': 'Kings Plaza - Downtown Brooklyn', 'shortName': 'B41', 'textColor': 'FFFFFF', 'type': 3, 'url': ''}], 'wheelchairBoarding': 'UNKNOWN'}, {'code': '303317', 'direction': 'SE', 'id': 'MTA_303317', 'lat': 40.631995, 'locationType': 0, 'lon': -73.946898, 'name': 'FLATBUSH AV/NOSTRAND AV', 'routes': [{'agency': {'disclaimer': '', 'email': '', 'fareUrl': '', 'id': 'MTA NYCT', 'lang': 'en', 'name': 'MTA New York Ci

In [None]:
# get schedule info and convert it to edge travel time
# https://api.pugetsound.onebusaway.org/api/where/schedule-for-route/1_100223.json?key=TEST