In [73]:
import requests
import json
import os
from dotenv import load_dotenv
import osmnx as ox

In [74]:
import asyncio
import aiohttp
    

In [75]:
load_dotenv()  # take environment variables

True

In [76]:
MTA_API_KEY = os.getenv("MTA_API_KEY")

In [None]:
# getting data given a route id
async def get_bus_route(route_id):
    url = f"https://bustime.mta.info/api/where/stops-for-route/MTA%20NYCT_{route_id}.json?key={MTA_API_KEY}&includePolylines=false&version=2"
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as response:
            response = await response.json()
    if response['code'] == 200:
        return response
    else:
        raise Exception(f"Error: bus route {route_id} returned response code {response['code']}")

In [142]:
route_id = "B52" # Example route
route_data = await get_bus_route(route_id)
print(json.dumps(route_data, indent=4))



{
    "code": 200,
    "currentTime": 1746806242524,
    "data": {
        "entry": {
            "polylines": [
                {
                    "length": 128,
                    "levels": "",
                    "points": "ylkwFhkrbMu@LJkA?UD{BDwB?_@@_A`CDdA@TD`AB`HH`A@v@AHARGLIVBZHL[x@qCt@iCLe@fAoDLe@t@qCJ_@Rq@v@cCZgAj@mBf@{AHW`AiDb@yAv@oCv@kCj@oB`@oAPk@v@uCbAaDV_A^qAHUDOV}@~@yCvAsEb@wAl@wBPs@M_@CQGcAC[UaESaEWaEUgEK}AMkBOqCCk@KgBKeBIoACc@K}AU_EW_EI{Au@{MIsAs@oMz@IvAQpC[lC[IuAg@yIGsAcAkRCYkAoSGqAw@cNIcBu@uMIwAw@_NGoA}@wNC_@Cm@UoDQcDM{BCi@Ei@Ci@c@{HOiCCk@Eq@Ce@}@sNGkA_AcPC[g@_JC[{GaHm@k@gEyDo@o@kHgHo@o@wFoFm@m@qFwFs@q@kGeGvAuCs@q@kBiBoBcB"
                },
                {
                    "length": 26,
                    "levels": "",
                    "points": "yhlwFxpbbMYY{AvC`B`BhBlBPPdBbBjEdEr@r@jGbGt@v@hFlFh@h@|FtFn@n@jHfHp@l@bEvDr@t@fFjFLxB\\`GHrA`AvPHpAv@fM"
                },
                {
                    "length": 4,
                    "levels": "",


In [134]:
# getting data given a route id
async def get_bus_schedule(route_id):
    url = f"https://bustime.mta.info/api/where/trips-for-route/MTA%20NYCT_{route_id}.json?key={MTA_API_KEY}&includeSchedule=true"
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as response:
            response = await response.json()
            return response
    if response['code'] == 200:
        return response
    else:
        raise Exception(f"Error: bus stop {route_id} returned response code {response['code']}")

In [None]:
stop_data = await get_bus_schedule(route_id)
print(json.dumps(stop_data, indent=4))

{
    "code": 200,
    "currentTime": 1746805407450,
    "data": {
        "limitExceeded": false,
        "list": [
            {
                "frequency": null,
                "schedule": {
                    "frequency": null,
                    "nextTripId": "MTA NYCT_FP_B5-Weekday-SDon-072500_B26_215",
                    "previousTripId": "MTA NYCT_FP_B5-Weekday-SDon-059300_B26_202",
                    "stopTimes": [
                        {
                            "arrivalTime": 39660,
                            "departureTime": 39660,
                            "distanceAlongTrip": 0.08656897032969031,
                            "stopHeadsign": "",
                            "stopId": "MTA_307175"
                        },
                        {
                            "arrivalTime": 39780,
                            "departureTime": 39780,
                            "distanceAlongTrip": 285.61860445152655,
                            "stopHeadsign": "

In [79]:
def get_stop_groups(route_data):
    if not route_data['data']['entry']['stopGroupings'][0]['ordered']:
        print(f"{route_data['routeId']} not ordered")
        return {}
    route_lists = {}
    stopGroups = route_data['data']['entry']['stopGroupings'][0]['stopGroups']
    
    for group in stopGroups:

        route_lists[group['name']['name']] = group['stopIds']
    
    return route_lists

    

In [80]:
test_route_lists = get_stop_groups(route_data)

In [93]:
json_path='./graph_data/bus_info_dict.json'
with open(json_path) as json_file:
    bus_info = json.load(json_file)

In [82]:
print(bus_info['MTA_901601'])

{'name': 'Brooklyn Bridge Park /PIER 6', 'routes': ['B63'], 'direction': 'W', 'link': 'https://bustime.mta.info/m/index?q=901601', 'id': 'MTA_901601', 'latitude': 40.693078, 'longitude': -74.000877, 'osmid': 1567286111}


In [83]:
json_path='./graph_data/all_routes.json'
with open(json_path) as json_file:
    all_routes = json.load(json_file)

FileNotFoundError: [Errno 2] No such file or directory: './graph_data/all_routes.json'

In [None]:
print(len(all_routes))

267


In [None]:
def shorten_id(route):
    shortened_id = route['id'].split('_')[1]
    return shortened_id

In [None]:
shorten_id(all_routes[0])

'M34+'

In [None]:
route_data = get_bus_route('B52')
print(route_data)

<coroutine object get_bus_route at 0x123cf4d40>


In [None]:
from tqdm import tqdm

In [None]:
#getting a dict of all routes 
all_route_lists = {}
fails = []
for i in tqdm(range(len(all_routes)), desc="fetching routes"):
    route_id = shorten_id(all_routes[i])

    try:
        route_data = await get_bus_route(route_id)
    except Exception as e:
        fails.append(route_id)
        continue

    stop_groups = get_stop_groups(route_data)
    all_route_lists.update(stop_groups)
    
print(f"failed to fetch {len(fails)} routes: {fails}")
    

  route_data = await get_bus_route(route_id)
fetching routes: 100%|██████████| 267/267 [01:53<00:00,  2.35it/s]

failed to fetch 12 routes: ['D90', 'B101', 'L90', 'B90', 'B94', 'B96', 'B98', 'Q90', 'BX92', 'Q107', 'Q108', 'M90']





In [None]:
print(len(all_route_lists))


446


In [94]:
filename = "./graph_data/all_route_lists.json"
with open(filename, 'w') as file:
    json.dump(all_route_lists, file, indent=4)

In [95]:
json_path='./graph_data/all_route_lists.json'
with open(json_path) as json_file:
    all_route_lists = json.load(json_file)

## Need to get all missing stops into stations.graphml

In [96]:
async def get_stop_info(stop_id):
        url = f"https://bustime.mta.info/api/where/stop/{stop_id}.json?key={MTA_API_KEY}"
        async with aiohttp.ClientSession() as session:
            async with session.get(url) as response:
                return await response.json()

In [97]:
def findNearestNode2Placemark(G, placemark_info):
    '''
    get the ID of the node nearest to a placemark
    '''
    placemark_longitude, placemark_latitude = placemark_info['longitude'], placemark_info['latitude']

    nearest_node = ox.distance.nearest_nodes(G,
                                         placemark_longitude, placemark_latitude,
                                         return_dist=True)
    
    return nearest_node[0]

In [98]:
async def add_info(stop_id, bus_info_dict, G):
    info = {}
    #add data retrieved online ab a stop id
    response = await get_stop_info(stop_id)
    info['name'] = response['data']['name']
    info['direction'] = response['data']['direction']
    info['id'] = stop_id
    info['longitude'] = response['data']['lon']
    info['latitude'] = response['data']['lat']
    info['osmid'] = findNearestNode2Placemark(G, info)
    if 'bus_stops' not in G.nodes[info['osmid']]:
        G.nodes[info['osmid']]['bus_stops'] = []
    G.nodes[info['osmid']]['bus_stops'].append(stop_id)
    bus_info_dict[stop_id] = info
    print(bus_info_dict[stop_id])
    print(G.nodes[info['osmid']])

    return bus_info_dict
    
    
    




In [99]:
# example info entry
# "MTA_308248": {
#         "name": "4 AV/SENATOR ST",
#         "routes": [
#             "B9"
#         ],
#         "direction": "S",
#         "link": "https://bustime.mta.info/m/index?q=308248",
#         "id": "MTA_308248",
#         "latitude": 40.636048,
#         "longitude": -74.022991,
#         "osmid": 42521235

In [100]:
G = ox.io.load_graphml('./graph_data/stations.graphml')

In [101]:

for route_list in all_route_lists.values():
    for stop in route_list:
        if stop not in bus_info:
            bus_info = await add_info(stop, bus_info, G)

filename = "./graph_data/bus_info_dict.json"
with open(filename, 'w') as file:
    json.dump(bus_info, file, indent=4)
ox.io.save_graphml(G, filepath='./graph_data/stations.graphml')

{'name': 'FDR DR/E 35 ST', 'direction': 'NE', 'id': 'MTA_903355', 'longitude': -73.971636, 'latitude': 40.743473, 'osmid': 406048768}
{'y': 40.7438669, 'x': -73.9716376, 'highway': 'traffic_signals', 'street_count': 3, 'bus_stops': ['MTA_903355']}
{'name': 'W 34 ST/DYER AV', 'direction': 'W', 'id': 'MTA_403530', 'longitude': -73.998474, 'latitude': 40.754416, 'osmid': 427841052}
{'y': 40.7543851, 'x': -73.9982439, 'street_count': 3, 'bus_stops': ['MTA_403530']}
{'name': 'W 34 ST/HUDSON BLVD E', 'direction': 'NW', 'id': 'MTA_405485', 'longitude': -74.000813, 'latitude': 40.755349, 'osmid': 3270031002}
{'y': 40.755166, 'x': -74.0005225, 'highway': 'traffic_signals', 'street_count': 4, 'bus_stops': ['MTA_803079', 'MTA_903292', 'MTA_903293', 'MTA_405485']}
{'name': 'SEAVIEW AV/NUGENT AV', 'direction': 'SE', 'id': 'MTA_905248', 'longitude': -74.086714, 'latitude': 40.583714, 'osmid': 42957538}
{'y': 40.5836798, 'x': -74.0865823, 'highway': 'traffic_signals', 'street_count': 4, 'bus_stops': 

In [112]:
# getting data given a route id
async def get_closest_stops(lon, lat, radius):
    url = f"https://bustime.mta.info/api/where/stops-for-location.json?key={MTA_API_KEY}&lat={lat}&lon={lon}&radius={radius}"
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as response:
            response = await response.json()
    if response['code'] == 200:
        return response
    else:
        raise Exception(f"Error: {response['code']}")

In [None]:
async def hi():
    closest_stops = await get_closest_stops(-73.946764,40.6319761, 200)
    print(closest_stops)

await hi()

TypeError: dump() missing 1 required positional argument: 'fp'

In [None]:
# get schedule info and convert it to edge travel time
# https://api.pugetsound.onebusaway.org/api/where/schedule-for-route/1_100223.json?key=TEST