In [5]:
import requests
import json
import os
from dotenv import load_dotenv
import osmnx as ox

In [6]:
import asyncio
import aiohttp
    

In [7]:
load_dotenv()  # take environment variables

True

In [8]:
MTA_API_KEY = os.getenv("MTA_API_KEY")

In [9]:
# getting data given a route id
async def get_bus_route(route_id):
    url = f"https://bustime.mta.info/api/where/stops-for-route/MTA%20NYCT_{route_id}.json?key={MTA_API_KEY}&includePolylines=false&version=2"
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as response:
            response = await response.json()
    if response['code'] == 200:
        return response
    else:
        raise Exception(f"Error: bus route {route_id} returned response code {response['code']}")

In [10]:
route_id = "B52" # Example route
route_data = await get_bus_route(route_id)
print(json.dumps(route_data, indent=4))



{
    "code": 200,
    "currentTime": 1747011208413,
    "data": {
        "entry": {
            "polylines": [],
            "routeId": "MTA NYCT_B52",
            "stopGroupings": [
                {
                    "ordered": true,
                    "stopGroups": [
                        {
                            "id": "1",
                            "name": {
                                "name": "DOWNTOWN BKLYN TILLARY ST via GATES",
                                "names": [
                                    "DOWNTOWN BKLYN TILLARY ST via GATES"
                                ],
                                "type": "destination"
                            },
                            "polylines": [],
                            "stopIds": [
                                "MTA_504965",
                                "MTA_504119",
                                "MTA_304196",
                                "MTA_304197",
                                "MT

In [11]:
# getting data given a route id
async def get_route_schedule(route_id):
    url = f"https://bustime.mta.info/api/where/trips-for-route/MTA%20NYCT_{route_id}.json?key={MTA_API_KEY}&includeSchedule=true"
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as response:
            response = await response.json()
    if response['code'] == 200:
        return response['data']['list']
    else:
        raise Exception(f"Error: bus stop {route_id} returned response code {response['code']}")

In [12]:
stop_data = await get_route_schedule(route_id)
print(json.dumps(stop_data, indent=4))

[
    {
        "frequency": null,
        "schedule": {
            "frequency": null,
            "nextTripId": "MTA NYCT_FP_B5-Sunday-127500_B26_219",
            "previousTripId": "MTA NYCT_FP_B5-Sunday-116200_B26_219",
            "stopTimes": [
                {
                    "arrivalTime": 73020,
                    "departureTime": 73020,
                    "distanceAlongTrip": 0.08656897032969031,
                    "stopHeadsign": "",
                    "stopId": "MTA_307175"
                },
                {
                    "arrivalTime": 73126,
                    "departureTime": 73126,
                    "distanceAlongTrip": 285.61860445152655,
                    "stopHeadsign": "",
                    "stopId": "MTA_307699"
                },
                {
                    "arrivalTime": 73173,
                    "departureTime": 73173,
                    "distanceAlongTrip": 409.6188950017041,
                    "stopHeadsign": "",
          

In [13]:
def get_stop_groups(route_data):
    if not route_data['data']['entry']['stopGroupings'][0]['ordered']:
        print(f"{route_data['routeId']} not ordered")
        return {}
    route_lists = {}
    stopGroups = route_data['data']['entry']['stopGroupings'][0]['stopGroups']
    for group in stopGroups:

        route_lists[group['name']['name']] = group['stopIds']
        #print(group['stopIds'][0])
    
    return route_lists

    

In [14]:
test_route_lists = get_stop_groups(route_data)

In [15]:
json_path='./graph_data/bus_info_dict.json'
with open(json_path) as json_file:
    bus_info = json.load(json_file)

In [16]:
print(bus_info['MTA_901601'])

{'name': 'Brooklyn Bridge Park /PIER 6', 'routes': ['B63'], 'direction': 'W', 'link': 'https://bustime.mta.info/m/index?q=901601', 'id': 'MTA_901601', 'latitude': 40.693078, 'longitude': -74.000877, 'osmid': 1567286111, 'diroutes': ['BAY RIDGE SHORE RD via 5 AV']}


In [43]:
json_path='./graph_data_archive/all_routes.json'
with open(json_path) as json_file:
    all_routes = json.load(json_file)

In [44]:
print(len(all_routes))

267


In [45]:
def shorten_id(route):
    shortened_id = route['id'].split('_')[1]
    return shortened_id

In [46]:
shorten_id(all_routes[0])

'M34+'

In [68]:
route_data = get_bus_route('B52')
print(route_data)

<coroutine object get_bus_route at 0x3000e1240>


In [91]:
from tqdm import tqdm
from collections import defaultdict
#loading bar module

In [96]:
unknown_routes = defaultdict(set)
def get_edge_times(first_stop, route_schedule, route_id, route_name):
    '''
    give first stop of a route and possible route schedules, find the time it takes to get between each stop
    '''
    edge_times = {}
    schedule_found = False
    group_stopTimes = None

    for schedule in route_schedule:
        #going through each schedule and seeing if one of the first stop matches the first stop in the stored route
        if first_stop == schedule['schedule']['stopTimes'][0]['stopId']:
            schedule_found = True
            group_stopTimes= schedule['schedule']['stopTimes']
            break

    if not schedule_found:
        #print(f'schedule for {first_stop} not found - Route: {route_id}')
        # a bunch of routes didn't have the first stop matching the first stop in my route list, so 
        unknown_routes[route_id].add(route_name)
        
        return {}
    
    #looping through each stop (except for the first one) and recording num seconds between the current stop and the previous stop
    for idx, stop in enumerate(group_stopTimes[1:]):
        prev_depart_time =group_stopTimes[idx]['departureTime']
        prev_stopId = group_stopTimes[idx]['stopId']

        cur_arrive_time = stop['arrivalTime']
        cur_stopId = stop['stopId']
        
        edge_times[(prev_stopId, cur_stopId)] = cur_arrive_time - prev_depart_time
    
    return edge_times
    

In [97]:
async def get_stop_group_times(stop_groups, route_id):
    '''
    getting route schedule for each stop group
    edge_data format = {
        (stopId, stopId2) : {est travel time from stopId to stopId2 in seconds}
    }
    '''
    edge_data = {}

    try:
        route_schedule = await get_route_schedule(route_id)
    except Exception as e:
        print(f"Error: {e}")
        return

    for route_name, group in list(stop_groups.items()):
        edge_data.update(get_edge_times(group[0], route_schedule, route_id, route_name)) #get travel time for route where first stop matches group[0]
    
    return edge_data


In [98]:
#getting a dict of all routes 
all_route_lists = {}
est_stop_times = {}
fails = []
for i in tqdm(range(len(all_routes)), desc="fetching routes"):
    route_id = shorten_id(all_routes[i])

    try:
        route_data = await get_bus_route(route_id)
        
    except Exception as e:
        fails.append(route_id)
        continue
    
    #extracting routes (stop groups) from data
    stop_groups = get_stop_groups(route_data)
    #adding route to dictionary
    all_route_lists.update(stop_groups)

    #getting a list of route times for each edge in the routes
    route_times = await get_stop_group_times(stop_groups, route_id)
    #print(route_times)
    est_stop_times.update(route_times)

    
print(f"failed to fetch {len(fails)} routes: {fails}")
    

fetching routes: 100%|██████████| 267/267 [05:32<00:00,  1.24s/it]

failed to fetch 12 routes: ['D90', 'B101', 'L90', 'B90', 'B94', 'B96', 'B98', 'Q90', 'BX92', 'Q107', 'Q108', 'M90']





In [99]:
print(len(all_route_lists))
print(unknown_routes)
print(len(est_stop_times))


446
defaultdict(<class 'set'>, {'S66': {'ST GEORGE FERRY via GRYMES HILL', 'PORT RICHMOND via GRYMES HILL'}, 'Q26': {'FRESH MEADOWS HOLLIS COURT BL via 46 AV', 'FLUSHING MAIN ST STATION'}, 'S61': {'ST GEORGE FERRY'}, 'SIM15': {'ELTINGVILLE TRANSIT CENTER via RICHMOND', 'DOWNTOWN LOOP via CHURCH ST via WATER ST'}, 'SIM10': {'ELTINGVILLE TRANS CTR via HYLAN via RICH', 'MIDTOWN via 23 ST via 6 AV'}, 'SIM11': {'MIDTOWN via 23 ST via MADISON AV', 'NEW DORP via HYLAN BL'}, 'SIM9': {'GREENWICH VILL via WEST ST via 6 AV', 'ELTINGVILLE via F CAP via HYLAN BL'}, 'SIM8': {'MIDTOWN via 42 ST via MADISON AV', 'ARDEN HTS via RICHMOND AV via WOODROW RD'}, 'SIM7': {'GREENWICH VILLAGE via WEST ST via 6 AV', 'ELTVLLE TRANS CTR via HYLAN via RICHMOND'}, 'SIM6': {'MIDTOWN via 23 ST via MADISON AV', 'ELTVLLE TRANS CTR via F CAP BL via GFRDS'}, 'SIM5': {'DOWNTOWN FRANKFORT ST via WATER ST', 'ELTVLLE TRANS CTR via F CAP BL via GFRDS'}, 'SIM4': {'ANNADALE via RICHMOND AV', 'DOWNTOWN WORTH ST via CHURCH ST'}, 

## I ended up with with a lot of missing route schedules, so I'll need to do some error correcting

In [115]:
file_path = '/Users/charlierothschild/Desktop/DSA/PublicTransitOptimizer/map2graph/graph_data/bus_info_dict.json'
with open(file_path) as json_file:
    bus_info_dict = json.load(json_file)

In [None]:
def estimate_route_time(diroute, all_route_lists, bus_info_dict):
    stops = all_route_lists[diroute]
    nodes = list(map(lambda x: bus_info_dict[x]['osmid'], stops))
    print(nodes)

def estimate_missing_edge_times(unknown_routes, est_stop_times, all_route_lists, bus_info_dict):
    '''
    Go through each missing route, go through each pair of nodes in that route and estimate how long it would take to get a bus to get between those two points
    make estimation by first attempting to find a route between the pair of nodes, sum the total length of the path, and dividing that length by the average travel speed of an MTA bus
    If no route can be found, make estimation based on euclidean distance
    '''

    #loop through each missing route
    for route in unknown_routes.values():
        # loop through each directed route:
        for diroute in route:
            estimate_route_time(diroute, all_route_lists, bus_info_dict)


In [116]:
estimate_missing_edge_times(unknown_routes, est_stop_times, all_route_lists, bus_info_dict)

[{'name': 'RICHMOND TERR/PORT RICHMOND AV', 'routes': ['S57', 'S66'], 'direction': 'SE', 'link': 'https://bustime.mta.info/m/index?q=202741', 'id': 'MTA_202741', 'latitude': 40.640214, 'longitude': -74.130886, 'osmid': 43022503, 'diroutes': ['ST GEORGE FERRY via GRYMES HILL']}, {'name': 'PORT RICHMOND AV/ANN ST', 'routes': ['S53', 'S57', 'S59', 'S66'], 'direction': 'SW', 'link': 'https://bustime.mta.info/m/index?q=200426', 'id': 'MTA_200426', 'latitude': 40.638743, 'longitude': -74.132956, 'osmid': 42997547, 'diroutes': ['NEW DORP MILL RD', 'ST GEORGE FERRY via GRYMES HILL', 'HYLAN BL', 'BAY RIDGE 86 ST STATION']}, {'name': 'PORT RICHMOND AV/HARRISON AV', 'routes': ['S53', 'S57', 'S59', 'S66'], 'direction': 'SW', 'link': 'https://bustime.mta.info/m/index?q=200427', 'id': 'MTA_200427', 'latitude': 40.63646, 'longitude': -74.134663, 'osmid': 42966434, 'diroutes': ['NEW DORP MILL RD', 'ST GEORGE FERRY via GRYMES HILL', 'HYLAN BL', 'BAY RIDGE 86 ST STATION']}, {'name': 'CASTLETON AV/HEBERT

In [102]:
filename = "./graph_data/all_route_lists.json"
with open(filename, 'w') as file:
    json.dump(all_route_lists, file, indent=4)

In [103]:
json_path='./graph_data/all_route_lists.json'
with open(json_path) as json_file:
    all_route_lists = json.load(json_file)

## Need to get all missing stops into stations.graphml

In [104]:
async def get_stop_info(stop_id):
        url = f"https://bustime.mta.info/api/where/stop/{stop_id}.json?key={MTA_API_KEY}"
        async with aiohttp.ClientSession() as session:
            async with session.get(url) as response:
                return await response.json()

In [105]:
def findNearestNode2Placemark(G, placemark_info):
    '''
    get the ID of the node nearest to a placemark
    '''
    placemark_longitude, placemark_latitude = placemark_info['longitude'], placemark_info['latitude']

    nearest_node = ox.distance.nearest_nodes(G,
                                         placemark_longitude, placemark_latitude,
                                         return_dist=True)
    
    return nearest_node[0]

In [106]:
async def add_info(stop_id, bus_info_dict, G):
    info = {}
    #add data retrieved online ab a stop id
    response = await get_stop_info(stop_id)
    info['name'] = response['data']['name']
    info['direction'] = response['data']['direction']
    info['id'] = stop_id
    info['longitude'] = response['data']['lon']
    info['latitude'] = response['data']['lat']
    info['osmid'] = findNearestNode2Placemark(G, info)
    if 'bus_stops' not in G.nodes[info['osmid']]:
        G.nodes[info['osmid']]['bus_stops'] = []
    G.nodes[info['osmid']]['bus_stops'].append(stop_id)
    bus_info_dict[stop_id] = info
    print(bus_info_dict[stop_id])
    print(G.nodes[info['osmid']])

    return bus_info_dict
    
    
    




In [107]:
# example info entry
# "MTA_308248": {
#         "name": "4 AV/SENATOR ST",
#         "routes": [
#             "B9"
#         ],
#         "direction": "S",
#         "link": "https://bustime.mta.info/m/index?q=308248",
#         "id": "MTA_308248",
#         "latitude": 40.636048,
#         "longitude": -74.022991,
#         "osmid": 42521235

In [108]:
G = ox.io.load_graphml('./graph_data/stations.graphml')

In [109]:

for route_list in all_route_lists.values():
    for stop in route_list:
        if stop not in bus_info:
            bus_info = await add_info(stop, bus_info, G)

filename = "./graph_data/bus_info_dict.json"
with open(filename, 'w') as file:
    json.dump(bus_info, file, indent=4)
ox.io.save_graphml(G, filepath='./graph_data/stations.graphml')

In [110]:
# getting data given a route id
async def get_closest_stops(lon, lat, radius):
    url = f"https://bustime.mta.info/api/where/stops-for-location.json?key={MTA_API_KEY}&lat={lat}&lon={lon}&radius={radius}"
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as response:
            response = await response.json()
    if response['code'] == 200:
        return response
    else:
        raise Exception(f"Error: {response['code']}")

In [111]:
async def hi():
    closest_stops = await get_closest_stops(-73.946764,40.6319761, 200)
    print(closest_stops)

await hi()

CancelledError: 

In [None]:
# get schedule info and convert it to edge travel time
# https://api.pugetsound.onebusaway.org/api/where/schedule-for-route/1_100223.json?key=TEST