In [None]:
import os
import geopandas as gpd


def retrieve_shipments(routes, years=None, months=None, days=None, hours=None):
  API_BASE_URL = 'http://api.buswatcher.org'
  api_get_single_shipment_geojson = API_BASE_URL + '/api/v2/nyc/{year}/{month}/{day}/{hour}/{route}/buses/geojson'
    gdf = None
    for route in routes:
      print(route)
      for year in years:
        for month in months:
          for day in days:
            if month == 2:
              if year % 4 == 0:
                if day > 29:
                  continue
              else:
                if day > 28:
                  continue
            if month in {4, 6, 9, 11}:
              if day > 30:
                continue          
            print(f'{month}/{day}/{year}')
            for hour in hours:
              url = api_get_single_shipment_geojson.format(year=year, month=month, day=day, hour=hour, route=route)
              # try / except block for handling missing shipment
              try:
                gdf_temp = gpd.read_file(url)
                print(f'\t{hour}')
              except:
                print(f'\t{hour} missing')
                continue
              if gdf is not None:
                gdf = gdf.append(gdf_temp)
              else:
                gdf = gdf_temp
    return gdf


def save_data(dir_path, routes, file_name=None, **kwargs):
    if file_name is None:
        file_name = 'nyc_buswatcher_api.geojson'
    
    root, ext = os.path.splitext(file_name)

    if ext != '.geojson':
        raise ValueError('File name extension must be .geojson')

    today = dt.date.today()
    file_name = f'{root}_{today}{ext}'
    file_path = os.path.join(dir_path, file_name)

    gdf = retrieve_shipments(routes, **kwargs)
    gdf.to_file(file_path, driver='GeoJSON')

    print(f'Data saved to {file_path}')


# helper function
def extract_route_names(path, print_lines=True):
  '''
  Generates list of all bus lines by parsing text file containing copy/pasted source code for ::
  
    https://bustime.mta.info/m/routes/
  
  NOTE that while each route in the routes list generated (except for those stripped of '-SBS' 
  distinction) works as a query to the MTA's BusTime API, (i.e. https://bustime.mta.info/m/?q={route}), 
  some do not seem to work with the BusWatcher API (e.g. M14A-SBS). Unclear as to why...

  Args:
    path (string): path to text file described above
    print_lines (bool): print(i, line) for i, line in enumerate(bus_lines)

  Returns:
    bus_lines (list): list of bus lines that can be used for BusWatcher API queries
  '''
    with open(path) as f:
        lines = f.readlines()
    bus_lines = []
    for line in lines:
            found = line.partition('">')[2].partition("<")[0]
            if found != '':
              # next two lines remove '-SBS' distinction (not accepted by BusWatcher API)
              found = found.split('-') 
              found = found[0]
              bus_lines.append(found)
    if print_lines:
      for (i, line) in enumerate(bus_lines):
          print(i, line)
    return bus_lines