In [40]:
import requests
import csv
import os
import urllib
import sys
import importlib
import pandas as pd

sys.path.insert(1, '../utils')

import utils

In [47]:
importlib.reload(utils)

<module 'utils' from '../utils/utils.py'>

In [73]:
region = 'Philadelphia'
extent = 'core'
county_ids = utils.county_ids.get_county_ids(region, extent)
input_date = '2020-05-01'
xmin, xmax, ymin, ymax = utils.geometry.osm_bounds(region, county_ids, extent, file = False, raw = True)

In [74]:
input_date_int = 10000 * int(input_date[0:4]) + 100 * int(input_date[5:7]) + int(input_date[8:10])

# query to find all locations within the bounding box of our region

response = requests.get(
    "https://transit.land/api/v1/operators",
    params = {
        "bbox": str(xmin) + "," + str(ymin) + "," + str(xmax) + "," + str(ymax) # from the previous section
    }
)
all_operators_json = response.json()
all_operators_json = all_operators_json["operators"]

# loop over operators, adding unique feed info (based on onestop_id) to a list

feed_base_info = []
for operator in all_operators_json:
    for onestop_id in operator["represented_in_feed_onestop_ids"]:
        feed_base_info.append([onestop_id, operator["name"],operator["website"],operator["state"],operator["metro"],operator["timezone"]])


In [75]:
feed_info = feed_base_info[2]

In [76]:
# loop over feed info, getting info for each feed, and saving to an output array

output_feed_info = [["operator_name", "operator_website", "operator_state", "operator_metro", "operator_timezone", "transitland_feed_id", "date_fetched", "earliest_calendar_date", "latest_calendar_date", "transitland_historical_url"]]

    # base info
operator_name = feed_info[1]
operator_website = feed_info[2]
operator_state = feed_info[3]
operator_metro = feed_info[4]
operator_timezone = feed_info[5]

# get feed versions
response = requests.get(
"https://transit.land/api/v1/feed_versions",
params = {
    "feed_onestop_id": feed_info[0],
    "per_page": 999
    }
)        
feeds = response.json()
#sleep(1) # to avoid API timeout

# if there are feeds, find the feed that is the most recent to the input date


In [77]:
feeds

{'feed_versions': [{'sha1': 'e64945dd7e50262b5cf40e3be84a62d6cba7f25a',
   'earliest_calendar_date': '2016-02-18',
   'latest_calendar_date': '2016-06-18',
   'md5': '20fb24c2477ce31ca773b8c360ad2a5b',
   'tags': None,
   'fetched_at': '2016-05-09T19:43:01.916Z',
   'imported_at': '2016-05-09T21:38:42.553Z',
   'import_level': 4,
   'created_at': '2016-05-09T19:43:08.752Z',
   'updated_at': '2017-04-29T09:37:01.952Z',
   'feed_version_imports': [481],
   'feed_version_imports_url': 'https://api.transit.land/api/v1/feed_version_imports?feed_onestop_id=f-dr4-septa%7Ebus&feed_version_sha1=e64945dd7e50262b5cf40e3be84a62d6cba7f25a',
   'feed_version_infos': [1704, 6944],
   'import_status': 'most_recent_succeeded',
   'is_active_feed_version': False,
   'changesets_imported_from_this_feed_version': [1336],
   'feed': 'f-dr4-septa~bus',
   'url': 'https://github.com/septadev/GTFS/releases/download/v20160425.2/gtfs_public.zip#google_bus.zip',
   'download_url': 'https://transitland-gtfs.s3.am

In [78]:
try:
    nfeeds = (len(feeds["feed_versions"]))
    if nfeeds > 0:

        # looping over feed versions
        i = nfeeds - 1
        while i >= 0: 

            # grabbing date info
            date_fetched_iso8601 = feeds["feed_versions"][i]["fetched_at"]
            date_fetched = str(date_fetched_iso8601)[0:10]
            date_fetched_int = 10000 * int(date_fetched[0:4]) + 100 * int(date_fetched[5:7]) + int(date_fetched[8:10])

            # checking if before the input date
            if date_fetched_int < input_date_int:

                # output info
                date_fetched = date_fetched
                earliest_calendar_date = feeds["feed_versions"][i]["earliest_calendar_date"]
                latest_calendar_date = feeds["feed_versions"][i]["latest_calendar_date"]
                transitland_historical_url = feeds["feed_versions"][i]["download_url"]
                feed_id = feeds["feed_versions"][i]["feed"]                    
                output_feed_info.append([operator_name, operator_website, operator_state, operator_metro, operator_timezone, feed_id, date_fetched, earliest_calendar_date, latest_calendar_date, transitland_historical_url])

                break # break since this should be the most recent

            else:
                None

            i = i - 1

except:
    None

# make sub directory for the GTFS
os.makedirs("../gtfs/feeds_" + input_date, exist_ok=True)

# write this info to a csv file, downloading the GTFS at the same time
gtfs_zips_to_dl = []
with open("../gtfs/feeds_" + input_date + "/" + region + "_feed_info_" + input_date + ".csv", "w") as csvfile:
    writer = csv.writer(csvfile)
    for row in output_feed_info:
        writer.writerow(row)
        gtfs_zip = [row[5],row[9]]
        if gtfs_zip not in gtfs_zips_to_dl:
            gtfs_zips_to_dl.append(gtfs_zip)

for gtfs_zip in gtfs_zips_to_dl:
    print(gtfs_zip)
    try:
        urllib.request.urlretrieve(gtfs_zip[1], "../gtfs/feeds_" + input_date + "/" + input_date + "_" + gtfs_zip[0] + ".zip")
        sleep(1)
    except:
        None

['transitland_feed_id', 'transitland_historical_url']
['f-dr4-septa~bus', 'https://transitland-gtfs.s3.amazonaws.com/datastore-uploads/feed_version/15b4aed52638c8f8434f01c102f8dff24918576e.zip']
