# GTFS Realtime Analysis Notebook 
This is a quick and DIY notebook to run the [realtimevaldiator](https://github.com/CUTR-at-USF/gtfs-realtime-validator/blob/master/gtfs-realtime-validator-lib/README.md#batch-processing) and a batch of agencies

In [1]:
import requests 
import pandas as pd
import yaml
import os
from pathlib import Path
import datetime
import zipfile
import time
import tqdm

1. Download the agencies list 

In [2]:
raw_url = "https://raw.githubusercontent.com/cal-itp/data-infra/main/airflow/data/agencies.yml"
r = requests.get(raw_url)

In [3]:
agencies = yaml.load(r.text)

  """Entry point for launching an IPython kernel.


In [4]:
rt_agencies = {k:v for k,v in agencies.items() if "gtfs_rt_urls" in v.keys()}

2. Load the GTFS and GTFS RT files 

In [5]:
num_iterations = 10
sleep_interval = 20 
Path('../data/gtfs-rt').mkdir(parents=True, exist_ok=True)
def download_sample(agency):
    agency = rt_agencies[agency]
    dest_dir = f"{path}/{agency['itp_id']}"
    Path(dest_dir).mkdir(parents=True, exist_ok=True)
    # download GTFS and save to agency path 
    Path(f"{dest_dir}/rt").mkdir(parents=True, exist_ok=True)
    r = requests.get(agency['gtfs_schedule_url'][0], verify=False)
    print(f"loading {agency['itp_id']} gtfs")
    print(f'{dest_dir}/gtfs.zip')
    with open(f'{dest_dir}/gtfs.zip', 'xb') as f:
        f.write(r.content)
        print(f"wrote GTFS for to {agency['agency_name']}")
    # start loop 
    for i in range(0, num_iterations):
        vehicle_url = agency['gtfs_rt_urls']['vehicle_positions'][0]
        alerts_url = agency['gtfs_rt_urls']['service_alerts'][0]
        trip_updates_url = agency['gtfs_rt_urls']['trip_updates'][0]
        r = requests.get(vehicle_url, verify=False)
        with open(f'{dest_dir}/rt/vehicle{i*20}.pb', 'xb') as f:
            f.write(r.content)
        r = requests.get(alerts_url, verify=False)
        with open(f'{dest_dir}/rt/alerts{i*20}.pb', 'xb') as f:
            f.write(r.content)
        r = requests.get(trip_updates_url, verify=False)
        with open(f'{dest_dir}/rt/trips{i*20}.pb', 'xb') as f:
            f.write(r.content)
        time.sleep(sleep_interval)
    print(f"finished loading {agency['agency_name']}")

In [7]:
from multiprocessing import Pool
from itertools import product
# what are your inputs, and what operation do you want to 
# perform on each input. For example...
run_time = datetime.datetime.now()
path = f'../data/gtfs-rt/{run_time.strftime("%Y-%m-%d:%H:%M:%S")}'
Path(path).mkdir(parents=True, exist_ok=True)

rt_agencies_list = rt_agencies.keys()
if __name__ == '__main__':
    with Pool(len(rt_agencies)) as p:
        print(p.map(download_sample, rt_agencies_list))




loading 301.0 gtfs
../data/gtfs-rt/2021-05-07:13:03:51/301.0/gtfs.zip
wrote GTFS for to Santa Rosa CityBus




loading 281.0 gtfs
../data/gtfs-rt/2021-05-07:13:03:51/281.0/gtfs.zip
wrote GTFS for to San Francisco International Airport




loading 8 gtfs
../data/gtfs-rt/2021-05-07:13:03:51/8/gtfs.zip




wrote GTFS for to Monterey-Salinas Transit




loading 6 gtfs
../data/gtfs-rt/2021-05-07:13:03:51/6/gtfs.zip
wrote GTFS for to Emery Go-Round
loading 243 gtfsloading 2 gtfs

../data/gtfs-rt/2021-05-07:13:03:51/243/gtfs.zip../data/gtfs-rt/2021-05-07:13:03:51/2/gtfs.zip

wrote GTFS for to Pasadena Transit
wrote GTFS for to San Francisco Bay Ferry




loading 310 gtfs
../data/gtfs-rt/2021-05-07:13:03:51/310/gtfs.zip
wrote GTFS for to SolTrans
loading 0 gtfs
../data/gtfs-rt/2021-05-07:13:03:51/0/gtfs.zip
wrote GTFS for to Big Blue Bus




loading 1 gtfs
../data/gtfs-rt/2021-05-07:13:03:51/1/gtfs.zip
wrote GTFS for to Fairfield and Suisun Transit
loading 350 gtfs
../data/gtfs-rt/2021-05-07:13:03:51/350/gtfs.zip
wrote GTFS for to Union City Transit
loading 235 gtfs
../data/gtfs-rt/2021-05-07:13:03:51/235/gtfs.zip
wrote GTFS for to Orange County Transportation Authority




loading 167 gtfs
../data/gtfs-rt/2021-05-07:13:03:51/167/gtfs.zip
wrote GTFS for to Tri-Valley Wheels
loading 203 gtfs
../data/gtfs-rt/2021-05-07:13:03:51/203/gtfs.zip
wrote GTFS for to Modesto Area Express
loading 269 gtfs
../data/gtfs-rt/2021-05-07:13:03:51/269/gtfs.zip
wrote GTFS for to Riverside Transit Agency
loading 98 gtfs
../data/gtfs-rt/2021-05-07:13:03:51/98/gtfs.zip
wrote GTFS for to Dumbarton Express
loading 1 gtfs
../data/gtfs-rt/2021-05-07:13:03:51/1/gtfs.zip
loading 4 gtfs
../data/gtfs-rt/2021-05-07:13:03:51/4/gtfs.zip
wrote GTFS for to AC Transit




loading 290 gtfs
../data/gtfs-rt/2021-05-07:13:03:51/290/gtfs.zip
wrote GTFS for to SamTrans
loading 279 gtfs
../data/gtfs-rt/2021-05-07:13:03:51/279/gtfs.zip
wrote GTFS for to Bay Area Rapid Transit




finished loading Monterey-Salinas Transit
finished loading San Francisco International Airport
finished loading AC Transit
finished loading Fairfield and Suisun Transit
finished loading Union City Transit
finished loading SolTrans
finished loading Emery Go-Round
finished loading Big Blue Bus
finished loading Dumbarton Express
finished loading Tri-Valley Wheelsfinished loading San Francisco Bay Ferry

finished loading SamTrans
finished loading Riverside Transit Agency
finished loading Modesto Area Express


LocationParseError: Failed to parse: Failed to parse: '.232.147.132', label empty or too long

3. Run the Valdiator in Batch Mode

In [24]:
import os
res_path = '../data/gtfs-rt/2021-05-07:13:03:51'
for itp_id in os.listdir(res_path):
    cmd = f"""
    java -jar gtfs-realtime-validator-lib-1.0.0-SNAPSHOT.jar -gtfs {res_path}/{itp_id}/gtfs.zip -gtfsRealtimePath {res_path}/{itp_id}/rt
    """
    os.system(cmd)