In [None]:
# do this first and restart kernel
# need version of the GTFSDownloadConfig class that has optional config.extracted_at
%pip install calitp=="2022.9.13a0"
%pip install pendulum

In [None]:
from calitp.storage import get_fs
fs = get_fs()

In [None]:
# configuration
RT_BUCKET = "test-calitp-gtfs-rt-raw"

In [None]:
# Get the list of RT files to update
#from tqdm.notebook import tqdm
from tqdm.auto import tqdm

def handle_type_day(typ, dt):
    proto_files = fs.expand_path(f'gs://{RT_BUCKET}/{typ}/dt={dt}', recursive=True)
    return [file for file in tqdm(proto_files, desc='Filtering out directories') if fs.stat(file)["type"] != "directory"]

In [None]:
import json

def get_metadata(files):
    metadatas = []
    missing_metadata = []
    for file in tqdm(files, desc='Getting metadata'):
        try:
            metadatas.append((file, json.loads(fs.getxattr(file, PARTITIONED_ARTIFACT_METADATA_KEY))))
        except KeyError:
            missing_metadata.append(file)
    return metadatas, missing_metadata

In [None]:
files = handle_type_day(typ="vehicle_positions", dt="2022-08-12")
metadatas, missing = get_metadata(files)
len(files), len(metadatas), len(missing)

In [None]:
files_without_config = [meta for meta in metadatas if "config" not in meta[1]]
files_without_config, missing

In [None]:
(
    sorted(metadatas, key=lambda m: m[1]["ts"], reverse=True)[0],
    set(json.dumps(metadata["config"]["auth_query_param"]) for (file, metadata) in metadatas),
    #set(json.dumps(metadata["config"]["auth_headers"]) for (file, metadata) in metadatas),
)

In [None]:
import pendulum
from typing import Dict
from calitp.storage import AirtableGTFSDataRecord, AirtableGTFSDataExtract, get_latest_file

import os
os.environ["CALITP_BUCKET__AIRTABLE"] = ""

def get_airtable_gtfs_records_for_day(
    dt: pendulum.Date,
) -> Dict[str, AirtableGTFSDataRecord]:
    file = get_latest_file(
        AirtableGTFSDataExtract.bucket,
        AirtableGTFSDataExtract.table,
        prefix_partitions={
            "dt": dt,
        },
        partition_types={
            "ts": pendulum.DateTime,
        },
    )

    with get_fs().open(file.name, "rb") as f:
        content = gzip.decompress(f.read())
    records = [
        AirtableGTFSDataRecord(**json.loads(row))
        for row in content.decode().splitlines()
    ]

    return {record.id: record for record in records}
len(get_airtable_gtfs_records_for_day(pendulum.Date(2022, 8, 15)))

In [None]:
import pendulum
from calitp.storage import GTFSDownloadConfig, GTFSRTFeedExtract, PARTITIONED_ARTIFACT_METADATA_KEY

def update_metadata(filepath, meta, write=False):
    meta = meta.copy()
    config = meta.pop("config")
    ts = pendulum.parse(meta.pop("ts"), exact=True)
    assert config
    uri = config["uri"]
    if "goswift.ly" in uri:
        headers = {"authorization": "SWIFTLY_AUTHORIZATION_KEY_CALITP"}
    elif "west-hollywood" in uri:
        headers ={"x-umo-iq-api-key": "WEHO_RT_KEY"}
    else:
        headers = {}
    extract = GTFSRTFeedExtract(
        ts=ts,
        config=GTFSDownloadConfig(
            name=config.get("name"),
            url=uri,
            feed_type=config["data"],
            schedule_url_for_validation="https://google.com",
            auth_query_params=config["auth_query_param"],
            auth_headers=headers,
        ),
        **meta,
    )
    if write:
        pass
        #fs.setxattr(**{PARTITIONED_ARTIFACT_METADATA_KEY: extract.json()})

for filepath, meta in tqdm(metadatas, desc="Updating metadatas"):
    update_metadata(filepath, meta, write=False)