In [1]:
import json
import os
from datetime import datetime, timedelta
from functools import cache
from pathlib import Path
from typing import Optional

import numpy as np
import pandas as pd
import typer
from calitp_data_analysis.sql import get_engine, query_sql  # type: ignore
from siuba import _, arrange, collect  # type: ignore
from siuba import filter as filtr  # type: ignore
from siuba import mutate, pipe, rename, select, spread  # type: ignore
from siuba.sql import LazyTbl  # type: ignore
from tqdm import tqdm

os.environ["CALITP_BQ_MAX_BYTES"] = str(800_000_000_000)

engine = get_engine()

# Cached array of all report paths.
index_report_file_path = "outputs/index_report.json"

In [2]:
# @cache
def _guideline_check():
    return (
        LazyTbl(
            engine,
            "mart_gtfs_quality.fct_monthly_reports_site_organization_guideline_checks",
        )
        >> select(
            _.organization_itp_id,
            _.publish_date,
            _.date_checked,
            _.feature,
            _.check,
            _.reports_status,
            _.is_manual,
            _.reports_order,
        )
        >> collect()
    )

In [3]:
def generate_guideline_check(itp_id: int, publish_date, feature):
    guideline_check = (
        _guideline_check()
        >> filtr(_.organization_itp_id == itp_id)
        >> filtr(_.publish_date == publish_date)
        >> filtr(_.feature == feature)
        >> select(
            _.date_checked, _.check, _.reports_status, _.is_manual, _.reports_order
        )
        >> mutate(
            date_checked=_.date_checked.astype(str),
            reports_order=_.reports_order.astype(int),
            check=np.where(_.is_manual, _.check + "*", _.check),
        )
        >> spread(_.date_checked, _.reports_status)
        >> arrange(_.reports_order)
        >> pipe(_.fillna(""))
    )

    return guideline_check

In [4]:
_guideline_check()

Unnamed: 0,organization_itp_id,publish_date,date_checked,feature,check,reports_status,is_manual,reports_order
0,4,2025-02-01,2025-01-18,Fixed-Route Completeness,100% of scheduled trips on a given day are rep...,FAIL,False,
1,4,2024-03-01,2024-02-18,Fixed-Route Completeness,100% of scheduled trips on a given day are rep...,FAIL,False,
2,4,2024-01-01,2023-12-04,Fixed-Route Completeness,100% of scheduled trips on a given day are rep...,FAIL,False,
3,4,2024-11-01,2024-10-18,Fixed-Route Completeness,100% of scheduled trips on a given day are rep...,FAIL,False,
4,4,2024-08-01,2024-07-18,Fixed-Route Completeness,100% of scheduled trips on a given day are rep...,FAIL,False,
...,...,...,...,...,...,...,...,...
920851,377,2025-04-01,2025-03-18,Availability on Website,Vehicle positions link is posted on website,NOT APPLICABLE/DATA UNAVAILABLE,True,
920852,377,2025-01-01,2024-12-18,Availability on Website,Vehicle positions link is posted on website,NOT APPLICABLE/DATA UNAVAILABLE,True,
920853,377,2025-02-01,2025-01-18,Availability on Website,Vehicle positions link is posted on website,NOT APPLICABLE/DATA UNAVAILABLE,True,
920854,377,2025-04-01,2025-03-04,Availability on Website,Vehicle positions link is posted on website,NOT APPLICABLE/DATA UNAVAILABLE,True,
