In [1]:
import sys

sys.path.append("../")  # up one level

import os
import shutil

import gcsfs
import geopandas as gpd
import pandas as pd
from calitp_data_analysis.sql import to_snakecase
from calitp_data_analysis.tables import tbls
from segment_speed_utils.project_vars import PUBLIC_GCS
from siuba import _, collect, count, filter, show_query, select
from update_vars import GCS_FILE_PATH, NTD_MODES, NTD_TOS

GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/ntd/"

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

In [2]:
# need to find a way to filter  ntd service to California agencies only.
ntd_service = (
    tbls.mart_ntd.dim_annual_service_agencies()
    >> filter(_.primary_uza_name.str.contains(", CA"))
    >> select(
        "report_year",
        "ntd_id",
        "agency",
        "reporter_type",
        "organization_type",
        "city",
        "primary_uza_name",
        "actual_vehicles_passenger_car_revenue_hours",
        "actual_vehicles_passenger_car_revenue_miles",
        "unlinked_passenger_trips_upt"
    )
    >> collect()
)
ntd_service.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 315 entries, 0 to 314
Data columns (total 10 columns):
 #   Column                                       Non-Null Count  Dtype  
---  ------                                       --------------  -----  
 0   report_year                                  315 non-null    int64  
 1   ntd_id                                       315 non-null    object 
 2   agency                                       315 non-null    object 
 3   reporter_type                                315 non-null    object 
 4   organization_type                            315 non-null    object 
 5   city                                         312 non-null    object 
 6   primary_uza_name                             315 non-null    object 
 7   actual_vehicles_passenger_car_revenue_hours  315 non-null    float64
 8   actual_vehicles_passenger_car_revenue_miles  315 non-null    float64
 9   unlinked_passenger_trips_upt                 315 non-null    float64
dtypes:

In [3]:
# reading in NTD ID crosswalk from GCS
crosswalk = pd.read_csv(
    f"{GCS_FILE_PATH}ntd_id_rtpa_crosswalk.csv", dtype={"ntd_id": "str"}
).rename(columns={"NTD ID": "ntd_id"})

crosswalk.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 122 entries, 0 to 121
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   ntd_id          122 non-null    object
 1   Legacy NTD ID   111 non-null    object
 2   Agency          122 non-null    object
 3   UZA Name        121 non-null    object
 4   RTPA_open_data  122 non-null    object
 5   RTPA            122 non-null    object
dtypes: object(6)
memory usage: 5.8+ KB


In [4]:
# are all agencies in the ntd_service in the crosswalk?
check = ntd_service.merge(crosswalk, on="ntd_id", how="left", indicator=True)

# check to see if any agencies in ntd_service did not merge.
check[check["_merge"] == "left_only"][["agency", "reporter_type"]].value_counts()

# the rural reporters did not merge with an NTD ID from crosswalk

agency                                                                          reporter_type   
Los Angeles County                                                              Reduced Reporter    10
City of Agoura Hills                                                            Reduced Reporter     2
City of Pico Rivera                                                             Reduced Reporter     2
City of Downey                                                                  Reduced Reporter     2
City of Glendora                                                                Reduced Reporter     2
City of Huntington Park                                                         Reduced Reporter     2
City of Lakewood , dba: DASH Transit                                            Reduced Reporter     2
City of Malibu                                                                  Reduced Reporter     2
City of Manteca, dba: Manteca Transit                                          

In [5]:
# crosswalk[["UZA Name", "RTPA"]].sort_values(by="UZA Name")

Some UZA Names can go to multiple RTPAs
- some agencies in the Sacramento UZA go to Sac and Placer RTPA
- some agencies in LA UZA got to Metro or OCTA


## Maybe its easier to find the RTPA of the rural/reduced reporter agencies
- how many rural/reduced reporters are there?


In [6]:
ntd_service["reporter_type"].value_counts()

Full Reporter       165
Reduced Reporter    148
Rural Reporter        2
Name: reporter_type, dtype: int64

In [8]:
ntd_service[ntd_service["reporter_type"]!="Full Reporter"].sort_values(by="primary_uza_name")

Unnamed: 0,report_year,ntd_id,agency,reporter_type,organization_type,city,primary_uza_name,actual_vehicles_passenger_car_revenue_hours,actual_vehicles_passenger_car_revenue_miles,unlinked_passenger_trips_upt
313,2023,91059,Kern Regional Transit,Reduced Reporter,Independent Public Agency or Authority of Tran...,Bakersfield,"Bakersfield, CA",79427.0,1934230.0,215876.0
183,2022,90163,"City of Camarillo, dba: Camarillo Area Transit",Reduced Reporter,"City, County or Local Government Unit or Depar...",Camarillo,"Camarillo, CA",19700.0,369502.0,69923.0
184,2023,90163,"City of Camarillo, dba: Camarillo Area Transit",Reduced Reporter,"City, County or Local Government Unit or Depar...",Camarillo,"Camarillo, CA",20667.0,374301.0,67360.0
187,2023,90167,"City of Davis, dba: Davis Community Transit",Reduced Reporter,"City, County or Local Government Unit or Depar...",Davis,"Davis, CA",4558.0,47130.0,12956.0
188,2022,90167,"City of Davis, dba: Davis Community Transit",Reduced Reporter,"City, County or Local Government Unit or Depar...",Davis,"Davis, CA",4147.0,43506.0,11036.0
194,2022,90194,"City of Atascadero, dba: Atascadero Dial A Ride",Reduced Reporter,"City, County or Local Government Unit or Depar...",Atascadero,"El Paso de Robles (Paso Robles)--Atascadero, CA",4110.0,50314.0,10352.0
193,2023,90194,City of Atascadero,Reduced Reporter,"City, County or Local Government Unit or Depar...",Atascadero,"El Paso de Robles (Paso Robles)--Atascadero, CA",2937.0,34112.0,7403.0
192,2023,90175,"City of Lodi, dba: GrapeLine",Reduced Reporter,"City, County or Local Government Unit or Depar...",Lodi,"Lodi, CA",27723.0,295692.0,184061.0
191,2022,90175,"City of Lodi, dba: GrapeLine",Reduced Reporter,"City, County or Local Government Unit or Depar...",Lodi,"Lodi, CA",27961.0,275220.0,159529.0
177,2023,90149,"City of Lompoc, dba: City of Lompoc Transit",Reduced Reporter,"City, County or Local Government Unit or Depar...",Lompoc,"Lompoc, CA",23114.0,238146.0,63727.0
