# Caltrans District + Legislative District
* How to add Caltrans and Legislative District to `fct_monthly_operator_summary` and `fct_monthly_routes`

In [1]:
from datetime import datetime
from functools import cache
from pathlib import Path
from typing import Literal

import _sql_query
import geopandas as gpd
import google.auth
import pandas as pd
import pandas_gbq
import publish_public_data
from calitp_data_analysis import geography_utils
from calitp_data_analysis.gcs_geopandas import GCSGeoPandas
from shared_utils import geo_utils, gtfs_utils_v2, portfolio_utils, publish_utils
from update_vars import GTFS_DATA_DICT

In [2]:
credentials, project = google.auth.default()

In [3]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [4]:
@cache
def gcs_geopandas():
    return GCSGeoPandas()

## Caltrans District
* Testing `cal-itp-data-infra-staging.tiffany_mart_transit_database.bridge_gtfs_analysis_name_x_ntd` per [Issue 1791](https://github.com/cal-itp/data-analyses/issues/1791)

In [5]:
query_sql = f"""
        SELECT 
            *
        FROM `cal-itp-data-infra-staging`.`tiffany_mart_transit_database`.`bridge_gtfs_analysis_name_x_ntd`
    """

In [6]:
query_sql

'\n        SELECT \n            *\n        FROM `cal-itp-data-infra-staging`.`tiffany_mart_transit_database`.`bridge_gtfs_analysis_name_x_ntd`\n    '

In [7]:
project = "cal-itp-data-infra"

In [56]:
df = pandas_gbq.read_gbq(
    query_sql, project_id=project, dialect="standard", credentials=credentials
)

Downloading: 100%|[32m██████████[0m|


In [53]:
df2 = df[
    [
        "analysis_name",
        "county_name",
        "caltrans_district",
        "caltrans_district_name",
        "ntd_id",
        "ntd_id_2022",
        "rtpa_name",
        "mpo_name",
    ]
].drop_duplicates(subset=["analysis_name", "county_name", "caltrans_district"])

In [72]:
df.sort_values(by = ["analysis_name", "county_name", "caltrans_district"])

Unnamed: 0,organization_name,organization_source_record_id,schedule_source_record_id,schedule_gtfs_dataset_name,analysis_name,regional_feed_type,county_name,caltrans_district,caltrans_district_name,ntd_id,ntd_id_2022,rtpa_name,mpo_name
6,Dumbarton Bridge Regional Operations Consortium,recn8zTmGbYZv1qxV,reciVy6v0lmnBeTVn,Bay Area 511 Dumbarton Express Schedule,Alameda-Contra Costa Transit District,Regional Subfeed,Alameda,4,Bay Area / Oakland,,,Metropolitan Transportation Commission,Metropolitan Transportation Commission
153,Alameda-Contra Costa Transit District,recOZgevYf7Jimm9L,recJjD8JT53sK302o,Bay Area 511 AC Transit Schedule,Alameda-Contra Costa Transit District,Regional Subfeed,Alameda,4,Bay Area / Oakland,90014,90014.0,Metropolitan Transportation Commission,Metropolitan Transportation Commission
168,Amador Regional Transit System,recSBFiK95hJnJuYx,recYb4Yoqr6zzqMd5,Amador Schedule,Amador Regional Transit System,,Amador,10,Stockton,9R02-91000,91000.0,Amador County Transportation Commission,
96,Amtrak,recKsb5FnJy70up78,recIHiLOHYXfVknaq,Amtrak Schedule,Amtrak,,Sacramento,3,Marysville / Sacramento,,,Sacramento Area Council of Governments,Sacramento Area Council of Governments
128,Anaheim Transportation Network,recsrIZdx5Wt6n3ol,recrxmzfLImgBGwUH,Anaheim Resort Schedule,Anaheim Transportation Network,,Orange,12,Orange County,90211,90211.0,Southern California Association of Governments,Southern California Association of Governments
40,Angel Island-Tiburon Ferry Company,rec0HI0gloltUftYg,recfvudBVGhR996NA,Bay Area 511 Angel Island-Tiburon Ferry Schedule,Angel Island-Tiburon Ferry Company,Regional Subfeed,San Francisco,4,Bay Area / Oakland,,,Metropolitan Transportation Commission,
190,Antelope Valley Transit Authority,recxsWR0KRrQTdjmg,recOJo4hgdBYwyMSG,Antelope Valley Transit Authority Schedule,Antelope Valley Transit Authority,,Los Angeles,7,Los Angeles / Ventura,90121,90121.0,Southern California Association of Governments,Southern California Association of Governments
68,Basin Transit,recniaSbsXcRSLsWe,recPDUpk92qJBocsl,Morongo Basin Schedule,Basin Transit,,San Bernardino,8,San Bernardino / Riverside,9R02-91090,91090.0,Southern California Association of Governments,Southern California Association of Governments
56,Butte County Association of Governments,recf7l9tozKXOmqqZ,recm6LcXTquVutuAW,B-Line Schedule,Butte County Association of Governments,,Butte,3,Marysville / Sacramento,90208,90208.0,Butte County Association of Governments,Butte County Association of Governments
46,Calaveras Transit Agency,recFqVabH8109u70q,recekJNW9RMN9P7j9,Calaveras Schedule,Calaveras Transit Agency,,Calaveras,10,Stockton,9R02-99442,99442.0,Calaveras Council of Governments,


### fct_monthly_operator_summary

In [21]:
monthly_operator_summary_url = "gs://calitp-analytics-data/data-analyses/gtfs_digest/raw/fct_monthly_operator_summary_2025_11.parquet"

In [25]:
monthly_operator_summary_df = (pd.read_parquet(monthly_operator_summary_url)
    .drop(columns =
       ['vp_base64_url', 
       'tu_base64_url',
        'schedule_base64_url'])
                              )

In [26]:
monthly_operator_summary_df.columns

Index(['month', 'year', 'month_first_day', 'schedule_name', 'vp_name',
       'tu_name', 'day_type', 'analysis_name', 'n_trips', 'daily_trips',
       'ttl_service_hours', 'n_routes', 'n_shapes', 'n_stops', 'n_days',
       'vp_messages_per_minute', 'n_vp_trips', 'daily_vp_trips',
       'pct_vp_trips', 'n_vp_routes', 'pct_vp_service_hours',
       'tu_messages_per_minute', 'n_tu_trips', 'daily_tu_trips',
       'pct_tu_trips', 'n_tu_routes', 'pct_tu_service_hours'],
      dtype='object')

In [27]:
monthly_operator_summary_df.sample().T

Unnamed: 0,209
month,11
year,2025
month_first_day,2025-11-01 00:00:00
schedule_name,Visalia Schedule
vp_name,Visalia VehiclePositions
tu_name,Visalia TripUpdates
day_type,Weekday
analysis_name,
n_trips,5994
daily_trips,666.00


In [54]:
monthly_operator_summary_df2 = pd.merge(
    monthly_operator_summary_df, df2, on=["analysis_name"], how="outer", indicator=True
)

In [29]:
monthly_operator_summary_df2._merge.value_counts()

both          235
right_only     95
left_only      81
Name: _merge, dtype: int64

In [30]:
monthly_operator_summary_df2.loc[monthly_operator_summary_df2._merge == "right_only"][
    ["analysis_name"]
].drop_duplicates().sort_values(by=["analysis_name"])

Unnamed: 0,analysis_name
395,Amador Regional Transit System
361,Amtrak
340,Angel Island-Tiburon Ferry Company
347,Basin Transit
342,Calaveras Transit Agency
378,Chemehuevi Indian Tribe
358,City of Alhambra
391,City of Artesia
403,City of Arvin
399,City of Baldwin Park


#### Mountain View is missing an analysis_name value.

In [61]:
mountainview_df = monthly_operator_summary_df.loc[
    monthly_operator_summary_df.schedule_name.str.contains("Mountain")
]

In [68]:
mountainview_df.analysis_name.unique()

array([None], dtype=object)

In [66]:
df.loc[df.analysis_name.str.contains("Mountain")].T

Unnamed: 0,1,19,117
organization_name,City of Mountain View,Mountain View Transportation Management Association,Mountain Area Regional Transit Authority
organization_source_record_id,rec4pDiUorjWbUfvU,recIeP8mUucOsbvbz,recHbquam1bWEwC3P
schedule_source_record_id,rec1aVUoncbe5ieev,recudnSn7WzZIFqC5,recCvLW5YvXVhOzG5
schedule_gtfs_dataset_name,Bay Area 511 Mountain View Community Shuttle Schedule,Bay Area 511 MVGO Schedule,Mountain Transit Schedule
analysis_name,City of Mountain View,Mountain View Transportation Management Association,Mountain Area Regional Transit Authority
regional_feed_type,Regional Subfeed,Regional Subfeed,
county_name,Santa Clara,Santa Clara,San Bernardino
caltrans_district,4,4,8
caltrans_district_name,Bay Area / Oakland,Bay Area / Oakland,San Bernardino / Riverside
ntd_id,,,9R02-91012


In [65]:
print(mountainview_df.schedule_name.value_counts())
print(mountainview_df.tu_name.value_counts())
print(mountainview_df.vp_name.value_counts())
print(mountainview_df.schedule_name.value_counts())

Mountain View Community Shuttle Schedule    3
Mountain Transit GMV Schedule               3
Name: schedule_name, dtype: int64
Mountain View Community Shuttle TripUpdates    3
Mountain Transit GMV TripUpdate                3
Name: tu_name, dtype: int64
Mountain View Community Shuttle VehiclePositions    3
Mountain Transit GMV VehiclePositions               3
Name: vp_name, dtype: int64
Mountain View Community Shuttle Schedule    3
Mountain Transit GMV Schedule               3
Name: schedule_name, dtype: int64


#### Same thing with Roseville

In [48]:
roseville_df = monthly_operator_summary_df.loc[
    monthly_operator_summary_df.schedule_name.str.contains("Roseville")
]

In [59]:
roseville_df.sample().T

Unnamed: 0,50
month,11
year,2025
month_first_day,2025-11-01 00:00:00
schedule_name,Roseville Transit TripShot Schedule
vp_name,Roseville Transit TripShot VehiclePositions
tu_name,Roseville Transit TripShot TripUpdates
day_type,Saturday
analysis_name,
n_trips,144
daily_trips,48.00


In [64]:
print(roseville_df.schedule_name.value_counts())
print(roseville_df.tu_name.value_counts())
print(roseville_df.vp_name.value_counts())
print(roseville_df.schedule_name.value_counts())

Roseville Transit TripShot Schedule    2
Name: schedule_name, dtype: int64
Roseville Transit TripShot TripUpdates    2
Name: tu_name, dtype: int64
Roseville Transit TripShot VehiclePositions    2
Name: vp_name, dtype: int64
Roseville Transit TripShot Schedule    2
Name: schedule_name, dtype: int64


In [58]:
df.loc[df.analysis_name.str.contains("Roseville")].T

Unnamed: 0,74
organization_name,City of Roseville
organization_source_record_id,recUdTq5QiUjJRiAe
schedule_source_record_id,rec90jC43naXJz9lr
schedule_gtfs_dataset_name,Roseville Schedule
analysis_name,City of Roseville
regional_feed_type,
county_name,Placer
caltrans_district,3
caltrans_district_name,Marysville / Sacramento
ntd_id,90168


#### Same thing with YoloBus

In [69]:
monthly_operator_summary_df.columns

Index(['month', 'year', 'month_first_day', 'schedule_name', 'vp_name',
       'tu_name', 'day_type', 'analysis_name', 'n_trips', 'daily_trips',
       'ttl_service_hours', 'n_routes', 'n_shapes', 'n_stops', 'n_days',
       'vp_messages_per_minute', 'n_vp_trips', 'daily_vp_trips',
       'pct_vp_trips', 'n_vp_routes', 'pct_vp_service_hours',
       'tu_messages_per_minute', 'n_tu_trips', 'daily_tu_trips',
       'pct_tu_trips', 'n_tu_routes', 'pct_tu_service_hours'],
      dtype='object')

In [35]:
monthly_operator_summary_df.loc[
    monthly_operator_summary_df.schedule_name.str.contains("Yolo")
]

Unnamed: 0,month,year,month_first_day,schedule_name,vp_name,tu_name,day_type,analysis_name,n_trips,daily_trips,ttl_service_hours,n_routes,n_shapes,n_stops,n_days,vp_messages_per_minute,n_vp_trips,daily_vp_trips,pct_vp_trips,n_vp_routes,pct_vp_service_hours,tu_messages_per_minute,n_tu_trips,daily_tu_trips,pct_tu_trips,n_tu_routes,pct_tu_service_hours


In [70]:
monthly_operator_summary_df.loc[
    monthly_operator_summary_df.tu_name.str.contains("Yolo")
]

Unnamed: 0,month,year,month_first_day,schedule_name,vp_name,tu_name,day_type,analysis_name,n_trips,daily_trips,ttl_service_hours,n_routes,n_shapes,n_stops,n_days,vp_messages_per_minute,n_vp_trips,daily_vp_trips,pct_vp_trips,n_vp_routes,pct_vp_service_hours,tu_messages_per_minute,n_tu_trips,daily_tu_trips,pct_tu_trips,n_tu_routes,pct_tu_service_hours


In [71]:
monthly_operator_summary_df.loc[
    monthly_operator_summary_df.vp_name.str.contains("Yolo")
]

Unnamed: 0,month,year,month_first_day,schedule_name,vp_name,tu_name,day_type,analysis_name,n_trips,daily_trips,ttl_service_hours,n_routes,n_shapes,n_stops,n_days,vp_messages_per_minute,n_vp_trips,daily_vp_trips,pct_vp_trips,n_vp_routes,pct_vp_service_hours,tu_messages_per_minute,n_tu_trips,daily_tu_trips,pct_tu_trips,n_tu_routes,pct_tu_service_hours


In [36]:
df.loc[df.analysis_name.str.contains("Yolo")]

Unnamed: 0,analysis_name,county_name,caltrans_district,caltrans_district_name,ntd_id,ntd_id_2022,rtpa_name,mpo_name
28,Yolo County Transportation District,Yolo,3,Marysville / Sacramento,90090,90090,Sacramento Area Council of Governments,Sacramento Area Council of Governments


#### Alhambra is just missing

In [37]:
monthly_operator_summary_df.loc[
    monthly_operator_summary_df.schedule_name.str.contains("Alhambra")
]

Unnamed: 0,month,year,month_first_day,schedule_name,vp_name,tu_name,day_type,analysis_name,n_trips,daily_trips,ttl_service_hours,n_routes,n_shapes,n_stops,n_days,vp_messages_per_minute,n_vp_trips,daily_vp_trips,pct_vp_trips,n_vp_routes,pct_vp_service_hours,tu_messages_per_minute,n_tu_trips,daily_tu_trips,pct_tu_trips,n_tu_routes,pct_tu_service_hours


In [38]:
monthly_operator_summary_df.loc[
    monthly_operator_summary_df.schedule_name.str.contains("Needles")
]

Unnamed: 0,month,year,month_first_day,schedule_name,vp_name,tu_name,day_type,analysis_name,n_trips,daily_trips,ttl_service_hours,n_routes,n_shapes,n_stops,n_days,vp_messages_per_minute,n_vp_trips,daily_vp_trips,pct_vp_trips,n_vp_routes,pct_vp_service_hours,tu_messages_per_minute,n_tu_trips,daily_tu_trips,pct_tu_trips,n_tu_routes,pct_tu_service_hours


In [45]:
df.loc[df.analysis_name.str.contains("Alhambra")]

Unnamed: 0,analysis_name,county_name,caltrans_district,caltrans_district_name,ntd_id,ntd_id_2022,rtpa_name,mpo_name
90,City of Alhambra,Los Angeles,7,Los Angeles / Ventura,90247,90247,Southern California Association of Governments,Southern California Association of Governments


### fct_monthly_routes  

In [40]:
monthly_routes_url = "gs://calitp-analytics-data/data-analyses/gtfs_digest/processed/fct_monthly_routes_2025_12.parquet"

In [41]:
monthly_routes_gdf = gpd.read_parquet(
    monthly_routes_url, storage_options={"token": credentials.token}
)

In [42]:
monthly_routes_gdf.shape

(76342, 11)

In [73]:
monthly_routes_gdf2 = pd.merge(
    monthly_routes_gdf, df2, on=["analysis_name"], how="outer", indicator=True
)

In [74]:
monthly_routes_gdf2._merge.value_counts()

both          76227
left_only       115
right_only        6
Name: _merge, dtype: int64

In [76]:
monthly_routes_gdf2.loc[monthly_routes_gdf2._merge == "right_only"]

Unnamed: 0,name,year,month,route_name,route_type_x,analysis_name,source_record_id,route_type_y,route_color,route_typology,geometry,county_name,caltrans_district,caltrans_district_name,ntd_id,ntd_id_2022,rtpa_name,mpo_name,_merge
76342,,,,,,"University of California, Irvine",,,,,,Orange,12,Orange County,,,,,right_only
76343,,,,,,Trinity County,,,,,,Trinity,2,Redding,9R02-91035,91035.0,Trinity County Transportation Commission,,right_only
76344,,,,,,City of Thousand Oaks,,,,,,Ventura,7,Los Angeles / Ventura,,,Southern California Association of Governments,Southern California Association of Governments,right_only
76345,,,,,,City of El Segundo,,,,,,Los Angeles,7,Los Angeles / Ventura,A0003-99449,99449.0,Southern California Association of Governments,Southern California Association of Governments,right_only
76346,,,,,,City of La Puente,,,,,,Los Angeles,7,Los Angeles / Ventura,,,Southern California Association of Governments,Southern California Association of Governments,right_only
76347,,,,,,Town of Truckee,,,,,,Nevada,3,Marysville / Sacramento,9R02-91101,91101.0,Nevada County Transportation Commission,,right_only
