# Lost operators
* Per `gtfs_digest/diagnostics.yml`, the number of unique operators went down from around 190 the past few months to 174.

In [1]:
import numpy as np
import pandas as pd
import pyaml
from update_vars import GTFS_DATA_DICT, RT_SCHED_GCS

DIAGNOSTIC_YAML_PATH = "./diagnostics.yml"

In [3]:
DIGEST_RT_SCHED = GTFS_DATA_DICT.digest_tables.monthly_route_schedule_vp

In [9]:
route_df = (
    pd.read_parquet(
        f"{RT_SCHED_GCS}{DIGEST_RT_SCHED}.parquet",
    )
    .drop_duplicates()
    .reset_index(drop=True)
    .astype(str)
)

In [10]:
route_df.columns

Index(['schedule_gtfs_dataset_key', 'route_id', 'direction_id', 'time_period',
       'avg_scheduled_service_minutes', 'avg_stop_miles', 'n_scheduled_trips',
       'frequency', 'service_date', 'is_express', 'is_ferry', 'is_rail',
       'is_coverage', 'is_local', 'is_downtown_local', 'is_rapid', 'typology',
       'name', 'combined_name', 'recent_combined_name', 'recent_route_id',
       'route_primary_direction', 'minutes_atleast1_vp', 'minutes_atleast2_vp',
       'total_rt_service_minutes', 'total_scheduled_service_minutes',
       'total_vp', 'vp_in_shape', 'is_early', 'is_ontime', 'is_late',
       'n_vp_trips', 'vp_per_minute', 'pct_in_shape',
       'pct_rt_journey_atleast1_vp', 'pct_rt_journey_atleast2_vp',
       'pct_sched_journey_atleast1_vp', 'pct_sched_journey_atleast2_vp',
       'rt_sched_journey_ratio', 'avg_rt_service_minutes', 'sched_rt_category',
       'speed_mph', 'schedule_source_record_id', 'base64_url',
       'caltrans_district', 'portfolio_organization_name']

In [15]:
route_df.service_date.unique()

array(['2023-05-17', '2023-06-14', '2023-07-12', '2023-08-15',
       '2023-09-13', '2023-10-11', '2023-11-15', '2023-12-13',
       '2024-01-17', '2024-02-14', '2024-03-13', '2024-04-17',
       '2024-05-22', '2024-06-12', '2024-07-17', '2024-08-14',
       '2024-09-18', '2024-10-16', '2024-11-13', '2024-12-11',
       '2025-01-15', '2025-02-12', '2025-03-12', '2025-04-16',
       '2025-05-14', '2025-06-11', '2025-07-16', '2023-04-12',
       '2023-03-15'], dtype=object)

In [16]:
july = route_df.loc[route_df.service_date == "2025-07-16"]

In [17]:
june = route_df.loc[route_df.service_date == "2025-06-11",]

In [22]:
missing_july_orgs = set(list(june.portfolio_organization_name.unique())) - set(
    list(july.portfolio_organization_name.unique())
)

In [23]:
missing_july_names = set(list(june.name.unique())) - set(
    list(july.name.unique())
)

In [24]:
missing_july_names

{'Bay Area 511 SolTrans Schedule',
 'Bay Area 511 Sonoma-Marin Area Rail Transit Schedule',
 'Bay Area 511 Vacaville City Coach Schedule',
 'Bay Area 511 Vine Transit Schedule',
 'Beach Cities GMV Schedule',
 'Culver City Schedule',
 'Desert Roadrunner GMV Schedule',
 'DowneyLink Avail Schedule',
 'Eastern Sierra Schedule',
 'Get Around Town Express Schedule',
 'Glendora Schedule',
 'Havasu Landing Ferry Schedule',
 'LA DOT Schedule',
 'Madera Metro Schedule',
 'Mountain Transit Schedule',
 'North County Schedule',
 'Redding Schedule',
 'Rosemead Passio Schedule',
 'SLO Peak Transit Schedule',
 'Santa Maria Schedule'}

In [19]:
missing_july_ops

{'Chemehuevi Indian Tribe',
 'City of Culver City',
 'City of Downey',
 'City of Glendora',
 'City of Los Angeles',
 'City of Madera',
 'City of Redondo Beach',
 'City of Rosemead',
 'City of Santa Maria',
 'City of South Gate',
 'City of Vacaville',
 'Eastern Sierra Transit Authority',
 'Mountain Area Regional Transit Authority',
 'Napa Valley Transportation Authority',
 'North County Transit District',
 'Redding Area Bus Authority',
 'Solano Transportation Authority',
 'Sonoma-Marin Area Rail Transit District'}

In [29]:
july.loc[july.name.isin(list(missing_july_names))][
    ["portfolio_organization_name", "sched_rt_category"]
]

Unnamed: 0,portfolio_organization_name,sched_rt_category


In [None]:
july.loc[july.portfolio_organization_name.isin(list(missing_july_ops))][
    ["portfolio_organization_name", "sched_rt_category"]
]

In [31]:
june.loc[june.portfolio_organization_name.isin(list(missing_july_ops))][
    ["caltrans_district","portfolio_organization_name", "name","sched_rt_category"]
].drop_duplicates().sort_values(by = ["portfolio_organization_name"])

Unnamed: 0,caltrans_district,portfolio_organization_name,name,sched_rt_category
113409,08 - San Bernardino / Riverside,Chemehuevi Indian Tribe,Havasu Landing Ferry Schedule,schedule_only
84468,07 - Los Angeles / Ventura,City of Culver City,Culver City Schedule,schedule_and_vp
84622,07 - Los Angeles / Ventura,City of Culver City,Culver City Schedule,schedule_only
215176,07 - Los Angeles / Ventura,City of Downey,DowneyLink Avail Schedule,schedule_and_vp
235595,07 - Los Angeles / Ventura,City of Glendora,Glendora Schedule,schedule_only
241041,07 - Los Angeles / Ventura,City of Los Angeles,LA DOT Schedule,schedule_and_vp
82513,06 - Fresno / Bakersfield,City of Madera,Madera Metro Schedule,schedule_only
27204,07 - Los Angeles / Ventura,City of Redondo Beach,Beach Cities GMV Schedule,schedule_and_vp
113309,07 - Los Angeles / Ventura,City of Rosemead,Rosemead Passio Schedule,schedule_and_vp
128894,05 - San Luis Obispo / Santa Barbara,City of Santa Maria,Santa Maria Schedule,schedule_and_vp
