## Something is wrong with GTFS Digest
* Makefile in `gtfs_digest` won't run since the function changed. 
    * Go to `rt_segment_speeds` -> `segment_speed_utils` -> `time_series_utils` and temporarily change back to the old function.


In [35]:
import _section2_utils as section2
import geopandas as gpd
import merge_operator_data
import merge_data
import numpy as np
import pandas as pd
from segment_speed_utils import gtfs_schedule_wrangling, helpers, time_series_utils
from shared_utils import catalog_utils, rt_dates, rt_utils
from update_vars import GTFS_DATA_DICT, RT_SCHED_GCS, SCHED_GCS

In [2]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [3]:
analysis_date_list = rt_dates.y2024_dates + rt_dates.y2023_dates

### Metrics for All Routes
* March 2023 has two values for some operators.
* Some operators have many rows that are repeating, causing their charts to go above 100. 

#### Look at the metrics dataframes first.
* I think `op_rt_sched_metrics` is the reason why there are duplicative values.
* Temp fix: in `section2_utils.load_operator_metrics()` drop duplicates based on `service_date`.

In [36]:
op_sched_metrics = merge_operator_data.concatenate_schedule_operator_metrics(analysis_date_list)

In [54]:
op_sched_metrics_dec = op_sched_metrics.loc[op_sched_metrics.service_date ==
                                                     '2024-12-11T00:00:00.000000000']

In [57]:
op_sched_metrics_dec.schedule_gtfs_dataset_key.value_counts().head(10)

0139b1253130b33adcd4b3a4490530d2    1
9b4c9cf681dfcc10dffe9523b43598bf    1
c00acf594cd3612865b836c9cef64e2e    1
baeeb157e85a901e47b828ef9fe75091    1
b9f9ee9267bd3564d5d2cfbe2389f3fa    1
b82a23bef8a501e980c086ef269ffec7    1
aea4108997c66a74fbdae27b34b69fde    1
ac2951bfaa7ecf6b80ba9e50aef1ae86    1
a8d5f90bfd689badb7e1deb041408e96    1
a7f5522d7690161fc2be75857d7e2f79    1
Name: schedule_gtfs_dataset_key, dtype: int64

In [38]:
op_rt_sched_metrics = merge_operator_data.concatenate_rt_vs_schedule_operator_metrics(analysis_date_list)

In [48]:
op_rt_sched_metrics_dec = op_rt_sched_metrics.loc[op_rt_sched_metrics.service_date ==
                                                     '2024-12-11T00:00:00.000000000']

In [49]:
op_rt_sched_metrics_dec.organization_name.value_counts().head(15)

City of Santa Monica                                        2
City of Roseville                                           2
Basin Transit                                               2
City of San Luis Obispo                                     2
Victor Valley Transit Authority                             2
Mountain Area Regional Transit Authority                    2
Transit Joint Powers Authority for Merced County            2
Tahoe Transportation District                               2
Palo Verde Valley Transit Agency                            2
City of Lawndale                                            2
Los Angeles County Metropolitan Transportation Authority    2
City of Tracy                                               1
City of Arcata                                              1
Greyhound                                                   1
FlixBus                                                     1
Name: organization_name, dtype: int64

* There is the rail versus the bus schedule.

In [58]:
op_rt_sched_metrics_dec.loc[
    op_rt_sched_metrics_dec.organization_name
    == "Los Angeles County Metropolitan Transportation Authority"
].T

Unnamed: 0,44,682
schedule_gtfs_dataset_key,0666caf3ec1ecc96b74f4477ee4bc939,2a0571758141f412b6a546fd70a65bf3
operator_n_routes,111,6
operator_n_trips,13760,1512
operator_n_shapes,638,12
operator_n_stops,12010,106
operator_n_arrivals,863656,26257
operator_route_length_miles,1955.29,121.40
operator_arrivals_per_stop,71.91,247.71
n_downtown_local_routes,216,12
n_local_routes,7,0


#### How do you know which one is correct?

In [52]:
op_rt_sched_metrics_dec.loc[
    op_rt_sched_metrics_dec.organization_name
    == "Transit Joint Powers Authority for Merced County"
].T

Unnamed: 0,1499,2608
schedule_gtfs_dataset_key,6fa88692b54f9ce98d2b95ae6804fa43,bff13f8993ff18e43577db1f5596e014
operator_n_routes,15,15
operator_n_trips,333,288
operator_n_shapes,50,15
operator_n_stops,358,101
operator_n_arrivals,9141,2429
operator_route_length_miles,323.48,386.90
operator_arrivals_per_stop,25.53,24.05
n_downtown_local_routes,1,0
n_local_routes,2,3


In [53]:
op_rt_sched_metrics_dec.loc[
    op_rt_sched_metrics_dec.organization_name
    == "City of Santa Monica"
].T

Unnamed: 0,2623,3190
schedule_gtfs_dataset_key,c2630c44a1f2703ec651596e6358d4f4,efbbd5293be71f7a5de0cf82b59febe1
operator_n_routes,19,19
operator_n_trips,1478,1478
operator_n_shapes,67,67
operator_n_stops,929,929
operator_n_arrivals,51399,51399
operator_route_length_miles,185.03,185.03
operator_arrivals_per_stop,55.33,55.33
n_downtown_local_routes,37,37
n_local_routes,2,2


In [59]:
op_rt_sched_metrics_dec.loc[
    op_rt_sched_metrics_dec.organization_name
    == "Tahoe Transportation District"
].T

Unnamed: 0,65,2654
schedule_gtfs_dataset_key,07d3b79f14cec8099119e1eb649f065b,c3499b856c717e5706299664fb1c5261
operator_n_routes,5,4
operator_n_trips,132,123
operator_n_shapes,13,9
operator_n_stops,123,117
operator_n_arrivals,2409,2589
operator_route_length_miles,90.82,64.68
operator_arrivals_per_stop,19.59,22.13
n_downtown_local_routes,0,0
n_local_routes,1,1


In [61]:
op_rt_sched_metrics_dec.loc[
    op_rt_sched_metrics_dec.organization_name
    == "City of Lawndale"
].T

Unnamed: 0,84,2729
schedule_gtfs_dataset_key,09a703757d1ed14ca9580b1385e39315,c91b3a82737733ba48cf1b70b44274ba
operator_n_routes,2,2
operator_n_trips,30,60
operator_n_shapes,2,4
operator_n_stops,43,44
operator_n_arrivals,698,774
operator_route_length_miles,17.92,8.72
operator_arrivals_per_stop,16.23,17.59
n_downtown_local_routes,2,4
n_local_routes,0,0


#### Dataframe from `merge_operator_data.concatenate_rt_vs_schedule_operator_metrics` is created [here at `gtfs_funnel/operator_scheduled_stats.py`](https://github.com/cal-itp/data-analyses/blob/1ba0f544a01f99966a6e210dd11666b4fe4a146e/gtfs_funnel/operator_scheduled_stats.py#L147)
* The data is grouped by `gtfs_schedule_dataset_key` and an `organization_name` can have multiple, which is why some organizations have multiple entries.

#### Other attempts to look at Operator Profiles

In [4]:
url = "gs://calitp-analytics-data/data-analyses/rt_vs_schedule/digest/operator_profiles.parquet"
operator_profile_df = pd.read_parquet(url)

In [5]:
operator_profile_df.service_date.unique()

array(['2023-05-17T00:00:00.000000000', '2023-06-14T00:00:00.000000000',
       '2023-07-12T00:00:00.000000000', '2023-08-15T00:00:00.000000000',
       '2023-09-13T00:00:00.000000000', '2023-10-11T00:00:00.000000000',
       '2023-11-15T00:00:00.000000000', '2023-12-13T00:00:00.000000000',
       '2024-01-17T00:00:00.000000000', '2024-02-14T00:00:00.000000000',
       '2024-03-13T00:00:00.000000000', '2024-04-17T00:00:00.000000000',
       '2024-05-22T00:00:00.000000000', '2024-06-12T00:00:00.000000000',
       '2024-07-17T00:00:00.000000000', '2024-08-14T00:00:00.000000000',
       '2024-09-18T00:00:00.000000000', '2024-10-16T00:00:00.000000000',
       '2024-11-13T00:00:00.000000000', '2024-12-11T00:00:00.000000000',
       '2023-04-12T00:00:00.000000000', '2023-03-15T00:00:00.000000000'],
      dtype='datetime64[ns]')

In [6]:
march_2023 = operator_profile_df.loc[
    operator_profile_df.service_date == "2023-03-15T00:00:00.000000000"
]

In [7]:
dec_2024 = operator_profile_df.loc[
    operator_profile_df.service_date == "2024-12-11T00:00:00.000000000"
]

In [27]:
march_2023.organization_name.value_counts().head(12)

Victor Valley Transit Authority                             2
Transit Joint Powers Authority for Merced County            2
Alameda-Contra Costa Transit District                       2
Los Angeles County Metropolitan Transportation Authority    2
Palo Verde Valley Transit Agency                            1
North Lake Tahoe Express                                    1
Imperial County Transportation Commission                   1
San Benito County Local Transportation Authority            1
City of South San Francisco                                 1
San Luis Obispo Regional Transit Authority                  1
Tahoe Transportation District                               1
Kern County                                                 1
Name: organization_name, dtype: int64

In [26]:
dec_2024.organization_name.value_counts().head(12)

City of Roseville                                           2
City of Lawndale                                            2
Transit Joint Powers Authority for Merced County            2
Los Angeles County Metropolitan Transportation Authority    2
Mountain Area Regional Transit Authority                    2
Palo Verde Valley Transit Agency                            2
Basin Transit                                               2
Tahoe Transportation District                               2
City of San Luis Obispo                                     2
Victor Valley Transit Authority                             2
Amador Regional Transit System                              1
City of Calabasas                                           1
Name: organization_name, dtype: int64

#### How does Los Angeles County Metropolitan Transportation Authority have two different values?

In [60]:
dec_2024.loc[
    dec_2024.organization_name
    == "Basin Transit"
].T

Unnamed: 0,1473,3254
schedule_gtfs_dataset_key,a7f5522d7690161fc2be75857d7e2f79,b0760015c9fcd0500c4fddd5b9bb115b
vp_per_min_agency,2.92,
spatial_accuracy_agency,95.49,
service_date,2024-12-11 00:00:00,2024-12-11 00:00:00
operator_n_routes,7.00,7.00
operator_n_trips,87.00,89.00
operator_n_shapes,16.00,24.00
operator_n_stops,170.00,168.00
operator_n_arrivals,2473.00,2446.00
operator_route_length_miles,184.36,181.54


In [32]:
dec_2024.loc[
    dec_2024.organization_name
    == "Los Angeles County Metropolitan Transportation Authority"
]

Unnamed: 0,schedule_gtfs_dataset_key,vp_per_min_agency,spatial_accuracy_agency,service_date,operator_n_routes,operator_n_trips,operator_n_shapes,operator_n_stops,operator_n_arrivals,operator_route_length_miles,operator_arrivals_per_stop,n_downtown_local_routes,n_local_routes,n_coverage_routes,n_rapid_routes,n_express_routes,n_rail_routes,name,organization_source_record_id,organization_name,caltrans_district,counties_served,service_area_sq_miles,hq_city,service_area_pop,organization_type,primary_uza_name,reporter_type
48,0666caf3ec1ecc96b74f4477ee4bc939,2.05,92.38,2024-12-11,111.0,13760.0,638.0,12010.0,863656.0,1955.29,71.91,216.0,7.0,18.0,25.0,12.0,0.0,LA Metro Bus Schedule,recPnGkwdpnr8jmHB,Los Angeles County Metropolitan Transportation Authority,07 - Los Angeles,Los Angeles,4099,Los Angeles,10394849,Independent Public Agency or Authority of Transit Service,"Los Angeles--Long Beach--Anaheim, CA",Full Reporter
580,2a0571758141f412b6a546fd70a65bf3,1.1,96.81,2024-12-11,6.0,1512.0,12.0,106.0,26257.0,121.4,247.71,12.0,0.0,0.0,0.0,0.0,12.0,LA Metro Rail Schedule,recPnGkwdpnr8jmHB,Los Angeles County Metropolitan Transportation Authority,07 - Los Angeles,Los Angeles,4099,Los Angeles,10394849,Independent Public Agency or Authority of Transit Service,"Los Angeles--Long Beach--Anaheim, CA",Full Reporter


In [31]:
dec_2024.loc[
    dec_2024.organization_name == "Transit Joint Powers Authority for Merced County"
]

Unnamed: 0,schedule_gtfs_dataset_key,vp_per_min_agency,spatial_accuracy_agency,service_date,operator_n_routes,operator_n_trips,operator_n_shapes,operator_n_stops,operator_n_arrivals,operator_route_length_miles,operator_arrivals_per_stop,n_downtown_local_routes,n_local_routes,n_coverage_routes,n_rapid_routes,n_express_routes,n_rail_routes,name,organization_source_record_id,organization_name,caltrans_district,counties_served,service_area_sq_miles,hq_city,service_area_pop,organization_type,primary_uza_name,reporter_type
2723,6fa88692b54f9ce98d2b95ae6804fa43,,,2024-12-11,15.0,333.0,50.0,358.0,9141.0,323.48,25.53,1.0,2.0,15.0,12.0,0.0,0.0,Merced Schedule,recVSX7dwjxAb557T,Transit Joint Powers Authority for Merced County,10 - Stockton,Merced,310,Merced,136957,Independent Public Agency or Authority of Transit Service,"Merced, CA",Full Reporter
3373,bff13f8993ff18e43577db1f5596e014,,,2024-12-11,15.0,288.0,15.0,101.0,2429.0,386.9,24.05,0.0,3.0,12.0,10.0,0.0,0.0,Merced GMV Schedule,recVSX7dwjxAb557T,Transit Joint Powers Authority for Merced County,10 - Stockton,Merced,310,Merced,136957,Independent Public Agency or Authority of Transit Service,"Merced, CA",Full Reporter


In [29]:
dec_2024.loc[dec_2024.organization_name == "City of Lawndale"]

Unnamed: 0,schedule_gtfs_dataset_key,vp_per_min_agency,spatial_accuracy_agency,service_date,operator_n_routes,operator_n_trips,operator_n_shapes,operator_n_stops,operator_n_arrivals,operator_route_length_miles,operator_arrivals_per_stop,n_downtown_local_routes,n_local_routes,n_coverage_routes,n_rapid_routes,n_express_routes,n_rail_routes,name,organization_source_record_id,organization_name,caltrans_district,counties_served,service_area_sq_miles,hq_city,service_area_pop,organization_type,primary_uza_name,reporter_type
97,09a703757d1ed14ca9580b1385e39315,3.01,95.77,2024-12-11,2.0,30.0,2.0,43.0,698.0,17.92,16.23,2.0,0.0,1.0,0.0,1.0,0.0,Lawndale Beat GMV Schedule,recj8LXdeSurpSRNU,City of Lawndale,07 - Los Angeles,,,,,,,
3447,c91b3a82737733ba48cf1b70b44274ba,,,2024-12-11,2.0,60.0,4.0,44.0,774.0,8.72,17.59,4.0,0.0,1.0,1.0,2.0,0.0,Lawndale Schedule,recj8LXdeSurpSRNU,City of Lawndale,07 - Los Angeles,,,,,,,


In [28]:
dec_2024.loc[dec_2024.organization_name == "Palo Verde Valley Transit Agency"]

Unnamed: 0,schedule_gtfs_dataset_key,vp_per_min_agency,spatial_accuracy_agency,service_date,operator_n_routes,operator_n_trips,operator_n_shapes,operator_n_stops,operator_n_arrivals,operator_route_length_miles,operator_arrivals_per_stop,n_downtown_local_routes,n_local_routes,n_coverage_routes,n_rapid_routes,n_express_routes,n_rail_routes,name,organization_source_record_id,organization_name,caltrans_district,counties_served,service_area_sq_miles,hq_city,service_area_pop,organization_type,primary_uza_name,reporter_type
718,4383eb1cca04093020f1583f57f32d9b,2.8,89.73,2024-12-11,5.0,54.0,9.0,39.0,472.0,220.1,12.1,1.0,2.0,8.0,1.0,1.0,0.0,Desert Roadrunner GMV Schedule,recGcv4NidDjwVSiN,Palo Verde Valley Transit Agency,08 - San Bernardino,,,Blythe,,Independent Public Agency or Authority of Transit Service,,Rural Reporter
3206,a4fbbb85891191f714ec113dfe1a936b,,,2024-12-11,5.0,45.0,14.0,161.0,1753.0,228.79,10.89,1.0,1.0,7.0,1.0,1.0,0.0,Desert Roadrunner Schedule,recGcv4NidDjwVSiN,Palo Verde Valley Transit Agency,08 - San Bernardino,,,Blythe,,Independent Public Agency or Authority of Transit Service,,Rural Reporter


In [33]:
dec_2024.loc[dec_2024.organization_name == "City of San Luis Obispo"]

Unnamed: 0,schedule_gtfs_dataset_key,vp_per_min_agency,spatial_accuracy_agency,service_date,operator_n_routes,operator_n_trips,operator_n_shapes,operator_n_stops,operator_n_arrivals,operator_route_length_miles,operator_arrivals_per_stop,n_downtown_local_routes,n_local_routes,n_coverage_routes,n_rapid_routes,n_express_routes,n_rail_routes,name,organization_source_record_id,organization_name,caltrans_district,counties_served,service_area_sq_miles,hq_city,service_area_pop,organization_type,primary_uza_name,reporter_type
2077,f4c3ea214214ee0d96f7646b3e9d69dc,1.96,99.53,2024-12-11,9.0,147.0,10.0,157.0,3921.0,84.95,24.97,1.0,2.0,6.0,9.0,0.0,0.0,SLO Peak Transit Schedule,recMM99msxjmc6PPv,City of San Luis Obispo,05 - San Luis Obispo,San Luis Obispo,22,San Luis Obispo,46997,"City, County or Local Government Unit or Department of Transportation","San Luis Obispo, CA",Full Reporter
3279,b475cf3b7ceaae9fc30b22cb8d3a1e33,,,2024-12-11,9.0,135.0,10.0,157.0,3570.0,85.38,22.74,2.0,0.0,8.0,10.0,0.0,0.0,SLO Schedule,recMM99msxjmc6PPv,City of San Luis Obispo,05 - San Luis Obispo,San Luis Obispo,22,San Luis Obispo,46997,"City, County or Local Government Unit or Department of Transportation","San Luis Obispo, CA",Full Reporter


In [14]:
crosswalk_df = merge_operator_data.concatenate_crosswalks(analysis_date_list)

In [15]:
crosswalk_df.head(1)

Unnamed: 0,schedule_gtfs_dataset_key,caltrans_district,counties_served,service_area_sq_miles,hq_city,service_area_pop,organization_type,primary_uza_name,reporter_type,service_date
0,f8c7d523e77341ee3c140b24a316f0fa,06 - Fresno,,,,,,,,2023-03-15


In [None]:
march_crosswalk_df = crosswalk_df.loc[]

### Op Profiles
* The code for `gtfs_digest/merge_operator.py` stopped working because one of the column names changed. I went into `crosswalk_gtfs_dataset_key_to_organization` to fix that. 
* <s>Operator Profiles: are from September 2024 when it's Dec 2024.</s>
    * Fixed: was still referencing one of my old testing profiles.

In [16]:
SCHED_GCS

'gs://calitp-analytics-data/data-analyses/gtfs_schedule/'

In [17]:
f"{GTFS_DATA_DICT.schedule_tables.gtfs_key_crosswalk}"

'crosswalk/gtfs_key_organization'

In [18]:
dec_crosswalk_url = "gs://calitp-analytics-data/data-analyses/gtfs_schedule/crosswalk/gtfs_key_organization_2024-12-11.parquet"

In [19]:
nov_crosswalk_url = "gs://calitp-analytics-data/data-analyses/gtfs_schedule/crosswalk/gtfs_key_organization_2024-11-13.parquet"

In [20]:
dec_crosswalk_df = pd.read_parquet(dec_crosswalk_url)

In [21]:
dec_crosswalk_df.organization_name.value_counts().head(25)

Los Angeles World Airports                                  3
Tahoe Transportation District                               2
Basin Transit                                               2
Santa Clara Valley Transportation Authority                 2
Mountain Area Regional Transit Authority                    2
City and County of San Francisco                            2
Anaheim Transportation Network                              2
City of South San Francisco                                 2
City of San Luis Obispo                                     2
North County Transit District                               2
City of Cerritos                                            2
City of Santa Monica                                        2
City of Roseville                                           2
Golden Gate Bridge, Highway and Transportation District     2
City of Lawndale                                            2
Amtrak                                                      2
Transit 

In [22]:
dec_crosswalk_df.loc[
    dec_crosswalk_df.organization_name == "City of South San Francisco"
]

Unnamed: 0,schedule_gtfs_dataset_key,name,schedule_source_record_id,base64_url,organization_source_record_id,organization_name,caltrans_district,counties_served,hq_city,hq_county,is_public_entity,is_publicly_operating,funding_sources,on_demand_vehicles_at_max_service,vehicles_at_max_service,number_of_state_counties,primary_uza_name,density,number_of_counties_with_service,state_admin_funds_expended,service_area_sq_miles,population,service_area_pop,subrecipient_type,primary_uza_code,reporter_type,organization_type,voms_pt,voms_do,year
33,b75dfb03ca9b316fdb18f98a6b97e419,South San Francisco Schedule,reckDBU6lw1zHr4ya,aHR0cHM6Ly93d3cuaXBzLXN5c3RlbXMuY29tL0dURlMvU2NoZWR1bGUvMTk=,recPtsCi89lKcXaTW,City of South San Francisco,04 - Oakland,,,,,,,,,,,,,,,,,,,,,,,
97,62b3c362d08fa3a771a7b7b2002c8a16,Bay Area 511 South San Francisco Shuttle Schedule,recQgQrtVM1XQrGQ2,aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1TUw==,recPtsCi89lKcXaTW,City of South San Francisco,04 - Oakland,,,,,,,,,,,,,,,,,,,,,,,


In [23]:
dec_crosswalk_df.loc[
    dec_crosswalk_df.organization_name == "City and County of San Francisco"
]

Unnamed: 0,schedule_gtfs_dataset_key,name,schedule_source_record_id,base64_url,organization_source_record_id,organization_name,caltrans_district,counties_served,hq_city,hq_county,is_public_entity,is_publicly_operating,funding_sources,on_demand_vehicles_at_max_service,vehicles_at_max_service,number_of_state_counties,primary_uza_name,density,number_of_counties_with_service,state_admin_funds_expended,service_area_sq_miles,population,service_area_pop,subrecipient_type,primary_uza_code,reporter_type,organization_type,voms_pt,voms_do,year
90,7cc0cb1871dfd558f11a2885c145d144,Bay Area 511 Muni Schedule,recHD22phgJs34JHP,aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1TRg==,rechaapWbeffO33OX,City and County of San Francisco,04 - Oakland,San Francisco,San Francisco,San Francisco,True,True,5307;5309;5310;5337;5339,663,774,,"San Francisco--Oakland, CA",6943.0,,,49,3515933.0,842754,,,Full Reporter,"City, County or Local Government Unit or Department of Transportation",94,658,2022
91,7cc0cb1871dfd558f11a2885c145d144,Bay Area 511 Muni Schedule,recHD22phgJs34JHP,aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1TRg==,rechaapWbeffO33OX,City and County of San Francisco,04 - Oakland,San Francisco,San Francisco,San Francisco,True,True,5307;5309;5310;5337;5339,663,774,,"San Francisco--Oakland, CA",6943.0,,,49,3515933.0,842754,,,Full Reporter,"City, County or Local Government Unit or Department of Transportation",94,658,2022


In [None]:
nov_crosswalk_df = pd.read_parquet(nov_crosswalk_url)

In [None]:
sept_crosswalk_df = pd.read_parquet(
    "gs://calitp-analytics-data/data-analyses/gtfs_schedule/crosswalk/gtfs_key_organization_2024-09-18.parquet"
)

In [None]:
sept_cols = set(sept_crosswalk_df.columns.tolist())
dec_cols = set(dec_crosswalk_df.columns.tolist())
nov_cols = set(nov_crosswalk_df.columns.tolist())

In [None]:
nov_cols - sept_cols

In [None]:
sept_cols - dec_cols

In [None]:
dec_cols - sept_cols

In [None]:
ventura_dec = dec_crosswalk_df.loc[
    dec_crosswalk_df.organization_name == "Ventura County Transportation Commission"
]

In [None]:
ventura_dec[["primary_uza_code", "primary_uza_name"]].drop_duplicates()

In [None]:
ventura_sept = sept_crosswalk_df.loc[
    sept_crosswalk_df.organization_name == "Ventura County Transportation Commission"
]

In [None]:
ventura_sept[["primary_uza_code", "primary_uza_name"]].drop_duplicates()

In [None]:
crosswalk_df = merge_operator_data.concatenate_crosswalks(analysis_date_list)

In [None]:
crosswalk_df.service_date.unique()

In [None]:
import _section1_utils

In [None]:
organization_name = "Monterey-Salinas Transit"

In [None]:
ntd_profile = _section1_utils.load_operator_ntd_profile(organization_name)

In [None]:
ntd_profile

### Timeliness for Dir 0 and 1 are missing since October.

In [None]:
schd_vp_url = f"{GTFS_DATA_DICT.digest_tables.dir}{GTFS_DATA_DICT.digest_tables.route_schedule_vp}.parquet"

In [None]:
# Keep only rows that are found in both schedule and real time data
schd_vp_df = pd.read_parquet(
    schd_vp_url,
    filters=[
        [
            ("organization_name", "==", organization_name),
            ("sched_rt_category", "==", "schedule_and_vp"),
        ]
    ],
)

In [None]:
schd_vp_df_gtfskeys = schd_vp_df[
    ["schedule_gtfs_dataset_key", "service_date"]
].drop_duplicates()

In [None]:
schd_vp_df.head(2)

In [None]:
schedule_by_route = merge_data.concatenate_schedule_by_route_direction(
    analysis_date_list
)

In [None]:
schedule_by_route_gtfskeys = schedule_by_route[
    ["schedule_gtfs_dataset_key", "service_date"]
].drop_duplicates()

In [None]:
pd.merge(
    df_avg_speeds_gtfskeys,
    schedule_by_route_gtfskeys,
    on=["schedule_gtfs_dataset_key", "service_date"],
    how="outer",
    indicator=True,
)[["_merge"]].value_counts()

In [None]:
import merge_data

In [None]:
from shared_utils import gtfs_utils_v2, publish_utils

### Average Speed Missing for Offpeak and Peak since October
* All Day available 
* GTFS Keys missing? 

In [None]:
df_avg_speeds = merge_data.concatenate_speeds_by_route_direction(analysis_date_list)

In [None]:
df_avg_speeds.service_date.unique()

In [None]:
df_avg_speeds.head()

In [None]:
df_avg_speeds_gtfskeys = df_avg_speeds[
    ["schedule_gtfs_dataset_key", "service_date"]
].drop_duplicates()

In [None]:
pd.merge(
    df_avg_speeds_gtfskeys,
    schd_vp_df_gtfskeys,
    on=["schedule_gtfs_dataset_key", "service_date"],
    how="outer",
    indicator=True,
)[["_merge"]].value_counts()