## Adjusting `service_hours` and `operator_profiles` with NTD data to be published on the Public GCS Page 

In [1]:
import geopandas as gpd
import pandas as pd

from pathlib import Path
from typing import Literal

from calitp_data_analysis import utils
from shared_utils import publish_utils
from update_vars import GTFS_DATA_DICT 
  
PUBLIC_GCS = GTFS_DATA_DICT.gcs_paths.PUBLIC_GCS

In [2]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [3]:
import _gtfs_digest_dataset

In [4]:
service_hours_df = _gtfs_digest_dataset.total_service_hours_all_months()

In [5]:
service_hours_df.shape

(18022, 6)

In [6]:
# service_hours_df.head(2)

In [7]:
# service_hours_df.name.nunique()

In [8]:
# service_hours_df.month.unique()

In [9]:
operator_profiles = _gtfs_digest_dataset.load_operator_profiles()

In [10]:
operator_profiles.shape

(260, 26)

In [11]:
operator_profiles.head(2)

Unnamed: 0,schedule_gtfs_dataset_key,operator_n_routes,operator_n_trips,operator_n_shapes,operator_n_stops,operator_n_arrivals,operator_route_length_miles,operator_arrivals_per_stop,n_downtown_local_routes,n_local_routes,n_coverage_routes,n_rapid_routes,n_express_routes,n_rail_routes,name,organization_source_record_id,organization_name,service_date,counties_served,service_area_sq_miles,hq_city,uza_name,service_area_pop,organization_type,primary_uza,reporter_type
0,ff72e90ec439c37fe3ce0d3273a0073e,9,139,10,159,3771,85.34,23.72,2,0,8,10,0,0,SLO Schedule,recMM99msxjmc6PPv,City of San Luis Obispo,2024-04-17,San Luis Obispo,22.0,San Luis Obispo,"San Luis Obispo, CA",46997.0,County or Local Government Unit or Department of Transportation,,Full Reporter
1,c388c692579412414b6b4bdb91c6561c,3,150,10,63,3066,20.48,48.67,3,0,1,2,0,0,Laguna Beach Schedule,rec6Z3DnERm3OwFzw,City of Laguna Beach,2024-04-17,Orange,9.0,Laguna Beach,"Mission Viejo--Lake Forest--Laguna Niguel, CA",23190.0,County or Local Government Unit or Department of Transportation,,Reduced Reporter


In [12]:
operator_profiles.schedule_gtfs_dataset_key.nunique()

260

In [13]:
operator_profiles.name.nunique()

164

In [14]:
operator_profiles.name.value_counts().head(10)

Auburn Schedule               4
Long Beach Schedule           4
Fresno County Schedule        4
Monterey Salinas Schedule     3
Clean Air Express Schedule    3
Maywood Schedule              3
SLORTA Schedule               3
Unitrans Schedule             3
Humboldt Schedule             3
OCTA Schedule                 3
Name: name, dtype: int64

In [18]:
operator_profiles.name.value_counts().describe()

count   164.00
mean      1.59
std       0.75
min       1.00
25%       1.00
50%       1.00
75%       2.00
max       4.00
Name: name, dtype: float64

In [15]:
operator_profiles.loc[operator_profiles.name == "Auburn Schedule"]

Unnamed: 0,schedule_gtfs_dataset_key,operator_n_routes,operator_n_trips,operator_n_shapes,operator_n_stops,operator_n_arrivals,operator_route_length_miles,operator_arrivals_per_stop,n_downtown_local_routes,n_local_routes,n_coverage_routes,n_rapid_routes,n_express_routes,n_rail_routes,name,organization_source_record_id,organization_name,service_date,counties_served,service_area_sq_miles,hq_city,uza_name,service_area_pop,organization_type,primary_uza,reporter_type
109,020467a276c12a9fe4b0a2332e393f2c,1,8,1,32,256,12.74,8.0,0,0,1,0,0,0,Auburn Schedule,recbW86Xrtuw8PhiU,City of Auburn,2024-04-17,,,Auburn,,,County or Local Government Unit or Department of Transportation,,Rural Reporter
146,83a293ce449a611b01f08929a7fcaab0,1,8,1,32,256,12.74,8.0,0,0,1,0,0,0,Auburn Schedule,recbW86Xrtuw8PhiU,City of Auburn,2024-03-13,,,,,,,,
158,2e70745409b9369778a606bc37c79abc,1,8,1,32,256,12.74,8.0,0,0,1,0,0,0,Auburn Schedule,recbW86Xrtuw8PhiU,City of Auburn,2024-02-14,,,,,,,,
167,685b58f9c35b765f19b26ff46afd7705,1,8,1,32,256,12.74,8.0,0,0,1,0,0,0,Auburn Schedule,recbW86Xrtuw8PhiU,City of Auburn,2023-12-13,,,,,,,,


In [17]:
operator_profiles.loc[operator_profiles.name == "Long Beach Schedule"]

Unnamed: 0,schedule_gtfs_dataset_key,operator_n_routes,operator_n_trips,operator_n_shapes,operator_n_stops,operator_n_arrivals,operator_route_length_miles,operator_arrivals_per_stop,n_downtown_local_routes,n_local_routes,n_coverage_routes,n_rapid_routes,n_express_routes,n_rail_routes,name,organization_source_record_id,organization_name,service_date,counties_served,service_area_sq_miles,hq_city,uza_name,service_area_pop,organization_type,primary_uza,reporter_type
136,f1b35a50955aeb498533c1c6fdafbe44,36,1846,110,1900,90799,414.65,47.79,68,3,4,11,0,0,Long Beach Schedule,rec00qSzZL8KqiXAo,Long Beach Transit,2024-04-17,Los Angeles,110.0,Long Beach,"Los Angeles--Long Beach--Anaheim, CA",906752.0,Publicly-Owned or Privately Chartered Corporation,,Full Reporter
189,a2c62c774c7e97b6a510ca5c20c5e3c6,37,1860,100,1905,90277,419.46,47.39,69,3,4,11,0,0,Long Beach Schedule,rec00qSzZL8KqiXAo,Long Beach Transit,2023-08-15,,,,,,,,
217,4846188981dd245c3dd4d1b4ae572ad7,37,1854,104,1897,89794,416.3,47.33,71,3,4,11,0,0,Long Beach Schedule,rec00qSzZL8KqiXAo,Long Beach Transit,2023-06-14,,,,,,,,
241,32230adf5db3a8df91f05947bb1ad658,37,1854,104,1897,89794,416.3,47.33,73,3,4,11,0,0,Long Beach Schedule,rec00qSzZL8KqiXAo,Long Beach Transit,2023-03-15,,,,,,,,


In [16]:
operator_profiles.service_date.unique()

array(['2024-04-17T00:00:00.000000000', '2024-03-13T00:00:00.000000000',
       '2024-02-14T00:00:00.000000000', '2024-01-17T00:00:00.000000000',
       '2023-12-13T00:00:00.000000000', '2023-11-15T00:00:00.000000000',
       '2023-10-11T00:00:00.000000000', '2023-09-13T00:00:00.000000000',
       '2023-08-15T00:00:00.000000000', '2023-07-12T00:00:00.000000000',
       '2023-06-14T00:00:00.000000000', '2023-05-17T00:00:00.000000000',
       '2023-04-12T00:00:00.000000000', '2023-03-15T00:00:00.000000000'],
      dtype='datetime64[ns]')

In [None]:
digest_df_keys = [
        "route_schedule_vp", 
        "operator_profile_portfolio_view",  
        "operator_sched_rt",
        "scheduled_service_hours",
    ]  

In [None]:
PUBLIC_GCS

In [None]:
table_section = GTFS_DATA_DICT["digest_tables"].dir

In [None]:
table_section

In [None]:
# [GTFS_DATA_DICT[table_section][f] for f in digest_df_keys]