# Non-daily #1589 
* To get data for another date besides 4/16/2025 (which is a Wednesday): 
    * Update the dates in `gtfs_funnel/update_vars`
    * `python download_trips.py`
    * `python schedule_stats_by_route_direction` 

In [1]:
import re

import geopandas as gpd
import google.auth
import merge_data
import merge_operator_service
import pandas as pd
from update_vars import GTFS_DATA_DICT, RT_SCHED_GCS, SCHED_GCS, SEGMENT_GCS

In [2]:
credentials, project = google.auth.default()

In [3]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [4]:
ROUTE_DIR_MONTH_FILE = GTFS_DATA_DICT.digest_tables.monthly_route_schedule_vp_report

In [5]:
route_dir_month_df = pd.read_parquet(f"{RT_SCHED_GCS}{ROUTE_DIR_MONTH_FILE}.parquet")

## Load data

In [6]:
GTFS_DATA_DICT.rt_vs_schedule_tables.sched_route_direction_metrics

'schedule_route_dir/schedule_route_direction_metrics'

In [7]:
wed_url = "gs://calitp-analytics-data/data-analyses/rt_vs_schedule/schedule_route_dir/schedule_route_direction_metrics_2025-04-16.parquet"

In [8]:
wed_df = gpd.read_parquet(
    wed_url,
    storage_options={"token": credentials.token},
).drop(columns=["geometry"])

In [10]:
mon_url = "gs://calitp-analytics-data/data-analyses/rt_vs_schedule/schedule_route_dir/schedule_route_direction_metrics_2025-04-14.parquet"

In [11]:
mon_df = gpd.read_parquet(
    mon_url,
    storage_options={"token": credentials.token},
).drop(columns=["geometry"])

In [83]:
mon_df.direction_id.value_counts()

0.00    6259
1.00    5222
Name: direction_id, dtype: int64

In [13]:
def load_dataset(url: str, date: list) -> pd.DataFrame:
    df = gpd.read_parquet(url, storage_options={"token": credentials.token})[
        [
            "schedule_gtfs_dataset_key",
            "route_id",
            "common_shape_id",
            "n_trips",
            "frequency",
            "geometry",
        ]
    ]

    # merge crosswalk
    crosswalk_df = merge_data.concatenate_crosswalk_organization(date)[
        ["caltrans_district", "schedule_gtfs_dataset_key", "analysis_name"]
    ]

    m1 = pd.merge(df, crosswalk_df, on="schedule_gtfs_dataset_key", how="left").drop(
        columns=["schedule_gtfs_dataset_key"]
    )

    m1["id_col"] = m1.analysis_name + "_" + m1.route_id + "_" + m1.common_shape_id

    m1 = m1.drop_duplicates(subset=["id_col"]).reset_index(drop=True)

    m1 = m1[
        [
            "caltrans_district",
            "analysis_name",
            "id_col",
            "route_id",
            "common_shape_id",
            "n_trips",
            "frequency",
            "geometry",
        ]
    ]
    return m1

In [14]:
mon_df2 = load_dataset(mon_url, ["2025-04-14"])

In [15]:
wed_df2 = load_dataset(wed_url, ["2025-04-16"])

In [16]:
m1 = pd.merge(
    mon_df2,
    wed_df2,
    on=["id_col"],
    suffixes=["monday_", "wednesday_"],
    how="outer",
    indicator=True,
)

In [17]:
m1._merge = m1._merge.str.replace("left_only", "_monday_only").str.replace(
    "right_only", "_wednesday_only"
)

In [44]:
mon_gdf = mon_df2[["id_col", "geometry"]]

In [45]:
wed_gdf = wed_df2[["id_col", "geometry"]]

In [47]:
geometries_df = pd.concat([wed_gdf, mon_gdf], axis=0)

## How many routes do not run daily?

In [18]:
m1._merge.value_counts()

both               3964
_monday_only         83
_wednesday_only      77
Name: _merge, dtype: int64

In [19]:
m1.loc[m1._merge != "both"].id_col.nunique()

160

## Which agencies have this practice?

In [23]:
def compare_dates(df: pd.DataFrame, day: str, subset_cols: list) -> pd.DataFrame:
    df2 = df.loc[df._merge == day][subset_cols].drop_duplicates()

    df2["day"] = day

    days = ["monday", "wednesday"]
    for day in days:
        df2.columns = df2.columns.str.replace(f"{day}_", "", regex=False)

    return df2

In [24]:
mon_agencies = compare_dates(
    m1, "_monday_only", ["caltrans_districtmonday_", "analysis_namemonday_"]
)

In [26]:
wed_agencies = compare_dates(
    m1,
    "_wednesday_only",
    [
        "caltrans_districtwednesday_",
        "analysis_namewednesday_",
    ],
)

In [27]:
agencies_df = (
    pd.concat([mon_agencies, wed_agencies], axis=0)
    .sort_values(by=["caltrans_district", "analysis_name"])
    .drop_duplicates(subset=["caltrans_district", "analysis_name"])
)

In [28]:
agencies_df

Unnamed: 0,caltrans_district,analysis_name,day
4065,02 - Redding,Tehama County,_wednesday_only
4047,02 - Redding,Trinity County,_wednesday_only
73,03 - Marysville / Sacramento,Sacramento County,_monday_only
4081,03 - Marysville / Sacramento,Yuba-Sutter Transit Authority,_wednesday_only
1831,04 - Bay Area / Oakland,Alameda-Contra Costa Transit District,_monday_only
4048,04 - Bay Area / Oakland,City of Fairfield,_wednesday_only
937,04 - Bay Area / Oakland,City of Petaluma,_monday_only
4085,04 - Bay Area / Oakland,San Francisco Bay Ferry and Oakland Alameda Water Shuttle Schedule,_wednesday_only
2092,04 - Bay Area / Oakland,San Mateo County Transit District,_monday_only
4049,05 - San Luis Obispo / Santa Barbara,San Luis Obispo Regional Transit Authority,_wednesday_only


## Which routes are they?

In [69]:
mon_routes = compare_dates(
    m1,
    "_monday_only",
    [
        "caltrans_districtmonday_",
        "analysis_namemonday_",
        "route_idmonday_",
        "common_shape_idmonday_",
        "n_tripsmonday_",
        "id_col",
    ],
)

In [70]:
wed_routes = compare_dates(
    m1,
    "_wednesday_only",
    [
        "caltrans_districtwednesday_",
        "analysis_namewednesday_",
        "route_idwednesday_",
        "common_shape_idwednesday_",
        "n_tripswednesday_",
        "id_col",
    ],
)

In [71]:
routes_df = pd.concat([mon_routes, wed_routes], axis=0).drop_duplicates()

In [72]:
routes_gdf = pd.merge(geometries_df, routes_df, on="id_col", how="inner").sort_values(
    by=["caltrans_district", "analysis_name", "route_id"]
)

In [80]:
routes_gdf.groupby(["caltrans_district", "analysis_name"]).agg(
    {"id_col": "nunique", "n_trips": "sum"}
).rename(columns={"id_col": "unique_routes"})

Unnamed: 0_level_0,Unnamed: 1_level_0,unique_routes,n_trips
caltrans_district,analysis_name,Unnamed: 2_level_1,Unnamed: 3_level_1
02 - Redding,Tehama County,1,2.0
02 - Redding,Trinity County,1,1.0
03 - Marysville / Sacramento,Sacramento County,3,24.0
03 - Marysville / Sacramento,Yuba-Sutter Transit Authority,2,4.0
04 - Bay Area / Oakland,Alameda-Contra Costa Transit District,2,2.0
04 - Bay Area / Oakland,City of Fairfield,2,8.0
04 - Bay Area / Oakland,City of Petaluma,1,1.0
04 - Bay Area / Oakland,San Francisco Bay Ferry and Oakland Alameda Water Shuttle Schedule,2,46.0
04 - Bay Area / Oakland,San Mateo County Transit District,2,3.0
05 - San Luis Obispo / Santa Barbara,San Luis Obispo Regional Transit Authority,10,139.0


In [75]:
routes_gdf.drop(columns=["geometry", "id_col"])

Unnamed: 0,caltrans_district,analysis_name,route_id,common_shape_id,n_trips,day
18,02 - Redding,Tehama County,1292,p_2597,2.0,_wednesday_only
0,02 - Redding,Trinity County,180,p_177185,1.0,_wednesday_only
79,03 - Marysville / Sacramento,Sacramento County,Delta,45882,4.0,_monday_only
102,03 - Marysville / Sacramento,Sacramento County,GaltSac,45880,4.0,_monday_only
84,03 - Marysville / Sacramento,Sacramento County,Hwy 99,45876,16.0,_monday_only
34,03 - Marysville / Sacramento,Yuba-Sutter Transit Authority,6465,p_110991,2.0,_wednesday_only
35,03 - Marysville / Sacramento,Yuba-Sutter Transit Authority,6465,p_110993,2.0,_wednesday_only
33,04 - Bay Area / Oakland,Alameda-Contra Costa Transit District,672,shp-672-51,1.0,_wednesday_only
142,04 - Bay Area / Oakland,Alameda-Contra Costa Transit District,672,shp-672-52,1.0,_monday_only
1,04 - Bay Area / Oakland,City of Fairfield,8T,p_1434332,4.0,_wednesday_only


## Look at Specific Routes

In [85]:
all_day = mon_df.loc[(mon_df.time_period == "all_day")]

### Culver City 
* Only one direction on Monday and Wednesday.

In [95]:
def specific_route(route: str):
    print("Monday")
    display(mon_df.loc[mon_df.route_id == route])
    print("Wednesday")
    display(wed_df.loc[wed_df.route_id == route])

    display(
        routes_gdf.loc[routes_gdf.route_id == route].explore(
            "day", width=600, height=400, tiles="CartoDB positron"
        )
    )

In [92]:
specific_route("5C2")

Unnamed: 0,schedule_gtfs_dataset_key,route_id,direction_id,common_shape_id,avg_scheduled_service_minutes,route_primary_direction,avg_stop_miles,n_trips,time_period,frequency
1574,4b317fc27dde351e12253d46cedd8df0,5C2,1.0,shp-5C2-06,37.5,Northbound,3.26,2,all_day,0.08
1575,4b317fc27dde351e12253d46cedd8df0,5C2,1.0,shp-5C2-06,37.5,Northbound,3.26,1,offpeak,0.06
1576,4b317fc27dde351e12253d46cedd8df0,5C2,1.0,shp-5C2-06,37.5,Northbound,3.26,1,peak,0.12


Unnamed: 0,schedule_gtfs_dataset_key,route_id,direction_id,common_shape_id,avg_scheduled_service_minutes,route_primary_direction,avg_stop_miles,n_trips,time_period,frequency
9,4b317fc27dde351e12253d46cedd8df0,5C2,1.0,shp-5C2-56,36.67,Southbound,3.04,3,all_day,0.12
10,4b317fc27dde351e12253d46cedd8df0,5C2,1.0,shp-5C2-56,36.67,Southbound,3.04,1,offpeak,0.06
11,4b317fc27dde351e12253d46cedd8df0,5C2,1.0,shp-5C2-56,36.67,Southbound,3.04,2,peak,0.25


## City of Glendora MiddayShuttle:Orange 	
* Missing direction 1 for both Monday and Wednesday
* Different `common_shape_id` values.

In [96]:
specific_route("MiddayShuttle:Orange")

Monday


Unnamed: 0,schedule_gtfs_dataset_key,route_id,direction_id,common_shape_id,avg_scheduled_service_minutes,route_primary_direction,avg_stop_miles,n_trips,time_period,frequency
1817,a532ea8697dcb081f15f8e712f6cf8bd,MiddayShuttle:Orange,0.0,p_901434,72.5,Southbound,1.78,2,all_day,0.08
1818,a532ea8697dcb081f15f8e712f6cf8bd,MiddayShuttle:Orange,0.0,p_901434,72.5,Southbound,1.78,2,offpeak,0.12


Wednesday


Unnamed: 0,schedule_gtfs_dataset_key,route_id,direction_id,common_shape_id,avg_scheduled_service_minutes,route_primary_direction,avg_stop_miles,n_trips,time_period,frequency
589,a532ea8697dcb081f15f8e712f6cf8bd,MiddayShuttle:Orange,0.0,p_901421,25.0,Southbound,2.45,2,all_day,0.08
590,a532ea8697dcb081f15f8e712f6cf8bd,MiddayShuttle:Orange,0.0,p_901421,25.0,Southbound,2.45,1,offpeak,0.06
591,a532ea8697dcb081f15f8e712f6cf8bd,MiddayShuttle:Orange,0.0,p_901421,25.0,Southbound,2.45,1,peak,0.12


## City of Fairfield 	
* Different shape id for each direction

In [99]:
specific_route("8T")

Monday


Unnamed: 0,schedule_gtfs_dataset_key,route_id,direction_id,common_shape_id,avg_scheduled_service_minutes,route_primary_direction,avg_stop_miles,n_trips,time_period,frequency


Wednesday


Unnamed: 0,schedule_gtfs_dataset_key,route_id,direction_id,common_shape_id,avg_scheduled_service_minutes,route_primary_direction,avg_stop_miles,n_trips,time_period,frequency
101,0f5e1b251db53223200c5bfc365d33f2,8T,1.0,p_1434332,28.0,Southbound,6.17,4,all_day,0.17
102,0f5e1b251db53223200c5bfc365d33f2,8T,1.0,p_1434332,28.0,Southbound,6.17,4,peak,0.5
943,0f5e1b251db53223200c5bfc365d33f2,8T,0.0,p_1435106,37.0,Northbound,2.73,4,all_day,0.17
944,0f5e1b251db53223200c5bfc365d33f2,8T,0.0,p_1435106,37.0,Northbound,2.73,4,peak,0.5


## AC Transit
* Same route, different `common_shape_id` value

In [100]:
specific_route("672")

Monday


Unnamed: 0,schedule_gtfs_dataset_key,route_id,direction_id,common_shape_id,avg_scheduled_service_minutes,route_primary_direction,avg_stop_miles,n_trips,time_period,frequency
5509,c499f905e33929a641f083dad55c521e,672,0.0,shp-672-52,34.0,Westbound,3.7,1,all_day,0.04
5510,c499f905e33929a641f083dad55c521e,672,0.0,shp-672-52,34.0,Westbound,3.7,1,offpeak,0.06
5598,c499f905e33929a641f083dad55c521e,672,1.0,shp-672-01,35.0,Eastbound,3.91,1,all_day,0.04
5599,c499f905e33929a641f083dad55c521e,672,1.0,shp-672-01,35.0,Eastbound,3.91,1,peak,0.12


Wednesday


Unnamed: 0,schedule_gtfs_dataset_key,route_id,direction_id,common_shape_id,avg_scheduled_service_minutes,route_primary_direction,avg_stop_miles,n_trips,time_period,frequency
5366,c499f905e33929a641f083dad55c521e,672,1.0,shp-672-01,35.0,Eastbound,3.91,1,all_day,0.04
5367,c499f905e33929a641f083dad55c521e,672,1.0,shp-672-01,35.0,Eastbound,3.91,1,peak,0.12
5458,c499f905e33929a641f083dad55c521e,672,0.0,shp-672-51,34.0,Westbound,3.7,1,all_day,0.04
5459,c499f905e33929a641f083dad55c521e,672,0.0,shp-672-51,34.0,Westbound,3.7,1,offpeak,0.06
