# All Day frequency doesn't correspond with Peak/Offpeak for San Diego MST's 1 Fashion Valley

In [49]:
import _report_utils
import _section1_utils as section1
import _section2_utils as section2
import geopandas as gpd
import merge_data
import merge_operator_data
import numpy as np
import pandas as pd
from segment_speed_utils import gtfs_schedule_wrangling, helpers
from segment_speed_utils.project_vars import COMPILED_CACHED_VIEWS, PROJECT_CRS
from shared_utils import catalog_utils, portfolio_utils, rt_dates,portfolio_utils, rt_utils, time_helpers
from update_vars import GTFS_DATA_DICT, RT_SCHED_GCS, SCHED_GCS, SEGMENT_GCS

In [2]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [3]:
analysis_date_list = [rt_dates.DATES["feb2025"]]

In [4]:
analysis_date = rt_dates.DATES["feb2025"]

In [5]:
schd_vp_url = f"{GTFS_DATA_DICT.digest_tables.dir}{GTFS_DATA_DICT.digest_tables.route_schedule_vp}.parquet"

## Look at San Diego first

In [6]:
sdi = "San Diego Metropolitan Transit System"

In [7]:
df = pd.read_parquet(
    schd_vp_url,
    filters=[
        [
            ("organization_name", "==", sdi),
            ("sched_rt_category", "in", ["schedule_and_vp"]),
        ]
    ],
)

In [8]:
df["headway_in_minutes"] = 60 / df.frequency

In [9]:
df = _report_utils.replace_column_names(df)

In [10]:
df.columns

Index(['schedule_gtfs_dataset_key', 'dir_0_1', 'Period',
       'Average Scheduled Service (trip minutes)',
       'Average Stop Distance (miles)', '# scheduled trips', 'Trips per Hour',
       'is_express', 'is_rapid', 'is_rail', 'is_coverage', 'is_downtown_local',
       'is_local', 'Date', 'Route typology', '# Minutes with 1+ VP per Minute',
       '# Minutes with 2+ VP per Minute', 'Aggregate Actual Service Minutes',
       'Aggregate Scheduled Service Minutes (all trips)', '# VP',
       '# VP within Scheduled Shape', '# Early Arrival Trips',
       '# On-Time Trips', '# Late Trips', '# Trips with VP',
       'Average VP per Minute', '% VP within Scheduled Shape',
       'pct_rt_journey_atleast1_vp', 'pct_rt_journey_atleast2_vp',
       '% Scheduled Trip w/ 1+ VP/Minute', '% Scheduled Trip w/ 2+ VP/Minute',
       'Realtime versus Scheduled Service Ratio',
       'Average Actual Service (Trip Minutes)', 'GTFS Availability',
       'Speed (MPH)', 'route_long_name', 'route_short_nam

In [11]:
df.Date.unique()

array(['2023-04-12T00:00:00.000000000', '2023-05-17T00:00:00.000000000',
       '2023-06-14T00:00:00.000000000', '2023-07-12T00:00:00.000000000',
       '2023-08-15T00:00:00.000000000', '2023-03-15T00:00:00.000000000',
       '2023-09-13T00:00:00.000000000', '2023-10-11T00:00:00.000000000',
       '2023-11-15T00:00:00.000000000', '2023-12-13T00:00:00.000000000',
       '2024-01-17T00:00:00.000000000', '2024-02-14T00:00:00.000000000',
       '2024-03-13T00:00:00.000000000', '2024-04-17T00:00:00.000000000',
       '2024-05-22T00:00:00.000000000', '2024-06-12T00:00:00.000000000',
       '2024-07-17T00:00:00.000000000', '2024-08-14T00:00:00.000000000',
       '2024-09-18T00:00:00.000000000', '2024-10-16T00:00:00.000000000',
       '2024-11-13T00:00:00.000000000', '2024-12-11T00:00:00.000000000',
       '2025-01-15T00:00:00.000000000', '2025-02-12T00:00:00.000000000'],
      dtype='datetime64[ns]')

### In the original dataset, there are more trips per hour for all_day than in peak and offpeak

In [12]:
fashion_valley = df.loc[
    (df["Route"] == "1 Fashion Valley - La Mesa")
    & (df["Date"] == "2025-02-12T00:00:00.000000000")
].reset_index(drop=True)

In [13]:
fashion_valley[["Date", "headway_in_minutes", "Trips per Hour", "Period"]]

Unnamed: 0,Date,headway_in_minutes,Trips per Hour,Period
0,2025-02-12,23.26,2.58,all_day
1,2025-02-12,43.48,1.38,offpeak
2,2025-02-12,49.59,1.21,peak
3,2025-02-12,22.47,2.67,all_day
4,2025-02-12,41.1,1.46,offpeak
5,2025-02-12,49.59,1.21,peak


## Check other routes & operators

In [14]:
all_ops = pd.read_parquet(
    schd_vp_url,
    filters=[
        [
            ("sched_rt_category", "in", ["schedule_and_vp"]),
        ]
    ],
)

In [15]:
all_ops["headway_in_minutes"] = 60 / all_ops.frequency

In [16]:
all_ops = _report_utils.replace_column_names(all_ops)

In [17]:
all_ops = all_ops.loc[(all_ops["Date"] == "2025-02-12T00:00:00.000000000")].reset_index(
    drop=True
)

In [41]:
all_ops2 = all_ops[
    ["Organization", "Direction", "Period", "headway_in_minutes", "Trips per Hour", "Route", "Route ID"]
].drop_duplicates()

## `All_day` trips per hour is created by adding together peak and offpeak, but this makes it look like there are more trips happening than in reality.

In [42]:
all_ops2.loc[all_ops2.Route == "10 E. 14th St. - Mission"].drop_duplicates()

Unnamed: 0,Organization,Direction,Period,headway_in_minutes,Trips per Hour,Route,Route ID
9156,Alameda-Contra Costa Transit District,Eastbound,all_day,22.9,2.62,10 E. 14th St. - Mission,10
9157,Alameda-Contra Costa Transit District,Eastbound,offpeak,40.0,1.5,10 E. 14th St. - Mission,10
9158,Alameda-Contra Costa Transit District,Eastbound,peak,53.57,1.12,10 E. 14th St. - Mission,10
9159,Alameda-Contra Costa Transit District,Westbound,all_day,21.82,2.75,10 E. 14th St. - Mission,10
9160,Alameda-Contra Costa Transit District,Westbound,offpeak,37.04,1.62,10 E. 14th St. - Mission,10
9161,Alameda-Contra Costa Transit District,Westbound,peak,53.57,1.12,10 E. 14th St. - Mission,10


In [24]:
all_ops2.loc[all_ops2.Route == " Los Olivos Loop"].drop_duplicates()

Unnamed: 0,Organization,Direction,Period,headway_in_minutes,Trips per Hour,Route
12352,City of Solvang,Eastbound,all_day,181.82,0.33,Los Olivos Loop
12353,City of Solvang,Eastbound,offpeak,500.0,0.12,Los Olivos Loop
12354,City of Solvang,Eastbound,peak,285.71,0.21,Los Olivos Loop


## Recalculate this metric?? `gtfs_funnel/schedule_stats_by_route_direction`

In [30]:
import sys

sys.path.append("../gtfs_funnel/")
import schedule_stats_by_route_direction

In [28]:
feb_sched = pd.read_parquet("gs://calitp-analytics-data/data-analyses/rt_vs_schedule/schedule_route_dir/schedule_route_direction_metrics_2025-02-12.parquet")

In [29]:
feb_sched.head(1)

Unnamed: 0,geometry,schedule_gtfs_dataset_key,route_id,direction_id,common_shape_id,route_name,avg_scheduled_service_minutes,avg_stop_miles,n_trips,time_period,peak_offpeak,frequency,is_coverage,is_downtown_local,is_local,is_rapid,is_express,is_rail,route_primary_direction
0,"b'\x01\x02\x00\x00\x00i\x00\x00\x00\xe8\xd4\x8dE\xdb\xd3\xf5@\x80\x98\xaf;\xd1\xac\xe3\xc0\xdd\x93\xe8\x99\x1b\xd4\xf5@\x80\xa71\x0fw\xad\xe3\xc0E\x16\r\x8aP\xd4\xf5@@\x8a\xc5\xc8\x07\xae\xe3\xc0\x0f\xe5B\x1c\xb6\xd4\xf5@\x80\xc9\x97\x18\x1b\xaf\xe3\xc0\'*f#\x16\xd5\xf5@\x00\xefaP@\xb0\xe3\xc0S\xb7\xd4*\x82\xd5\xf5@\x00\xc7\xe1u\xec\xb1\xe3\xc0rL?\xd6\x05\xd6\xf5@\x00F\x08\xe1b\xb4\xe3\xc0\xac9\x99@E\xd6\xf5@@X\xdeyl\xb6\xe3\xc0\xc4\x9c\xbf\x94y\xd6\xf5@\x00\'\xb8\xe2\x99\xb8\xe3\xc0\x9e\x97\x06\xbd\xaf\xd6\xf5@\x80~\x15\xc7\x1c\xbc\xe3\xc0mh*\xef\xe3\xd6\xf5@\xc0\xae\x91I1\xbe\xe3\xc0\xc3&F\xbd\x12\xd7\xf5@@\xc0h\xe7l\xbf\xe3\xc0\xd2\xa5\xdc%\xb0\xd7\xf5@\x80M^\x1b\x1f\xc1\xe3\xc0\xa9\xd0\xd3\x01c\xd8\xf5@\x00_.\xc5\t\xc3\xe3\xc0\x95\xbd\xe4\x02\xd4\xd8\xf5@@v\xb1K9\xc4\xe3\xc0\xb6\x14=V\xe2\xd8\xf5@\x80\xed\xcf\xadc\xc4\xe3\xc0.2m\xdbe\xd9\xf5@\xc0\x9e\xb8\xde\xa7\xc3\xe3\xc0\xa9?\xc6z\xd4\xd9\xf5@\xc0 \xa0\xc1\x1a\xc3\xe3\xc0\x89\x96\xf3[\xe7\xdb\xf5@\x80Q\xc3#\x96\xc0\xe3\xc0\x8e\x07E8\x06\xdc\xf5@\x80j\x9a\x04y\xc0\xe3\xc0\xbe\x86h\xb0\x1e\xdd\xf5@\x00\xfbl\x13a\xbf\xe3\xc0B\xc2d]\xc8\xdd\xf5@\xc0\x86\xf0;\xb6\xbe\xe3\xc0\x1d\xb8\x04\xe5\xab\xde\xf5@@C\xdd\xd2\x14\xbe\xe3\xc0d]\xed\xf6x\xdf\xf5@\x00\xf6\x8c\xae\x85\xbd\xe3\xc0d\xd0\xb0\xe7o\xe0\xf5@@K*`\xa7\xbc\xe3\xc0\xc1\xac\xcf\xc0\xd3\xe1\xf5@\x00U\x05S\n\xbc\xe3\xc0\x15\xeay]\x05\xe5\xf5@\x804T\x84\xf1\xba\xe3\xc0Qkp\x85~\xe5\xf5@@\xacj\xaf\xe0\xba\xe3\xc0\x88y0F@\xe8\xf5@\x80\xfa\xd2\x07u\xba\xe3\xc0\xc5\x9a\xe5^\xaa\xe9\xf5@\xc0\x14\xc3\xacI\xba\xe3\xc0\xf6\x031\x06:\xea\xf5@@\xa4W\xaf-\xba\xe3\xc0\x86S]\xbf\x98\xeb\xf5@\x80`\x8b\xc8\xf0\xb9\xe3\xc0\x87<\xa23\x95\xed\xf5@@\xf9)\x07\x94\xb9\xe3\xc0\xbax\x9dl\xa6\xf2\xf5@\x80\xcf\xfcV\xad\xb8\xe3\xc0\xc3er!8\xf5\xf5@@\xdf\xccV-\xb8\xe3\xc0\x1a\xc9\x03\x00\n\xf6\xf5@@\xe87\xcd\x08\xb8\xe3\xc0\xdbL\x8b\x92\x97\xf8\xf5@\x80+E\xab\x9a\xb7\xe3\xc0\x1d\x87-\xe8\xc6\xfa\xf5@@\x04\xb5\xb1U\xb7\xe3\xc0\x99?\xd9,?\xfc\xf5@\x00\xe4\xbb<8\xb7\xe3\xc0\xb3U\xbf\xa2\xce\xfe\xf5@@\\\x81\xfc""\xb7\xe3\xc0\x9b\xe6Q\xb6\xd7\xff\xf5@@\x06\xbc\xf0\'\xb7\xe3\xc0\xf1\xf8l\xfa\xdf\x01\xf6@\xc0\x9dI\x80.\xb7\xe3\xc0\xf24fS\x13\x03\xf6@\xc0\xd8c\xfb.\xb7\xe3\xc0\x89\x1cM\xc0K\x03\xf6@\x80.\x01\xe14\xb7\xe3\xc0`>\xf2A[\x03\xf6@\x80PN\x8b4\xb7\xe3\xc0o)\x1af\xf6\x05\xf6@\x80\xa0\'\xbbi\xb7\xe3\xc07%\xc3\xce8\x08\xf6@\x00U\t\x0f\xaf\xb7\xe3\xc0\x12\x02p\x1a\xe2\x08\xf6@\x80\xbe\x9bM\xc4\xb7\xe3\xc0\x8c\xe6\xa6\x82\xa6\x0b\xf6@\xc0\x94\xf5\xbb?\xb8\xe3\xc0\xb8\xbdr7r\x0e\xf6@\x00ug?\xe9\xb8\xe3\xc0\xd3\xd6\x1b-\xca\x10\xf6@\x00\xdeT\xccx\xb9\xe3\xc0\xed^\x19p\xe4\x12\xf6@@\x91&(F\xba\xe3\xc0\x81\x96\xe63\x1b\x14\xf6@\x80\x89\xae\xd3\xb4\xba\xe3\xc0\xc5c\x14\x99\xce\x14\xf6@\xc0\xac\x00\x89\xf4\xba\xe3\xc0\xc7\x08\xcdW\xe9\x15\xf6@\xc0\xe6\xd9\xd3\x83\xbb\xe3\xc0\xef#\xb7\xe1\xe8\x19\xf6@\xc0\xaf\xc6d\x8a\xbd\xe3\xc0\x08\xb9\xaaC\xb4\x1c\xf6@\xc0w\xaf\x18\xfb\xbe\xe3\xc0\xd2\x04}\xf1\x8e\x1c\xf6@\xc0\x15Ox\x1e\xc0\xe3\xc0\x12q*\xda\x0e)\xf6@\xc0\xbcz\x12\xa0\xc5\xe3\xc0m\xcf\xbcA\x9c(\xf6@\xc0\xb8\n\\<\xc9\xe3\xc01d\x0fF\xc3\'\xf6@\x007xM\xd0\xcb\xe3\xc0K1\x93\xba2&\xf6@\x80E l\xb0\xcd\xe3\xc0 \xf4su\xa3&\xf6@\x005\xeb[(\xdf\xe3\xc09qo\x89\xb0&\xf6@\x00\xbd0\x13(\xdf\xe3\xc0\xb8\xe7_lI2\xf6@\x80\xc3\x17\xcd\xde\xe7\xe3\xc0c\x1a2%\x1b7\xf6@@\xf4\xbd""\xcf\xea\xe3\xc0j~@\x03\n;\xf6@\x00`\xcf\xff\xe3\xec\xe3\xc0W\xe7E\xc4\xb1?\xf6@\x80:\x11y\xc1\xec\xe3\xc0\xd5\x94Oi\xaa?\xf6@@\xeb\x08T\xb4\xec\xe3\xc0{\xee\xce\xc7\xcc@\xf6@\x80\xe0\x8c\xe0\xa6\xec\xe3\xc0\xe0@\x9e\x91.B\xf6@@\xfe\xa2{\x94\xec\xe3\xc0\xa5)X\xf0\x9dB\xf6@\x00\x88F\r\x92\xec\xe3\xc0\xbc\xfe^\xda\x82D\xf6@\xc0+uY\x80\xec\xe3\xc0\x88T+gkE\xf6@\xc0\x9b&\x99p\xec\xe3\xc0-\x1c_@\xc7H\xf6@\xc0\x84\x86\x06L\xec\xe3\xc0\xa2\x11t\xba\x82I\xf6@@\x18\xe5^D\xec\xe3\xc0\xd7t@b\x82I\xf6@\x80\xd3\xc1\x90\x13\xf3\xe3\xc0]\x1eT\xa7\x83I\xf6@\xc0\x84\x89(\x06\xf9\xe3\xc0\x15\xf50J~I\xf6@\xc0-\xe0\xc9\xf0\xfe\xe3\xc0N\xc5>C\x82I\xf6@@\xc1\x00x\xe0\xfe\xe3\xc0\xe5\xf8\x99\x9b\x82I\xf6@\xc07v#Y\x01\xe4\xc0Tw\x8b+\x83I\xf6@\x00t\xb1""\xc4\x03\xe4\xc0\xae\x83\xf4\x19\x83I\xf6@\x8061\x9b\xb9\x04\xe4\xc0\xe6n!\xbb\x83I\xf6@\x80\xde=\xf92\x08\xe4\xc0@\xd3kC\x84I\xf6@\x80\x173\x8e\x9a\x0b\xe4\xc0cH\xa0\x85\x83I\xf6@\x80-]\x11\x1b\x0f\xe4\xc0\x82\xaa\xd2\x12\x83I\xf6@\x80\x83y\x18\xcd\x10\xe4\xc0\x8a[\x96\x82\x83I\xf6@@>\x06\r\x1f\x12\xe4\xc0(\xdc$8\x83I\xf6@\xc0\xd6\xbe\xda\x83\x15\xe4\xc0{\xdc\xb91\x81I\xf6@@P\xb2V}\x15\xe4\xc0\x8a\xcd\x0eH\x84I\xf6@\x00b\xbaq\xd7\'\xe4\xc0D\xd6\xee\xf9\x83I\xf6@@\xd0e\xe9\xa9,\xe4\xc052\xb3d\x84I\xf6@\xc0]QO\xf8-\xe4\xc0\xc8P\rr\x18J\xf6@\x00\xce\xab\xa1\xf8-\xe4\xc0^\xe5\xa2\x86\xabM\xf6@\x80\xffni\xf6-\xe4\xc0\xf5\xd5\x1d#\xf5W\xf6@\x00}1\xe0\xc3-\xe4\xc0\xb7\xe5Yc\x92^\xf6@@\x1d\xad\xde\xa5-\xe4\xc0\rx\xef9Ic\xf6@\x80\xe2\x9a\xf1\x8e-\xe4\xc0\xb5\xa3FHdc\xf6@\x80+\x08L\xc0.\xe4\xc02\x1b+\xa6dc\xf6@\xc0\xf0\x16\x06\x040\xe4\xc0\xc9\x9d\x1a\xc0\x85c\xf6@\xc0\xa1\x1b\x18\x802\xe4\xc0\xc3\x8f\xc0\xaa9d\xf6@\x00\x89}\xa8\x1c6\xe4\xc0\xe2\x02\xcc\x8d\xfae\xf6@@\x84\xecdz9\xe4\xc0w\x84C)pg\xf6@@\xd0ITr:\xe4\xc0\xc5\x0f\xa0\t\xfag\xf6@\x80>\x14%\xc0:\xe4\xc0'",cb8a465cffec67c8fd90f31b389ed4c3,76755,0.0,p_1435956,Purple Line,10.0,2.3,21,all_day,,0.88,1.0,1.0,0.0,0.0,0.0,0.0,Eastbound


In [37]:
group_merge_cols = [
            "schedule_gtfs_dataset_key", 
            "route_id", 
            "direction_id"
        ]

In [36]:
trip_metrics = schedule_stats_by_route_direction.assemble_scheduled_trip_metrics(analysis_date, GTFS_DATA_DICT)
     
trip_metrics.direction_id = trip_metrics.direction_id.fillna(0)

## Go to `rt_segment_speeds/gtfs_schedule_wrangling`

In [39]:
service_freq_df = gtfs_schedule_wrangling.aggregate_time_of_day_to_peak_offpeak(
        trip_metrics, group_merge_cols, long_or_wide="long")

In [44]:
all_ops2.loc[all_ops2["Route ID"] == "0177a66b-9f33-407d-a72e-776429fb73d4"].drop_duplicates()

Unnamed: 0,Organization,Direction,Period,headway_in_minutes,Trips per Hour,Route,Route ID
0,Tulare County Regional Transit Agency,Eastbound,all_day,750.0,0.08,C70 LOOP 70,0177a66b-9f33-407d-a72e-776429fb73d4
1,Tulare County Regional Transit Agency,Eastbound,offpeak,1500.0,0.04,C70 LOOP 70,0177a66b-9f33-407d-a72e-776429fb73d4
2,Tulare County Regional Transit Agency,Eastbound,peak,1500.0,0.04,C70 LOOP 70,0177a66b-9f33-407d-a72e-776429fb73d4


In [47]:
trip_metrics.loc[trip_metrics["route_id"] == "0177a66b-9f33-407d-a72e-776429fb73d4"].drop_duplicates()

Unnamed: 0,schedule_gtfs_dataset_key,trip_instance_key,median_stop_meters,time_of_day,scheduled_service_minutes,route_id,direction_id
140,0139b1253130b33adcd4b3a4490530d2,76e57196b9f67ff4f84a74b4e0d2b150,9625.7,Midday,62.0,0177a66b-9f33-407d-a72e-776429fb73d4,0.0
213,0139b1253130b33adcd4b3a4490530d2,b1ed510f7a2c53b2c59e34a22634e8fa,9625.7,AM Peak,65.0,0177a66b-9f33-407d-a72e-776429fb73d4,0.0


In [46]:
service_freq_df.loc[service_freq_df["route_id"] == "0177a66b-9f33-407d-a72e-776429fb73d4"].drop_duplicates()

Unnamed: 0,schedule_gtfs_dataset_key,route_id,direction_id,n_trips,time_period,peak_offpeak,frequency
0,0139b1253130b33adcd4b3a4490530d2,0177a66b-9f33-407d-a72e-776429fb73d4,0.0,2,all_day,,0.08
4614,0139b1253130b33adcd4b3a4490530d2,0177a66b-9f33-407d-a72e-776429fb73d4,0.0,1,,offpeak,0.04
4615,0139b1253130b33adcd4b3a4490530d2,0177a66b-9f33-407d-a72e-776429fb73d4,0.0,1,,peak,0.04


In [50]:
peak_hours = sum(v for k, v in time_helpers.HOURS_BY_TIME_OF_DAY.items() 
                 if k in time_helpers.PEAK_PERIODS) 
    
offpeak_hours = sum(v for k, v in time_helpers.HOURS_BY_TIME_OF_DAY.items() 
                 if k not in time_helpers.PEAK_PERIODS) 

In [52]:
offpeak_hours

16

In [51]:
peak_hours

8

In [53]:
trip_metrics.head(1)

Unnamed: 0,schedule_gtfs_dataset_key,trip_instance_key,median_stop_meters,time_of_day,scheduled_service_minutes,route_id,direction_id
0,0139b1253130b33adcd4b3a4490530d2,0040e3cfa3a954c543b05c41235ca529,5425.96,Midday,40.0,T5,0.0


In [55]:
trip_metrics = gtfs_schedule_wrangling.add_peak_offpeak_column(trip_metrics)

In [56]:
trip_metrics.head(1)

Unnamed: 0,schedule_gtfs_dataset_key,trip_instance_key,median_stop_meters,time_of_day,scheduled_service_minutes,route_id,direction_id,peak_offpeak
0,0139b1253130b33adcd4b3a4490530d2,0040e3cfa3a954c543b05c41235ca529,5425.96,Midday,40.0,T5,0.0,offpeak


In [58]:
trip_metrics.time_of_day.value_counts()

Midday      33577
PM Peak     33501
AM Peak     21618
Early AM    11458
Evening     10417
Owl          3854
Name: time_of_day, dtype: int64

In [59]:
trip_metrics.peak_offpeak.value_counts()

offpeak    59306
peak       55119
Name: peak_offpeak, dtype: int64

### `count_trips_by_group`

In [60]:
all_day = (
        trip_metrics.groupby(group_merge_cols, dropna=False)
        .agg({"trip_instance_key": "count"})
        .reset_index()
    )
all_day = all_day.rename(columns={"trip_instance_key": "n_trips"})

In [62]:
all_day.loc[all_day["route_id"] == "0177a66b-9f33-407d-a72e-776429fb73d4"].drop_duplicates()

Unnamed: 0,schedule_gtfs_dataset_key,route_id,direction_id,n_trips
0,0139b1253130b33adcd4b3a4490530d2,0177a66b-9f33-407d-a72e-776429fb73d4,0.0,2


In [63]:
peak_offpeak = (
        trip_metrics.groupby(group_merge_cols + ["peak_offpeak"], dropna=False)
        .agg({"trip_instance_key": "count"})
        .reset_index()
    )
peak_offpeak = peak_offpeak.rename(columns={"trip_instance_key": "n_trips"})

In [64]:
peak_offpeak.loc[peak_offpeak["route_id"] == "0177a66b-9f33-407d-a72e-776429fb73d4"].drop_duplicates()

Unnamed: 0,schedule_gtfs_dataset_key,route_id,direction_id,peak_offpeak,n_trips
0,0139b1253130b33adcd4b3a4490530d2,0177a66b-9f33-407d-a72e-776429fb73d4,0.0,offpeak,1
1,0139b1253130b33adcd4b3a4490530d2,0177a66b-9f33-407d-a72e-776429fb73d4,0.0,peak,1
