In [1]:
import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(1_000_000_000_000) ## 1TB?

In [2]:
import geopandas as gpd
import pandas as pd
import numpy as np

from segment_speed_utils import helpers, gtfs_schedule_wrangling
from shared_utils import rt_dates, gtfs_utils_v2, catalog_utils, portfolio_utils
import folium
import itertools
from calitp_data_analysis.gcs_geopandas import GCSGeoPandas
gcsgp = GCSGeoPandas()

In [3]:
from update_vars import (analysis_date, AM_PEAK, PM_PEAK, EXPORT_PATH, GCS_FILE_PATH, PROJECT_CRS,
SEGMENT_BUFFER_METERS, AM_PEAK, PM_PEAK, HQ_TRANSIT_THRESHOLD, MS_TRANSIT_THRESHOLD, SHARED_STOP_THRESHOLD)

In [4]:
import create_aggregate_stop_frequencies as casf

In [5]:
df = pd.read_parquet(f"{GCS_FILE_PATH}max_arrivals_by_stop_single_route.parquet")

In [6]:
df

Unnamed: 0,schedule_gtfs_dataset_key,stop_id,am_max_trips,route_dir,pm_max_trips,am_max_trips_hr,pm_max_trips_hr,n_trips,route_dir_count
0,0089bd1b0a2b78a8590d8749737d7146,40090,6,[17_1],6.0,2.00,1.50,12.0,1
1,0089bd1b0a2b78a8590d8749737d7146,40090,1,[61_0],2.0,0.33,0.50,3.0,1
2,0089bd1b0a2b78a8590d8749737d7146,40091,4,[17_0],6.0,1.33,1.50,10.0,1
3,0089bd1b0a2b78a8590d8749737d7146,40091,1,[61_1],2.0,0.33,0.50,3.0,1
4,0089bd1b0a2b78a8590d8749737d7146,40092,4,[17_0],6.0,1.33,1.50,10.0,1
...,...,...,...,...,...,...,...,...,...
101987,f8e4fa18131802bf978177326377241d,883129,1,[605_0],,0.33,0.00,1.0,1
101988,f8e4fa18131802bf978177326377241d,883130,4,[14_1],6.0,1.33,1.50,10.0,1
101989,f8e4fa18131802bf978177326377241d,883131,4,[4_0],6.0,1.33,1.50,10.0,1
101990,f8e4fa18131802bf978177326377241d,883131,1,[502_0],,0.33,0.00,1.0,1


In [7]:
st, trips = casf.get_st_trips(analysis_date) # includes lookback
st_prepped = casf.add_route_dir(trips=trips, stop_times=st, analysis_date=analysis_date).pipe(casf.prep_stop_times)

{'2025-08-20': ['eTrans Schedule', 'Roseville Transit GMV Schedule'], '2025-09-24': ['San Juan Capistrano Trolley Schedule', 'Culver City Schedule'], '2025-10-15': ['Yolobus Schedule', 'Go West Schedule', 'Bay Area 511 Angel Island-Tiburon Ferry Schedule', 'El Monte Schedule', 'Nevada County Schedule']}


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stop_times['peak'] = stop_times['arrival_hour'].map(peaks_dict)


In [8]:
max_arrivals_by_stop_single = st_prepped.pipe(casf.stop_times_aggregation_max_by_stop, analysis_date, single_route_dir=True)
max_arrivals_by_stop_single.to_parquet(f"{GCS_FILE_PATH}max_arrivals_by_stop_single_route.parquet") #  for branching_derived_intersections.py
max_arrivals_by_stop_multi = st_prepped.pipe(casf.stop_times_aggregation_max_by_stop, analysis_date, single_route_dir=False)

multi_only_explode = casf.get_explode_multiroute_only(max_arrivals_by_stop_single, max_arrivals_by_stop_multi, (HQ_TRANSIT_THRESHOLD, MS_TRANSIT_THRESHOLD))
share_counts = {}
multi_only_explode.groupby(['schedule_gtfs_dataset_key', 'stop_id']).apply(casf.accumulate_share_count, share_counts=share_counts)
qualify_dict = {key: share_counts[key] for key in share_counts.keys() if share_counts[key] >= SHARED_STOP_THRESHOLD}

In [9]:
multi_only_explode.query('route_dir.str.contains("42")')

Unnamed: 0,schedule_gtfs_dataset_key,stop_id,route_dir
228,076e30b080fdc5501151bd3fb0a37b9e,0013,042_1
248,076e30b080fdc5501151bd3fb0a37b9e,1997,042_0
265,076e30b080fdc5501151bd3fb0a37b9e,3125,042_1
267,076e30b080fdc5501151bd3fb0a37b9e,3183,042_1
269,076e30b080fdc5501151bd3fb0a37b9e,3185,042_0
...,...,...,...
22955,eca94265831d5c499800dd921ecf4011,2558700,14216_1
22956,eca94265831d5c499800dd921ecf4011,2558757,14213_0
22957,eca94265831d5c499800dd921ecf4011,2558757,14216_0
22958,eca94265831d5c499800dd921ecf4011,2558851,14216_0


In [10]:
stops = helpers.import_scheduled_stops(analysis_date)

In [11]:
stops = stops.merge(trips[['feed_key', 'schedule_gtfs_dataset_key']].drop_duplicates(), on='feed_key')

In [12]:
trips.query('name.str.contains("Yolo")')

Unnamed: 0,name,feed_key,schedule_gtfs_dataset_key,trip_id,route_id,direction_id,route_type,analysis_date,analysis_name
0,Yolobus Schedule,6769ba94f648570ace873d1a4d17ba82,75ae96721f84cf99b890bdb8fd4099f5,d0058276-0198-4eb8-bd1c-5d5e555ee0c6,42B,0.0,3,2025-10-15,Yolo County Transportation District
1,Yolobus Schedule,6769ba94f648570ace873d1a4d17ba82,75ae96721f84cf99b890bdb8fd4099f5,6dd4a070-0949-4311-964a-e066f35e4b0c,42B,0.0,3,2025-10-15,Yolo County Transportation District
2,Yolobus Schedule,6769ba94f648570ace873d1a4d17ba82,75ae96721f84cf99b890bdb8fd4099f5,1b8af72b-f054-4b08-82ec-a8b60fd10b6f,42B,0.0,3,2025-10-15,Yolo County Transportation District
3,Yolobus Schedule,6769ba94f648570ace873d1a4d17ba82,75ae96721f84cf99b890bdb8fd4099f5,b4ea714b-ce54-478c-9d16-dd682b23a63d,42B,0.0,3,2025-10-15,Yolo County Transportation District
4,Yolobus Schedule,6769ba94f648570ace873d1a4d17ba82,75ae96721f84cf99b890bdb8fd4099f5,3c1ac604-b7b9-43a9-a5c4-4ee7bdf1ef24,42B,0.0,3,2025-10-15,Yolo County Transportation District
...,...,...,...,...,...,...,...,...,...
196,Yolobus Schedule,6769ba94f648570ace873d1a4d17ba82,75ae96721f84cf99b890bdb8fd4099f5,48e28263-9084-4fd9-880a-b9c94ac21d30,43,1.0,3,2025-10-15,Yolo County Transportation District
197,Yolobus Schedule,6769ba94f648570ace873d1a4d17ba82,75ae96721f84cf99b890bdb8fd4099f5,5e3a74d5-5c56-4685-b278-cb89eea5bfe8,43,1.0,3,2025-10-15,Yolo County Transportation District
198,Yolobus Schedule,6769ba94f648570ace873d1a4d17ba82,75ae96721f84cf99b890bdb8fd4099f5,1ba19833-00df-415f-ac62-bfaa2cfc97b5,138,0.0,3,2025-10-15,Yolo County Transportation District
199,Yolobus Schedule,6769ba94f648570ace873d1a4d17ba82,75ae96721f84cf99b890bdb8fd4099f5,cf01b2f1-cb52-40d3-88dc-deecc9f583cd,138,0.0,3,2025-10-15,Yolo County Transportation District


In [13]:
stops.query('schedule_gtfs_dataset_key == "076e30b080fdc5501151bd3fb0a37b9e"')

Unnamed: 0,feed_key,service_date,feed_timezone,first_stop_arrival_datetime_pacific,last_stop_departure_datetime_pacific,stop_id,stop_key,stop_name,stop_event_count,route_type_0,...,route_type_3,route_type_4,route_type_5,route_type_6,route_type_7,route_type_11,route_type_12,missing_route_type,geometry,schedule_gtfs_dataset_key
35705,cbae8712910fbb4c24027c69d481cc9e,2025-11-05,America/Los_Angeles,2025-11-05 08:19:00,2025-11-05 18:19:00,0002,580ec94e2dde98250912fdee6565c926,Del Monte Center / Gate 1,19,,...,19.0,,,,,,,,POINT (-169535.057 -157452.978),076e30b080fdc5501151bd3fb0a37b9e
35706,cbae8712910fbb4c24027c69d481cc9e,2025-11-05,America/Los_Angeles,2025-11-05 08:03:00,2025-11-05 19:03:00,0003,404d56bbba8b8bedeb4fad2c082e01c6,Del Monte Center / Gate 2,21,,...,21.0,,,,,,,,POINT (-169522.345 -157443.006),076e30b080fdc5501151bd3fb0a37b9e
35707,cbae8712910fbb4c24027c69d481cc9e,2025-11-05,America/Los_Angeles,2025-11-05 08:02:00,2025-11-05 19:02:00,0004,711f0012d88f85ce3e8bf26aff00d4b7,Del Monte Center / Gate 3,40,,...,40.0,,,,,,,,POINT (-169674.843 -157470.966),076e30b080fdc5501151bd3fb0a37b9e
35708,cbae8712910fbb4c24027c69d481cc9e,2025-11-05,America/Los_Angeles,2025-11-05 05:54:00,2025-11-05 21:54:00,0006,1ef80078e2f610f1218d86a1c43afd90,6th Ave / Mission St,73,,...,73.0,,,,,,,,POINT (-171687.260 -160634.964),076e30b080fdc5501151bd3fb0a37b9e
35709,cbae8712910fbb4c24027c69d481cc9e,2025-11-05,America/Los_Angeles,2025-11-05 05:27:00,2025-11-05 21:51:00,0011,ef0a41233926aca8cfeb763c469b2187,Northridge Mall,163,,...,163.0,,,,,,,,POINT (-147884.242 -143324.784),076e30b080fdc5501151bd3fb0a37b9e
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36664,cbae8712910fbb4c24027c69d481cc9e,2025-11-05,America/Los_Angeles,2025-11-05 06:05:00,2025-11-05 20:10:00,9209,7c4caedabc4c2459289915de7c13e219,Salinas Transit Center / Gate 9,36,,...,36.0,,,,,,,,POINT (-147847.789 -147671.571),076e30b080fdc5501151bd3fb0a37b9e
36665,cbae8712910fbb4c24027c69d481cc9e,2025-11-05,America/Los_Angeles,2025-11-05 07:27:00,2025-11-05 17:55:00,9301,a6965f63fe1714dfb711b343ed32d7d9,Marina Transit Exchange Gate 1,12,,...,12.0,,,,,,,,POINT (-160095.188 -146568.934),076e30b080fdc5501151bd3fb0a37b9e
36666,cbae8712910fbb4c24027c69d481cc9e,2025-11-05,America/Los_Angeles,2025-11-05 05:12:00,2025-11-05 21:45:00,9302,8bada6f9a293546dbb4c50a04a892ab4,Marina Transit Exchange Gate 2,31,,...,31.0,,,,,,,,POINT (-160106.261 -146588.067),076e30b080fdc5501151bd3fb0a37b9e
36667,cbae8712910fbb4c24027c69d481cc9e,2025-11-05,America/Los_Angeles,2025-11-05 06:05:00,2025-11-05 21:34:00,9303,36b231b35b2cafae056e43cc9635da07,Marina Transit Exchange Gate 3,30,,...,30.0,,,,,,,,POINT (-160117.309 -146605.866),076e30b080fdc5501151bd3fb0a37b9e


In [14]:
trips.columns

Index(['name', 'feed_key', 'schedule_gtfs_dataset_key', 'trip_id', 'route_id',
       'direction_id', 'route_type', 'analysis_date', 'analysis_name'],
      dtype='object')

In [15]:
trips

Unnamed: 0,name,feed_key,schedule_gtfs_dataset_key,trip_id,route_id,direction_id,route_type,analysis_date,analysis_name
0,Santa Ynez Mecatran Schedule,bc76f45fb4d8a3c1be8349ad3d085c3c,372a06b593e1716d1c911b1d1d35bedd,ROUTEA|26003877:T17|17:35:00,ROUTEA,0.0,3,2025-11-05,City of Solvang
1,Santa Ynez Mecatran Schedule,bc76f45fb4d8a3c1be8349ad3d085c3c,372a06b593e1716d1c911b1d1d35bedd,ROUTEA|26003877:T13|14:40:00,ROUTEA,0.0,3,2025-11-05,City of Solvang
2,Santa Ynez Mecatran Schedule,bc76f45fb4d8a3c1be8349ad3d085c3c,372a06b593e1716d1c911b1d1d35bedd,ROUTEA|26003877:T7|11:10:00,ROUTEA,0.0,3,2025-11-05,City of Solvang
3,Santa Ynez Mecatran Schedule,bc76f45fb4d8a3c1be8349ad3d085c3c,372a06b593e1716d1c911b1d1d35bedd,ROUTEA|26003877:T11|13:30:00,ROUTEA,0.0,3,2025-11-05,City of Solvang
4,Santa Ynez Mecatran Schedule,bc76f45fb4d8a3c1be8349ad3d085c3c,372a06b593e1716d1c911b1d1d35bedd,ROUTEA|26003877:T5|9:25:00,ROUTEA,0.0,3,2025-11-05,City of Solvang
...,...,...,...,...,...,...,...,...,...
522,Nevada County Schedule,9597abcbfc03d792a0ab5e0b043e42b2,eca94265831d5c499800dd921ecf4011,t_5965196_b_83697_tn_0,16672,1.0,3,2025-10-15,Nevada County
523,Nevada County Schedule,9597abcbfc03d792a0ab5e0b043e42b2,eca94265831d5c499800dd921ecf4011,t_5965195_b_83697_tn_0,16672,1.0,3,2025-10-15,Nevada County
524,Nevada County Schedule,9597abcbfc03d792a0ab5e0b043e42b2,eca94265831d5c499800dd921ecf4011,t_5965194_b_83697_tn_0,16672,0.0,3,2025-10-15,Nevada County
525,Nevada County Schedule,9597abcbfc03d792a0ab5e0b043e42b2,eca94265831d5c499800dd921ecf4011,t_5965193_b_83697_tn_0,16672,0.0,3,2025-10-15,Nevada County


## debug lookback

In [12]:
import lookback_wrappers
import _utils

In [15]:
ops = lookback_wrappers.read_published_operators(analysis_date)

In [16]:
ops

{'2025-08-20': ['eTrans Schedule', 'Roseville Transit GMV Schedule'],
 '2025-09-24': ['San Juan Capistrano Trolley Schedule',
  'Culver City Schedule'],
 '2025-10-15': ['Yolobus Schedule',
  'Go West Schedule',
  'Bay Area 511 Angel Island-Tiburon Ferry Schedule',
  'El Monte Schedule',
  'Nevada County Schedule']}

use ix to get crosswalk on lookback dates to ensure join..., concat after xwalk join

In [17]:
trips_cols = ["name", "feed_key", "gtfs_dataset_key", "trip_id",
               "route_id", "direction_id", "route_type"]
trips = helpers.import_scheduled_trips(
    analysis_date,
    columns = trips_cols,
    get_pandas = True
    ).assign(analysis_date = analysis_date)

In [21]:
lookback_trips = lookback_wrappers.get_lookback_trips(ops, trips_cols)

In [20]:
lookback_trips_ix = lookback_wrappers.lookback_trips_ix(lookback_trips)

In [22]:
lookback_trips.head(3)

Unnamed: 0,name,feed_key,schedule_gtfs_dataset_key,trip_id,route_id,direction_id,route_type,analysis_date
0,eTrans Schedule,55c36aaf0f4ea1d12704cd1ec110be56,ea65e81b31025ca3e74e8ffb27e1a223,t_42723_b_390_tn_0,277,1.0,3,2025-08-20
1,eTrans Schedule,55c36aaf0f4ea1d12704cd1ec110be56,ea65e81b31025ca3e74e8ffb27e1a223,t_840_b_390_tn_0,277,1.0,3,2025-08-20
2,eTrans Schedule,55c36aaf0f4ea1d12704cd1ec110be56,ea65e81b31025ca3e74e8ffb27e1a223,t_41473_b_390_tn_0,277,1.0,3,2025-08-20


In [27]:
df = pd.concat([trips, lookback_trips])

In [30]:
lookback_analysis_name = []
for date in df.analysis_date.unique():
    subset = df.query('analysis_date == @date')
    subset = portfolio_utils.standardize_operator_info_for_exports(subset, date=date)
    lookback_analysis_name += [subset]

In [32]:
pd.concat(lookback_analysis_name)

Unnamed: 0,name_original,feed_key,schedule_gtfs_dataset_key,trip_id,route_id,direction_id,route_type,analysis_date,name,base64_url,caltrans_district,analysis_name,source_record_id
0,Santa Ynez Mecatran Schedule,bc76f45fb4d8a3c1be8349ad3d085c3c,372a06b593e1716d1c911b1d1d35bedd,ROUTEA|26003877:T17|17:35:00,ROUTEA,0.0,3,2025-11-05,Santa Ynez Mecatran Schedule,aHR0cDovL2FwcC5tZWNhdHJhbi5jb20vdXJiL3dzL2ZlZW...,05 - San Luis Obispo / Santa Barbara,City of Solvang,recuWhPXfxMatv6rL
1,Santa Ynez Mecatran Schedule,bc76f45fb4d8a3c1be8349ad3d085c3c,372a06b593e1716d1c911b1d1d35bedd,ROUTEA|26003877:T13|14:40:00,ROUTEA,0.0,3,2025-11-05,Santa Ynez Mecatran Schedule,aHR0cDovL2FwcC5tZWNhdHJhbi5jb20vdXJiL3dzL2ZlZW...,05 - San Luis Obispo / Santa Barbara,City of Solvang,recuWhPXfxMatv6rL
2,Santa Ynez Mecatran Schedule,bc76f45fb4d8a3c1be8349ad3d085c3c,372a06b593e1716d1c911b1d1d35bedd,ROUTEA|26003877:T7|11:10:00,ROUTEA,0.0,3,2025-11-05,Santa Ynez Mecatran Schedule,aHR0cDovL2FwcC5tZWNhdHJhbi5jb20vdXJiL3dzL2ZlZW...,05 - San Luis Obispo / Santa Barbara,City of Solvang,recuWhPXfxMatv6rL
3,Santa Ynez Mecatran Schedule,bc76f45fb4d8a3c1be8349ad3d085c3c,372a06b593e1716d1c911b1d1d35bedd,ROUTEA|26003877:T11|13:30:00,ROUTEA,0.0,3,2025-11-05,Santa Ynez Mecatran Schedule,aHR0cDovL2FwcC5tZWNhdHJhbi5jb20vdXJiL3dzL2ZlZW...,05 - San Luis Obispo / Santa Barbara,City of Solvang,recuWhPXfxMatv6rL
4,Santa Ynez Mecatran Schedule,bc76f45fb4d8a3c1be8349ad3d085c3c,372a06b593e1716d1c911b1d1d35bedd,ROUTEA|26003877:T5|9:25:00,ROUTEA,0.0,3,2025-11-05,Santa Ynez Mecatran Schedule,aHR0cDovL2FwcC5tZWNhdHJhbi5jb20vdXJiL3dzL2ZlZW...,05 - San Luis Obispo / Santa Barbara,City of Solvang,recuWhPXfxMatv6rL
...,...,...,...,...,...,...,...,...,...,...,...,...,...
522,Nevada County Schedule,9597abcbfc03d792a0ab5e0b043e42b2,eca94265831d5c499800dd921ecf4011,t_5965196_b_83697_tn_0,16672,1.0,3,2025-10-15,Nevada County Schedule,aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3...,03 - Marysville / Sacramento,Nevada County,rec9i3cd99vZ1qdYt
523,Nevada County Schedule,9597abcbfc03d792a0ab5e0b043e42b2,eca94265831d5c499800dd921ecf4011,t_5965195_b_83697_tn_0,16672,1.0,3,2025-10-15,Nevada County Schedule,aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3...,03 - Marysville / Sacramento,Nevada County,rec9i3cd99vZ1qdYt
524,Nevada County Schedule,9597abcbfc03d792a0ab5e0b043e42b2,eca94265831d5c499800dd921ecf4011,t_5965194_b_83697_tn_0,16672,0.0,3,2025-10-15,Nevada County Schedule,aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3...,03 - Marysville / Sacramento,Nevada County,rec9i3cd99vZ1qdYt
525,Nevada County Schedule,9597abcbfc03d792a0ab5e0b043e42b2,eca94265831d5c499800dd921ecf4011,t_5965193_b_83697_tn_0,16672,0.0,3,2025-10-15,Nevada County Schedule,aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3...,03 - Marysville / Sacramento,Nevada County,rec9i3cd99vZ1qdYt
