In [1]:
import pandas as pd
import geopandas as gpd
from siuba import *
import shared_utils

from calitp_data_analysis import get_fs, geography_utils
from segment_speed_utils import helpers, time_series_utils, gtfs_schedule_wrangling
from segment_speed_utils.project_vars import SCHED_GCS, SEGMENT_GCS, GTFS_DATA_DICT, analysis_date

In [2]:
import dask.dataframe as dd
import dask_geopandas as dg
from dask import delayed, compute

# Further investigation after 45_diff_tables

* https://github.com/cal-itp/data-analyses/pull/1356
* average_segment_speeds looks OK
* issue upstream, specifically with differing segment definitions (stop_pair) supplied via https://github.com/cal-itp/data-analyses/blob/5e83acbafe8312415f1752f25083688561dd42a6/gtfs_funnel/stop_times_with_direction.py vs. those from `gtfs_segments`
* 

In [3]:
catalog = shared_utils.catalog_utils.get_catalog('gtfs_analytics_data')

In [4]:
import sys
sys.path.append('./scripts')

In [5]:
import average_segment_speeds as aseg

In [6]:
from segment_speed_utils.project_vars import analysis_date_list, oct2024_week

In [7]:
from shared_utils import time_helpers, dask_utils

In [8]:
import nearest_vp_to_stop as nvp
import pipeline_segment_speeds as ps

## explode steps in: [nearest_vp_to_stop](https://github.com/cal-itp/data-analyses/blob/5e83acbafe8312415f1752f25083688561dd42a6/rt_segment_speeds/scripts/nearest_vp_to_stop.py#L20)

Also compare with December.

In [9]:
segment_type = "stop_segments"
# segment_type = "speedmap_segments"

In [10]:
SEGMENT_FILE = GTFS_DATA_DICT[segment_type]['segments_file']

In [11]:
analysis_date_dec = shared_utils.rt_dates.DATES['dec2024']

In [13]:
rt_trips = helpers.import_unique_vp_trips(analysis_date)
rt_trips2 = helpers.import_unique_vp_trips(analysis_date_dec)

In [14]:
shape_stop_combinations = pd.read_parquet(
        f"{SEGMENT_GCS}{SEGMENT_FILE}_{analysis_date}.parquet",
            columns = ["trip_instance_key",
                   "stop_id1", "stop_pair",
                   "st_trip_instance_key"],
filters = [[("trip_instance_key", "in", rt_trips)]]).rename(columns = {"stop_id1": "stop_id"})

In [15]:
shape_stop_combinations2 = pd.read_parquet(
        f"{SEGMENT_GCS}{SEGMENT_FILE}_{shared_utils.rt_dates.DATES['dec2024']}.parquet",
            columns = ["trip_instance_key",
                   "stop_id1", "stop_pair",
                   "st_trip_instance_key"],
filters = [[("trip_instance_key", "in", rt_trips2)]]).rename(columns = {"stop_id1": "stop_id"})

In [16]:
shape_stop_combinations.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3151412 entries, 0 to 3151411
Data columns (total 4 columns):
 #   Column                Dtype 
---  ------                ----- 
 0   trip_instance_key     object
 1   stop_id               object
 2   stop_pair             object
 3   st_trip_instance_key  object
dtypes: object(4)
memory usage: 120.2+ MB


In [17]:
shape_stop_combinations2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3150548 entries, 0 to 3150652
Data columns (total 4 columns):
 #   Column                Dtype 
---  ------                ----- 
 0   trip_instance_key     object
 1   stop_id               object
 2   stop_pair             object
 3   st_trip_instance_key  object
dtypes: object(4)
memory usage: 120.2+ MB


In [18]:
subset_trips2 = shape_stop_combinations2.st_trip_instance_key.unique()

In [19]:
subset_trips = shape_stop_combinations.st_trip_instance_key.unique()

In [20]:
#  GTFS_DATA_DICT.rt_vs_schedule_tables.stop_times_direction
#  written in gtfs_funnel/stop_times_with_direction.py
stops_to_use = helpers.import_scheduled_stop_times(
    analysis_date,
    columns = ["trip_instance_key", "shape_array_key",
               "stop_sequence", "stop_id", "stop_pair",
               "stop_primary_direction", "geometry"],
    filters = [[("trip_instance_key", "in", subset_trips)]],
    get_pandas = True,
    with_direction = True
).rename(columns = {"trip_instance_key": "st_trip_instance_key"})

In [21]:
stops_to_use.head(3)

Unnamed: 0,st_trip_instance_key,shape_array_key,stop_sequence,stop_id,stop_pair,stop_primary_direction,geometry
0,00001f273308970887743ce0c3acf068,5abad00eed316250607a32ab7aa38fa7,1,88878,88878__60308,Unknown,POINT (275996.236 -600448.777)
1,00001f273308970887743ce0c3acf068,5abad00eed316250607a32ab7aa38fa7,2,60308,60308__60310,Eastbound,POINT (276129.555 -600498.971)
2,00001f273308970887743ce0c3acf068,5abad00eed316250607a32ab7aa38fa7,3,60309,60309__60249,Northbound,POINT (276315.087 -600745.139)


In [22]:
shape_stop_combinations.head(3)

Unnamed: 0,trip_instance_key,stop_id,stop_pair,st_trip_instance_key
0,00001f273308970887743ce0c3acf068,88878,88878__60308,00001f273308970887743ce0c3acf068
1,00001f273308970887743ce0c3acf068,60308,60308__60309,00001f273308970887743ce0c3acf068
2,00001f273308970887743ce0c3acf068,60309,60309__60310,00001f273308970887743ce0c3acf068


In [25]:
trips.query('trip_instance_key == "ffd7d651605dbca64e7b3f46945f5817"')

Unnamed: 0,trip_instance_key,route_short_name,name,feed_key,trip_id
33114,ffd7d651605dbca64e7b3f46945f5817,194,Foothill Schedule,e920f063b73c35605bb16e98f63d2e6f,t90B-b2812-sl5


In [26]:
left_merge = pd.merge(
    stops_to_use,
    shape_stop_combinations,
    on = ["st_trip_instance_key", "stop_id", "stop_pair"],
    how = "left",
    indicator=True
)

In [29]:
left_merge._merge.value_counts()

both          235026
left_only     155801
right_only         0
Name: _merge, dtype: int64

In [37]:
row = left_merge.loc[390823]
row

st_trip_instance_key                    ffd7d651605dbca64e7b3f46945f5817
shape_array_key                         a094fd659ed5dffd9c4526cbd6df96c2
stop_sequence                                                       2609
stop_id                                                             1648
stop_pair                                                     1648__2877
stop_primary_direction                                        Northbound
geometry                  POINT (199462.02638845713 -439913.87381394114)
trip_instance_key                                                    NaN
_merge                                                         left_only
Name: 390823, dtype: object

In [38]:
shape_stop_combinations.query('st_trip_instance_key == @row.st_trip_instance_key & stop_id == @row.stop_id')

Unnamed: 0,trip_instance_key,stop_id,stop_pair,st_trip_instance_key
3151409,ffd7d651605dbca64e7b3f46945f5817,1648,1648__2544,ffd7d651605dbca64e7b3f46945f5817


In [39]:
gdf1 = stops_to_use.query('st_trip_instance_key == @row.st_trip_instance_key & stop_id == @row.stop_id')

# map pair from shape_stop_combinations_combinations, only using stops_to_use for geom
gdf2 = stops_to_use.query('st_trip_instance_key == @row.st_trip_instance_key & stop_id == "2544"')

correct, consecutive stops

In [40]:
pd.concat([gdf1, gdf2]).explore()

In [41]:
gdf1 = stops_to_use.query('st_trip_instance_key == @row.st_trip_instance_key & stop_id == @row.stop_id')
#  from stops_to_use via helpers.import_scheduled_stop_times
#  GTFS_DATA_DICT.rt_vs_schedule_tables.stop_times_direction
#  written in gtfs_funnel/stop_times_with_direction.py
gdf2 = stops_to_use.query('st_trip_instance_key == @row.st_trip_instance_key & stop_id == "2877"')

very far apart, skipping 20+ stops

In [42]:
pd.concat([gdf1, gdf2]).explore()

In [43]:
trips = helpers.import_scheduled_trips(analysis_date, columns=['trip_instance_key', 'route_short_name', 'name',
                                                              'feed_key', 'trip_id'])

### Another comparison

referrring back to plain stop_times, the stop_pairs from shape_stop_combinations are in sequence while those from stops_to_use again skip many stops. It seems like the first part of stop_pair there is correct, but the second part is not actually the subsequent stop

In [44]:
trips.query('trip_instance_key == "0c9b170807546736de0820cf8860bc00"')

Unnamed: 0,trip_instance_key,route_short_name,name,feed_key,trip_id
36933,0c9b170807546736de0820cf8860bc00,169,LA Metro Bus Schedule,7a5a0fa93eb3c7a402c0887a4f11f066,10169001531434-DEC24


In [54]:
shape_stop_combinations.query('trip_instance_key == "0c9b170807546736de0820cf8860bc00"').head(3)

Unnamed: 0,trip_instance_key,stop_id,stop_pair,st_trip_instance_key
2432353,0c9b170807546736de0820cf8860bc00,30021,30021__3625,0c9b170807546736de0820cf8860bc00
2432354,0c9b170807546736de0820cf8860bc00,3625,3625__12116,0c9b170807546736de0820cf8860bc00
2432355,0c9b170807546736de0820cf8860bc00,12116,12116__2499,0c9b170807546736de0820cf8860bc00


In [53]:
stops_to_use.query('st_trip_instance_key == "0c9b170807546736de0820cf8860bc00"').head(3)

Unnamed: 0,st_trip_instance_key,shape_array_key,stop_sequence,stop_id,stop_pair,stop_primary_direction,geometry
69818,0c9b170807546736de0820cf8860bc00,59459dbb9f7ce418d6f88f65e905d583,1,30021,30021__3325,Eastbound,POINT (151920.171 -423388.371)
69819,0c9b170807546736de0820cf8860bc00,59459dbb9f7ce418d6f88f65e905d583,2,3625,3625__3252,Eastbound,POINT (152164.852 -423047.563)
69820,0c9b170807546736de0820cf8860bc00,59459dbb9f7ce418d6f88f65e905d583,3,12116,12116__14463,Eastbound,POINT (152163.927 -422698.157)


In [47]:
st_no_dir = helpers.import_scheduled_stop_times(analysis_date, with_direction=False, get_pandas=True,
                                               columns=['feed_key', 'trip_id', 'stop_sequence',
                                                       'stop_id'])

In [48]:
st_no_dir.query('feed_key == "7a5a0fa93eb3c7a402c0887a4f11f066" & trip_id == "10169001531434-DEC24"').sort_values('stop_sequence')

Unnamed: 0,feed_key,trip_id,stop_sequence,stop_id
2448922,7a5a0fa93eb3c7a402c0887a4f11f066,10169001531434-DEC24,1,30021
1766211,7a5a0fa93eb3c7a402c0887a4f11f066,10169001531434-DEC24,2,3625
3255026,7a5a0fa93eb3c7a402c0887a4f11f066,10169001531434-DEC24,3,12116
1279772,7a5a0fa93eb3c7a402c0887a4f11f066,10169001531434-DEC24,4,2499
779733,7a5a0fa93eb3c7a402c0887a4f11f066,10169001531434-DEC24,5,3567
...,...,...,...,...
1278824,7a5a0fa93eb3c7a402c0887a4f11f066,10169001531434-DEC24,130,6575
2925060,7a5a0fa93eb3c7a402c0887a4f11f066,10169001531434-DEC24,131,6597
2753655,7a5a0fa93eb3c7a402c0887a4f11f066,10169001531434-DEC24,132,7148
1436805,7a5a0fa93eb3c7a402c0887a4f11f066,10169001531434-DEC24,133,5507


In [52]:
(st_no_dir.query('feed_key == "7a5a0fa93eb3c7a402c0887a4f11f066" & trip_id == "10169001531434-DEC24"'
                ).sort_values('stop_sequence'
                ).query('stop_id == "3325"')

)
#  stop_sequence 118 but expect 2!

Unnamed: 0,feed_key,trip_id,stop_sequence,stop_id
1608478,7a5a0fa93eb3c7a402c0887a4f11f066,10169001531434-DEC24,118,3325


### December data

In [49]:
stops_to_use2 = helpers.import_scheduled_stop_times(
    analysis_date_dec,
    columns = ["trip_instance_key", "shape_array_key",
               "stop_sequence", "stop_id", "stop_pair",
               "stop_primary_direction", "geometry"],
    filters = [[("trip_instance_key", "in", subset_trips2)]],
    get_pandas = True,
    with_direction = True
).rename(columns = {"trip_instance_key": "st_trip_instance_key"})

In [50]:
left_merge2 = pd.merge(
    stops_to_use2,
    shape_stop_combinations2,
    on = ["st_trip_instance_key", "stop_id", "stop_pair"],
    how = "left",
    indicator=True
)

In [51]:
left_merge2._merge.value_counts()

both          3134260
left_only        5232
right_only          0
Name: _merge, dtype: int64

# Averaging seems to work OK actually, problem in previous step

In [38]:
dict_inputs = catalog["stop_segments"]
shape_stop_combinations_COLS = [*dict_inputs["shape_stop_combinations_cols"]]
ROUTE_DIR_COLS = [*dict_inputs["route_dir_cols"]]
STOP_PAIR_COLS = [*dict_inputs["stop_pair_cols"]]

In [39]:
SPEED_FILE = dict_inputs["stage4"]
MAX_SPEED = dict_inputs["max_speed"]

In [45]:
analysis_date_list

['2025-01-15']

In [71]:
SPEED_FILE

'speeds_stop_segments'

In [46]:
path = 'gs://calitp-analytics-data/data-analyses/rt_segment_speeds/speedmap/speeds_2025-01-15.parquet'

In [48]:
df = pd.read_parquet(
            path,
            # **kwargs,
        ).drop_duplicates()

In [53]:
path = 'gs://calitp-analytics-data/data-analyses/rt_segment_speeds/speedmap/speeds'

In [55]:
analysis_date_list

['2025-01-15']

In [68]:
# ddf=dask_utils.get_ddf([path], analysis_date_list, 'df', get_pandas = False, add_date = True) #  this is fine
ddf=dask_utils.get_ddf([path], analysis_date_list, 'df', get_pandas = False,
                       add_date = True, columns = cols, filters = [[("speed_mph", "<=", MAX_SPEED)]])

In [69]:
ddf.compute()

Unnamed: 0,schedule_gtfs_dataset_key,shape_array_key,shape_id,stop_sequence,route_id,direction_id,stop_pair,stop_pair_name,trip_instance_key,speed_mph,meters_elapsed,sec_elapsed,time_of_day,arrival_time,service_date
0,baeeb157e85a901e47b828ef9fe75091,5abad00eed316250607a32ab7aa38fa7,906_8_38,2,906,0.0,60308__60310,Beyer Bl & Iris Av__Beyer Bl & Precision Park Ln,00001f273308970887743ce0c3acf068,3.443793,307.893917,200.0,PM Peak,2025-01-15 18:09:27,2025-01-15
1,baeeb157e85a901e47b828ef9fe75091,5abad00eed316250607a32ab7aa38fa7,906_8_38,3,906,0.0,60309__60249,Beyer Bl & Dairy Mart Rd__San Ysidro Bl & Virg...,00001f273308970887743ce0c3acf068,10.917496,229.379660,47.0,PM Peak,2025-01-15 18:12:47,2025-01-15
2,baeeb157e85a901e47b828ef9fe75091,5abad00eed316250607a32ab7aa38fa7,906_8_38,4,906,0.0,60310__60568,Beyer Bl & Precision Park Ln__San Ysidro Bl & ...,00001f273308970887743ce0c3acf068,10.915386,1039.328261,213.0,PM Peak,2025-01-15 18:13:34,2025-01-15
3,baeeb157e85a901e47b828ef9fe75091,5abad00eed316250607a32ab7aa38fa7,906_8_38,6,906,0.0,60245__60567,Beyer Bl & Poplar St__San Ysidro Bl & Dairy Ma...,00001f273308970887743ce0c3acf068,10.445412,364.211940,78.0,PM Peak,2025-01-15 18:17:07,2025-01-15
4,baeeb157e85a901e47b828ef9fe75091,5abad00eed316250607a32ab7aa38fa7,906_8_38,7,906,0.0,60052__60574,South Vista Av & Sunset Ln__San Ysidro Bl & Cy...,00001f273308970887743ce0c3acf068,14.550872,136.597364,21.0,PM Peak,2025-01-15 18:18:25,2025-01-15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3074227,f74424acf8c41e4c1e9fd42838c4875c,a094fd659ed5dffd9c4526cbd6df96c2,18468_shp,2160,194,0.0,3356__3324,Valley Blvd and Lemon Ave__Valley Blvd and Max...,ffd7d651605dbca64e7b3f46945f5817,36.749597,1002.112387,61.0,Evening,2025-01-15 23:54:48,2025-01-15
3074228,f74424acf8c41e4c1e9fd42838c4875c,a094fd659ed5dffd9c4526cbd6df96c2,18468_shp,2280,194,0.0,3324__3337,Valley Blvd and Pierre Rd E__Old Valley Blvd a...,ffd7d651605dbca64e7b3f46945f5817,39.970207,375.223222,21.0,Evening,2025-01-15 23:54:54,2025-01-15
3074229,f74424acf8c41e4c1e9fd42838c4875c,a094fd659ed5dffd9c4526cbd6df96c2,18468_shp,2322,194,0.0,3329__3342,Valley Blvd and Suzanne Rd E__Azusa Way and Va...,ffd7d651605dbca64e7b3f46945f5817,23.061093,639.154132,62.0,Evening,2025-01-15 23:55:15,2025-01-15
3074230,f74424acf8c41e4c1e9fd42838c4875c,a094fd659ed5dffd9c4526cbd6df96c2,18468_shp,2393,194,0.0,3319__3340,Valley Blvd and Brea Canyon Rd E__Mayor Dave W...,ffd7d651605dbca64e7b3f46945f5817,25.971102,940.393061,81.0,Evening,2025-01-15 23:56:17,2025-01-15


In [64]:
cols = helpers.unique_list(
        aseg.OPERATOR_COLS + shape_stop_combinations_COLS + 
        ROUTE_DIR_COLS + STOP_PAIR_COLS + [
            "trip_instance_key", "speed_mph", 
            "meters_elapsed", "sec_elapsed", 
            "time_of_day", "arrival_time"])

In [67]:
cols

['schedule_gtfs_dataset_key',
 'shape_array_key',
 'shape_id',
 'stop_sequence',
 'route_id',
 'direction_id',
 'stop_pair',
 'stop_pair_name',
 'trip_instance_key',
 'speed_mph',
 'meters_elapsed',
 'sec_elapsed',
 'time_of_day',
 'arrival_time']

In [63]:
df.columns

Index(['trip_instance_key', 'stop_sequence', 'stop_sequence1',
       'shape_array_key', 'stop_meters', 'arrival_time', 'arrival_time_sec',
       'subseq_arrival_time_sec', 'subseq_stop_meters', 'meters_elapsed',
       'sec_elapsed', 'speed_mph', 'schedule_gtfs_dataset_key', 'route_id',
       'direction_id', 'route_short_name', 'scheduled_service_minutes',
       'sched_rt_category', 'time_of_day', 'shape_id', 'segment_id',
       'stop_pair_name', 'stop_pair'],
      dtype='object')

In [70]:
aseg.SEGMENT_GCS

'gs://calitp-analytics-data/data-analyses/rt_segment_speeds/'

In [41]:
df = time_series_utils.concatenate_datasets_across_dates(
    aseg.SEGMENT_GCS, 
    SPEED_FILE,
    analysis_date_list,
    data_type  = "df",
    columns = helpers.unique_list(
        aseg.OPERATOR_COLS + SHAPE_STOP_COLS + 
        ROUTE_DIR_COLS + STOP_PAIR_COLS + [
            "trip_instance_key", "speed_mph", 
            "meters_elapsed", "sec_elapsed", 
            "time_of_day", "arrival_time"]),
    filters = [[("speed_mph", "<=", MAX_SPEED)]],
    get_pandas=False
)

In [43]:
df.compute()

Unnamed: 0,schedule_gtfs_dataset_key,shape_array_key,shape_id,stop_sequence,route_id,direction_id,stop_pair,stop_pair_name,trip_instance_key,speed_mph,meters_elapsed,sec_elapsed,time_of_day,arrival_time,service_date
0,baeeb157e85a901e47b828ef9fe75091,5abad00eed316250607a32ab7aa38fa7,906_8_38,12,906,0.0,60314__60315,San Ysidro Bl & I-805 (Ramp)__San Ysidro Bl & ...,00001f273308970887743ce0c3acf068,5.870617,3584.829289,1366.0,PM Peak,2025-01-15 18:22:03,2025-01-15
1,baeeb157e85a901e47b828ef9fe75091,5abad00eed316250607a32ab7aa38fa7,906_8_38,12,906,0.0,60314__60315,San Ysidro Bl & I-805 (Ramp)__San Ysidro Bl & ...,093275bdc15202d91c8bea881866e7b2,6.446353,3584.829289,1244.0,Midday,2025-01-15 13:49:05,2025-01-15
2,baeeb157e85a901e47b828ef9fe75091,5abad00eed316250607a32ab7aa38fa7,906_8_38,12,906,0.0,60314__60315,San Ysidro Bl & I-805 (Ramp)__San Ysidro Bl & ...,0b309afa2d5bef3c2028961646c94ba3,7.077902,3584.829289,1133.0,PM Peak,2025-01-15 17:09:01,2025-01-15
3,baeeb157e85a901e47b828ef9fe75091,5abad00eed316250607a32ab7aa38fa7,906_8_38,12,906,0.0,60314__60315,San Ysidro Bl & I-805 (Ramp)__San Ysidro Bl & ...,0f5769de811a55b20dc44ffa816de412,5.289751,3584.829289,1516.0,PM Peak,2025-01-15 16:54:38,2025-01-15
4,baeeb157e85a901e47b828ef9fe75091,5abad00eed316250607a32ab7aa38fa7,906_8_38,12,906,0.0,60314__60315,San Ysidro Bl & I-805 (Ramp)__San Ysidro Bl & ...,1288107588bb83361fda270fee48bdba,6.836541,3584.829289,1173.0,PM Peak,2025-01-15 19:34:48,2025-01-15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
191821,c499f905e33929a641f083dad55c521e,0227946648df4c7ba490c34bdc6c6ee3,shp-696-02,26,696,0.0,50127__57896,Florence Av & Hermosa Av__Florence Av & Broadw...,fddaf72e40d72fbfea96f069a272451b,11.322115,313.800230,62.0,AM Peak,2025-01-15 08:33:13,2025-01-15
191822,c499f905e33929a641f083dad55c521e,0227946648df4c7ba490c34bdc6c6ee3,shp-696-02,27,696,0.0,57896__59519,Florence Av & Broadway Ter__Broadway Ter & Her...,fddaf72e40d72fbfea96f069a272451b,14.082307,2379.576195,378.0,AM Peak,2025-01-15 08:34:15,2025-01-15
191826,364d59b3aea55aec2962a0b3244a40e0,24d7ff7f53f1dd1ac665462891afdde6,162,13,1,0.0,208__210,Vanderbilt Wy & Westinghouse Pl__Ave Stanford ...,fe6f2489a8dc41b382f4491539db22c7,16.131704,2105.703045,292.0,AM Peak,2025-01-15 09:23:13,2025-01-15
191828,0666caf3ec1ecc96b74f4477ee4bc939,f82809b9f53bd208c67890c43aa4dcec,2420002_DEC24,12,242-13186,0.0,8492__8483,Winnetka / Vanowen__Winnetka / Hart,fe970eb78c0742910069087708c74666,10.843088,9888.197968,2040.0,PM Peak,2025-01-15 16:09:53,2025-01-15


In [None]:
df = aseg.concatenate_trip_segment_speeds(
        analysis_date_list,
        "stop_segments",
        get_pandas = False
    )

In [None]:
df

In [None]:
trip_speeds = df.compute()

In [None]:
trip_speeds

In [None]:
group_cols = aseg.OPERATOR_COLS + ROUTE_DIR_COLS + STOP_PAIR_COLS

In [None]:
group_cols

In [None]:
avg_speeds = delayed(aseg.metrics.concatenate_peak_offpeak_allday_averages)(
            df, 
            group_cols,
            metric_type = "segment_speeds"
        )

In [None]:
avg_spds = avg_speeds.compute()

In [None]:
avg_spds

In [None]:
avg_spds = avg_spds.pipe(
            gtfs_schedule_wrangling.merge_operator_identifiers, 
            analysis_date_list,
            columns = aseg.CROSSWALK_COLS
        )