In [1]:
import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(1_000_000_000_000) ## 1TB?

In [2]:
import geopandas as gpd
import pandas as pd
from siuba import *
import numpy as np

from segment_speed_utils import helpers, gtfs_schedule_wrangling
from shared_utils import rt_dates, gtfs_utils_v2
import folium

In [3]:
from update_vars import (analysis_date, AM_PEAK, PM_PEAK, EXPORT_PATH, GCS_FILE_PATH, PROJECT_CRS,
SEGMENT_BUFFER_METERS, AM_PEAK, PM_PEAK, HQ_TRANSIT_THRESHOLD, MS_TRANSIT_THRESHOLD)

In [4]:
import sjoin_stops_to_segments

In [5]:
analysis_date

'2024-10-21'

In [6]:
imported_st = helpers.import_scheduled_stop_times(
    analysis_date,
    get_pandas = True,
)

In [7]:
import importlib

importlib.reload(sjoin_stops_to_segments)

<module 'sjoin_stops_to_segments' from '/home/jovyan/data-analyses/high_quality_transit_areas/sjoin_stops_to_segments.py'>

In [8]:
# # (1) Aggregate stop times - by stop_id, find max trips in AM/PM peak
# # takes 1 min
# max_arrivals_by_stop = imported_st.pipe(sjoin_stops_to_segments.prep_stop_times).pipe(sjoin_stops_to_segments.stop_times_aggregation_max_by_stop, analysis_date)

In [9]:
# # (1) Aggregate stop times - by stop_id, find max trips in AM/PM peak
# # takes 1 min
# max_arrivals_by_stop_single = imported_st.pipe(
#     sjoin_stops_to_segments.stop_times_aggregation_max_by_stop, analysis_date, single_route_dir=True)

## multi logic

In [271]:
trips = helpers.import_scheduled_trips(
    analysis_date,
    columns = ["feed_key", "gtfs_dataset_key", "trip_id",
               "route_id", "direction_id"],
    get_pandas = True
)

trips = imported_st.merge(
    trips,
    on = ["feed_key", "trip_id"]
)

In [272]:
trips.direction_id = trips.direction_id.fillna(0).astype(int).astype(str)
trips['route_dir'] = trips[['route_id', 'direction_id']].agg('_'.join, axis=1)

In [273]:
st_prepped = trips.pipe(sjoin_stops_to_segments.prep_stop_times)

In [274]:
cols = ["schedule_gtfs_dataset_key", "stop_id", "peak"]
# cols = ["schedule_gtfs_dataset_key", "stop_id", "peak",
#        "route_id", "direction_id"]

In [275]:
trips_per_peak_multi = gtfs_schedule_wrangling.stop_arrivals_per_stop(
    st_prepped,
    group_cols = cols,
    count_col = "trip_id",
    route_dir_array = True
).rename(columns = {"n_arrivals": "n_trips"})

In [276]:
stop_cols = ["schedule_gtfs_dataset_key", "stop_id"]
trips_per_hour_cols = ["peak"]

In [277]:
def last_bit(trips_per_peak_period):

    am_trips = (trips_per_peak_period[trips_per_peak_period.peak == 'am_peak']
                .rename(columns = {"n_trips": "am_max_trips"})
                .drop(columns="peak")
               )
    pm_trips = (trips_per_peak_period[trips_per_peak_period.peak == 'pm_peak']
                .rename(columns = {"n_trips": "pm_max_trips"})
                .drop(columns=["peak", "route_dir"])
               )

    max_trips_by_stop = pd.merge(
        am_trips, 
        pm_trips,
        on = stop_cols,
        how = "left"
    )
    #  divide by length of peak to get trips/hr, keep n_trips a raw sum
    max_trips_by_stop = max_trips_by_stop.assign(
        am_max_trips_hr = (max_trips_by_stop.am_max_trips.fillna(0) / len(am_peak_hrs)).astype(int),
        pm_max_trips_hr = (max_trips_by_stop.pm_max_trips.fillna(0) / len(pm_peak_hrs)).astype(int),
        n_trips = (max_trips_by_stop.am_max_trips.fillna(0) + 
                   max_trips_by_stop.pm_max_trips.fillna(0)),
        route_dir_count = max_trips_by_stop.route_dir.map(lambda x: x.size)
    )
    
    return max_trips_by_stop

In [278]:
am_peak_hrs = list(range(AM_PEAK[0].hour, AM_PEAK[1].hour))
pm_peak_hrs = list(range(PM_PEAK[0].hour, PM_PEAK[1].hour))

In [279]:
multi_qual = last_bit(trips_per_peak_multi)

In [280]:
min_freq = min([HQ_TRANSIT_THRESHOLD, MS_TRANSIT_THRESHOLD])

In [281]:
multi_qual

Unnamed: 0,schedule_gtfs_dataset_key,stop_id,am_max_trips,route_dir,pm_max_trips,am_max_trips_hr,pm_max_trips_hr,n_trips,route_dir_count
0,0139b1253130b33adcd4b3a4490530d2,00eb15cb-1430-4964-b8ae-ca6183e1d0ef,2,[D1_0],4.0,0,1,6.0,1
1,0139b1253130b33adcd4b3a4490530d2,02a30e39-496f-45d4-ba1c-ac8f3c66b621,8,"[0ad6c6aa-1939-45a0-a3a8-02ebe8e19092_0, D2_0,...",13.0,2,3,21.0,3
2,0139b1253130b33adcd4b3a4490530d2,04a2c417-05bf-4f95-bfb6-dd9cec701f11,5,"[D3_0, c6726149-9979-4ebb-85f6-0be90402266c_0]",8.0,1,2,13.0,2
3,0139b1253130b33adcd4b3a4490530d2,05d0285f-813a-4ea9-82e0-3b8d1127e8e0,3,[T2_0],5.0,1,1,8.0,1
4,0139b1253130b33adcd4b3a4490530d2,07fe70a4-21dd-4bcf-9adf-ed96f0daebbc,2,[D1_0],4.0,0,1,6.0,1
...,...,...,...,...,...,...,...,...,...
79257,ff1bc5dde661d62c877165421e9ca257,exp_20,5,[ROUTEA_1],8.0,1,2,13.0,1
79258,ff1bc5dde661d62c877165421e9ca257,exp_21,5,[ROUTEA_1],8.0,1,2,13.0,1
79259,ff1bc5dde661d62c877165421e9ca257,exp_22,5,[ROUTEA_1],8.0,1,2,13.0,1
79260,ff1bc5dde661d62c877165421e9ca257,exp_23,5,[ROUTEA_1],8.0,1,2,13.0,1


In [282]:
multi_qual = multi_qual >> filter(_.am_max_trips_hr > min_freq, _.pm_max_trips_hr > min_freq, _.route_dir_count > 1)

In [283]:
multi_qual

Unnamed: 0,schedule_gtfs_dataset_key,stop_id,am_max_trips,route_dir,pm_max_trips,am_max_trips_hr,pm_max_trips_hr,n_trips,route_dir_count
96,0139b1253130b33adcd4b3a4490530d2,52c2636c-34a3-434c-99ae-cdf3dc36d15c,39,"[0e85fd4c-5258-4256-9852-4a96554aadb7_0, T11x_...",58.0,13,14,97.0,9
179,0139b1253130b33adcd4b3a4490530d2,98d2a60c-86b1-45d6-b5d6-39b273c9eb46,16,"[0ad6c6aa-1939-45a0-a3a8-02ebe8e19092_0, 79b4a...",24.0,5,6,40.0,5
180,0139b1253130b33adcd4b3a4490530d2,999ff07b-4a27-4c80-9a1c-e868038ce097,24,"[0ad6c6aa-1939-45a0-a3a8-02ebe8e19092_0, D1_0,...",44.0,8,11,68.0,7
296,015d67d5b75b5cf2b710bbadadfb75f5,40103,12,"[17_0, 17_1, 61_0, 61_1]",17.0,4,4,29.0,4
297,015d67d5b75b5cf2b710bbadadfb75f5,40113,30,"[17_0, 22_0, 22_1, 36_0, 36_1, 61_1, 71_0, 71_1]",48.0,10,12,78.0,8
...,...,...,...,...,...,...,...,...,...
79047,fb746afc72ff40405cfefa6d23ab58a0,53129,28,"[A_0, B_0, E_0, M_0]",42.0,9,10,70.0,4
79049,fb746afc72ff40405cfefa6d23ab58a0,53163,20,"[5_AM_0, A_0, B_0, C_0, E_0, F_0]",27.0,6,6,47.0,6
79093,fc6cd27871cce0092a08ccf68fb240a2,132744,28,"[4556_0, 4557_0, 4558_0, 4559_0]",44.0,9,11,72.0,4
79116,fe4aab1717eca5a2935c32c85a35a5bf,115,13,"[11_0, 12_0, 14_0, 1_0, 2_0, 3_0, 4_1]",22.0,4,5,35.0,7


## single logic

In [284]:
# cols = ["schedule_gtfs_dataset_key", "stop_id", "peak"]
cols = ["schedule_gtfs_dataset_key", "stop_id", "peak",
       "route_id", "direction_id"]

In [285]:
trips_per_peak_single = gtfs_schedule_wrangling.stop_arrivals_per_stop(
    st_prepped,
    group_cols = cols,
    count_col = "trip_id",
    route_dir_array = True
).rename(columns = {"n_arrivals": "n_trips"})

In [286]:
stop_cols = ["schedule_gtfs_dataset_key", "stop_id"]
trips_per_hour_cols = ["peak"]

In [287]:
def last_bit(trips_per_peak_period):

    am_trips = (trips_per_peak_period[trips_per_peak_period.peak == 'am_peak']
                .rename(columns = {"n_trips": "am_max_trips"})
                .drop(columns="peak")
               )
    pm_trips = (trips_per_peak_period[trips_per_peak_period.peak == 'pm_peak']
                .rename(columns = {"n_trips": "pm_max_trips"})
                .drop(columns=["peak", "route_dir"])
               )

    max_trips_by_stop = pd.merge(
        am_trips, 
        pm_trips,
        on = stop_cols,
        how = "left"
    )
    #  divide by length of peak to get trips/hr, keep n_trips a raw sum
    max_trips_by_stop = max_trips_by_stop.assign(
        am_max_trips_hr = (max_trips_by_stop.am_max_trips.fillna(0) / len(am_peak_hrs)).astype(int),
        pm_max_trips_hr = (max_trips_by_stop.pm_max_trips.fillna(0) / len(pm_peak_hrs)).astype(int),
        n_trips = (max_trips_by_stop.am_max_trips.fillna(0) + 
                   max_trips_by_stop.pm_max_trips.fillna(0)),
        route_dir_count = max_trips_by_stop.route_dir.map(lambda x: x.size)
    )
    
    return max_trips_by_stop

In [288]:
df_single = last_bit(trips_per_peak_single)

In [289]:
df_single

Unnamed: 0,schedule_gtfs_dataset_key,stop_id,route_id_x,direction_id_x,am_max_trips,route_dir,route_id_y,direction_id_y,pm_max_trips,am_max_trips_hr,pm_max_trips_hr,n_trips,route_dir_count
0,0139b1253130b33adcd4b3a4490530d2,00eb15cb-1430-4964-b8ae-ca6183e1d0ef,D1,0,2,[D1_0],D1,0,4.0,0,1,6.0,1
1,0139b1253130b33adcd4b3a4490530d2,02a30e39-496f-45d4-ba1c-ac8f3c66b621,0ad6c6aa-1939-45a0-a3a8-02ebe8e19092,0,2,[0ad6c6aa-1939-45a0-a3a8-02ebe8e19092_0],0ad6c6aa-1939-45a0-a3a8-02ebe8e19092,0,6.0,0,1,8.0,1
2,0139b1253130b33adcd4b3a4490530d2,02a30e39-496f-45d4-ba1c-ac8f3c66b621,0ad6c6aa-1939-45a0-a3a8-02ebe8e19092,0,2,[0ad6c6aa-1939-45a0-a3a8-02ebe8e19092_0],D2,0,3.0,0,0,5.0,1
3,0139b1253130b33adcd4b3a4490530d2,02a30e39-496f-45d4-ba1c-ac8f3c66b621,0ad6c6aa-1939-45a0-a3a8-02ebe8e19092,0,2,[0ad6c6aa-1939-45a0-a3a8-02ebe8e19092_0],c6726149-9979-4ebb-85f6-0be90402266c,0,4.0,0,1,6.0,1
4,0139b1253130b33adcd4b3a4490530d2,02a30e39-496f-45d4-ba1c-ac8f3c66b621,D2,0,2,[D2_0],0ad6c6aa-1939-45a0-a3a8-02ebe8e19092,0,6.0,0,1,8.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
200142,ff1bc5dde661d62c877165421e9ca257,exp_20,ROUTEA,1,5,[ROUTEA_1],ROUTEA,1,8.0,1,2,13.0,1
200143,ff1bc5dde661d62c877165421e9ca257,exp_21,ROUTEA,1,5,[ROUTEA_1],ROUTEA,1,8.0,1,2,13.0,1
200144,ff1bc5dde661d62c877165421e9ca257,exp_22,ROUTEA,1,5,[ROUTEA_1],ROUTEA,1,8.0,1,2,13.0,1
200145,ff1bc5dde661d62c877165421e9ca257,exp_23,ROUTEA,1,5,[ROUTEA_1],ROUTEA,1,8.0,1,2,13.0,1


In [290]:
single_qual = df_single >> filter(_.am_max_trips_hr > min_freq, _.pm_max_trips_hr > min_freq)

In [291]:
multi_only = multi_qual >> anti_join(_, single_qual, on=['schedule_gtfs_dataset_key', 'stop_id'])

In [292]:
# multi_only

In [293]:
test = (multi_only[['schedule_gtfs_dataset_key', 'stop_id', 'route_dir']]
.explode('route_dir')
.sort_values(['schedule_gtfs_dataset_key','stop_id', 'route_dir']))

In [294]:
# test = test.head(5000)

In [295]:
test

Unnamed: 0,schedule_gtfs_dataset_key,stop_id,route_dir
96,0139b1253130b33adcd4b3a4490530d2,52c2636c-34a3-434c-99ae-cdf3dc36d15c,0e85fd4c-5258-4256-9852-4a96554aadb7_0
96,0139b1253130b33adcd4b3a4490530d2,52c2636c-34a3-434c-99ae-cdf3dc36d15c,T11x_0
96,0139b1253130b33adcd4b3a4490530d2,52c2636c-34a3-434c-99ae-cdf3dc36d15c,T1_0
96,0139b1253130b33adcd4b3a4490530d2,52c2636c-34a3-434c-99ae-cdf3dc36d15c,T2_0
96,0139b1253130b33adcd4b3a4490530d2,52c2636c-34a3-434c-99ae-cdf3dc36d15c,T3_0
...,...,...,...
79116,fe4aab1717eca5a2935c32c85a35a5bf,115,3_0
79116,fe4aab1717eca5a2935c32c85a35a5bf,115,4_1
79246,ff1bc5dde661d62c877165421e9ca257,LO_19,ROUTEA_0
79246,ff1bc5dde661d62c877165421e9ca257,LO_19,ROUTEA_1


In [296]:
xy = test.loc[96,:].route_dir.to_numpy()

In [297]:
xy

array(['0e85fd4c-5258-4256-9852-4a96554aadb7_0', 'T11x_0', 'T1_0', 'T2_0',
       'T3_0', 'T4_0', 'T5_0', 'T6_0',
       'e430d571-76bd-45d4-8b01-76e3ef7c3ae1_0'], dtype=object)

In [298]:
def test_share_count(df):
    global share_counts
    xy = df.route_dir.to_numpy()
    schedule_gtfs_dataset_key = df.schedule_gtfs_dataset_key.iloc[0]
    for route_dir in xy:
        other_dirs = [x for x in xy if x != route_dir]
        for other_dir in other_dirs:
            key = schedule_gtfs_dataset_key+'__'+route_dir+'__'+other_dir
            if key in share_counts.keys():
                share_counts[key] += 1
            else:
                share_counts[key] = 1

In [299]:
share_counts = {}

In [300]:
test.groupby(['schedule_gtfs_dataset_key', 'stop_id']).apply(test_share_count)

In [301]:
# share_counts

In [302]:
qualify = {key: share_counts[key] for key in share_counts.keys() if share_counts[key] >= 5}

In [322]:
# qualify

## explore potential stops (not yet filtered)

In [304]:
stops = helpers.import_scheduled_stops(
    analysis_date,
    get_pandas = True,
    crs = PROJECT_CRS
)

In [305]:
gdf = stops >> inner_join(_, multi_only, on = ['stop_id']) >> select(_.stop_id, _.geometry)

In [306]:
gdf2 = stops >> inner_join(_, single_qual, on = ['stop_id']) >> select(_.stop_id, _.geometry)

In [307]:
gdf2.geometry = gdf2.buffer(400)

In [308]:
gdf = gdf.overlay(gdf2, how='difference')

In [309]:
# gdf.explore()

## lookup function/filtering steps

In [323]:
feeds_to_filter = np.unique([key.split('__')[0] for key in qualify.keys()])

In [326]:
feeds_no_qualify = np.unique([key.split('__')[0] for key in share_counts.keys() if key.split('__')[0] not in feeds_to_filter])

In [311]:
from calitp_data_analysis.tables import tbls

In [329]:
feeds_no_qualify = tbls.mart_transit_database.dim_gtfs_service_data() >> filter(_.gtfs_dataset_key.isin(feeds_no_qualify)) >> distinct(_.name, _.gtfs_dataset_key) >> collect()

In [335]:
df2 = tbls.mart_transit_database.dim_gtfs_service_data() >> filter(_.gtfs_dataset_key.isin(feeds_to_filter)) >> distinct(_.name, _.gtfs_dataset_key) >> collect()

In [336]:
df2 >> filter(_.name.str.contains('Long'))

Unnamed: 0,name,gtfs_dataset_key
37,Long Beach Transit – Long Beach Schedule,f1b35a50955aeb498533c1c6fdafbe44


In [337]:
# dataset_key = '015d67d5b75b5cf2b710bbadadfb75f5' #  Marin
# dataset_key = '3c62ad6ee589d56eca915ce291a5df0a' #  Yolobus 42A and 42B share 5+ stops so they match, which isn't desirable.
# dataset_key = '70c8a8b71c815224299523bf2115924a' #  SacRT
# dataset_key = '63029a23cb0e73f2a5d98a345c5e2e40' #  Elk Grove
dataset_key = 'f1b35a50955aeb498533c1c6fdafbe44' #  LBT

In [339]:
this_feed_qual = {key.split(dataset_key)[1][2:]:qualify[key] for key in qualify.keys() if key.split('__')[0] == dataset_key}

In [341]:
this_feed_qual

{'171_1__172_1': 13,
 '171_1__173_1': 13,
 '171_1__175_1': 15,
 '172_1__171_1': 13,
 '172_1__173_1': 24,
 '172_1__175_1': 13,
 '173_1__171_1': 13,
 '173_1__172_1': 24,
 '173_1__175_1': 13,
 '175_1__171_1': 15,
 '175_1__172_1': 13,
 '175_1__173_1': 13,
 '181_1__191_1': 5,
 '181_1__192_1': 5,
 '191_1__181_1': 5,
 '191_1__192_1': 36,
 '192_1__181_1': 5,
 '192_1__191_1': 36,
 '191_1__41_0': 6,
 '192_1__41_0': 6,
 '41_0__191_1': 6,
 '41_0__192_1': 6,
 '101_0__102_0': 11,
 '101_0__103_0': 12,
 '101_0__104_0': 11,
 '101_1__102_1': 8,
 '101_1__103_1': 10,
 '101_1__104_1': 8,
 '102_0__101_0': 11,
 '102_0__103_0': 10,
 '102_0__104_0': 11,
 '102_1__101_1': 8,
 '102_1__103_1': 7,
 '102_1__104_1': 8,
 '103_0__101_0': 12,
 '103_0__102_0': 10,
 '103_0__104_0': 10,
 '103_1__101_1': 10,
 '103_1__102_1': 7,
 '103_1__104_1': 7,
 '104_0__101_0': 11,
 '104_0__102_0': 11,
 '104_0__103_0': 10,
 '104_1__101_1': 8,
 '104_1__102_1': 8,
 '104_1__103_1': 7,
 '191_1__4_1': 5,
 '192_1__4_1': 5,
 '4_1__191_1': 5,
 '

In [342]:
list_pairs = [key.split('__') for key in this_feed_qual.keys()]

In [343]:
# list_pairs

In [344]:
arr = np.array(list_pairs[0])
for pair in list_pairs[1:]: arr = np.append(arr, np.array(pair))

In [345]:
any_appearance = np.unique(arr)

In [356]:
any_appearance

array(['101_0', '101_1', '102_0', '102_1', '103_0', '103_1', '104_0',
       '104_1', '111_1', '112_1', '121_0', '121_1', '131_0', '131_1',
       '141_1', '171_0', '171_1', '172_0', '172_1', '173_0', '173_1',
       '175_0', '175_1', '181_0', '181_1', '182_0', '182_1', '191_0',
       '191_1', '192_0', '192_1', '21_0', '21_1', '22_0', '22_1', '23_0',
       '23_1', '41_0', '41_1', '46_0', '46_1', '4_0', '4_1', '61_0',
       '91_0', '91_1', '92_0', '92_1', '93_0', '93_1'], dtype='<U5')

In [357]:
#  only need to check stops that qualify as multi-route only
stops_to_eval = multi_only >> filter(_.schedule_gtfs_dataset_key == dataset_key) >> distinct(_.stop_id)
st_to_eval = st_prepped >> filter(_.schedule_gtfs_dataset_key == dataset_key,
                                  _.stop_id.isin(stops_to_eval.stop_id),
                                  _.route_dir.isin(any_appearance)
                                 )

In [358]:
am_peak_hrs

[6, 7, 8]

In [359]:
pm_peak_hrs

[15, 16, 17, 18]

In [360]:
#  cut down problem space by checking if stops still could qual after filtering for any appearance
min_rows = min_freq * (len(am_peak_hrs) + len(pm_peak_hrs))

In [361]:
st_could_qual = (st_to_eval >> group_by(_.stop_id)
 >> mutate(could_qualify = _.shape[0] >= min_rows)
 >> ungroup()
 >> filter(_.could_qualify)
)

In [362]:
# one_stop = st_could_qual >> filter(_.stop_id == '23585') #  Yolobus 23017 knocked out in last step
one_stop = st_could_qual >> filter(_.stop_id == '1677') #  PCH/Redondo EB

In [363]:
one_stop

Unnamed: 0,feed_key,feed_timezone,base64_url,trip_id,stop_id,stop_sequence,timepoint,arrival_sec,departure_sec,arrival_hour,departure_hour,schedule_gtfs_dataset_key,route_id,direction_id,route_dir,peak,could_qualify
31825,18dc7c803a632739be475747f212df94,America/Los_Angeles,aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD...,10143116,1677,22,0.0,55462.0,55462.0,15.0,15.0,f1b35a50955aeb498533c1c6fdafbe44,172,1,172_1,pm_peak,True
31910,18dc7c803a632739be475747f212df94,America/Los_Angeles,aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD...,10143336,1677,22,0.0,25822.0,25822.0,7.0,7.0,f1b35a50955aeb498533c1c6fdafbe44,172,1,172_1,am_peak,True
31969,18dc7c803a632739be475747f212df94,America/Los_Angeles,aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD...,10143337,1677,22,0.0,29726.0,29726.0,8.0,8.0,f1b35a50955aeb498533c1c6fdafbe44,172,1,172_1,am_peak,True
32144,18dc7c803a632739be475747f212df94,America/Los_Angeles,aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD...,10143087,1677,22,0.0,27686.0,27686.0,7.0,7.0,f1b35a50955aeb498533c1c6fdafbe44,172,1,172_1,am_peak,True
32234,18dc7c803a632739be475747f212df94,America/Los_Angeles,aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD...,10143124,1677,22,0.0,62242.0,62242.0,17.0,17.0,f1b35a50955aeb498533c1c6fdafbe44,172,1,172_1,pm_peak,True
32274,18dc7c803a632739be475747f212df94,America/Los_Angeles,aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD...,10143126,1677,22,0.0,63982.0,63982.0,17.0,17.0,f1b35a50955aeb498533c1c6fdafbe44,172,1,172_1,pm_peak,True
32458,18dc7c803a632739be475747f212df94,America/Los_Angeles,aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD...,10143118,1677,22,0.0,56962.0,56962.0,15.0,15.0,f1b35a50955aeb498533c1c6fdafbe44,172,1,172_1,pm_peak,True
33314,18dc7c803a632739be475747f212df94,America/Los_Angeles,aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD...,10143130,1677,22,0.0,68362.0,68362.0,18.0,18.0,f1b35a50955aeb498533c1c6fdafbe44,172,1,172_1,pm_peak,True
33396,18dc7c803a632739be475747f212df94,America/Los_Angeles,aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD...,10143120,1677,22,0.0,58642.0,58642.0,16.0,16.0,f1b35a50955aeb498533c1c6fdafbe44,172,1,172_1,pm_peak,True
33459,18dc7c803a632739be475747f212df94,America/Los_Angeles,aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD...,10143122,1677,22,0.0,60442.0,60442.0,16.0,16.0,f1b35a50955aeb498533c1c6fdafbe44,172,1,172_1,pm_peak,True


In [364]:
this_stop_route_dirs = one_stop.route_dir.unique()
this_stop_route_dirs

array(['172_1', '173_1', '171_1', '175_1'], dtype=object)

In [365]:
list(this_stop_route_dirs)

['172_1', '173_1', '171_1', '175_1']

In [366]:
if this_stop_route_dirs.shape[0] == 2:
    #  quickly evaluate when only 2 route_dir
    stop_qualifies = list(this_stop_route_dirs) in list_pairs

In [None]:
stop_qualifies

In [347]:
df = multi_only >> filter(_.schedule_gtfs_dataset_key == dataset_key)

In [352]:
gdf = stops >> inner_join(_, df, on='stop_id') >> select(_.stop_id, _.geometry)

In [367]:
# gdf.explore()