In [1]:
import sys

In [2]:
import intake
import pandas as pd
import geopandas as gpd
import numpy as np
from calitp_data_analysis import geography_utils
from siuba import *
import gtfs_segments
import shapely

In [3]:
from shared_utils import catalog_utils, rt_dates, rt_utils
GTFS_DATA_DICT = catalog_utils.get_catalog("gtfs_analytics_data")

In [4]:
from segment_speed_utils import helpers

In [5]:
from tqdm import tqdm
tqdm.pandas(desc="Progress")

In [6]:
analysis_date = rt_dates.DATES['feb2025']

# Aggregations

Combine trip-level info with border zone info and stops in tract/border info, then aggregate.

## Methodology

* analysis segment in tract & shape has 1+ stops in tract -> allocate vrm, vrh to that tract
* analysis segment in border zone & shape has 1+ stops in zone -> allocate vrm, vrh to that zone
    * sub-allocate border zone vrm, vrh 50/50 to bordering tracts
* analysis segment in tract or border zone but shape has 0 stops in tract/zone
    * allocate 50/50 to adjacent tracts or zones, repeat above 

In [7]:
trip_tsi_alameda = pd.concat([pd.read_parquet('./trips_set1_tsi_segs_alameda_2025-02-12.parquet'),
                             pd.read_parquet('./trips_set2_tsi_segs_alameda_2025-02-12.parquet')])

In [9]:
trip_tsi_alameda.head(3)

Unnamed: 0,shape_array_key,tsi_segment_id,start_meters,tsi_segment_meters,trip_instance_key,arrival_sec,arrival_sec_next,segment_seconds
71580,4dff2f7bd084547a24529a02806234d0,f4d8a196-1a7a-4a34-8ab3-1310c39bb429,10873.972623,266.319586,7936e1ebd5e663bc3c0e621579b40329,87052.024038,87073.502298,21.47826
71596,4dff2f7bd084547a24529a02806234d0,f4d8a196-1a7a-4a34-8ab3-1310c39bb429,10873.972623,266.319586,3b584e25a7ce90b5c7814e8ace9598ea,26030.851102,26053.502298,22.651195
71604,4dff2f7bd084547a24529a02806234d0,f4d8a196-1a7a-4a34-8ab3-1310c39bb429,10873.972623,266.319586,8b67bae021d2b2a32d8d99ab369f0762,34488.270644,34518.003063,29.73242


In [None]:
trip_tsi_alameda.drop_duplicates(subset=['shape_array_key']).explore()

In [15]:
def read_shapes_stopping_in_seg(analysis_date):
    cols = ['shape_array_key', 'tsi_segment_id']
    sstb = pd.read_parquet(f'./shape_stops_tracts_borders_{analysis_date}.parquet')[cols]
    sstb['has_stop'] = True
    return sstb

In [16]:
sstb = read_shapes_stopping_in_seg(analysis_date)

In [17]:
sstb.head(3)

Unnamed: 0,shape_array_key,tsi_segment_id,has_stop
0,3c4985abe54a0185f7b7e9dc726d5e11,06001400100,True
1,3c4985abe54a0185f7b7e9dc726d5e11,41846e2d-ef57-474b-acc2-6d4aaed778d7,True
2,3c4985abe54a0185f7b7e9dc726d5e11,bba65910-4679-477c-944c-c662c6341ee6,True


In [18]:
def attach_stopping_info(trip_segment_df, shape_stopping_df):
    '''
    '''
    df = trip_segment_df.merge(shape_stopping_df, how='left', on=['shape_array_key', 'tsi_segment_id'])
    df.has_stop = df.has_stop.fillna(False)
    return df

In [23]:
joined = attach_stopping_info(trip_tsi_alameda, sstb)

In [22]:
bart_shape_array = 'db1920458bee7ea9de34b68eb9f4d8a5'

## test aggregation

In [24]:
bart = joined.query('shape_array_key == @bart_shape_array')

In [80]:
# bart.sort_values(by=['trip_instance_key', 'start_meters'])

In [45]:
# gpd.read_parquet(f'./shape_stops_tracts_borders_{analysis_date}.parquet').query('shape_array_key == @bart_shape_array').explore()

### get "brackets"

In [77]:
def locate_stopping_segments(row, df):
    if row.has_stop:
        return row
    else:
        id_before = None
        id_after = None
        # print(row.name)
        stop_before = df.loc[:(row.name - 1)].query('has_stop')
        if not stop_before.empty:
            id_before = stop_before.query('start_meters == start_meters.max()').tsi_segment_id.iloc[0]
        stop_after = df.loc[(row.name + 1):].query('has_stop')
        if not stop_after.empty:
            id_after = stop_after.query('start_meters == start_meters.min()').tsi_segment_id.iloc[0]
        row['stopping_segments'] = (id_before, id_after)
        # return (id_before, id_after)
        return row

In [95]:
def assign_stopping_sequences(joined_df):
    '''
    with a joined trip tsi segment df and shape
    stopping df, create a new df by shape showing 
    which tsi segments (tracts or border zones) vrh & vrm
    should be allocated to when there are no stops for that
    shape in that segment
    '''
    cols = ['shape_array_key', 'start_meters', 'tsi_segment_id', 'has_stop']
    simple_sequence_df = (joined_df[cols]
                          .drop_duplicates()
                          .sort_values(['shape_array_key', 'start_meters'])
                          .reset_index(drop=True)
                         )
    fn = lambda df: df.apply(locate_stopping_segments, df=df, axis=1)
    return simple_sequence_df.groupby('shape_array_key', group_keys=False).progress_apply(fn)

In [96]:
stopping_sequences_df = assign_stopping_sequences(joined)

Progress: 100%|██████████| 501/501 [00:44<00:00, 11.30it/s]


In [97]:
stopping_sequences_df

Unnamed: 0,has_stop,shape_array_key,start_meters,stopping_segments,tsi_segment_id
0,False,0126d45f3bae05fa2246e3370c3d4a18,69.573988,"(None, 06001451601)",49538b67-d6a5-498d-b402-975afcaeaf3a
1,False,0126d45f3bae05fa2246e3370c3d4a18,281.510017,"(None, 06001451601)",3b82dab3-89ee-4c8d-af68-5b1613922c2d
2,True,0126d45f3bae05fa2246e3370c3d4a18,471.172398,,06001451601
3,False,0126d45f3bae05fa2246e3370c3d4a18,798.187628,"(06001451601, 06001451501)",06001451503
4,False,0126d45f3bae05fa2246e3370c3d4a18,1063.458980,"(06001451601, 06001451501)",e8dc2f6a-8a2c-43cf-8596-0e5f0a26b81f
...,...,...,...,...,...
9850,False,ffe1d6cb204f362582b15e892ac4f6af,48889.515488,"(06001422900, 06001422200)",06001422300
9851,True,ffe1d6cb204f362582b15e892ac4f6af,49715.033241,,06001422200
9852,False,ffe1d6cb204f362582b15e892ac4f6af,50242.876198,"(06001422200, None)",06001421900
9853,False,ffe1d6cb204f362582b15e892ac4f6af,51067.482867,"(06001422200, None)",95338956-e219-4293-b169-60e125f76f58


In [102]:
joined = joined.merge(stopping_sequences_df, on=['has_stop', 'shape_array_key', 'start_meters', 'tsi_segment_id'])

In [None]:
def divide_to_segments

In [106]:
joined

Unnamed: 0,shape_array_key,tsi_segment_id,start_meters,tsi_segment_meters,trip_instance_key,arrival_sec,arrival_sec_next,segment_seconds,has_stop,stopping_segments
0,4dff2f7bd084547a24529a02806234d0,f4d8a196-1a7a-4a34-8ab3-1310c39bb429,10873.972623,266.319586,7936e1ebd5e663bc3c0e621579b40329,87052.024038,87073.502298,21.478260,False,"(06001404900, 06001407800)"
1,4dff2f7bd084547a24529a02806234d0,f4d8a196-1a7a-4a34-8ab3-1310c39bb429,10873.972623,266.319586,3b584e25a7ce90b5c7814e8ace9598ea,26030.851102,26053.502298,22.651195,False,"(06001404900, 06001407800)"
2,4dff2f7bd084547a24529a02806234d0,f4d8a196-1a7a-4a34-8ab3-1310c39bb429,10873.972623,266.319586,8b67bae021d2b2a32d8d99ab369f0762,34488.270644,34518.003063,29.732420,False,"(06001404900, 06001407800)"
3,4dff2f7bd084547a24529a02806234d0,f4d8a196-1a7a-4a34-8ab3-1310c39bb429,10873.972623,266.319586,eadbdf537773eeeb23426575e3c5ea98,76609.443579,,,False,"(06001404900, 06001407800)"
4,4dff2f7bd084547a24529a02806234d0,f4d8a196-1a7a-4a34-8ab3-1310c39bb429,10873.972623,266.319586,fcb94a2efd9291a48c52b10afca3badf,70728.270644,,,False,"(06001404900, 06001407800)"
...,...,...,...,...,...,...,...,...,...,...
303511,a580a611c37012412caf9571ce9658cf,06001422900,13369.503270,619.164990,df567f62abb7cce8e97256c661403bd1,59925.474795,60093.846033,168.371238,True,
303512,a580a611c37012412caf9571ce9658cf,06001422900,13369.503270,619.164990,df567f62abb7cce8e97256c661403bd1,59925.474795,60093.846033,168.371238,True,
303513,a580a611c37012412caf9571ce9658cf,be695707-d9e6-4e01-ac5c-297e141a73a9,13981.205857,546.212833,df567f62abb7cce8e97256c661403bd1,60093.846033,60188.771318,94.925285,False,"(06001422900, None)"
303514,a580a611c37012412caf9571ce9658cf,617fe0ff-8999-4e5e-8d06-3a2d65eeb616,14460.984713,817.126163,df567f62abb7cce8e97256c661403bd1,60188.771318,60353.057328,164.286010,False,"(06001422900, None)"


In [105]:
joined.explode(column='stopping_segments')

Unnamed: 0,shape_array_key,tsi_segment_id,start_meters,tsi_segment_meters,trip_instance_key,arrival_sec,arrival_sec_next,segment_seconds,has_stop,stopping_segments
0,4dff2f7bd084547a24529a02806234d0,f4d8a196-1a7a-4a34-8ab3-1310c39bb429,10873.972623,266.319586,7936e1ebd5e663bc3c0e621579b40329,87052.024038,87073.502298,21.478260,False,06001404900
0,4dff2f7bd084547a24529a02806234d0,f4d8a196-1a7a-4a34-8ab3-1310c39bb429,10873.972623,266.319586,7936e1ebd5e663bc3c0e621579b40329,87052.024038,87073.502298,21.478260,False,06001407800
1,4dff2f7bd084547a24529a02806234d0,f4d8a196-1a7a-4a34-8ab3-1310c39bb429,10873.972623,266.319586,3b584e25a7ce90b5c7814e8ace9598ea,26030.851102,26053.502298,22.651195,False,06001404900
1,4dff2f7bd084547a24529a02806234d0,f4d8a196-1a7a-4a34-8ab3-1310c39bb429,10873.972623,266.319586,3b584e25a7ce90b5c7814e8ace9598ea,26030.851102,26053.502298,22.651195,False,06001407800
2,4dff2f7bd084547a24529a02806234d0,f4d8a196-1a7a-4a34-8ab3-1310c39bb429,10873.972623,266.319586,8b67bae021d2b2a32d8d99ab369f0762,34488.270644,34518.003063,29.732420,False,06001404900
...,...,...,...,...,...,...,...,...,...,...
303513,a580a611c37012412caf9571ce9658cf,be695707-d9e6-4e01-ac5c-297e141a73a9,13981.205857,546.212833,df567f62abb7cce8e97256c661403bd1,60093.846033,60188.771318,94.925285,False,
303514,a580a611c37012412caf9571ce9658cf,617fe0ff-8999-4e5e-8d06-3a2d65eeb616,14460.984713,817.126163,df567f62abb7cce8e97256c661403bd1,60188.771318,60353.057328,164.286010,False,06001422900
303514,a580a611c37012412caf9571ce9658cf,617fe0ff-8999-4e5e-8d06-3a2d65eeb616,14460.984713,817.126163,df567f62abb7cce8e97256c661403bd1,60188.771318,60353.057328,164.286010,False,
303515,a580a611c37012412caf9571ce9658cf,9350e796-e37b-4997-b05f-db9c931ef770,15263.214466,538.937161,df567f62abb7cce8e97256c661403bd1,60353.057328,60480.000000,126.942672,False,06001422900
