In [1]:
import sys

In [20]:
import intake
import pandas as pd
import geopandas as gpd
from calitp_data_analysis import geography_utils
from siuba import *
import gtfs_segments

In [3]:
from shared_utils import catalog_utils, rt_dates
GTFS_DATA_DICT = catalog_utils.get_catalog("gtfs_analytics_data")

In [4]:
from segment_speed_utils import helpers

In [5]:
analysis_date = rt_dates.DATES['feb2025']

# Exploratory Analysis

Start working on how to identify and count "border zones" where transit runs along Census Tract boundaries and VRH/VRM should be split accordingly (avoiding double-counting or arbitrary allocation to only one border tract)

## trip - linestring interpolation

could use this again? https://github.com/cal-itp/data-analyses/blob/530f2d5cf4419a2403d6485845d91ac4bc65e672/_shared_utils/shared_utils/rt_utils.py#L574-L579

https://github.com/cal-itp/data-analyses/blob/530f2d5cf4419a2403d6485845d91ac4bc65e672/rt_delay/rt_analysis/rt_parser.py#L90

* project all stop x shape combos either natively or using `gtfs_segments`
    * https://github.com/UTEL-UIUC/gtfs_segments/blob/871447705f7058da3f05f86aa9da42b75996808c/gtfs_segments/geom_utils.py#L437
    * `nearest_points` should be usable...
* fillna both directions arrival <-> departure time
* then dropna

## Projected st via `stop_times_direction`

In [6]:
def attach_projected_stop_times(analysis_date: str):
    '''
    
    '''
    path = f'{GTFS_DATA_DICT.rt_vs_schedule_tables.dir}{GTFS_DATA_DICT.rt_vs_schedule_tables.stop_times_direction}_{analysis_date}.parquet'
    ST_DIR_COLS = ['trip_instance_key', 'stop_sequence', 'stop_meters']
    st_dir = gpd.read_parquet(path)[ST_DIR_COLS]
    st = helpers.import_scheduled_stop_times(analysis_date, get_pandas=True)
    trips = helpers.import_scheduled_trips(analysis_date, columns=['trip_id', 'trip_instance_key', 'feed_key'])
    st = st.merge(trips, on = ['feed_key', 'trip_id'])
    return st.merge(st_dir, on = ['trip_instance_key', 'stop_sequence'])

In [7]:
st_proj = attach_projected_stop_times(analysis_date)

In [10]:
st_proj.arrival_sec.isna().value_counts()

False    4236225
True       90804
Name: arrival_sec, dtype: int64

In [11]:
st_proj.departure_sec.isna().value_counts()

False    4236225
True       90804
Name: departure_sec, dtype: int64

In [12]:
st_proj.query('departure_sec.isna()')

Unnamed: 0,feed_key,feed_timezone,base64_url,trip_id,stop_id,stop_sequence,timepoint,arrival_sec,departure_sec,arrival_hour,departure_hour,trip_instance_key,stop_meters
3103,c8c2eeefda895cf2e9971029b998af1a,US/Pacific,aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3...,t_5674198_b_33098_tn_0,7015,17,0.0,,,,,c6a594134f701ab6f78ef49fd413a124,6081.455776
3104,c8c2eeefda895cf2e9971029b998af1a,US/Pacific,aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3...,t_5674198_b_33098_tn_0,801,21,0.0,,,,,c6a594134f701ab6f78ef49fd413a124,5060.554260
3108,c8c2eeefda895cf2e9971029b998af1a,US/Pacific,aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3...,t_5674198_b_33098_tn_0,7002,2,0.0,,,,,c6a594134f701ab6f78ef49fd413a124,2517.475724
3109,c8c2eeefda895cf2e9971029b998af1a,US/Pacific,aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3...,t_5674198_b_33098_tn_0,803,23,0.0,,,,,c6a594134f701ab6f78ef49fd413a124,5060.554260
3110,c8c2eeefda895cf2e9971029b998af1a,US/Pacific,aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3...,t_5674198_b_33098_tn_0,805,25,0.0,,,,,c6a594134f701ab6f78ef49fd413a124,5341.559600
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4325324,0319b0617cc88876f918f8c63abc1ac0,America/Los_Angeles,aHR0cHM6Ly91bml0cmFucy51Y2RhdmlzLmVkdS9tZWRpYS...,VL_13_outbound_0755,22286,3,0.0,,,,,977e6569b6ca81091c34d146f897c585,0.000000
4325325,0319b0617cc88876f918f8c63abc1ac0,America/Los_Angeles,aHR0cHM6Ly91bml0cmFucy51Y2RhdmlzLmVkdS9tZWRpYS...,VL_13_outbound_0755,22306,4,0.0,,,,,977e6569b6ca81091c34d146f897c585,435.273048
4325329,0319b0617cc88876f918f8c63abc1ac0,America/Los_Angeles,aHR0cHM6Ly91bml0cmFucy51Y2RhdmlzLmVkdS9tZWRpYS...,D_13_outbound_0755,22225,3,0.0,,,,,3b1e14196ab4aab37d25b36bca550f95,1535.322402
4326230,32db8314a8fb7d99b61296c37545c460,America/Los_Angeles,aHR0cHM6Ly93d3cuY2l0eW9mZ2xlbmRvcmEuZ292L2ZpbG...,Gold-Line-Commuter-Shuttle-North_Eastbound-wkd...,2619576,3,0.0,,,,,48d7051b118a9cab8357908c201deae1,4518.785645


In [16]:
st_proj.query('trip_id == "VL_13_outbound_0755"').sort_values('stop_sequence')

Unnamed: 0,feed_key,feed_timezone,base64_url,trip_id,stop_id,stop_sequence,timepoint,arrival_sec,departure_sec,arrival_hour,departure_hour,trip_instance_key,stop_meters
4325323,0319b0617cc88876f918f8c63abc1ac0,America/Los_Angeles,aHR0cHM6Ly91bml0cmFucy51Y2RhdmlzLmVkdS9tZWRpYS...,VL_13_outbound_0755,22256,1,1.0,28500.0,28500.0,7.0,7.0,977e6569b6ca81091c34d146f897c585,4654.515682
4325326,0319b0617cc88876f918f8c63abc1ac0,America/Los_Angeles,aHR0cHM6Ly91bml0cmFucy51Y2RhdmlzLmVkdS9tZWRpYS...,VL_13_outbound_0755,22361,2,1.0,28680.0,28680.0,7.0,7.0,977e6569b6ca81091c34d146f897c585,322.88297
4325324,0319b0617cc88876f918f8c63abc1ac0,America/Los_Angeles,aHR0cHM6Ly91bml0cmFucy51Y2RhdmlzLmVkdS9tZWRpYS...,VL_13_outbound_0755,22286,3,0.0,,,,,977e6569b6ca81091c34d146f897c585,0.0
4325325,0319b0617cc88876f918f8c63abc1ac0,America/Los_Angeles,aHR0cHM6Ly91bml0cmFucy51Y2RhdmlzLmVkdS9tZWRpYS...,VL_13_outbound_0755,22306,4,0.0,,,,,977e6569b6ca81091c34d146f897c585,435.273048
4325327,0319b0617cc88876f918f8c63abc1ac0,America/Los_Angeles,aHR0cHM6Ly91bml0cmFucy51Y2RhdmlzLmVkdS9tZWRpYS...,VL_13_outbound_0755,22304,5,1.0,28920.0,28920.0,8.0,8.0,977e6569b6ca81091c34d146f897c585,2899.976133


In [18]:
st_proj.to_parquet(f'st_proj_{analysis_date}.parquet')

## projecting TSI interpolation points