In [1]:
import pandas as pd
import geopandas as gpd

from shared_utils import rt_utils, catalog_utils

from calitp_data_analysis import get_fs, geography_utils
from segment_speed_utils import helpers, time_series_utils, gtfs_schedule_wrangling, corridor_analysis
from segment_speed_utils.project_vars import SCHED_GCS, SEGMENT_GCS, GTFS_DATA_DICT, analysis_date

# develop and test some basic tools for corridor analysis

In [2]:
catalog = catalog_utils.get_catalog('gtfs_analytics_data')

In [3]:
catalog.speedmap_segments

{'dir': '${gcs_paths.SEGMENT_GCS}', 'stage1': '${speeds_tables.vp_dwell}', 'proxy_stop_times': 'stop_time_expansion/speedmap_stop_times', 'stage2': 'nearest/nearest_vp_speedmap_proxy', 'stage3': 'speedmap/stop_arrivals_proxy', 'stage3b': 'speedmap/stop_arrivals', 'stage4': 'speedmap/speeds', 'trip_stop_cols': ['trip_instance_key', 'stop_sequence', 'stop_sequence1'], 'shape_stop_cols': ['shape_array_key', 'shape_id'], 'stop_pair_cols': ['stop_pair', 'stop_pair_name', 'segment_id'], 'route_dir_cols': ['route_id', 'direction_id'], 'segments_file': 'segment_options/speedmap_segments', 'shape_stop_single_segment': 'rollup_singleday/speeds_shape_speedmap_segments', 'shape_stop_single_segment_detail': 'rollup_singleday/speeds_shape_speedmap_segments_detail', 'route_dir_single_segment': 'rollup_singleday/speeds_route_dir_speedmap_segments', 'route_dir_multi_segment': 'rollup_multiday/speeds_route_dir_speedmap_segments', 'min_trip_minutes': '${speed_vars.time_min_cutoff}', 'max_trip_minutes': 1

In [4]:
analysis_date

'2024-12-11'

In [5]:
# path = f'{catalog.speedmap_segments.dir}{catalog.speedmap_segments.stage4}_{analysis_date}.parquet'

In [6]:
path = f'{catalog.speedmap_segments.dir}{catalog.speedmap_segments.shape_stop_single_segment_detail}_{analysis_date}.parquet'

In [7]:
path

'gs://calitp-analytics-data/data-analyses/rt_segment_speeds/rollup_singleday/speeds_shape_speedmap_segments_detail_2024-12-11.parquet'

In [8]:
detail = gpd.read_parquet(path)

In [9]:
detail.head(3)

Unnamed: 0,schedule_gtfs_dataset_key,shape_array_key,shape_id,route_id,direction_id,stop_pair,stop_pair_name,segment_id,time_of_day,p50_mph,...,p80_mph,name,caltrans_district,organization_source_record_id,organization_name,base64_url,geometry,n_trips_sch,trips_hr_sch,route_short_name
0,015d67d5b75b5cf2b710bbadadfb75f5,43f0d67e5131502a51f9330e04bacc55,20,228,0.0,40512__40514,Sir Francis Drake Blvd & Olema Rd__Sir Francis...,40512-40514-1,AM Peak,11.83,...,11.83,Bay Area 511 Marin Schedule,04 - Oakland,recNOb7pqBRlQVG5e,Marin County Transit District,aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZW...,"LINESTRING (-122.60413 38.00022, -122.60416 38...",3,1.0,228
1,015d67d5b75b5cf2b710bbadadfb75f5,43f0d67e5131502a51f9330e04bacc55,20,228,0.0,40514__40516,Sir Francis Drake Blvd & Alhambra Circle__Sir ...,40514-40516-1,AM Peak,15.52,...,19.79,Bay Area 511 Marin Schedule,04 - Oakland,recNOb7pqBRlQVG5e,Marin County Transit District,aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZW...,"LINESTRING (-122.60045 37.99908, -122.60037 37...",3,1.0,228
2,015d67d5b75b5cf2b710bbadadfb75f5,43f0d67e5131502a51f9330e04bacc55,20,228,0.0,40516__40518,Sir Francis Drake Blvd At Drake Manor Apts__Si...,40516-40518-1,AM Peak,15.69,...,20.79,Bay Area 511 Marin Schedule,04 - Oakland,recNOb7pqBRlQVG5e,Marin County Transit District,aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZW...,"LINESTRING (-122.59907 37.99704, -122.59894 37...",3,1.0,228


## need trip-level (pre-aggregation) gdf to properly calculate metrics

In [15]:
st4 = corridor_analysis.import_trip_speeds(analysis_date)

2.2 percent of segments have no speed


## corridor specification

In [17]:
orgs = detail.drop_duplicates(subset=['organization_source_record_id', 'organization_name'])[['organization_source_record_id', 'organization_name']]

In [18]:
orgs[orgs.organization_name.str.contains('San Fr')]

Unnamed: 0,organization_source_record_id,organization_name
365619,rechaapWbeffO33OX,City and County of San Francisco
403280,recmatCuQAUrNcs8j,San Francisco Bay Area Water Emergency Transit...


## define corridor

In [19]:
# shape_id = '4953'
# start_seg_id = '18088-18089-1'
# end_seg_id = '16800-16806-1'

sf = 'rechaapWbeffO33OX'
shape_id = '800'
start_seg_id = '14970-17900-2'
end_seg_id = '16357-16358-1'

In [20]:
gdf = corridor_analysis.corridor_from_segments(speed_segments_gdf=detail, organization_source_record_id=sf, shape_id=shape_id,
                      start_seg_id=start_seg_id, end_seg_id=end_seg_id)

POINT (-122.40550254785889 37.76900326502991) POINT (-122.40217200000001 37.724137999999975)


In [23]:
# gdf.explore()

## Corridor Measurements

Previous logic:

For each trip, get from the last stop before entering corridor to the first stop after exiting corridor. This was done on stop_sequence

Now,

* first sjoin with aggregated data (has geom). Sjoining on segments is equivalent to previous methodology, since it will yield the last stop before entry to the first stop after exiting...
* avoid doing scheduled delay metric for now...
* 

In [24]:
corridor_trips = corridor_analysis.find_corridor_data(detail, gdf, st4)

In [25]:
corridor_results = corridor_analysis.analyze_corridor_trips(corridor_trips)

1.7 percent of zero seconds
7.4 percent of speeds > 80mph


In [26]:
corridor_results

Unnamed: 0,trip_instance_key,corridor_meters,corridor_seconds,corridor_speed_mps,corridor_speed_mph,route_short_name,route_id,shape_array_key,shape_id,schedule_gtfs_dataset_key
0,0009706c373521826e85fcabfc475641,627.417614,194.0,3.234111,7.234707,33,33,11a8f7eaad706c9bc8b883bf18bde052,3301,7cc0cb1871dfd558f11a2885c145d144
1,003dc9be48a08ed62e596246f083f2dd,462.390685,67.0,6.901354,15.438328,24,24,30d30239a2e9dff3b79624af67bbb05e,2453,7cc0cb1871dfd558f11a2885c145d144
2,0044ad7f19493edca62dc9c6def42817,309.921442,29.0,10.686946,23.906699,44,44,f33b999b25b084d0f7198541278cb793,4400,7cc0cb1871dfd558f11a2885c145d144
3,00a3c1e963bc7fa17caf120661ce5ac5,438.652854,140.0,3.133235,7.009046,29,29,0c3581fa197c6182af85722892967ac1,2950,7cc0cb1871dfd558f11a2885c145d144
4,00bf2a85d41cf00a203fb8d217e64a13,760.144494,258.0,2.946296,6.590865,24,24,9490825d5c4c929e05f10608787314f7,2401,7cc0cb1871dfd558f11a2885c145d144
...,...,...,...,...,...,...,...,...,...,...
1853,ffc1c876a1ffe104a861e6e93e95e265,5210.443147,1303.0,3.998805,8.945327,9,9,1243bb0c08589bd69384a2822e4a77d4,900,7cc0cb1871dfd558f11a2885c145d144
1854,ffd560d60d50484e631ec562f753c399,319.620675,103.0,3.103113,6.941665,48,48,8c07add2085354815e15e2f2f6824cbf,4850,7cc0cb1871dfd558f11a2885c145d144
1855,ffeeaf46feefa862acd818185380c4e7,760.895305,241.0,3.157242,7.062750,44,44,f33b999b25b084d0f7198541278cb793,4400,7cc0cb1871dfd558f11a2885c145d144
1856,fff23c3c81c743754b97e079f9e68047,452.572053,89.0,5.085079,11.375322,44,44,0503007182a0031c7bbd1d5287a2f8de,4456,7cc0cb1871dfd558f11a2885c145d144


In [27]:
corridor_results.groupby('route_short_name')['corridor_speed_mph'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
route_short_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
19,55.0,11.73602,7.126207,1.574957,8.773816,10.599169,12.036919,57.145756
22,212.0,10.090163,6.754561,1.858691,6.692021,8.426232,10.804697,67.709459
23,54.0,10.506523,5.384685,1.996256,7.090824,9.635138,13.431518,26.105401
24,173.0,11.792317,9.174229,3.802823,7.524085,9.394714,11.727195,64.647998
29,160.0,10.60098,7.250062,0.749631,6.698677,8.890529,11.89509,51.645602
292,94.0,13.326078,2.519433,8.683131,11.539397,12.945584,14.711138,22.11809
33,79.0,11.484993,10.836814,3.130412,7.13878,8.717598,11.048662,72.131022
397,5.0,22.579561,6.759814,17.507468,18.24301,20.585908,22.395658,34.165762
44,187.0,11.884644,8.598061,1.476256,7.552099,9.925526,13.043278,72.314549
48,140.0,9.701642,7.829498,0.163732,6.145391,8.077077,9.96343,51.4301


In [28]:
routes = ['8AX', '8BX', '8']
rt8 = corridor_results.query('route_short_name in @routes')

In [29]:
corridor_analysis.analyze_corridor_improvements(rt8, trip_seconds_saved=30)

Unnamed: 0,trip_instance_key,corridor_meters,corridor_seconds,corridor_speed_mps,corridor_speed_mph,route_short_name,route_id,shape_array_key,shape_id,schedule_gtfs_dataset_key,improved_corridor_seconds,improved_corridor_speed_mps,improved_corridor_speed_mph
5,00eec7a988d3afb24ea22340eb4929c9,7535.184523,1562.0,4.824062,10.791426,8AX,8AX,5183639c45e26411e1562c1534f7af26,36,7cc0cb1871dfd558f11a2885c145d144,1532.0,4.918528,11.002747
13,0296fcb09fc0ffbcbefe5b16a5f86fac,6576.864680,2225.0,2.955894,6.612335,8AX,8AX,ff6de96c284c1e74197ac728c66d0ef1,37,7cc0cb1871dfd558f11a2885c145d144,2195.0,2.996294,6.702709
43,05f2e1412144e6c0e9583503efbd331a,5574.615307,693.0,8.044178,17.994826,8,8,88039de0a52cdf6be3d52ebe6315a48f,800,7cc0cb1871dfd558f11a2885c145d144,663.0,8.408168,18.809072
44,0603537639160755ea929b70d665f39a,6685.387624,609.0,10.977648,24.556999,8,8,8ed76369a70c40b1f67d69dd9fc31fab,850,7cc0cb1871dfd558f11a2885c145d144,579.0,11.546438,25.829382
45,063f5e3cff4726d5b2c6100ac6bbd9e8,2000.000000,111.0,18.018018,40.306306,8BX,8BX,5a72bf15b9501c6ae24cef7204b7b160,38,7cc0cb1871dfd558f11a2885c145d144,81.0,24.691358,55.234568
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1805,f84fa24228d7aedbe8f8410fbb3364c3,6968.093716,858.0,8.121321,18.167396,8,8,8ed76369a70c40b1f67d69dd9fc31fab,850,7cc0cb1871dfd558f11a2885c145d144,828.0,8.415572,18.825635
1815,f954935dfc66acad10bf12d12117ab04,6685.387624,1622.0,4.121694,9.220229,8,8,8ed76369a70c40b1f67d69dd9fc31fab,850,7cc0cb1871dfd558f11a2885c145d144,1592.0,4.199364,9.393977
1819,f9d76ed2612e5a49765cde543919c61b,5000.000000,247.0,20.242915,45.283401,8,8,88039de0a52cdf6be3d52ebe6315a48f,800,7cc0cb1871dfd558f11a2885c145d144,217.0,23.041475,51.543779
1822,fa20f4399aafc333aad75221b7c1f70e,6685.387624,1798.0,3.718236,8.317693,8,8,8ed76369a70c40b1f67d69dd9fc31fab,850,7cc0cb1871dfd558f11a2885c145d144,1768.0,3.781328,8.458830
