In [2]:
import pandas as pd
import geopandas as gpd

from shared_utils import rt_utils, catalog_utils

from calitp_data_analysis import get_fs, geography_utils
from segment_speed_utils import helpers, time_series_utils, gtfs_schedule_wrangling, corridor_analysis
from segment_speed_utils.project_vars import SCHED_GCS, SEGMENT_GCS, GTFS_DATA_DICT, analysis_date

# develop and test some basic tools for corridor analysis

In [3]:
catalog = catalog_utils.get_catalog('gtfs_analytics_data')

In [4]:
catalog.speedmap_segments

{'dir': '${gcs_paths.SEGMENT_GCS}', 'stage1': '${speeds_tables.vp_dwell}', 'proxy_stop_times': 'stop_time_expansion/speedmap_stop_times', 'stage2': 'nearest/nearest_vp_speedmap_proxy', 'stage3': 'speedmap/stop_arrivals_proxy', 'stage3b': 'speedmap/stop_arrivals', 'stage4': 'speedmap/speeds', 'trip_stop_cols': ['trip_instance_key', 'stop_sequence', 'stop_sequence1'], 'shape_stop_cols': ['shape_array_key', 'shape_id'], 'stop_pair_cols': ['stop_pair', 'stop_pair_name', 'segment_id'], 'route_dir_cols': ['route_id', 'direction_id'], 'segments_file': 'segment_options/speedmap_segments', 'shape_stop_single_segment': 'rollup_singleday/speeds_shape_speedmap_segments', 'shape_stop_single_segment_detail': 'rollup_singleday/speeds_shape_speedmap_segments_detail', 'route_dir_single_segment': 'rollup_singleday/speeds_route_dir_speedmap_segments', 'route_dir_multi_segment': 'rollup_multiday/speeds_route_dir_speedmap_segments', 'min_trip_minutes': '${speed_vars.time_min_cutoff}', 'max_trip_minutes': 1

In [5]:
analysis_date

'2024-12-11'

In [6]:
# path = f'{catalog.speedmap_segments.dir}{catalog.speedmap_segments.stage4}_{analysis_date}.parquet'

In [7]:
path = f'{catalog.speedmap_segments.dir}{catalog.speedmap_segments.shape_stop_single_segment_detail}_{analysis_date}.parquet'

In [8]:
path

'gs://calitp-analytics-data/data-analyses/rt_segment_speeds/rollup_singleday/speeds_shape_speedmap_segments_detail_2024-12-11.parquet'

In [9]:
detail = gpd.read_parquet(path).round(2)

In [43]:
frequencies = detail[['route_id', 'schedule_gtfs_dataset_key', 'trips_hr_sch']].drop_duplicates()
frequencies = frequencies.groupby(['route_id', 'schedule_gtfs_dataset_key']).max().reset_index().sort_values('trips_hr_sch', ascending=False)

In [44]:
# frequencies = (detail[['shape_array_key', 'route_id', 'schedule_gtfs_dataset_key']].drop_duplicates(
#                     ).merge(frequencies, on=['route_id', 'schedule_gtfs_dataset_key'])
#               )

In [45]:
frequencies

Unnamed: 0,route_id,schedule_gtfs_dataset_key,trips_hr_sch
1511,807,2a0571758141f412b6a546fd70a65bf3,12.00
849,38R,7cc0cb1871dfd558f11a2885c145d144,10.67
1031,49,7cc0cb1871dfd558f11a2885c145d144,10.33
978,4444,cc53a0dbf5df90e3009b9cb5d89d80ba,10.00
495,22,7cc0cb1871dfd558f11a2885c145d144,10.00
...,...,...,...
1251,613,587e730fac4db21d54037e0f12b0dd5d,0.20
1299,635,364d59b3aea55aec2962a0b3244a40e0,0.20
1430,72,5456c80d420043e15c8eb7368a8a4d89,0.20
1327,654,015d67d5b75b5cf2b710bbadadfb75f5,0.20


In [10]:
detail.head(3)

Unnamed: 0,schedule_gtfs_dataset_key,shape_array_key,shape_id,route_id,direction_id,stop_pair,stop_pair_name,segment_id,time_of_day,p50_mph,...,p80_mph,name,caltrans_district,organization_source_record_id,organization_name,base64_url,geometry,n_trips_sch,trips_hr_sch,route_short_name
0,015d67d5b75b5cf2b710bbadadfb75f5,43f0d67e5131502a51f9330e04bacc55,20,228,0.0,40512__40514,Sir Francis Drake Blvd & Olema Rd__Sir Francis...,40512-40514-1,AM Peak,11.83,...,11.83,Bay Area 511 Marin Schedule,04 - Oakland,recNOb7pqBRlQVG5e,Marin County Transit District,aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZW...,"LINESTRING (-122.60413 38.00022, -122.60416 38...",3,1.0,228
1,015d67d5b75b5cf2b710bbadadfb75f5,43f0d67e5131502a51f9330e04bacc55,20,228,0.0,40514__40516,Sir Francis Drake Blvd & Alhambra Circle__Sir ...,40514-40516-1,AM Peak,15.52,...,19.79,Bay Area 511 Marin Schedule,04 - Oakland,recNOb7pqBRlQVG5e,Marin County Transit District,aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZW...,"LINESTRING (-122.60045 37.99908, -122.60037 37...",3,1.0,228
2,015d67d5b75b5cf2b710bbadadfb75f5,43f0d67e5131502a51f9330e04bacc55,20,228,0.0,40516__40518,Sir Francis Drake Blvd At Drake Manor Apts__Si...,40516-40518-1,AM Peak,15.69,...,20.79,Bay Area 511 Marin Schedule,04 - Oakland,recNOb7pqBRlQVG5e,Marin County Transit District,aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZW...,"LINESTRING (-122.59907 37.99704, -122.59894 37...",3,1.0,228


## need trip-level (pre-aggregation) gdf to properly calculate metrics

In [11]:
st4 = corridor_analysis.import_trip_speeds(analysis_date)

2.2 percent of segments have no speed


## corridor specification

In [12]:
# rt_utils.show_full_df(pd.read_parquet('../ca_transit_speed_maps/_rt_progress_2024-12-11.parquet').sort_values(['caltrans_district', 'organization_name']))

## define corridor

In [13]:
# shape_id = '4953'
# start_seg_id = '18088-18089-1'
# end_seg_id = '16800-16806-1'

sf = 'rechaapWbeffO33OX'
shape_id = '800'
start_seg_id = '14970-17900-2'
end_seg_id = '16357-16358-1'

# hum = 'recynxkqEoo9dJEvw'
# shape_id = 'p_1435936'
# start_seg_id = '1252-4209812-1'
# end_seg_id = '1276-1277-1'

In [14]:
gdf = corridor_analysis.corridor_from_segments(speed_segments_gdf=detail, organization_source_record_id=sf, shape_id=shape_id,
                      start_seg_id=start_seg_id, end_seg_id=end_seg_id)

POINT (-122.40550254785889 37.76900326502991) POINT (-122.40217200000001 37.724137999999975)


## Corridor Measurements

Previous logic:

For each trip, get from the last stop before entering corridor to the first stop after exiting corridor. This was done on stop_sequence

Now,

* first sjoin with aggregated data (has geom). Sjoining on segments is equivalent to previous methodology, since it will yield the last stop before entry to the first stop after exiting...
* avoid doing scheduled delay metric for now...
* 

In [15]:
corridor_trips = corridor_analysis.find_corridor_data(detail, gdf, st4)
corridor_results = corridor_analysis.analyze_corridor_trips(corridor_trips)

0.8 percent of trips with zero seconds
0.8 percent of trips with speeds > 80mph


In [20]:
# m = gdf.explore(color='gray')

# corridor_trips[['route_short_name', 'shape_length', 'geometry']].explore(m=m, column='shape_length')

In [21]:
# corridor_results.head(3)

In [22]:
# routes = ['8AX', '8BX', '8']
# rt8 = corridor_results.query('route_short_name in @routes')
# df = corridor_analysis.analyze_corridor_improvements(rt8, trip_seconds_saved=30)

df = corridor_analysis.analyze_corridor_improvements(corridor_results, trip_seconds_saved=30)

In [23]:
df.head(3)

Unnamed: 0,trip_instance_key,corridor_meters,corridor_seconds,corridor_speed_mps,corridor_speed_mph,route_short_name,route_id,shape_array_key,shape_id,schedule_gtfs_dataset_key,time_of_day,corridor_id,improved_corridor_seconds,improved_corridor_speed_mps,improved_corridor_speed_mph
0,00eec7a988d3afb24ea22340eb4929c9,7535.184523,1562.0,4.824062,10.791426,8AX,8AX,5183639c45e26411e1562c1534f7af26,36,7cc0cb1871dfd558f11a2885c145d144,AM Peak,2850169509686928307,1532.0,4.918528,11.002747
1,020b55f09bec8a995b77fd470167f2e5,4903.795632,1021.0,4.802934,10.744163,9,9,bc2f9da35043207727c2fd2bc9a5b2ad,915,7cc0cb1871dfd558f11a2885c145d144,PM Peak,2850169509686928307,991.0,4.948331,11.069416
2,0274459938044131634a5acaef1ffab5,5645.089521,1236.0,4.567225,10.216881,9R,9R,516956ffa95db3a152bf0ca679c8d5f5,210,7cc0cb1871dfd558f11a2885c145d144,PM Peak,2850169509686928307,1206.0,4.680837,10.471033


In [65]:
def summarize_corridor_improvements(df: pd.DataFrame, extra_group_cols: list = []):
    '''
    
    '''
    group_cols=['corridor_id', 'schedule_gtfs_dataset_key'] + extra_group_cols
    sum_cols = ['corridor_seconds', 'improved_corridor_seconds', 'delay_seconds',
                   'delay_minutes']
    array_cols = ['route_short_name', 'route_id']
    df = df.assign(delay_seconds = df.corridor_seconds - df.improved_corridor_seconds)
                  # corridor_miles = df.corridor_meters / rt_utils.METERS_PER_MILE)
    df = df.assign(delay_minutes = df.delay_seconds / 60)
    
    group = df.groupby(group_cols)[sum_cols + array_cols]
    
    df = group.agg({**{x:'sum' for x in sum_cols},
                    **{x:'unique' for x in array_cols}})
    # return df
    freq = (df.explode(['route_short_name', 'route_id']).reset_index(
                ).merge(frequencies, on=['route_id', 'schedule_gtfs_dataset_key'])
           )
    return freq, df
    df = df.assign(trips_hr_sch = [freq.trips_hr_sch.to_list()], total_trips_hr = freq.trips_hr_sch.sum())
    
    return df.round(1)

In [66]:
test, df = summarize_corridor_improvements(df)

In [67]:
test

Unnamed: 0,corridor_id,schedule_gtfs_dataset_key,corridor_seconds,improved_corridor_seconds,delay_seconds,delay_minutes,route_short_name,route_id,trips_hr_sch
0,2850169509686928307,5456c80d420043e15c8eb7368a8a4d89,47953.0,46003.0,1950.0,32.5,292,292,3.0
1,2850169509686928307,5456c80d420043e15c8eb7368a8a4d89,47953.0,46003.0,1950.0,32.5,EPX,EPX,1.4
2,2850169509686928307,5456c80d420043e15c8eb7368a8a4d89,47953.0,46003.0,1950.0,32.5,397,397,0.75
3,2850169509686928307,5456c80d420043e15c8eb7368a8a4d89,47953.0,46003.0,1950.0,32.5,FCX,FCX,1.2
4,2850169509686928307,7cc0cb1871dfd558f11a2885c145d144,755851.0,739351.0,16500.0,275.0,8AX,8AX,6.33
5,2850169509686928307,7cc0cb1871dfd558f11a2885c145d144,755851.0,739351.0,16500.0,275.0,9,9,5.0
6,2850169509686928307,7cc0cb1871dfd558f11a2885c145d144,755851.0,739351.0,16500.0,275.0,9R,9R,5.0
7,2850169509686928307,7cc0cb1871dfd558f11a2885c145d144,755851.0,739351.0,16500.0,275.0,8,8,7.6
8,2850169509686928307,7cc0cb1871dfd558f11a2885c145d144,755851.0,739351.0,16500.0,275.0,90,90,2.0
9,2850169509686928307,7cc0cb1871dfd558f11a2885c145d144,755851.0,739351.0,16500.0,275.0,8BX,8BX,5.33


In [69]:
df.reset_index()

Unnamed: 0,corridor_id,schedule_gtfs_dataset_key,corridor_seconds,improved_corridor_seconds,delay_seconds,delay_minutes,route_short_name,route_id
0,2850169509686928307,5456c80d420043e15c8eb7368a8a4d89,47953.0,46003.0,1950.0,32.5,"[292, EPX, 397, FCX]","[292, EPX, 397, FCX]"
1,2850169509686928307,7cc0cb1871dfd558f11a2885c145d144,755851.0,739351.0,16500.0,275.0,"[8AX, 9, 9R, 8, 90, 8BX]","[8AX, 9, 9R, 8, 90, 8BX]"


In [72]:
test.groupby('schedule_gtfs_dataset_key').agg({'trips_hr_sch': 'unique'})

Unnamed: 0_level_0,trips_hr_sch
schedule_gtfs_dataset_key,Unnamed: 1_level_1
5456c80d420043e15c8eb7368a8a4d89,"[3.0, 1.4, 0.75, 1.2]"
7cc0cb1871dfd558f11a2885c145d144,"[6.33, 5.0, 7.6, 2.0, 5.33]"


In [80]:
df.reset_index().merge(test.groupby('schedule_gtfs_dataset_key').agg({'trips_hr_sch': lambda x: list(x)}), on='schedule_gtfs_dataset_key')

Unnamed: 0,corridor_id,schedule_gtfs_dataset_key,corridor_seconds,improved_corridor_seconds,delay_seconds,delay_minutes,route_short_name,route_id,trips_hr_sch
0,2850169509686928307,5456c80d420043e15c8eb7368a8a4d89,47953.0,46003.0,1950.0,32.5,"[292, EPX, 397, FCX]","[292, EPX, 397, FCX]","[3.0, 1.4, 0.75, 1.2]"
1,2850169509686928307,7cc0cb1871dfd558f11a2885c145d144,755851.0,739351.0,16500.0,275.0,"[8AX, 9, 9R, 8, 90, 8BX]","[8AX, 9, 9R, 8, 90, 8BX]","[6.33, 5.0, 5.0, 7.6, 2.0, 5.33]"


In [57]:
test.reset_index()

Unnamed: 0,index,corridor_id,schedule_gtfs_dataset_key,corridor_seconds,improved_corridor_seconds,delay_seconds,delay_minutes,route_short_name,route_id,trips_hr_sch
0,0,2850169509686928307,5456c80d420043e15c8eb7368a8a4d89,47953.0,46003.0,1950.0,32.5,292,292,3.0
1,1,2850169509686928307,5456c80d420043e15c8eb7368a8a4d89,47953.0,46003.0,1950.0,32.5,EPX,EPX,1.4
2,2,2850169509686928307,5456c80d420043e15c8eb7368a8a4d89,47953.0,46003.0,1950.0,32.5,397,397,0.75
3,3,2850169509686928307,5456c80d420043e15c8eb7368a8a4d89,47953.0,46003.0,1950.0,32.5,FCX,FCX,1.2
4,4,2850169509686928307,7cc0cb1871dfd558f11a2885c145d144,755851.0,739351.0,16500.0,275.0,8AX,8AX,6.33
5,5,2850169509686928307,7cc0cb1871dfd558f11a2885c145d144,755851.0,739351.0,16500.0,275.0,9,9,5.0
6,6,2850169509686928307,7cc0cb1871dfd558f11a2885c145d144,755851.0,739351.0,16500.0,275.0,9R,9R,5.0
7,7,2850169509686928307,7cc0cb1871dfd558f11a2885c145d144,755851.0,739351.0,16500.0,275.0,8,8,7.6
8,8,2850169509686928307,7cc0cb1871dfd558f11a2885c145d144,755851.0,739351.0,16500.0,275.0,90,90,2.0
9,9,2850169509686928307,7cc0cb1871dfd558f11a2885c145d144,755851.0,739351.0,16500.0,275.0,8BX,8BX,5.33


In [46]:
df.merge(frequencies, on=['route_id', 'schedule_gtfs_dataset_key'])

Unnamed: 0,trip_instance_key,corridor_meters,corridor_seconds,corridor_speed_mps,corridor_speed_mph,route_short_name,route_id,shape_array_key,shape_id,schedule_gtfs_dataset_key,time_of_day,corridor_id,improved_corridor_seconds,improved_corridor_speed_mps,improved_corridor_speed_mph,trips_hr_sch
0,00eec7a988d3afb24ea22340eb4929c9,7535.184523,1562.0,4.824062,10.791426,8AX,8AX,5183639c45e26411e1562c1534f7af26,36,7cc0cb1871dfd558f11a2885c145d144,AM Peak,2850169509686928307,1532.0,4.918528,11.002747,6.33
1,0296fcb09fc0ffbcbefe5b16a5f86fac,6576.864680,2225.0,2.955894,6.612335,8AX,8AX,ff6de96c284c1e74197ac728c66d0ef1,37,7cc0cb1871dfd558f11a2885c145d144,PM Peak,2850169509686928307,2195.0,2.996294,6.702709,6.33
2,065498b1ad0aaba819becfb13a5659f3,6685.387624,3079.0,2.171285,4.857165,8AX,8AX,5183639c45e26411e1562c1534f7af26,36,7cc0cb1871dfd558f11a2885c145d144,Early AM,2850169509686928307,3049.0,2.192649,4.904956,6.33
3,075e5bc8fee8474f6883de98b21d2392,8699.292204,3380.0,2.573755,5.757490,8AX,8AX,5183639c45e26411e1562c1534f7af26,36,7cc0cb1871dfd558f11a2885c145d144,AM Peak,2850169509686928307,3350.0,2.596804,5.809050,6.33
4,0fddccde2439c2b5c54a8544bd3a72c1,6685.387624,850.0,7.865162,17.594367,8AX,8AX,5183639c45e26411e1562c1534f7af26,36,7cc0cb1871dfd558f11a2885c145d144,Early AM,2850169509686928307,820.0,8.152912,18.238064,6.33
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
610,73db11a00352504011e37e6363741174,7000.000000,465.0,15.053763,33.675269,FCX,FCX,927b89cceacf516d79033da65361eefd,FCX0076,5456c80d420043e15c8eb7368a8a4d89,PM Peak,2850169509686928307,435.0,16.091954,35.997701,1.20
611,9fd4e5bb849fb00a3d40df182faae750,7000.000000,404.0,17.326733,38.759901,FCX,FCX,927b89cceacf516d79033da65361eefd,FCX0076,5456c80d420043e15c8eb7368a8a4d89,PM Peak,2850169509686928307,374.0,18.716578,41.868984,1.20
612,b9155a2b0e5b01e8e232693051cfcf2d,5000.000000,501.0,9.980040,22.325349,FCX,FCX,d74b06ef07336ce2cd6674180b287d3c,FCX0075,5456c80d420043e15c8eb7368a8a4d89,AM Peak,2850169509686928307,471.0,10.615711,23.747346,1.20
613,d59439dd782a6391bd32ec99734cfc32,7000.000000,427.0,16.393443,36.672131,FCX,FCX,927b89cceacf516d79033da65361eefd,FCX0076,5456c80d420043e15c8eb7368a8a4d89,PM Peak,2850169509686928307,397.0,17.632242,39.443325,1.20


In [48]:
test = summarize_corridor_improvements(df, group_cols=['corridor_id'])

TypeError: unhashable type: 'numpy.ndarray'

In [37]:
test

Unnamed: 0_level_0,corridor_seconds,improved_corridor_seconds,delay_seconds,delay_minutes,route_short_name,route_id,schedule_gtfs_dataset_key
corridor_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2850169509686928307,803804.0,785354.0,18450.0,307.5,"[8AX, 9, 9R, 292, 8, 90, EPX, 8BX, 397, FCX]","[8AX, 9, 9R, 292, 8, 90, EPX, 8BX, 397, FCX]","[7cc0cb1871dfd558f11a2885c145d144, 5456c80d420..."


In [30]:
frequencies

Unnamed: 0,route_id,schedule_gtfs_dataset_key,trips_hr_sch
1511,807,2a0571758141f412b6a546fd70a65bf3,12.00
849,38R,7cc0cb1871dfd558f11a2885c145d144,10.67
1031,49,7cc0cb1871dfd558f11a2885c145d144,10.33
978,4444,cc53a0dbf5df90e3009b9cb5d89d80ba,10.00
495,22,7cc0cb1871dfd558f11a2885c145d144,10.00
...,...,...,...
1251,613,587e730fac4db21d54037e0f12b0dd5d,0.20
1299,635,364d59b3aea55aec2962a0b3244a40e0,0.20
1430,72,5456c80d420043e15c8eb7368a8a4d89,0.20
1327,654,015d67d5b75b5cf2b710bbadadfb75f5,0.20


In [29]:
df.merge(frequencies, on=['route_id', 'schedule_gtfs_dataset_key'])

Unnamed: 0,trip_instance_key,corridor_meters,corridor_seconds,corridor_speed_mps,corridor_speed_mph,route_short_name,route_id,shape_array_key,shape_id,schedule_gtfs_dataset_key,time_of_day,corridor_id,improved_corridor_seconds,improved_corridor_speed_mps,improved_corridor_speed_mph,trips_hr_sch
0,00eec7a988d3afb24ea22340eb4929c9,7535.184523,1562.0,4.824062,10.791426,8AX,8AX,5183639c45e26411e1562c1534f7af26,36,7cc0cb1871dfd558f11a2885c145d144,AM Peak,2850169509686928307,1532.0,4.918528,11.002747,6.33
1,0296fcb09fc0ffbcbefe5b16a5f86fac,6576.864680,2225.0,2.955894,6.612335,8AX,8AX,ff6de96c284c1e74197ac728c66d0ef1,37,7cc0cb1871dfd558f11a2885c145d144,PM Peak,2850169509686928307,2195.0,2.996294,6.702709,6.33
2,065498b1ad0aaba819becfb13a5659f3,6685.387624,3079.0,2.171285,4.857165,8AX,8AX,5183639c45e26411e1562c1534f7af26,36,7cc0cb1871dfd558f11a2885c145d144,Early AM,2850169509686928307,3049.0,2.192649,4.904956,6.33
3,075e5bc8fee8474f6883de98b21d2392,8699.292204,3380.0,2.573755,5.757490,8AX,8AX,5183639c45e26411e1562c1534f7af26,36,7cc0cb1871dfd558f11a2885c145d144,AM Peak,2850169509686928307,3350.0,2.596804,5.809050,6.33
4,0fddccde2439c2b5c54a8544bd3a72c1,6685.387624,850.0,7.865162,17.594367,8AX,8AX,5183639c45e26411e1562c1534f7af26,36,7cc0cb1871dfd558f11a2885c145d144,Early AM,2850169509686928307,820.0,8.152912,18.238064,6.33
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
610,73db11a00352504011e37e6363741174,7000.000000,465.0,15.053763,33.675269,FCX,FCX,927b89cceacf516d79033da65361eefd,FCX0076,5456c80d420043e15c8eb7368a8a4d89,PM Peak,2850169509686928307,435.0,16.091954,35.997701,1.20
611,9fd4e5bb849fb00a3d40df182faae750,7000.000000,404.0,17.326733,38.759901,FCX,FCX,927b89cceacf516d79033da65361eefd,FCX0076,5456c80d420043e15c8eb7368a8a4d89,PM Peak,2850169509686928307,374.0,18.716578,41.868984,1.20
612,b9155a2b0e5b01e8e232693051cfcf2d,5000.000000,501.0,9.980040,22.325349,FCX,FCX,d74b06ef07336ce2cd6674180b287d3c,FCX0075,5456c80d420043e15c8eb7368a8a4d89,AM Peak,2850169509686928307,471.0,10.615711,23.747346,1.20
613,d59439dd782a6391bd32ec99734cfc32,7000.000000,427.0,16.393443,36.672131,FCX,FCX,927b89cceacf516d79033da65361eefd,FCX0076,5456c80d420043e15c8eb7368a8a4d89,PM Peak,2850169509686928307,397.0,17.632242,39.443325,1.20


In [28]:
df

Unnamed: 0,trip_instance_key,corridor_meters,corridor_seconds,corridor_speed_mps,corridor_speed_mph,route_short_name,route_id,shape_array_key,shape_id,schedule_gtfs_dataset_key,time_of_day,corridor_id,improved_corridor_seconds,improved_corridor_speed_mps,improved_corridor_speed_mph
0,00eec7a988d3afb24ea22340eb4929c9,7535.184523,1562.0,4.824062,10.791426,8AX,8AX,5183639c45e26411e1562c1534f7af26,36,7cc0cb1871dfd558f11a2885c145d144,AM Peak,2850169509686928307,1532.0,4.918528,11.002747
1,020b55f09bec8a995b77fd470167f2e5,4903.795632,1021.0,4.802934,10.744163,9,9,bc2f9da35043207727c2fd2bc9a5b2ad,915,7cc0cb1871dfd558f11a2885c145d144,PM Peak,2850169509686928307,991.0,4.948331,11.069416
2,0274459938044131634a5acaef1ffab5,5645.089521,1236.0,4.567225,10.216881,9R,9R,516956ffa95db3a152bf0ca679c8d5f5,210,7cc0cb1871dfd558f11a2885c145d144,PM Peak,2850169509686928307,1206.0,4.680837,10.471033
3,0296fcb09fc0ffbcbefe5b16a5f86fac,6576.864680,2225.0,2.955894,6.612335,8AX,8AX,ff6de96c284c1e74197ac728c66d0ef1,37,7cc0cb1871dfd558f11a2885c145d144,PM Peak,2850169509686928307,2195.0,2.996294,6.702709
4,031ac5dcd0b8e4af9e8400f3ef4ea4f4,5735.577590,1297.0,4.422188,9.892434,9R,9R,7eb3ea290f654bf36a306df56dfcf6d2,211,7cc0cb1871dfd558f11a2885c145d144,Midday,2850169509686928307,1267.0,4.526896,10.126667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
610,fd24021b9596ad543169bb3d05db1b59,4993.501678,1248.0,4.001203,8.950692,9,9,ff10c7aea2fcfb08e350e27742f6e918,969,7cc0cb1871dfd558f11a2885c145d144,PM Peak,2850169509686928307,1218.0,4.099755,9.171152
611,feb49dd7190da29eadf0212984a32536,6461.153360,1084.0,5.960474,13.333579,9R,9R,7eb3ea290f654bf36a306df56dfcf6d2,211,7cc0cb1871dfd558f11a2885c145d144,Early AM,2850169509686928307,1054.0,6.130127,13.713093
612,ff2327da3c41c372358725d15f7c2b86,4993.501678,1570.0,3.180574,7.114945,9,9,4eb030066acc04e9455dd941ab86a1e7,950,7cc0cb1871dfd558f11a2885c145d144,PM Peak,2850169509686928307,1540.0,3.242534,7.253548
613,ff9e0dd09110bdd1cdaa557f6ef22391,5299.667297,1341.0,3.952026,8.840683,9,9,4eb030066acc04e9455dd941ab86a1e7,950,7cc0cb1871dfd558f11a2885c145d144,Midday,2850169509686928307,1311.0,4.042462,9.042987


In [27]:
test

Unnamed: 0_level_0,corridor_seconds,improved_corridor_seconds,delay_seconds,delay_minutes,route_short_name,route_id,schedule_gtfs_dataset_key,trips_hr_sch,total_trips_hr
corridor_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2850169509686928307,803804.0,785354.0,18450.0,307.5,"[8AX, 9, 9R, 292, 8, 90, EPX, 8BX, 397, FCX]","[8AX, 9, 9R, 292, 8, 90, EPX, 8BX, 397, FCX]",5456c80d420043e15c8eb7368a8a4d89,"[3.0, 1.4, 0.75, 1.2]",6.4


In [None]:
corr = pd.read_excel('./_temp/corr_hs.xlsx', sheet_name='Corridors')

In [None]:
hs = pd.read_excel('./_temp/corr_hs.xlsx', sheet_name='Hotspots')

In [None]:
corr = corr.astype({'start_segment_id': str, 'end_segment_id': str, 'shape_id':str})
hs = hs.astype({'start_segment_id': str, 'end_segment_id': str, 'shape_id':str})

In [None]:
row = corr.iloc[0,:]

In [None]:
row

In [None]:
gdf = corridor_analysis.corridor_from_segments(speed_segments_gdf=detail, organization_source_record_id=row.organization_source_record_id, shape_id=row.shape_id,
                      start_seg_id=row.start_segment_id, end_seg_id=row.end_segment_id)

In [None]:
def corridor_from_row(df, intervention_dict):
    all_corridors = []
    for _, row in df.iterrows():
        try:
            print(row["SHS Segment"])
            corr = corridor_analysis.corridor_from_segments(speed_segments_gdf=detail, organization_source_record_id=row.organization_source_record_id, shape_id=row.shape_id,
                          start_seg_id=row.start_segment_id, end_seg_id=row.end_segment_id, name=row['SHS Segment'])
            corridor_trips = corridor_analysis.find_corridor_data(detail, corr, st4)
            corridor_results = corridor_analysis.analyze_corridor_trips(corridor_trips)
            df = corridor_analysis.analyze_corridor_improvements(corridor_results, **intervention_dict)
            summ = summarize_corridor_improvements(df, group_cols=['corridor_id']).reset_index(drop=True)
            corr = pd.concat([corr, summ], axis=1)
            corr = corr.assign(corridor_miles = corr.corridor_distance_meters / rt_utils.METERS_PER_MILE) #  from corridor def, not trip distance
            corr = corr.assign(minutes_per_mile = corr.delay_minutes / corr.corridor_miles)
            all_corridors += [corr]
        except Exception as e:
            print(f'failed for{row["SHS Segment"]}')
            print(e)
            pass
    return pd.concat(all_corridors)

In [None]:
corr.loc[15]

In [None]:
corr_gdf = corridor_from_row(corr, intervention_dict={'trip_mph_target': 16})
# corr.iloc[:3,:].apply(corridor_from_row, axis=1)

In [None]:
corr_gdf

In [None]:
corr_gdf = pd.concat(all_corridors)[['corridor_name', 'name', 'corridor_miles', 'delay_minutes',
        'minutes_per_mile', 'geometry']]

In [None]:
corr_gdf.explore(column='minutes_per_mile')

In [None]:
all_corridors = []
hs.apply(corridor_from_row, axis=1, intervention_dict={'trip_mph_target': 16})

In [None]:
hs_gdf = pd.concat(all_corridors)[['corridor_name', 'name', 'corridor_miles', 'delay_minutes',
        'minutes_per_mile', 'geometry']]

In [None]:
hs_gdf.explore(column='delay_minutes')

In [None]:
corr_gdf.sort_values('minutes_per_mile', ascending=False)

In [None]:
hs_gdf.sort_values('delay_minutes', ascending=False)

## Discussion

* Which other metrics?

after screening:

ridership/person-hours of delay
accessibility, equity

* Is our list complete?

Include Tempo, Van Ness, can we compare?

* Exclude routes where necessary (current corridor join is just spatial...)

* Add location-specific interventions, and [guidance](https://caltrans.sharepoint.com/:w:/s/DOTPMPHQ-DataandDigitalServices/EdG0YNQcQMBJmKncAuNva9wBjpxVq2sD8p3C5azumXFNRA?e=TO7CbB)

* How much should we focus on freeways?

Yes, include (Bay Bridge, others, SDMTS freeway service, launch service with new HOT...)

District transit plans, express/rapid on freeways...?