In [None]:
import pandas as pd
import geopandas as gpd

from shared_utils import rt_utils, catalog_utils

from calitp_data_analysis import get_fs, geography_utils
from segment_speed_utils import helpers, time_series_utils, gtfs_schedule_wrangling, corridor_analysis
from segment_speed_utils.project_vars import SCHED_GCS, SEGMENT_GCS, GTFS_DATA_DICT, analysis_date
import numpy as np

# Transit Priority on SHS Feb 2025

## Methodology and Assumptions

* manually create lists of corridors and hotspots based on speedmaps, judgement
* find relevant trip data (segments) for each corridor, from the first segment that intersects the defined corridor to the last.
* only keep routes that travel along the corridor for at least half of the corridor length (exclude cross-street routes)
* exclude trips with apparent zero seconds in corridor, speeds above 80mph, speeds below the 5th or above the 95th percentile.
* calculate speed and travel times for trips through corridor
* for each trip, roughly estimate effects of transit priority
    * corridors: either all trip speeds increase to 16mph if current conditions median speed less than 13.6mph, otherwise all trip speeds increase by 15%
    * hotspots: all trips save 30 seconds of travel time through hotspot
* track total daily trips and peak frequency across all routes
* estimate average trip delay by dividing total minutes of delay by daily trip count
* estimate rider delay for certain examples by multiplying average trip delay by ridership
* "minutes_per_mile" is a metric created by dividing total daily minutes of delay by the length of the corridor, it's intended to provide a rough measure of delay intensity or cost-effectiveness

In [None]:
import importlib
importlib.reload(corridor_analysis)

In [None]:
analysis_date

## need trip-level (pre-aggregation) gdf to properly calculate metrics

In [None]:
segment_speeds = corridor_analysis.import_speedmap_segment_speeds(analysis_date)

In [None]:
frequencies = corridor_analysis.get_max_frequencies(segment_speeds)

In [None]:
trip_speeds = corridor_analysis.import_trip_speeds(analysis_date)

## corridor specification

In [None]:
# rt_utils.show_full_df(pd.read_parquet('../ca_transit_speed_maps/_rt_progress_2024-12-11.parquet').sort_values(['caltrans_district', 'organization_name']))

## Corridor Measurements

Previous logic:

For each trip, get from the last stop before entering corridor to the first stop after exiting corridor. This was done on stop_sequence

Now,

* first sjoin with aggregated data (has geom). Sjoining on segments is equivalent to previous methodology, since it will yield the last stop before entry to the first stop after exiting...
* avoid doing scheduled delay metric for now...
* 

In [None]:
corr = pd.read_excel('./_temp/corr_hs.xlsx', sheet_name='Corridors')
hs = pd.read_excel('./_temp/corr_hs.xlsx', sheet_name='Hotspots')
corr = corr.astype({'start_segment_id': str, 'end_segment_id': str, 'shape_id':str})
hs = hs.astype({'start_segment_id': str, 'end_segment_id': str, 'shape_id':str})

In [None]:
row = corr.loc[3]

In [None]:
gdf = corridor_analysis.corridor_from_segments(speed_segments_gdf=segment_speeds,
                                               organization_source_record_id=row.organization_source_record_id,
                                               shape_id=row.shape_id,
                                               start_seg_id=row.start_segment_id,
                                               end_seg_id=row.end_segment_id)

In [None]:
corridor_trips = corridor_analysis.find_corridor_data(segment_speeds, gdf, trip_speeds)
corridor_results = corridor_analysis.analyze_corridor_trips(corridor_trips)

In [None]:
corridor_analysis.validate_corridor_routes(gdf, corridor_trips)

In [None]:
corridor_improvements = corridor_analysis.analyze_corridor_improvements(corridor_results, **{'trip_mph_floor': 16, 'trip_percent_speedup': 15})

In [None]:
corridor_improvements.head(3)

In [None]:
corridor_improvements.sort_values('corridor_seconds')

In [None]:
summarized = corridor_analysis.summarize_corridor_improvements(corridor_improvements, frequencies)

In [None]:
summarized

In [None]:
corr.loc[3]

In [None]:
# corr_gdf = corridor_from_sheet(corr, intervention_dict={'trip_mph_floor': 16, 'trip_percent_speedup': 15}, fwy_xpwy_floor = 35)
corr_gdf = corridor_analysis.corridor_from_sheet(corr,
                                                 segment_speeds,
                                                 trip_speeds,
                                                 frequencies,
                                                 intervention_dict={'trip_mph_floor': 16, 'trip_percent_speedup': 15},
                                                 fwy_xpwy_floor = 35)

In [None]:
corr_gdf.round(1)

In [None]:
# corr_gdf.iloc[1:,:].explore(column='minutes_per_mile')

In [None]:
hs_gdf = corridor_analysis.corridor_from_sheet(hs,
                             segment_speeds,
                             trip_speeds,
                             frequencies,
                             intervention_dict={'trip_seconds_saved': 30})

In [None]:
hs_gdf

## Discussion

* Which other metrics?

after screening:

ridership/person-hours of delay
accessibility, equity

* Is our list complete?

Include Tempo, Van Ness, can we compare?

* Exclude routes where necessary (current corridor join is just spatial...)

* Add location-specific interventions, and [guidance](https://caltrans.sharepoint.com/:w:/s/DOTPMPHQ-DataandDigitalServices/EdG0YNQcQMBJmKncAuNva9wBjpxVq2sD8p3C5azumXFNRA?e=TO7CbB)

* How much should we focus on freeways?

Yes, include (Bay Bridge, others, SDMTS freeway service, launch service with new HOT...)

District transit plans, express/rapid on freeways...?

In [None]:
old_sfmta = gpd.read_parquet('gs://calitp-analytics-data/data-analyses/rt_delay/stop_delay_views/282_2022-02-08.parquet')

In [None]:
import rt_analysis

## bespoke van ness

* v2 warehouse doesn't include RT before Sep 2022, and Van Ness opened Apr 2022
* get old rt_analysis code working

In [None]:
rt_analysis.rt_filter_map_plot.from_gcs?

In [None]:
from rt_analysis import rt_filter_map_plot

In [None]:
importlib.reload(rt_filter_map_plot)

In [None]:
sfmta = rt_analysis.rt_filter_map_plot.from_gcs(282, '2022-02-08')

In [None]:
sfmta.set_filter(route_names=['49'])

In [None]:
sfmta.segment_speed_map?

In [None]:
sfmta.segment_speed_map()

In [None]:
sfmta.autocorridor?

In [None]:
sfmta.autocorridor('194252', [27, 37])

In [None]:
old_van_ness = sfmta.corridor_metrics()

In [None]:
new_van_ness = corr_gdf.query('corridor_name == "US101 Van Ness" & schedule_gtfs_dataset_key_y == "7cc0cb1871dfd558f11a2885c145d144"')

In [None]:
old_van_ness

In [None]:
old_segments = sfmta.stop_segment_speed_view[sfmta.stop_segment_speed_view.corridor]

In [None]:
new_van_ness

In [None]:
row = corr.iloc[3,:]

In [None]:
row

In [None]:
gdf = corridor_analysis.corridor_from_segments(speed_segments_gdf=detail,
                                               organization_source_record_id=row.organization_source_record_id,
                                               shape_id=row.shape_id,
                                               start_seg_id=row.start_segment_id,
                                               end_seg_id=row.end_segment_id)

In [None]:
corridor_trips = corridor_analysis.find_corridor_data(detail, gdf, st4)
# corridor_results = corridor_analysis.analyze_corridor_trips(corridor_trips)

In [None]:
corridor_trips = corridor_trips.query('speed_mph <= 30')

In [None]:
corridor_trips = corridor_trips.query('route_short_name == "49"')

In [None]:
corridor_trips.columns

In [None]:
old_segments = old_segments.query('speed_mph <= 30')

In [None]:
fig = old_segments.speed_mph.hist(bins=30)

In [None]:
corridor_trips.speed_mph.hist(bins=30)

In [None]:
import matplotlib.pyplot as plt

plt.hist(old_segments['speed_mph'],  
         alpha=0.5, # the transaparency parameter 
         label='feb 2022',
         bins=20) 
  
plt.hist(corridor_trips['speed_mph'], 
         alpha=0.5, 
         label='dec 2024',
         bins=20) 
  
plt.legend(loc='upper right') 
plt.title('Van Ness BRT segment speeds') 
plt.show()

In [None]:
len(corridor_trips.trip_instance_key.unique())

In [None]:
len(old_segments.trip_key.unique())

In [None]:
old_segments.shape

In [None]:
corridor_trips.shape

In [None]:
corridor_trips.segment_id.unique().shape

In [None]:
old_segments.stop_sequence.unique().shape