In [None]:
import pandas as pd
import geopandas as gpd

from shared_utils import rt_utils, catalog_utils, rt_dates

from calitp_data_analysis import get_fs, geography_utils
from segment_speed_utils import helpers, time_series_utils, gtfs_schedule_wrangling, corridor_analysis
from segment_speed_utils.project_vars import SCHED_GCS, SEGMENT_GCS, GTFS_DATA_DICT, analysis_date
import numpy as np

# Transit Priority on SHS Feb 2025

## Methodology and Assumptions

* manually create lists of corridors and hotspots based on speedmaps, judgement
* find relevant trip data (segments) for each corridor, from the first segment that intersects the defined corridor to the last.
* only keep routes that travel along the corridor for at least half of the corridor length (exclude cross-street routes)
* exclude trips with apparent zero seconds in corridor, speeds above 80mph, speeds below the 5th or above the 95th percentile.
* calculate speed and travel times for trips through corridor
* for each trip, roughly estimate effects of transit priority
    * corridors: either all trip speeds increase to 16mph if current conditions median speed less than 13.6mph, otherwise all trip speeds increase by 15%
    * hotspots: all trips save 30 seconds of travel time through hotspot
* track total daily trips and peak frequency across all routes
* estimate average trip delay by dividing total minutes of delay by daily trip count
* estimate rider delay for certain examples by multiplying average trip delay by ridership
* "minutes_per_mile" is a metric created by dividing total daily minutes of delay by the length of the corridor, it's intended to provide a rough measure of delay intensity or cost-effectiveness

In [None]:
import importlib
importlib.reload(corridor_analysis)

## need trip-level (pre-aggregation) gdf to properly calculate metrics

In [None]:
segment_speeds = corridor_analysis.import_speedmap_segment_speeds(analysis_date)

In [None]:
frequencies = corridor_analysis.get_max_frequencies(segment_speeds)

In [None]:
trip_speeds = corridor_analysis.import_trip_speeds(analysis_date)

## corridor specification

In [None]:
# rt_utils.show_full_df(pd.read_parquet('../ca_transit_speed_maps/_rt_progress_2024-12-11.parquet').sort_values(['caltrans_district', 'organization_name']))

## Corridor Measurements

Previous logic:

For each trip, get from the last stop before entering corridor to the first stop after exiting corridor. This was done on stop_sequence

Now,

* first sjoin with aggregated data (has geom). Sjoining on segments is equivalent to previous methodology, since it will yield the last stop before entry to the first stop after exiting...
* avoid doing scheduled delay metric for now...
* 

In [None]:
corr = pd.read_excel('./_temp/corr_hs.xlsx', sheet_name='Corridor Input')
hs = pd.read_excel('./_temp/corr_hs.xlsx', sheet_name='Hotspot Input')
corr = corr.astype({'start_segment_id': str, 'end_segment_id': str, 'shape_id':str})
hs = hs.astype({'start_segment_id': str, 'end_segment_id': str, 'shape_id':str})

In [None]:
row = corr.loc[15]

In [None]:
gdf = corridor_analysis.corridor_from_segments(speed_segments_gdf=segment_speeds,
                                               organization_source_record_id=row.organization_source_record_id,
                                               shape_id=row.shape_id,
                                               start_seg_id=row.start_segment_id,
                                               end_seg_id=row.end_segment_id)

In [None]:
corridor_trips = corridor_analysis.find_corridor_data(segment_speeds, gdf, trip_speeds)
corridor_results = corridor_analysis.analyze_corridor_trips(corridor_trips)

In [None]:
corridor_analysis.validate_corridor_routes(gdf, corridor_trips)

In [None]:
corridor_improvements = corridor_analysis.analyze_corridor_improvements(corridor_results, **{'trip_mph_floor': 16, 'trip_percent_speedup': 15})

In [None]:
corridor_improvements.head(3)

In [None]:
summarized = corridor_analysis.summarize_corridor_improvements(corridor_improvements, frequencies)

In [None]:
summarized

In [None]:
# corr_gdf = corridor_from_sheet(corr, intervention_dict={'trip_mph_floor': 16, 'trip_percent_speedup': 15}, fwy_xpwy_floor = 35)
corr_gdf = corridor_analysis.corridor_from_sheet(corr,
                                                 segment_speeds,
                                                 trip_speeds,
                                                 frequencies,
                                                 intervention_dict={'trip_mph_floor': 16, 'trip_percent_speedup': 15},
                                                 fwy_xpwy_floor = 35)

In [None]:
corr_gdf.to_excel('corridors.xlsx')

In [None]:
corr_gdf.to_file(f'corridors_{analysis_date}.geojson')

In [None]:
# corr_gdf.iloc[1:,:].explore(column='minutes_per_mile')

In [None]:
hs_gdf = corridor_analysis.corridor_from_sheet(hs,
                             segment_speeds,
                             trip_speeds,
                             frequencies,
                             intervention_dict={'trip_seconds_saved': 30})

In [None]:
hs_gdf.to_excel('hotspots.xlsx')

In [None]:
hs_gdf.to_file(f'hotspots_{analysis_date}.geojson')

## Discussion

* Which other metrics?

after screening:

ridership/person-hours of delay
accessibility, equity

* Is our list complete?

Include Tempo, Van Ness, can we compare?

* Exclude routes where necessary (current corridor join is just spatial...)

* Add location-specific interventions, and [guidance](https://caltrans.sharepoint.com/:w:/s/DOTPMPHQ-DataandDigitalServices/EdG0YNQcQMBJmKncAuNva9wBjpxVq2sD8p3C5azumXFNRA?e=TO7CbB)

* How much should we focus on freeways?

Yes, include (Bay Bridge, others, SDMTS freeway service, launch service with new HOT...)

District transit plans, express/rapid on freeways...?

## webmaps

In [None]:
rt_utils.set_state_export?

In [None]:
import branca

In [None]:
import sys
sys.path.append('../ca_transit_speed_maps/')

In [None]:
import export_legends

In [None]:
# colors = branca.colormap.step.Reds_08.colors[3:]
# cmap = branca.colormap.LinearColormap(colors=colors).to_step(data=corr_gdf.iloc[1:,:].delay_minutes, method='quant', n=5)

In [None]:
# corr_gdf.iloc[1:,:].explore(cmap=cmap, column='delay_minutes')

In [None]:
labels = [int(x) for x in cmap.tick_labels[1:-1]]
export_legends.export_legend(cmap, filename='transit_delay_quantiles.svg', inner_labels=labels)

In [None]:
# colors = branca.colormap.step.Reds_08.colors[3:]
# cmap = branca.colormap.LinearColormap(colors=colors).to_step(data=hs_gdf.delay_minutes, method='quant', n=5)
# hs_gdf.explore(cmap=cmap, column='delay_minutes')

In [None]:
labels = [int(x) for x in cmap.tick_labels[1:-1]]
export_legends.export_legend(cmap, filename='transit_delay_quantiles_hs.svg', inner_labels=labels)

In [None]:
shn = gpd.read_parquet(rt_utils.SHN_PATH)

In [None]:
drop_van_ness = corr_gdf.iloc[1:,:]

In [None]:
drop_van_ness.columns

In [None]:
gdf = drop_van_ness.drop(columns=['corridor_id', 'intervention_assumption']).rename(columns={'trips_per_hr_peak_directional': 'trips/hr'})

In [None]:
export_result = rt_utils.set_state_export(shn, subfolder = 'transit_delay/', filename = 'shn',
                    map_type = 'state_highway_network')
shn_state = export_result['state_dict']

In [None]:
export_result = rt_utils.set_state_export(
    gdf, subfolder = 'transit_delay/', filename=f'corridors_{analysis_date}',
    color_col='delay_minutes', cmap=cmap, legend_url="https://storage.googleapis.com/calitp-map-tiles/transit_delay_quantiles.svg",
    map_title='Corridor Transit Delay Dec 2024 (total daily vehicle-minutes)',
    existing_state = shn_state)

In [None]:
export_result

In [None]:
gdf2 = hs_gdf.drop(columns=['corridor_id', 'intervention_assumption']).rename(columns={'trips_per_hr_peak_directional': 'trips/hr'})

In [None]:
export_result = rt_utils.set_state_export(shn, subfolder = 'transit_delay/', filename = 'shn',
                    map_type = 'state_highway_network')
shn_state = export_result['state_dict']

In [None]:
export_result = rt_utils.set_state_export(
    gdf2, subfolder = 'transit_delay/', filename=f'hotspots2_{analysis_date}',
    color_col='delay_minutes', cmap=cmap, legend_url="https://storage.googleapis.com/calitp-map-tiles/transit_delay_quantiles_hs.svg",
    map_title='Hotspot Transit Delay Dec 2024 (total daily vehicle-minutes)',
    existing_state = shn_state)

In [None]:
export_result

## bespoke van ness

* v2 warehouse doesn't include RT before Sep 2022, and Van Ness opened Apr 2022
* get old rt_analysis code working

In [None]:
old_sfmta = gpd.read_parquet('gs://calitp-analytics-data/data-analyses/rt_delay/stop_delay_views/282_2022-02-08.parquet')

In [None]:
import rt_analysis

In [None]:
rt_analysis.rt_filter_map_plot.from_gcs?

In [None]:
from rt_analysis import rt_filter_map_plot

In [None]:
importlib.reload(rt_filter_map_plot)

In [None]:
sfmta = rt_analysis.rt_filter_map_plot.from_gcs(282, '2022-02-08')

In [None]:
sfmta.set_filter(route_names=['49'])

In [None]:
sfmta.segment_speed_map?

In [None]:
sfmta.segment_speed_map()

In [None]:
sfmta.autocorridor?

In [None]:
sfmta.autocorridor('194252', [27, 37])

In [None]:
old_van_ness = sfmta.corridor_metrics()

In [None]:
new_van_ness = corr_gdf.query('corridor_name == "US101 Van Ness"')

In [None]:
old_van_ness

In [None]:
sfmta.segment_speed_map(corridor=True, no_render=True)

In [None]:
old_segments = sfmta.stop_segment_speed_view[sfmta.stop_segment_speed_view.corridor]

In [None]:
new_van_ness

In [None]:
row = corr.iloc[3,:]

In [None]:
row

In [None]:
gdf = corridor_analysis.corridor_from_segments(speed_segments_gdf=segment_speeds,
                                               organization_source_record_id=row.organization_source_record_id,
                                               shape_id=row.shape_id,
                                               start_seg_id=row.start_segment_id,
                                               end_seg_id=row.end_segment_id)

In [None]:
corridor_trips = corridor_analysis.find_corridor_data(segment_speeds, gdf, trip_speeds)
corridor_results = corridor_analysis.analyze_corridor_trips(corridor_trips)

In [None]:
corridor_results.corridor_speed_mph.median()

In [None]:
(8.4 - 6.8) / 6.8

In [None]:
corridor_trips = corridor_trips.query('speed_mph <= 30 & speed_mph > speed_mph.quantile(.05)')

In [None]:
corridor_trips = corridor_trips.query('route_short_name == "49"')

In [None]:
corridor_trips.columns

In [None]:
old_segments = old_segments.query('speed_mph <= 30')

In [None]:
import matplotlib.pyplot as plt

plt.hist(old_segments['speed_mph'],  
         alpha=0.5, # the transaparency parameter 
         label='Feb 2022',
         bins=20) 
  
plt.hist(corridor_trips['speed_mph'], 
         alpha=0.5, 
         label='Feb 2023',
         bins=20) 
  
plt.legend(loc='upper right') 
plt.title('Van Ness BRT segment speeds')
plt.xlabel('Speed (mph)')
plt.ylabel('Trips (count)')
plt.show()