In [1]:
%%capture
import warnings
warnings.filterwarnings('ignore')

import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(800_000_000_000)
import shared_utils

from calitp_data_analysis.tables import tbls
import calitp_data_analysis.magics

from siuba import *
import pandas as pd
import geopandas as gpd

import datetime as dt

import importlib
from rt_analysis import rt_filter_map_plot
# import build_speedmaps_index

from IPython.display import display, Markdown, Latex, HTML
import json

In [2]:
analysis_date = dt.date(2023, 5, 17)

In [3]:
%%capture_parameters
human_date = analysis_date.strftime('%B %d %Y (%A)')
human_date

{"human_date": "May 17 2023 (Wednesday)"}


In [4]:
from tqdm.notebook import tqdm

# Transit on SHS by district (from `bus_service_increase`)

In [13]:
path = 'gs://calitp-analytics-data/data-analyses/bus_service_increase/parallel_or_intersecting_2023-04-12.parquet'

In [14]:
shs_pa = gpd.read_parquet(path)

In [None]:
## probably in feet? (yes)

In [16]:
shs_pa.crs

<Projected CRS: EPSG:2229>
Name: NAD83 / California zone 5 (ftUS)
Axis Info [cartesian]:
- X[east]: Easting (US survey foot)
- Y[north]: Northing (US survey foot)
Area of Use:
- name: United States (USA) - California - counties Kern; Los Angeles; San Bernardino; San Luis Obispo; Santa Barbara; Ventura.
- bounds: (-121.42, 32.76, -114.12, 35.81)
Coordinate Operation:
- name: SPCS83 California zone 5 (US Survey feet)
- method: Lambert Conic Conformal (2SP)
Datum: North American Datum 1983
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich

In [17]:
## highway lengths are presumably segments of some kind, not full rts

In [15]:
shs_pa >> head(3)

Unnamed: 0,feed_key,name,route_id,geometry,total_routes,route_length,Route,County,District,RouteType,NB,SB,EB,WB,highway_length,pct_route,pct_highway,parallel
0,026ed9b585ddcd860d59a367139d7429,Bay Area 511 AC Transit Schedule,10,"LINESTRING (5377530.006 3182846.533, 5377574.4...",113,40538.084415,112.0,ALA,4.0,State,0.0,0.0,1.0,1.0,9439.06421,0.13,0.557,0
1,026ed9b585ddcd860d59a367139d7429,Bay Area 511 AC Transit Schedule,10,"LINESTRING (5377530.006 3182846.533, 5377574.4...",113,40538.084415,185.0,ALA,4.0,State,1.0,1.0,0.0,0.0,38599.895707,0.595,0.625,1
2,026ed9b585ddcd860d59a367139d7429,Bay Area 511 AC Transit Schedule,10,"LINESTRING (5377530.006 3182846.533, 5377574.4...",113,40538.084415,238.0,ALA,4.0,State,1.0,1.0,0.0,0.0,54249.258347,0.289,0.216,1


In [18]:
shs_pa['mi_on_shs'] = shs_pa.route_length * shs_pa.pct_route / shared_utils.geography_utils.FEET_PER_MI

In [24]:
shs_mileage = (shs_pa
 >> select(_.feed_key, _.route_id, _.mi_on_shs)
 >> group_by(_.feed_key, _.route_id)
 >> summarize(mi_on_shs = _.mi_on_shs.sum())
)

In [6]:
path = 'gs://calitp-analytics-data/data-analyses/bus_service_increase/routes_categorized_2023-04-12.parquet'

In [7]:
shs_routes = gpd.read_parquet(path)

In [8]:
shs_routes.columns

Index(['feed_key', 'name', 'route_id', 'total_routes', 'geometry', 'district',
       'service_hours', 'category', 'unique_route', 'route_length_mi'],
      dtype='object')

In [9]:
shs_routes >> head(3)

Unnamed: 0,feed_key,name,route_id,total_routes,geometry,district,service_hours,category,unique_route,route_length_mi
0,026ed9b585ddcd860d59a367139d7429,Bay Area 511 AC Transit Schedule,10,113,"LINESTRING (-122.08709 37.67010, -122.08693 37...",4.0,70.9,on_shn,1,7.68
1,026ed9b585ddcd860d59a367139d7429,Bay Area 511 AC Transit Schedule,12,113,"LINESTRING (-122.30044 37.87949, -122.30023 37...",4.0,103.55,other,1,11.3
2,026ed9b585ddcd860d59a367139d7429,Bay Area 511 AC Transit Schedule,14,113,"LINESTRING (-122.22546 37.77552, -122.22563 37...",4.0,105.67,intersects_shn,1,10.89


In [29]:
df = (shs_routes
      >> filter(-_.district.isna())
      >> inner_join(_, shs_mileage, on = ['feed_key', 'route_id'])
     )

In [31]:
df >> head(3)

Unnamed: 0,feed_key,name,route_id,total_routes,geometry,district,service_hours,category,unique_route,route_length_mi,mi_on_shs
0,026ed9b585ddcd860d59a367139d7429,Bay Area 511 AC Transit Schedule,10,113,"LINESTRING (-122.08709 37.67010, -122.08693 37...",4.0,70.9,on_shn,1,7.68,11.217072
1,026ed9b585ddcd860d59a367139d7429,Bay Area 511 AC Transit Schedule,12,113,"LINESTRING (-122.30044 37.87949, -122.30023 37...",4.0,103.55,other,1,11.3,10.020702
2,026ed9b585ddcd860d59a367139d7429,Bay Area 511 AC Transit Schedule,14,113,"LINESTRING (-122.22546 37.77552, -122.22563 37...",4.0,105.67,intersects_shn,1,10.89,11.83259


In [38]:
(df >> group_by(_.district)
    >> summarize(n = _.shape[0], mi_on_shs = _.mi_on_shs.sum())
    # >> spread('category', 'n')
)

Unnamed: 0,district,n,mi_on_shs
0,1.0,40,1052.175156
1,2.0,35,1059.337037
2,3.0,209,3150.887764
3,4.0,622,7195.786269
4,5.0,144,2644.610772
5,6.0,109,1759.61548
6,7.0,494,5949.395936
7,8.0,176,2420.820507
8,9.0,16,838.577457
9,10.0,124,1592.297178


In [39]:
(df
    >> count(_.district, _.category)
    >> spread('category', 'n')
    >> mutate(total = _.intersects_shn + _.on_shn + _.other)
)

Unnamed: 0,district,intersects_shn,on_shn,other,total
0,1.0,18.0,21.0,1.0,40.0
1,2.0,12.0,22.0,1.0,35.0
2,3.0,98.0,77.0,34.0,209.0
3,4.0,408.0,135.0,79.0,622.0
4,5.0,76.0,54.0,14.0,144.0
5,6.0,52.0,33.0,24.0,109.0
6,7.0,284.0,99.0,111.0,494.0
7,8.0,76.0,43.0,57.0,176.0
8,9.0,5.0,11.0,,
9,10.0,69.0,37.0,18.0,124.0


In [40]:
# df.to_csv('df.csv')

# Frequent SHN Routes

* quickly look for frequent transit routes using the State Highway Network
* also evaluate applicable district-sourced ideas

### CT Links:

https://app.smartsheet.com/folders/whC3J79cMJCf3MqQ7MGv8pxHgQ3JQ68VwGQGwX21

https://caltrans-my.sharepoint.com/:p:/g/personal/audrey_ogden_dot_ca_gov/EZbU16vBqYZHvrORsT-LzwsBBxJCZqfUPaKHmm5uqWmn_Q?e=4%3A4Ijf9v&CID=80D7A50B-5D1F-497F-8C92-9435B5782132&wdLOR=c3E29B53E-4F87-436E-BEDA-3F4281A53835


## pbar

In [None]:
pbar = tqdm()

# SFMTA

In [None]:
## parameters cell
itp_id = 282

In [None]:
%%capture
rt_day = rt_filter_map_plot.from_gcs(itp_id, analysis_date, pbar)

In [None]:
%%capture_parameters
organization_name = rt_day.organization_name
organization_name

In [None]:
rt_day.set_filter(route_names = ['43', '28', '29'])

In [None]:
_m = rt_day.segment_speed_map()

## Muni Lombard

In [None]:
rt_day.autocorridor(shape_id = '2856', stop_seq_range = [27, 32])

In [None]:
rt_day.corridor_stop_delays >> distinct(_.route_short_name)

In [None]:
rt_day.corridor_metrics()

In [None]:
rt_day.quick_map_corridor()

In [None]:
# m = rt_day.segment_speed_map(how='low_speeds', no_title=False, shn=True, corridor=True)
# m

In [None]:
all_corridors = []

In [None]:
rt_day.corridor['corridor'] = 'us101_lombard'

In [None]:
all_corridors += [rt_day.corridor]

## Muni 19th A (add, check if B worth it)

In [None]:
rt_day.autocorridor(shape_id = '2808', stop_seq_range = [21, 32])

In [None]:
# rt_day.corridor_stop_delays >> distinct(_.route_short_name)

In [None]:
rt_day.corridor_metrics()

In [None]:
rt_day.quick_map_corridor()

In [None]:
# m = rt_day.segment_speed_map(how='low_speeds', no_title=False, shn=True, corridor=True)
## geoms error, no time to fix

In [None]:
rt_day.corridor['corridor'] = 'sr1_19th_a'

In [None]:
all_corridors += [rt_day.corridor]

## Muni 19th B

In [None]:
rt_day.autocorridor(shape_id = '2808', stop_seq_range = [32, 37])

In [None]:
rt_day.corridor_stop_delays >> distinct(_.route_short_name)

In [None]:
rt_day.corridor_metrics()

In [None]:
rt_day.quick_map_corridor()

In [None]:
rt_day.corridor['corridor'] = 'sr1_19th_b'

In [None]:
all_corridors += [rt_day.corridor]

In [None]:
# m = rt_day.segment_speed_map(how='low_speeds', no_title=False, shn=True, corridor=True)

## GGT (skip)

# AC Transit

In [None]:
## parameters cell
itp_id = 4

In [None]:
%%capture
rt_day = rt_filter_map_plot.from_gcs(itp_id, analysis_date, pbar)

In [None]:
rt_day.transit_priority_target_mph = 20

In [None]:
%%capture_parameters
organization_name = rt_day.organization_name
organization_name

In [None]:
rt_day.set_filter(route_names = ['72', '72M', '72R'])

In [None]:
_m = rt_day.segment_speed_map()

## AC Transit San Pablo

In [None]:
rt_day.autocorridor(shape_id = 'shp-72R-15', stop_seq_range = [9, 19])

In [None]:
# rt_day.corridor_stop_delays >> distinct(_.route_short_name)

In [None]:
rt_day.corridor_metrics()

In [None]:
rt_day.quick_map_corridor()

In [None]:
# m = rt_day.segment_speed_map(how='low_speeds', no_title=False, shn=True, corridor=True)

In [None]:
rt_day.corridor['corridor'] = 'sr123_san_pablo'

In [None]:
all_corridors += [rt_day.corridor]

# BBB (add)

In [None]:
## parameters cell
itp_id = 300

In [None]:
%%capture
rt_day = rt_filter_map_plot.from_gcs(itp_id, analysis_date, pbar)

In [None]:
%%capture_parameters
organization_name = rt_day.organization_name
organization_name

In [None]:
rt_day.set_filter(route_names = ['3', 'R3'])

In [None]:
_m = rt_day.segment_speed_map()

In [None]:
# _m

## BBB Lincoln

In [None]:
rt_day.autocorridor(shape_id = '26355', stop_seq_range = [16, 29])

In [None]:
# rt_day.corridor_stop_delays >> distinct(_.route_short_name)

In [None]:
rt_day.corridor_metrics()

In [None]:
rt_day.quick_map_corridor()

In [None]:
# m = rt_day.segment_speed_map(how='low_speeds', no_title=False, shn=True, corridor=True)

In [None]:
rt_day.corridor['corridor'] = 'sr1_lincoln'

In [None]:
all_corridors += [rt_day.corridor]

# LA Metro

In [None]:
## parameters cell
itp_id = 182

In [None]:
%%capture
rt_day = rt_filter_map_plot.from_gcs(itp_id, analysis_date, pbar)

In [None]:
%%capture_parameters
organization_name = rt_day.organization_name
organization_name

In [None]:
rt_day.set_filter(route_names = ['4'])

In [None]:
_m = rt_day.segment_speed_map()

## Metro Santa Monica Bl

In [None]:
rt_day.autocorridor(shape_id = '40336_DEC22', stop_seq_range = [39, 49])

In [None]:
rt_day.corridor_stop_delays >> distinct(_.route_short_name)

In [None]:
rt_day.corridor_metrics()

In [None]:
rt_day.quick_map_corridor()

In [None]:
# m = rt_day.segment_speed_map(how='low_speeds', no_title=False, shn=True, corridor=True)

In [None]:
rt_day.corridor['corridor'] = 'sr2_santa_monica_bl'

In [None]:
all_corridors += [rt_day.corridor]

# VTA (add, optional)

In [None]:
## parameters cell
itp_id = 294

In [None]:
%%capture
rt_day = rt_filter_map_plot.from_gcs(itp_id, analysis_date, pbar)

In [None]:
%%capture_parameters
organization_name = rt_day.organization_name
organization_name

In [None]:
rt_day.set_filter(route_names = ['22', 'Rapid 522'])

In [None]:
_m = rt_day.segment_speed_map()

In [None]:
# _m

## VTA ECR

In [None]:
rt_day.autocorridor(shape_id = '108676', stop_seq_range = [41, 98])

In [None]:
rt_day.corridor_stop_delays >> distinct(_.route_short_name)

In [None]:
rt_day.corridor_metrics()

In [None]:
rt_day.quick_map_corridor()

In [None]:
rt_day.corridor['corridor'] = 'sr82_vta'

In [None]:
all_corridors += [rt_day.corridor]

In [None]:
# m = rt_day.segment_speed_map(how='low_speeds', no_title=False, shn=True, corridor=True)

# Samtrans (add, optional)

In [None]:
## parameters cell
itp_id = 290

In [None]:
%%capture
rt_day = rt_filter_map_plot.from_gcs(itp_id, analysis_date, pbar)

In [None]:
%%capture_parameters
organization_name = rt_day.organization_name
organization_name

In [None]:
rt_day.set_filter(route_names = ['ECR'])

In [None]:
_m = rt_day.segment_speed_map()

## Samtrans ECR

merge shape, don't do 2 parts...

In [None]:
rt_day.autocorridor(shape_id = 'ECR1095', stop_seq_range = [3, 65])

In [None]:
corr_copy = rt_day.corridor.copy()

In [None]:
rt_day.autocorridor(shape_id = 'ECR1098', stop_seq_range = [4, 35])

In [None]:
corr2_copy = rt_day.corridor.copy()

In [None]:
gdf = pd.concat([corr_copy, corr2_copy])

In [None]:
gdf.distance_meters = gdf.distance_meters.sum()
gdf = gdf.dissolve()

In [None]:
# gdf.explore()

In [None]:
rt_day.add_corridor(gdf)

In [None]:
# rt_day.corridor_stop_delays >> distinct(_.route_short_name)

In [None]:
rt_day.corridor_metrics()

In [None]:
rt_day.quick_map_corridor()

In [None]:
# m = rt_day.segment_speed_map(how='low_speeds', no_title=False, shn=True, corridor=True)

# m

In [None]:
rt_day.corridor['corridor'] = 'sr82_samtrans'

In [None]:
all_corridors += [rt_day.corridor]

# Foothill (eval)

In [None]:
## parameters cell
itp_id = 112

In [None]:
%%capture
rt_day = rt_filter_map_plot.from_gcs(itp_id, analysis_date, pbar)

In [None]:
%%capture_parameters
organization_name = rt_day.organization_name
organization_name

In [None]:
rt_day.set_filter(route_names = ['188'])

In [None]:
_m = rt_day.segment_speed_map()

## Foothill Foothill Bl

In [None]:
rt_day.autocorridor(shape_id = '12286_shp', stop_seq_range = [1779, 2834])

In [None]:
# rt_day.corridor_stop_delays >> distinct(_.route_short_name)

In [None]:
rt_day.corridor_metrics()

In [None]:
rt_day.quick_map_corridor()

In [None]:
rt_day.corridor['corridor'] = 'sr66_foothill'

In [None]:
all_corridors += [rt_day.corridor]

# From Project List
https://app.smartsheet.com/reports/4gRrr282hJ3MPVG7hcqX4XQHh2FR4GgpfW6pq5Q1?view=grid

In [None]:
# shared_utils.rt_utils.show_full_df((pd.read_parquet('./_rt_progress_2023-05-17.parquet')
#                                    >> filter(_.status == 'map_confirmed')
#                                    )
#                                   )

In [None]:
# analysis_date = dt.date(2023, 5, 17)

# Eureka

In [None]:
## parameters cell
itp_id = 108

In [None]:
%%capture
rt_day = rt_filter_map_plot.from_gcs(itp_id, analysis_date, pbar)

In [None]:
rt_day.transit_priority_target_mph = 20

In [None]:
%%capture_parameters
organization_name = rt_day.organization_name
organization_name

In [None]:
# rt_day.set_filter(route_names = ['188'])

In [None]:
try:
    _m = rt_day.segment_speed_map()
except:
    pass

## Eureka US101 (Broadway Truesdale - 4th per [RTP](https://www.hcaog.net/sites/default/files/vroom_2022-2042_full_report.pdf))

* difficult to analyze -- need to exclude distant stops from corridor (add to algorithm?)
* standardize on most common trip pattern

In [None]:
rt_day.autocorridor(shape_id = 'p_178050', stop_seq_range = [13, 13.5],
manual_exclude={'p_179049': {'max': 4}, 'p_179044': {'max': 4}})
# intercity service, exclude stops distant from corridor

In [None]:
# rt_day.corridor_stop_delays >> distinct(_.route_short_name)

In [None]:
rt_day.corridor_metrics()

In [None]:
rt_day.quick_map_corridor()

In [None]:
rt_day.corridor['corridor'] = 'us101_broadway'

In [None]:
all_corridors += [rt_day.corridor]

# TART SR267/89 -- no data

# Tahoe Transportation District US50

* limited data geographic extent

In [None]:
## parameters cell
itp_id = 331

In [None]:
%%capture
rt_day = rt_filter_map_plot.from_gcs(itp_id, analysis_date, pbar)

In [None]:
rt_day.transit_priority_target_mph = 25

In [None]:
%%capture_parameters
organization_name = rt_day.organization_name
organization_name

In [None]:
_m = rt_day.segment_speed_map()

In [None]:
rt_day.autocorridor(shape_id = 'p_497358', stop_seq_range = [0, 20])

In [None]:
rt_day.corridor_stop_delays >> distinct(_.route_short_name)

In [None]:
rt_day.corridor_metrics()

In [None]:
rt_day.quick_map_corridor()

In [None]:
rt_day.corridor['corridor'] = 'us50_tahoe'

In [None]:
all_corridors += [rt_day.corridor]

# Fresno -- unable eval (new services)

# VCTC (SR126 Fillmore - Ventura)

* minimal thru running delay on available data...

In [None]:
## parameters cell
itp_id = 380

In [None]:
# analysis_date = dt.date(2023, 4, 12)

In [None]:
%%capture
rt_day = rt_filter_map_plot.from_gcs(itp_id, analysis_date, pbar)

In [None]:
%%capture_parameters
organization_name = rt_day.organization_name
organization_name

In [None]:
_m = rt_day.segment_speed_map()

In [None]:
_m

# LA Metro Projects

* NoHo-Pas BRT to only briefly use SR134, light delay
* As per sheet, NSFV improvements only intersect SHN
* Inglewood connector, Arbor Vitae not on SHN...
* Games Route Network, etc unable to analyze

# Victor Valley

* service runs, difficult to evaluate
* 0.5/hour, limited span

# Others

* OCTA speculative, new services unable to eval

# OCTA SR39

* multiple SHN sections, eval longest
* about 5 trips/hour combined!

In [None]:
## parameters cell
itp_id = 235

In [None]:
%%capture
rt_day = rt_filter_map_plot.from_gcs(itp_id, analysis_date, pbar)

In [None]:
rt_day.transit_priority_target_mph = 20

In [None]:
%%capture_parameters
organization_name = rt_day.organization_name
organization_name

In [None]:
rt_day.set_filter(route_names=['29', '529'])

In [None]:
_m = rt_day.segment_speed_map()

In [None]:
rt_day.autocorridor(shape_id = '2902', stop_seq_range = [5, 51])

In [None]:
# rt_day.corridor_stop_delays >> distinct(_.route_short_name)

In [None]:
rt_day.corridor_metrics()

In [None]:
rt_day.quick_map_corridor()

In [None]:
rt_day.corridor['corridor'] = 'sr39_beach'

In [None]:
all_corridors += [rt_day.corridor]

# combine and export

In [None]:
all_gdf = pd.concat(all_corridors)

In [None]:
all_gdf

In [None]:
(all_gdf >> select(-_.geometry)).to_csv('june16_corridor_metrics_speeds.csv')

In [None]:
all_gdf.to_file('june16_corridor_metrics_speeds.geojson')