In [1]:
%%capture
import warnings
warnings.filterwarnings('ignore')

import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(800_000_000_000)
from shared_utils import rt_utils, rt_dates, geography_utils

from calitp_data_analysis.tables import tbls
import calitp_data_analysis.magics

from siuba import *
import pandas as pd
import geopandas as gpd

import datetime as dt

import importlib
from rt_analysis import rt_filter_map_plot
# import build_speedmaps_index

from IPython.display import display, Markdown, Latex, HTML
import json

In [2]:
analysis_date = dt.date(2023, 5, 17)

In [3]:
%%capture_parameters
human_date = analysis_date.strftime('%B %d %Y (%A)')
human_date

{"human_date": "May 17 2023 (Wednesday)"}


In [4]:
from tqdm.notebook import tqdm

# Transit on SHS by district (from `bus_service_increase`)

In [32]:
path = 'gs://calitp-analytics-data/data-analyses/bus_service_increase/parallel_or_intersecting_2023-04-12.parquet'

In [33]:
shs_pa = gpd.read_parquet(path)

In [34]:
## probably in feet? (yes)

In [35]:
shs_pa.crs

<Projected CRS: EPSG:2229>
Name: NAD83 / California zone 5 (ftUS)
Axis Info [cartesian]:
- X[east]: Easting (US survey foot)
- Y[north]: Northing (US survey foot)
Area of Use:
- name: United States (USA) - California - counties Kern; Los Angeles; San Bernardino; San Luis Obispo; Santa Barbara; Ventura.
- bounds: (-121.42, 32.76, -114.12, 35.81)
Coordinate Operation:
- name: SPCS83 California zone 5 (US Survey feet)
- method: Lambert Conic Conformal (2SP)
Datum: North American Datum 1983
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich

In [36]:
## highway lengths are presumably segments of some kind, not full rts

In [37]:
shs_pa >> head(3)

Unnamed: 0,feed_key,name,route_id,geometry,total_routes,route_length,Route,County,District,RouteType,NB,SB,EB,WB,highway_length,pct_route,pct_highway,parallel
0,026ed9b585ddcd860d59a367139d7429,Bay Area 511 AC Transit Schedule,10,"LINESTRING (5377530.006 3182846.533, 5377574.4...",113,40538.084415,112.0,ALA,4.0,State,0.0,0.0,1.0,1.0,9439.06421,0.13,0.557,0
1,026ed9b585ddcd860d59a367139d7429,Bay Area 511 AC Transit Schedule,10,"LINESTRING (5377530.006 3182846.533, 5377574.4...",113,40538.084415,185.0,ALA,4.0,State,1.0,1.0,0.0,0.0,38599.895707,0.595,0.625,1
2,026ed9b585ddcd860d59a367139d7429,Bay Area 511 AC Transit Schedule,10,"LINESTRING (5377530.006 3182846.533, 5377574.4...",113,40538.084415,238.0,ALA,4.0,State,1.0,1.0,0.0,0.0,54249.258347,0.289,0.216,1


In [38]:
shs_pa['mi_on_shs'] = shs_pa.route_length * shs_pa.pct_route / geography_utils.FEET_PER_MI

In [39]:
shs_mileage = (shs_pa
 >> select(_.feed_key, _.route_id, _.mi_on_shs)
 >> group_by(_.feed_key, _.route_id)
 >> summarize(mi_on_shs = _.mi_on_shs.sum())
)

In [40]:
path = 'gs://calitp-analytics-data/data-analyses/bus_service_increase/routes_categorized_2023-04-12.parquet'

In [41]:
shs_routes = gpd.read_parquet(path)

In [42]:
shs_routes.columns

Index(['feed_key', 'name', 'route_id', 'total_routes', 'geometry', 'district',
       'service_hours', 'category', 'unique_route', 'route_length_mi'],
      dtype='object')

In [43]:
shs_routes >> head(3)

Unnamed: 0,feed_key,name,route_id,total_routes,geometry,district,service_hours,category,unique_route,route_length_mi
0,026ed9b585ddcd860d59a367139d7429,Bay Area 511 AC Transit Schedule,10,113,"LINESTRING (-122.08709 37.67010, -122.08693 37...",4.0,70.9,on_shn,1,7.68
1,026ed9b585ddcd860d59a367139d7429,Bay Area 511 AC Transit Schedule,12,113,"LINESTRING (-122.30044 37.87949, -122.30023 37...",4.0,103.55,other,1,11.3
2,026ed9b585ddcd860d59a367139d7429,Bay Area 511 AC Transit Schedule,14,113,"LINESTRING (-122.22546 37.77552, -122.22563 37...",4.0,105.67,intersects_shn,1,10.89


In [44]:
df = (shs_routes
      >> filter(-_.district.isna())
      >> inner_join(_, shs_mileage, on = ['feed_key', 'route_id'])
     )

In [45]:
df >> head(3)

Unnamed: 0,feed_key,name,route_id,total_routes,geometry,district,service_hours,category,unique_route,route_length_mi,mi_on_shs
0,026ed9b585ddcd860d59a367139d7429,Bay Area 511 AC Transit Schedule,10,113,"LINESTRING (-122.08709 37.67010, -122.08693 37...",4.0,70.9,on_shn,1,7.68,11.217072
1,026ed9b585ddcd860d59a367139d7429,Bay Area 511 AC Transit Schedule,12,113,"LINESTRING (-122.30044 37.87949, -122.30023 37...",4.0,103.55,other,1,11.3,10.020702
2,026ed9b585ddcd860d59a367139d7429,Bay Area 511 AC Transit Schedule,14,113,"LINESTRING (-122.22546 37.77552, -122.22563 37...",4.0,105.67,intersects_shn,1,10.89,11.83259


In [48]:
(df >> group_by(_.district)
    >> summarize(n = _.shape[0], mi_on_shs = _.mi_on_shs.sum())
    # >> spread('category', 'n')
).mi_on_shs.sum()

29866.570474117514

In [49]:
(df
    >> count(_.district, _.category)
    >> spread('category', 'n')
    >> mutate(total = _.intersects_shn + _.on_shn + _.other)
).sum()

district            78.0
intersects_shn    1248.0
on_shn             566.0
other              387.0
total             2185.0
dtype: float64

In [40]:
# df.to_csv('df.csv')

# Frequent SHN Routes

* quickly look for frequent transit routes using the State Highway Network
* also evaluate applicable district-sourced ideas

### CT Links:

https://app.smartsheet.com/folders/whC3J79cMJCf3MqQ7MGv8pxHgQ3JQ68VwGQGwX21

https://caltrans-my.sharepoint.com/:p:/g/personal/audrey_ogden_dot_ca_gov/EZbU16vBqYZHvrORsT-LzwsBBxJCZqfUPaKHmm5uqWmn_Q?e=4%3A4Ijf9v&CID=80D7A50B-5D1F-497F-8C92-9435B5782132&wdLOR=c3E29B53E-4F87-436E-BEDA-3F4281A53835


## pbar

In [5]:
pbar = tqdm()

0it [00:00, ?it/s]

# SFMTA

In [6]:
## parameters cell
itp_id = 282

In [None]:
%%capture
rt_day = rt_filter_map_plot.from_gcs(itp_id, analysis_date, pbar)

In [None]:
%%capture_parameters
organization_name = rt_day.organization_name
organization_name

In [None]:
rt_day.set_filter(route_names = ['43', '28', '29'])

In [None]:
_m = rt_day.segment_speed_map()

## Muni Lombard

In [None]:
rt_day.autocorridor(shape_id = '2856', stop_seq_range = [27, 32])

In [None]:
rt_day.corridor_stop_delays >> distinct(_.route_short_name)

In [None]:
rt_day.corridor_metrics()

In [None]:
rt_day.quick_map_corridor()

In [None]:
# m = rt_day.segment_speed_map(how='low_speeds', no_title=False, shn=True, corridor=True)
# m

In [None]:
all_corridors = []

In [None]:
rt_day.corridor['corridor'] = 'us101_lombard'

In [None]:
all_corridors += [rt_day.corridor]

## Muni 19th A (add, check if B worth it)

In [None]:
rt_day.autocorridor(shape_id = '2808', stop_seq_range = [21, 32])

In [None]:
# rt_day.corridor_stop_delays >> distinct(_.route_short_name)

In [None]:
rt_day.corridor_metrics()

In [None]:
rt_day.quick_map_corridor()

In [None]:
# m = rt_day.segment_speed_map(how='low_speeds', no_title=False, shn=True, corridor=True)
## geoms error, no time to fix

In [None]:
rt_day.corridor['corridor'] = 'sr1_19th_a'

In [None]:
all_corridors += [rt_day.corridor]

## Muni 19th B

In [None]:
rt_day.autocorridor(shape_id = '2808', stop_seq_range = [32, 37])

In [None]:
rt_day.corridor_stop_delays >> distinct(_.route_short_name)

In [None]:
rt_day.corridor_metrics()

In [None]:
rt_day.quick_map_corridor()

In [None]:
rt_day.corridor['corridor'] = 'sr1_19th_b'

In [None]:
all_corridors += [rt_day.corridor]

In [None]:
# m = rt_day.segment_speed_map(how='low_speeds', no_title=False, shn=True, corridor=True)

## GGT (skip)

# AC Transit

In [6]:
## parameters cell
itp_id = 4

In [7]:
%%capture
rt_day = rt_filter_map_plot.from_gcs(itp_id, analysis_date, pbar)

In [8]:
rt_day.transit_priority_target_mph = 20

In [9]:
%%capture_parameters
organization_name = rt_day.organization_name
organization_name

{"organization_name": "Alameda-Contra Costa Transit District"}


In [10]:
rt_day.set_filter(route_names = ['72', '72M', '72R'])

In [11]:
_m = rt_day.segment_speed_map()

## AC Transit San Pablo

In [12]:
rt_day.autocorridor(shape_id = 'shp-72R-15', stop_seq_range = [9, 19])

In [13]:
# rt_day.corridor_stop_delays >> distinct(_.route_short_name)

In [14]:
rt_day.corridor_metrics()

metrics attached to self.corridor: 


Unnamed: 0,route_id,route_short_name,organization,p20_corr_mph,speed_delay_minutes,total_speed_delay,p50_runtime_minutes,n_trips,span_hours,daily_avg_trips_hr,trips_added,new_avg_trips_hr,length_miles,target_mph,geometry
0,72,72,Alameda-Contra Costa Transit District,10.5,1023.2,3623.2,83.1,62,14.8,2.1,12.3,2.5,6.1,20,"POLYGON ((-201973.660 -11619.671, -201974.344 ..."
1,72M,72M,Alameda-Contra Costa Transit District,11.1,1001.4,3623.2,79.4,62,14.6,2.1,12.6,2.6,6.1,20,"POLYGON ((-201973.660 -11619.671, -201974.344 ..."
2,72R,72R,Alameda-Contra Costa Transit District,12.0,1598.6,3623.2,62.4,126,13.2,4.8,25.6,5.7,6.1,20,"POLYGON ((-201973.660 -11619.671, -201974.344 ..."


In [15]:
rt_day.quick_map_corridor()

In [16]:
# m = rt_day.segment_speed_map(how='low_speeds', no_title=False, shn=True, corridor=True)

In [17]:
rt_day.corridor['corridor'] = 'sr123_san_pablo'

In [18]:
all_corridors += [rt_day.corridor]

NameError: name 'all_corridors' is not defined

## AC Transit San Pablo rider delay

In [19]:
avg_daily_rider = 14789

In [20]:
## 15 min per trip delay

In [22]:
14789 * 15 / 60

3697.25

In [None]:
rt_day.corridor

# BBB (add)

In [None]:
## parameters cell
itp_id = 300

In [None]:
%%capture
rt_day = rt_filter_map_plot.from_gcs(itp_id, analysis_date, pbar)

In [None]:
%%capture_parameters
organization_name = rt_day.organization_name
organization_name

In [None]:
rt_day.set_filter(route_names = ['3', 'R3'])

In [None]:
_m = rt_day.segment_speed_map()

In [None]:
# _m

## BBB Lincoln

In [None]:
rt_day.autocorridor(shape_id = '26355', stop_seq_range = [16, 29])

In [None]:
# rt_day.corridor_stop_delays >> distinct(_.route_short_name)

In [None]:
rt_day.corridor_metrics()

In [None]:
rt_day.quick_map_corridor()

In [None]:
# m = rt_day.segment_speed_map(how='low_speeds', no_title=False, shn=True, corridor=True)

In [None]:
rt_day.corridor['corridor'] = 'sr1_lincoln'

In [None]:
all_corridors += [rt_day.corridor]

# LA Metro

In [6]:
## parameters cell
itp_id = 182

In [7]:
%%capture
rt_day = rt_filter_map_plot.from_gcs(itp_id, analysis_date, pbar)

In [8]:
%%capture_parameters
organization_name = rt_day.organization_name
organization_name

{"organization_name": "Los Angeles County Metropolitan Transportation Authority"}


In [9]:
rt_day.set_filter(route_names = ['4'])

In [10]:
_m = rt_day.segment_speed_map()

## Metro Santa Monica Bl

In [11]:
rt_day.autocorridor(shape_id = '40336_DEC22', stop_seq_range = [39, 49])

In [12]:
rt_day.corridor_stop_delays >> distinct(_.route_short_name)

Unnamed: 0,route_short_name
0,224
1,210
2,207
3,4
4,212


In [13]:
rt_day.corridor_metrics()

metrics attached to self.corridor: 


Unnamed: 0,route_id,route_short_name,organization,p20_corr_mph,speed_delay_minutes,total_speed_delay,p50_runtime_minutes,n_trips,span_hours,daily_avg_trips_hr,trips_added,new_avg_trips_hr,length_miles,target_mph,geometry
0,4-13167,4,Los Angeles County Metropolitan Transportation...,7.8,1907.2,1907.2,117.0,243,14.7,8.2,16.3,8.8,2.0,16,"POLYGON ((152799.930 -434869.235, 152799.237 -..."


In [14]:
rt_day.quick_map_corridor()

In [None]:
# m = rt_day.segment_speed_map(how='low_speeds', no_title=False, shn=True, corridor=True)

In [None]:
rt_day.corridor['corridor'] = 'sr2_santa_monica_bl'

In [None]:
all_corridors += [rt_day.corridor]

# VTA (add, optional)

In [None]:
## parameters cell
itp_id = 294

In [None]:
%%capture
rt_day = rt_filter_map_plot.from_gcs(itp_id, analysis_date, pbar)

In [None]:
%%capture_parameters
organization_name = rt_day.organization_name
organization_name

In [None]:
rt_day.set_filter(route_names = ['22', 'Rapid 522'])

In [None]:
_m = rt_day.segment_speed_map()

In [None]:
# _m

## VTA ECR

In [None]:
rt_day.autocorridor(shape_id = '108676', stop_seq_range = [41, 98])

In [None]:
rt_day.corridor_stop_delays >> distinct(_.route_short_name)

In [None]:
rt_day.corridor_metrics()

In [None]:
rt_day.quick_map_corridor()

In [None]:
rt_day.corridor['corridor'] = 'sr82_vta'

In [None]:
all_corridors += [rt_day.corridor]

In [None]:
# m = rt_day.segment_speed_map(how='low_speeds', no_title=False, shn=True, corridor=True)

# Samtrans (add, optional)

In [None]:
## parameters cell
itp_id = 290

In [None]:
%%capture
rt_day = rt_filter_map_plot.from_gcs(itp_id, analysis_date, pbar)

In [None]:
%%capture_parameters
organization_name = rt_day.organization_name
organization_name

In [None]:
rt_day.set_filter(route_names = ['ECR'])

In [None]:
_m = rt_day.segment_speed_map()

## Samtrans ECR

merge shape, don't do 2 parts...

In [None]:
rt_day.autocorridor(shape_id = 'ECR1095', stop_seq_range = [3, 65])

In [None]:
corr_copy = rt_day.corridor.copy()

In [None]:
rt_day.autocorridor(shape_id = 'ECR1098', stop_seq_range = [4, 35])

In [None]:
corr2_copy = rt_day.corridor.copy()

In [None]:
gdf = pd.concat([corr_copy, corr2_copy])

In [None]:
gdf.distance_meters = gdf.distance_meters.sum()
gdf = gdf.dissolve()

In [None]:
# gdf.explore()

In [None]:
rt_day.add_corridor(gdf)

In [None]:
# rt_day.corridor_stop_delays >> distinct(_.route_short_name)

In [None]:
rt_day.corridor_metrics()

In [None]:
rt_day.quick_map_corridor()

In [None]:
# m = rt_day.segment_speed_map(how='low_speeds', no_title=False, shn=True, corridor=True)

# m

In [None]:
rt_day.corridor['corridor'] = 'sr82_samtrans'

In [None]:
all_corridors += [rt_day.corridor]

# Foothill (eval)

In [None]:
## parameters cell
itp_id = 112

In [None]:
%%capture
rt_day = rt_filter_map_plot.from_gcs(itp_id, analysis_date, pbar)

In [None]:
%%capture_parameters
organization_name = rt_day.organization_name
organization_name

In [None]:
rt_day.set_filter(route_names = ['188'])

In [None]:
_m = rt_day.segment_speed_map()

## Foothill Foothill Bl

In [None]:
rt_day.autocorridor(shape_id = '12286_shp', stop_seq_range = [1779, 2834])

In [None]:
# rt_day.corridor_stop_delays >> distinct(_.route_short_name)

In [None]:
rt_day.corridor_metrics()

In [None]:
rt_day.quick_map_corridor()

In [None]:
rt_day.corridor['corridor'] = 'sr66_foothill'

In [None]:
all_corridors += [rt_day.corridor]

# From Project List
https://app.smartsheet.com/reports/4gRrr282hJ3MPVG7hcqX4XQHh2FR4GgpfW6pq5Q1?view=grid

In [None]:
# rt_utils.show_full_df((pd.read_parquet('./_rt_progress_2023-05-17.parquet')
#                                    >> filter(_.status == 'map_confirmed')
#                                    )
#                                   )

In [None]:
# analysis_date = dt.date(2023, 5, 17)

# Eureka

In [None]:
## parameters cell
itp_id = 108

In [None]:
%%capture
rt_day = rt_filter_map_plot.from_gcs(itp_id, analysis_date, pbar)

In [None]:
rt_day.transit_priority_target_mph = 20

In [None]:
%%capture_parameters
organization_name = rt_day.organization_name
organization_name

In [None]:
# rt_day.set_filter(route_names = ['188'])

In [None]:
try:
    _m = rt_day.segment_speed_map()
except:
    pass

## Eureka US101 (Broadway Truesdale - 4th per [RTP](https://www.hcaog.net/sites/default/files/vroom_2022-2042_full_report.pdf))

* difficult to analyze -- need to exclude distant stops from corridor (add to algorithm?)
* standardize on most common trip pattern

In [None]:
rt_day.autocorridor(shape_id = 'p_178050', stop_seq_range = [13, 13.5],
manual_exclude={'p_179049': {'max': 4}, 'p_179044': {'max': 4}})
# intercity service, exclude stops distant from corridor

In [None]:
# rt_day.corridor_stop_delays >> distinct(_.route_short_name)

In [None]:
rt_day.corridor_metrics()

In [None]:
rt_day.quick_map_corridor()

In [None]:
rt_day.corridor['corridor'] = 'us101_broadway'

In [None]:
all_corridors += [rt_day.corridor]

# TART SR267/89 -- no data

# Tahoe Transportation District US50

* limited data geographic extent

In [None]:
## parameters cell
itp_id = 331

In [None]:
%%capture
rt_day = rt_filter_map_plot.from_gcs(itp_id, analysis_date, pbar)

In [None]:
rt_day.transit_priority_target_mph = 25

In [None]:
%%capture_parameters
organization_name = rt_day.organization_name
organization_name

In [None]:
_m = rt_day.segment_speed_map()

In [None]:
rt_day.autocorridor(shape_id = 'p_497358', stop_seq_range = [0, 20])

In [None]:
rt_day.corridor_stop_delays >> distinct(_.route_short_name)

In [None]:
rt_day.corridor_metrics()

In [None]:
rt_day.quick_map_corridor()

In [None]:
rt_day.corridor['corridor'] = 'us50_tahoe'

In [None]:
all_corridors += [rt_day.corridor]

# Fresno -- unable eval (new services)

# VCTC (SR126 Fillmore - Ventura)

* minimal thru running delay on available data...

In [None]:
## parameters cell
itp_id = 380

In [None]:
# analysis_date = dt.date(2023, 4, 12)

In [None]:
%%capture
rt_day = rt_filter_map_plot.from_gcs(itp_id, analysis_date, pbar)

In [None]:
%%capture_parameters
organization_name = rt_day.organization_name
organization_name

In [None]:
_m = rt_day.segment_speed_map()

In [None]:
_m

# LA Metro Projects

* NoHo-Pas BRT to only briefly use SR134, light delay
* As per sheet, NSFV improvements only intersect SHN
* Inglewood connector, Arbor Vitae not on SHN...
* Games Route Network, etc unable to analyze

# Victor Valley

* service runs, difficult to evaluate
* 0.5/hour, limited span

# Others

* OCTA speculative, new services unable to eval

# OCTA SR39

* multiple SHN sections, eval longest
* about 5 trips/hour combined!

In [None]:
## parameters cell
itp_id = 235

In [None]:
%%capture
rt_day = rt_filter_map_plot.from_gcs(itp_id, analysis_date, pbar)

In [None]:
rt_day.transit_priority_target_mph = 20

In [None]:
%%capture_parameters
organization_name = rt_day.organization_name
organization_name

In [None]:
rt_day.set_filter(route_names=['29', '529'])

In [None]:
_m = rt_day.segment_speed_map()

In [None]:
rt_day.autocorridor(shape_id = '2902', stop_seq_range = [5, 51])

In [None]:
# rt_day.corridor_stop_delays >> distinct(_.route_short_name)

In [None]:
rt_day.corridor_metrics()

In [None]:
rt_day.quick_map_corridor()

In [None]:
rt_day.corridor['corridor'] = 'sr39_beach'

In [None]:
all_corridors += [rt_day.corridor]

# combine and export

In [None]:
all_gdf = pd.concat(all_corridors)

In [None]:
all_gdf

In [None]:
(all_gdf >> select(-_.geometry)).to_csv('june16_corridor_metrics_speeds.csv')

In [None]:
all_gdf.to_file('june16_corridor_metrics_speeds.geojson')

In [50]:
all_gdf = gpd.read_file('./june16_corridor_metrics_speeds.geojson')

In [52]:
all_gdf >> filter(_.corridor == 'sr123_san_pablo')

Unnamed: 0,route_id,route_short_name,organization,p20_corr_mph,speed_delay_minutes,total_speed_delay,p50_runtime_minutes,n_trips,span_hours,daily_avg_trips_hr,trips_added,new_avg_trips_hr,length_miles,target_mph,corridor,geometry
6,72,72,Alameda-Contra Costa Transit District,10.5,1023.2,3623.2,83.1,62,14.8,2.1,12.3,2.5,6.1,20,sr123_san_pablo,"POLYGON ((-201973.660 -11619.671, -201974.344 ..."
7,72M,72M,Alameda-Contra Costa Transit District,11.1,1001.4,3623.2,79.4,62,14.6,2.1,12.6,2.6,6.1,20,sr123_san_pablo,"POLYGON ((-201973.660 -11619.671, -201974.344 ..."
8,72R,72R,Alameda-Contra Costa Transit District,12.0,1598.6,3623.2,62.4,126,13.2,4.8,25.6,5.7,6.1,20,sr123_san_pablo,"POLYGON ((-201973.660 -11619.671, -201974.344 ..."


In [16]:
all_gdf = (all_gdf >> group_by(_.corridor, _.organization)
    >> mutate(avg_trip_delay_all_rts = _.total_speed_delay / _.n_trips.sum())
    >> summarize(avg_delay_min = _.avg_trip_delay_all_rts.max())
)

In [18]:
all_gdf

Unnamed: 0,corridor,organization,avg_delay_min
0,sr123_san_pablo,Alameda-Contra Costa Transit District,14.4928
1,sr1_19th_a,City and County of San Francisco,3.976106
2,sr1_19th_b,City and County of San Francisco,2.821829
3,sr1_lincoln,City of Santa Monica,8.070435
4,sr2_santa_monica_bl,Los Angeles County Metropolitan Transportation...,7.84856
5,sr39_beach,Orange County Transportation Authority,16.881679
6,sr66_foothill,Foothill Transit,1.644681
7,sr82_samtrans,San Mateo County Transit District,23.331008
8,sr82_vta,Santa Clara Valley Transportation Authority,10.670833
9,us101_broadway,City of Eureka,2.504167


In [21]:
corrs = all_gdf.corridor.to_list()
corrs

['sr123_san_pablo',
 'sr1_19th_a',
 'sr1_19th_b',
 'sr1_lincoln',
 'sr2_santa_monica_bl',
 'sr39_beach',
 'sr66_foothill',
 'sr82_samtrans',
 'sr82_vta',
 'us101_broadway',
 'us101_lombard',
 'us50_tahoe']

In [23]:
import numpy as np

In [24]:
ridership = [14789, 12100, 12100, #19th b
            6200, 22669, 4912, #beach
            5681, 8754, 15693, #vta
            np.nan, 8200, np.nan]

In [27]:
rider = pd.DataFrame({'corridor': corrs, 'avg_daily_riders': ridership})

In [31]:
(all_gdf >> inner_join(_, rider, on = 'corridor')
 >> mutate(rider_minutes = _.avg_delay_min * _.avg_daily_riders,
                rider_hours = _.rider_minutes / 60)
 >> arrange(-_.rider_hours)
)


Unnamed: 0,corridor,organization,avg_delay_min,avg_daily_riders,rider_minutes,rider_hours
0,sr123_san_pablo,Alameda-Contra Costa Transit District,14.4928,14789.0,214334.0192,3572.233653
7,sr82_samtrans,San Mateo County Transit District,23.331008,8754.0,204239.64186,3403.994031
4,sr2_santa_monica_bl,Los Angeles County Metropolitan Transportation...,7.84856,22669.0,177918.999177,2965.316653
8,sr82_vta,Santa Clara Valley Transportation Authority,10.670833,15693.0,167457.3875,2790.956458
5,sr39_beach,Orange County Transportation Authority,16.881679,4912.0,82922.80916,1382.046819
3,sr1_lincoln,City of Santa Monica,8.070435,6200.0,50036.695652,833.944928
1,sr1_19th_a,City and County of San Francisco,3.976106,12100.0,48110.884956,801.848083
10,us101_lombard,City and County of San Francisco,4.712914,8200.0,38645.89404,644.098234
2,sr1_19th_b,City and County of San Francisco,2.821829,12100.0,34144.129794,569.06883
6,sr66_foothill,Foothill Transit,1.644681,5681.0,9343.431915,155.723865
