In [1]:
import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(1_000_000_000_000) ## 1TB?

from siuba import *
import pandas as pd
import geopandas as gpd
import datetime as dt

from rt_analysis import rt_parser
from rt_analysis import rt_filter_map_plot

import shared_utils



In [2]:
from rt_analysis import sccp_tools
import numpy as np

In [3]:
from tqdm.notebook import tqdm

# About the Metrics

The schedule-based metric is a daily average of the sum of median trip stop delays along the corridor. To further explain, we take each corridor trip that we have data for and look at the delay in comparison to the schedule at each stop, after subtracting off any delay present as the trip entered the corridor. For each trip we then take the median delay of all stops along the corridor, and sum these medians to create the metric.

The speed-based metric is a daily average of the sum of delays for each trip traversing the corridor as compared to a reference speed of 16 miles per houar. To further explain, we take each corridor trip that we have data for and calculate the hypothetical time it would take for that trip to traverse the corridor at a speed of 16 mph. The difference between the actual time it took for the trip to traverse the corridor and that hypothetical time is the speed-based delay for that trip, and we sum those delays to create the metric. This metric is intended to provide a more consistent basis for comparison independent of scheduling practices.

In other words, if we expect a hypothetical bus lane/signal priority/payment system etc to increase corridor speeds to 16mph, this is how much time we could save per day.

With corridor attached, generate both metrics using `RtFilterMapper.corridor_metrics()`

## Common Date Range (2022-23) and convienience function

In [4]:
date_range = np.arange('2022-04-30', '2022-05-09', dtype='datetime64[D]')

In [5]:
type(date_range)

numpy.ndarray

In [6]:
def generate_export_data(itp_id, date_range, pbar):
    for date in date_range:
        analysis_date = date.astype(dt.date)
        check_ran = shared_utils.rt_utils.get_operators(analysis_date, [itp_id])
        if check_ran[itp_id] == 'already_ran':
            continue
        rt_day = rt_parser.OperatorDayAnalysis(itp_id, analysis_date, pbar)
        # don't worry about the CRS not set UserWarning
        rt_day.export_views_gcs()

In [7]:
pbar = tqdm()

0it [00:00, ?it/s]

In [8]:
%%capture --no-stdout
generate_export_data(4, date_range, pbar)

already ran: 4
already ran: 4
already ran: 4
already ran: 4
already ran: 4
already ran: 4
already ran: 4
already ran: 4
already ran: 4


# AC / Lake

In [7]:
# itp_id = 4 ## AC Transit
# itp_id = 159 ## Lake Transit

In [8]:
# corridor = gpd.read_file('./corridors_sccp_lpp/CMCP_StudyAreaBoundary.geojson') ## alameda

# Samtrans

In [9]:
# itp_id = 290 # Samtrans

In [10]:
# corridor = gpd.read_file('./corridors_sccp_lpp/peninsula.geojson') ## foster city / hayward park

# Metro (Inglewood)

In [11]:
# itp_id = 182
# corridor = gpd.read_file('./corridors_sccp_lpp/inglewood_map.geojson')

# SacRT/SJRTD, post-refactor

In [12]:
pbar = tqdm()

0it [00:00, ?it/s]

In [13]:
sac_rt_itp_id = 273
sjrtd_itp_id = 284

In [14]:
sac_date_range = np.arange('2022-02-05', '2022-02-14', dtype='datetime64[D]')

In [27]:
%%capture --no-stdout

generate_export_data(sac_rt_itp_id, sac_date_range, pbar)

already ran: 273
already ran: 273
already ran: 273
already ran: 273
not yet run: 273


AssertionError: no vehicle positions data found for 2022-02-09

In [16]:
%%capture --no-stdout

generate_export_data(sjrtd_itp_id, date_range, pbar)

already ran: 284
already ran: 284
already ran: 284
already ran: 284
already ran: 284
already ran: 284
already ran: 284
already ran: 284
already ran: 284


In [16]:
corridor = gpd.read_file('./corridors_sccp_lpp/sacrt_sjrtd_nov_2.geojson')

In [26]:
sac_corridor_metrics = sccp_tools.sccp_average_metrics(itp_id=273, date_range=sac_date_range, corridor=corridor)

found parquet


KeyboardInterrupt: 

In [20]:
sjrtd_corridor_metrics = sccp_tools.sccp_average_metrics(itp_id=284, date_range=date_range, corridor=corridor)

failed for date: 2022-04-30
calitp-analytics-data/data-analyses/rt_delay/stop_delay_views/284_2022-04-30.parquet
failed for date: 2022-05-01
calitp-analytics-data/data-analyses/rt_delay/stop_delay_views/284_2022-05-01.parquet
failed for date: 2022-05-02
calitp-analytics-data/data-analyses/rt_delay/stop_delay_views/284_2022-05-02.parquet
failed for date: 2022-05-03
calitp-analytics-data/data-analyses/rt_delay/stop_delay_views/284_2022-05-03.parquet
failed for date: 2022-05-04
calitp-analytics-data/data-analyses/rt_delay/stop_delay_views/284_2022-05-04.parquet
failed for date: 2022-05-05
calitp-analytics-data/data-analyses/rt_delay/stop_delay_views/284_2022-05-05.parquet
failed for date: 2022-05-06
calitp-analytics-data/data-analyses/rt_delay/stop_delay_views/284_2022-05-06.parquet
failed for date: 2022-05-07
calitp-analytics-data/data-analyses/rt_delay/stop_delay_views/284_2022-05-07.parquet
failed for date: 2022-05-08
calitp-analytics-data/data-analyses/rt_delay/stop_delay_views/284_20



In [18]:
date_range

array(['2022-04-30', '2022-05-01', '2022-05-02', '2022-05-03',
       '2022-05-04', '2022-05-05', '2022-05-06', '2022-05-07',
       '2022-05-08'], dtype='datetime64[D]')

In [19]:
rt_filter_map_plot.from_gcs?

[0;31mSignature:[0m [0mrt_filter_map_plot[0m[0;34m.[0m[0mfrom_gcs[0m[0;34m([0m[0mitp_id[0m[0;34m,[0m [0manalysis_date[0m[0;34m,[0m [0mpbar[0m[0;34m=[0m[0;32mNone[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Generates RtFilterMapper from cached artifacts in GCS. Generate using rt_analysis.OperatorDayAnalysis.export_views_gcs()
    
[0;31mFile:[0m      ~/data-analyses/rt_delay/rt_analysis/rt_filter_map_plot.py
[0;31mType:[0m      function


In [22]:
rt_day = rt_filter_map_plot.from_gcs(273, dt.date(2022, 2, 6))

found parquet


In [23]:
rt_day.add_corridor(corridor)

In [None]:
# rt_day.quick_map_corridor()

In [25]:
rt_day.corridor_metrics()

{'schedule_metric_minutes': 2.4166666666666665,
 'speed_metric_minutes': -3.5781744005258327}

In [29]:
rt_day.analysis_date.isoformat()

'2022-02-06'

# OCTA 12/1/2022

In [20]:
octa = gpd.read_file('./corridors_sccp_lpp/octa_dec1_rev.geojson')

In [41]:
# octa.explore()

In [26]:
rt_day = rt_filter_map_plot.from_gcs(235, dt.date(2022, 5, 5))

found parquet


In [27]:
rt_day.add_corridor(octa)

In [28]:
rt_day.corridor_metrics()

{'schedule_metric_minutes': 75.20833333333333,
 'speed_metric_minutes': 931.8252172851112}

In [32]:
octa_corridor_metrics = sccp_tools.sccp_average_metrics(itp_id=235, date_range=date_range, corridor=octa)

found parquet
complete for date: 2022-04-30
found parquet
complete for date: 2022-05-01
found parquet
complete for date: 2022-05-02
found parquet
complete for date: 2022-05-03
found parquet
complete for date: 2022-05-04
found parquet
complete for date: 2022-05-05
found parquet
complete for date: 2022-05-06
found parquet
complete for date: 2022-05-07
found parquet
complete for date: 2022-05-08


In [33]:
octa_corridor_metrics

{'avg_schedule_metric_minutes': 52.0,
 'avg_speed_metric_minutes': 893.0,
 'all_schedule': [53.25833333333333,
  20.433333333333334,
  78.05,
  49.175,
  64.16666666666667,
  75.20833333333333,
  55.4,
  60.84166666666667,
  14.475],
 'all_speed': [747.3978409473204,
  505.33545268309683,
  885.3617659165194,
  1686.831775345611,
  777.4150067746884,
  931.8252172851112,
  1379.7829452025758,
  609.0198823468858,
  514.3851696422547]}

In [38]:
# rt_day.quick_map_corridor()

In [39]:
# rt_day.segment_speed_map(corridor=True)

In [40]:
# rt_day.stop_delay_view >> filter(_.corridor) >> distinct(_.route_short_name)

# AC Dec 15

* applicant also requested BART, unable without vehicle positions and as a grade-seperated system possibly non-applicable
* three nearby polygons, will consolidate and notify applicant

In [11]:
new_ac = gpd.read_file('./corridors_sccp_lpp/ac_dec_15rev.geojson')

In [12]:
new_ac.explore()

In [13]:
rt_day = rt_filter_map_plot.from_gcs(4, dt.date(2022, 5, 5))

found parquet


In [14]:
rt_day.add_corridor(new_ac)

In [21]:
rt_day.corridor_stop_delays >> distinct(_.route_short_name)

Unnamed: 0,route_short_name
0,800
1,72
2,72M
3,76
4,376
5,675
6,L
7,667
8,7


In [15]:
rt_day.corridor_metrics()

{'schedule_metric_minutes': 395.0083333333333,
 'speed_metric_minutes': 949.5481268887544}

In [16]:
rt_day.quick_map_corridor()

In [17]:
rt_day.segment_speed_map(corridor=True)





In [18]:
ac_corridor_metrics = sccp_tools.sccp_average_metrics(itp_id=4, date_range=date_range, corridor=new_ac)

found parquet
complete for date: 2022-04-30
found parquet
complete for date: 2022-05-01
found parquet
complete for date: 2022-05-02
found parquet
complete for date: 2022-05-03
found parquet
complete for date: 2022-05-04
found parquet
complete for date: 2022-05-05
found parquet
complete for date: 2022-05-06
found parquet
complete for date: 2022-05-07
found parquet
complete for date: 2022-05-08


In [20]:
ac_corridor_metrics

{'avg_schedule_metric_minutes': 49.0,
 'avg_speed_metric_minutes': 252.0,
 'all_schedule': [2.4166666666666665,
  0.7333333333333333,
  -4.608333333333333,
  1.4166666666666667,
  9.125,
  395.0083333333333,
  33.766666666666666,
  10.35,
  -4.725],
 'all_speed': [157.54734499935864,
  171.83819257911264,
  178.64810780672,
  192.83988393923738,
  158.28028253422397,
  949.5481268887544,
  230.55046328090458,
  165.25451374992954,
  64.13876403365857]}