In [4]:
%%capture
import warnings
warnings.filterwarnings('ignore')

import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(800_000_000_000)
import shared_utils

from calitp.tables import tbls
from calitp import query_sql
import calitp.magics

from siuba import *
import pandas as pd
import numpy as np
import geopandas as gpd

import datetime as dt

import importlib
from rt_analysis import rt_parser
from rt_analysis import rt_filter_map_plot

import gcsfs
fs = gcsfs.GCSFileSystem()

from IPython.display import display, Markdown, Latex, HTML
import json

from tqdm import tqdm_notebook
from tqdm.notebook import trange, tqdm

# Algorithm

* Clip stops to corridor
* Get first and last stops in corridor for each shape_id, then find 1 stop before and 1 stop after for each
* Filter stop_delay_view based on those stops

# Report:

* Sum of median trip delays
* Median segment speed
* Delay compared to speed standards: 16mph (approx. 75th %ile for medium size operator by 2010 NTD)

# Common Ranges/Functions

In [6]:
date_range = np.arange('2022-04-30', '2022-05-09', dtype='datetime64[D]')

In [8]:
type(date_range)

numpy.ndarray

In [3]:
def generate_export_data(itp_id, date_range, pbar):
    for date in date_range:
        date = date.astype(dt.date)
        try:
            rt_analysis = rt.OperatorDayAnalysis(itp_id, date, pbar)
            rt_analysis.export_views_gcs()
            print(f'complete for date: {date}')
        except Exception as e:
            print(e)
            print(f'failed for date: {date}')
            continue

# AC / Lake

In [5]:
itp_id = 4 ## AC Transit
# itp_id = 159 ## Lake Transit

# Generate analysis data for all dates

In [21]:
importlib.reload(rt_filter_map_plot)

<module 'rt_filter_map_plot' from '/home/jovyan/data-analyses/rt_delay/rt_filter_map_plot.py'>

In [22]:
rt_day = rt_filter_map_plot.from_gcs(itp_id, dt.date(2022, 5, 4))

found parquet


In [23]:
corridor = gpd.read_file('./corridors_sccp_lpp/CMCP_StudyAreaBoundary.geojson') ## alameda

In [24]:
rt_day.add_corridor(corridor)

In [25]:
# quick corridor map
rt_day.quick_map_corridor()

In [26]:
rt_day.corridor_metrics()

{'schedule_metric_minutes': 1373.5333333333333,
 'speed_metric_minutes': 11881.582847833084}

In [27]:
rt_day.corridor_speed_delays

Unnamed: 0,shape_meters,stop_id,stop_name,geometry,shape_id,trip_key,trip_id,stop_sequence,arrival_time,route_id,...,entry_time,exit_time,entry_loc,exit_loc,meters_from_entry,seconds_from_entry,speed_from_entry,corridor_speed_mph,target_seconds,target_delay_seconds
0,9346.694499,2987,I-80 Fwy & Toll Plaza (East Bound),POINT (-203511.249 -18986.019),shp-OX-03,8.537864e+18,7234020,2.0,2022-05-04 17:19:49,OX,...,2022-05-04 17:19:03.448952,2022-05-04 17:42:12.579279,9346.694499,21145.75249,11799.057991,1389,8.494642,19.002515,1649.655795,-260.655795
1,9346.694499,2987,I-80 Fwy & Toll Plaza (East Bound),POINT (-203511.249 -18986.019),shp-OX-03,-2.608650e+18,5365040,2.0,2022-05-04 17:34:49,OX,...,2022-05-04 17:32:44.119039,2022-05-04 17:54:51.021356,9346.694499,21145.75249,11799.057991,1326,8.898234,19.905349,1649.655795,-323.655795
2,9346.694499,2987,I-80 Fwy & Toll Plaza (East Bound),POINT (-203511.249 -18986.019),shp-OX-03,7.317727e+18,6805020,2.0,2022-05-04 17:48:13,OX,...,2022-05-04 17:49:28.461680,2022-05-04 18:09:51.639865,9346.694499,21145.75249,11799.057991,1223,9.647635,21.581760,1649.655795,-426.655795
3,9346.694499,2987,I-80 Fwy & Toll Plaza (East Bound),POINT (-203511.249 -18986.019),shp-OX-03,-8.055310e+18,5202020,2.0,2022-05-04 17:04:49,OX,...,2022-05-04 17:02:30.054783,2022-05-04 17:23:18.243876,9346.694499,21145.75249,11799.057991,1248,9.454373,21.149433,1649.655795,-401.655795
4,9346.694499,2987,I-80 Fwy & Toll Plaza (East Bound),POINT (-203511.249 -18986.019),shp-OX-03,6.342403e+18,7363040,2.0,2022-05-04 18:30:24,OX,...,2022-05-04 18:36:31.759053,2022-05-04 18:46:24.509875,9346.694499,21145.75249,11799.057991,592,19.930841,44.585292,1649.655795,-1057.655795
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2455,17364.865459,2739,Embarcadero W & Franklin St,POINT (-200078.126 -22240.450),shp-12-56,3.637157e+18,5980020,75.0,2022-05-04 21:50:36,12,...,2022-05-04 21:55:11.412822,2022-05-04 22:29:45.751140,17364.865459,17962.91962,598.054162,2074,0.288358,0.645056,83.615447,1990.384553
2456,17364.865459,2739,Embarcadero W & Franklin St,POINT (-200078.126 -22240.450),shp-12-56,-5.382276e+18,4015020,75.0,2022-05-04 10:00:36,12,...,2022-05-04 10:00:59.982643,2022-05-04 10:02:41.000000,17364.865459,17962.91962,598.054162,101,5.921328,13.246011,83.615447,17.384553
2457,17364.865459,2739,Embarcadero W & Franklin St,POINT (-200078.126 -22240.450),shp-12-56,2.843586e+18,4762020,75.0,2022-05-04 12:34:10,12,...,2022-05-04 12:38:01.450012,2022-05-04 12:39:54.746980,17364.865459,17962.91962,598.054162,113,5.292515,11.839355,83.615447,29.384553
2458,17364.865459,2739,Embarcadero W & Franklin St,POINT (-200078.126 -22240.450),shp-12-56,8.631766e+18,1578020,75.0,2022-05-04 16:37:10,12,...,2022-05-04 17:02:59.083351,2022-05-04 17:09:05.900770,17364.865459,17962.91962,598.054162,366,1.634028,3.655320,83.615447,282.384553


# Samtrans

In [None]:
itp_id = 290 # Samtrans

In [None]:
import numpy as np

In [None]:
date_range = np.arange('2022-04-30', '2022-05-09', dtype='datetime64[D]')

In [None]:
date_range

# Generate analysis data for all dates

In [None]:
pbar = tqdm()

In [None]:
rt_day = rt_filter_map_plot.from_gcs(itp_id, dt.date(2022, 5, 5))

In [None]:
corridor = gpd.read_file('./corridors_sccp_lpp/peninsula.geojson') ## foster city / hayward park

In [None]:
rt_day.add_corridor(corridor)

In [None]:
# quick corridor map
rt_day.quick_map_corridor()

In [None]:
pbar = tqdm()

In [None]:
rt_day = rt_filter_map_plot.from_gcs(itp_id, dt.date(2022, 5, 5), pbar)
rt_day.add_corridor(corridor)
_m = rt_filter_map_plot.RtFilterMapper.segment_speed_map(rt_day, corridor=True)

In [None]:
_m

In [None]:
_m.save('samtrans_map.html')

In [None]:
rt_day.stop_delay_view >> filter(_.corridor) >> distinct(_.route_short_name)

In [None]:
rt_day.corridor_stop_delays

# Metro (Inglewood)

In [9]:
date_range

array(['2022-04-30', '2022-05-01', '2022-05-02', '2022-05-03',
       '2022-05-04', '2022-05-05', '2022-05-06', '2022-05-07',
       '2022-05-08'], dtype='datetime64[D]')

In [10]:
itp_id = 182
corridor = gpd.read_file('./corridors_sccp_lpp/inglewood_map.geojson')

In [7]:
# corridor.explore()

In [11]:
pbar = tqdm()

0it [00:00, ?it/s]

In [12]:
# generate_export_data(itp_id, dates_to_run, pbar)

In [21]:
metro_metrics = average_metrics(itp_id, date_range, pbar)

found parquet
complete for date: 2022-04-30
found parquet
complete for date: 2022-05-01
found parquet
complete for date: 2022-05-02
found parquet
complete for date: 2022-05-03
found parquet
complete for date: 2022-05-04
found parquet
complete for date: 2022-05-05
found parquet
complete for date: 2022-05-06
found parquet
complete for date: 2022-05-07
found parquet
complete for date: 2022-05-08


In [22]:
metro_metrics

{'avg_schedule_metric_minutes': 102.0, 'avg_speed_metric_minutes': 2018.0}

In [23]:
rt_day = rt_filter_map_plot.from_gcs(itp_id, dt.date(2022, 5, 4), pbar)
rt_day.add_corridor(corridor)
_m = rt_filter_map_plot.RtFilterMapper.segment_speed_map(rt_day, corridor=True)

found parquet


In [27]:
# _m

In [28]:
# rt_day.quick_map_corridor()

In [25]:
rt_day.stop_delay_view >> filter(_.corridor) >> distinct(_.route_short_name)

Unnamed: 0,route_short_name
0,117
1,111
2,115
3,211/215
4,40
5,212


# Draft general data present util

In [6]:
fs_list = fs.ls(f'{shared_utils.rt_utils.GCS_FILE_PATH}rt_trips/')

In [7]:
def get_operators(analysis_date):
    
    day = str(analysis_date.day).zfill(2)
    month = str(analysis_date.month).zfill(2)
    ## now finds ran operators on specific analysis date
    ran_operators = [int(path.split('rt_trips/')[1].split('_')[0])
                     for path in fs_list
                     if path.split('rt_trips/')[1] and path.split('rt_trips/')[1].split('_')[1] == month and path.split('rt_trips/')[1].split('_')[2][:2] == day]
    return ran_operators

In [8]:
for date in date_range:
    date = date.astype(dt.date)
    print(date, end=': ')
    print(get_operators(date))

2022-04-30: [182, 290]
2022-05-01: [182, 290]
2022-05-02: [159, 182, 290]
2022-05-03: [159, 182, 290]
2022-05-04: [110, 126, 127, 135, 148, 159, 167, 170, 182, 183, 188, 194, 218, 221, 226, 235, 243, 246, 247, 259, 260, 269, 278, 282, 284, 293, 294, 295, 300, 301, 30, 310, 315, 336, 349, 350, 360, 361, 372, 45, 484, 4, 75, 99]
2022-05-05: [159, 182, 290]
2022-05-06: [159, 182, 290]
2022-05-07: [182, 290]
2022-05-08: [182, 290]
