In [4]:
%%capture
import warnings
warnings.filterwarnings('ignore')

import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(800_000_000_000)
import shared_utils

from calitp.tables import tbls
from calitp import query_sql
import calitp.magics

from siuba import *
import pandas as pd
import numpy as np
import geopandas as gpd

import datetime as dt

import importlib
from rt_analysis import rt_parser
from rt_analysis import rt_filter_map_plot

import gcsfs
fs = gcsfs.GCSFileSystem()

from IPython.display import display, Markdown, Latex, HTML
import json

from tqdm import tqdm_notebook
from tqdm.notebook import trange, tqdm

# Algorithm

* Clip stops to corridor
* Get first and last stops in corridor for each shape_id, then find 1 stop before and 1 stop after for each
* Filter stop_delay_view based on those stops

# Report:

* Sum of median trip delays
* Median segment speed
* Delay compared to speed standards: 16mph (approx. 75th %ile for medium size operator by 2010 NTD)

# Common Ranges/Functions

In [6]:
date_range = np.arange('2022-04-30', '2022-05-09', dtype='datetime64[D]')

In [8]:
type(date_range)

numpy.ndarray

In [3]:
def generate_export_data(itp_id, date_range, pbar):
    for date in date_range:
        date = date.astype(dt.date)
        try:
            rt_analysis = rt.OperatorDayAnalysis(itp_id, date, pbar)
            rt_analysis.export_views_gcs()
            print(f'complete for date: {date}')
        except Exception as e:
            print(e)
            print(f'failed for date: {date}')
            continue

# AC / Lake

In [None]:
# itp_id = 4 ## AC Transit
itp_id = 159 ## Lake Transit

# Generate analysis data for all dates

In [None]:
rt_day = rt_filter_map_plot.from_gcs(itp_id, dt.date(2022, 5, 4))

In [None]:
# corridor = gpd.read_file('./CMCP_StudyAreaBoundary.geojson') ## alameda

In [None]:
rt_day.add_corridor(corridor)

In [None]:
# quick corridor map
# rt_day.quick_map_corridor()

# Corridor-based metrics for SCCP

* intent is to eventually parameterize this and output formatted report, also methodology may change

In [None]:
pbar = tqdm()

In [None]:
rt_day = rt_filter_map_plot.from_gcs(itp_id, dt.date(2022, 5, 4), pbar)
rt_day.add_corridor(corridor)
_m = rt_filter_map_plot.RtFilterMapper.segment_speed_map(rt_day, corridor=True)

In [None]:
_m.save('lake_map.html')

In [None]:
rt_day.stop_delay_view >> filter(_.corridor) >> distinct(_.route_short_name)

# Samtrans

In [None]:
itp_id = 290 # Samtrans

In [None]:
import numpy as np

In [None]:
date_range = np.arange('2022-04-30', '2022-05-09', dtype='datetime64[D]')

In [None]:
date_range

# Generate analysis data for all dates

In [None]:
pbar = tqdm()

In [None]:
rt_day = rt_filter_map_plot.from_gcs(itp_id, dt.date(2022, 5, 5))

In [None]:
corridor = gpd.read_file('./corridors_sccp_lpp/peninsula.geojson') ## foster city / hayward park

In [None]:
rt_day.add_corridor(corridor)

In [None]:
# quick corridor map
rt_day.quick_map_corridor()

In [None]:
pbar = tqdm()

In [None]:
rt_day = rt_filter_map_plot.from_gcs(itp_id, dt.date(2022, 5, 5), pbar)
rt_day.add_corridor(corridor)
_m = rt_filter_map_plot.RtFilterMapper.segment_speed_map(rt_day, corridor=True)

In [None]:
_m

In [None]:
_m.save('samtrans_map.html')

In [None]:
rt_day.stop_delay_view >> filter(_.corridor) >> distinct(_.route_short_name)

In [None]:
rt_day.corridor_stop_delays

# Metro (Inglewood)

In [9]:
date_range

array(['2022-04-30', '2022-05-01', '2022-05-02', '2022-05-03',
       '2022-05-04', '2022-05-05', '2022-05-06', '2022-05-07',
       '2022-05-08'], dtype='datetime64[D]')

In [10]:
itp_id = 182
corridor = gpd.read_file('./corridors_sccp_lpp/inglewood_map.geojson')

In [7]:
# corridor.explore()

In [11]:
pbar = tqdm()

0it [00:00, ?it/s]

In [12]:
# generate_export_data(itp_id, dates_to_run, pbar)

In [21]:
metro_metrics = average_metrics(itp_id, date_range, pbar)

found parquet
complete for date: 2022-04-30
found parquet
complete for date: 2022-05-01
found parquet
complete for date: 2022-05-02
found parquet
complete for date: 2022-05-03
found parquet
complete for date: 2022-05-04
found parquet
complete for date: 2022-05-05
found parquet
complete for date: 2022-05-06
found parquet
complete for date: 2022-05-07
found parquet
complete for date: 2022-05-08


In [22]:
metro_metrics

{'avg_schedule_metric_minutes': 102.0, 'avg_speed_metric_minutes': 2018.0}

In [23]:
rt_day = rt_filter_map_plot.from_gcs(itp_id, dt.date(2022, 5, 4), pbar)
rt_day.add_corridor(corridor)
_m = rt_filter_map_plot.RtFilterMapper.segment_speed_map(rt_day, corridor=True)

found parquet


In [27]:
# _m

In [28]:
# rt_day.quick_map_corridor()

In [25]:
rt_day.stop_delay_view >> filter(_.corridor) >> distinct(_.route_short_name)

Unnamed: 0,route_short_name
0,117
1,111
2,115
3,211/215
4,40
5,212


# Draft general data present util

In [6]:
fs_list = fs.ls(f'{shared_utils.rt_utils.GCS_FILE_PATH}rt_trips/')

In [7]:
def get_operators(analysis_date):
    
    day = str(analysis_date.day).zfill(2)
    month = str(analysis_date.month).zfill(2)
    ## now finds ran operators on specific analysis date
    ran_operators = [int(path.split('rt_trips/')[1].split('_')[0])
                     for path in fs_list
                     if path.split('rt_trips/')[1] and path.split('rt_trips/')[1].split('_')[1] == month and path.split('rt_trips/')[1].split('_')[2][:2] == day]
    return ran_operators

In [8]:
for date in date_range:
    date = date.astype(dt.date)
    print(date, end=': ')
    print(get_operators(date))

2022-04-30: [182, 290]
2022-05-01: [182, 290]
2022-05-02: [159, 182, 290]
2022-05-03: [159, 182, 290]
2022-05-04: [110, 126, 127, 135, 148, 159, 167, 170, 182, 183, 188, 194, 218, 221, 226, 235, 243, 246, 247, 259, 260, 269, 278, 282, 284, 293, 294, 295, 300, 301, 30, 310, 315, 336, 349, 350, 360, 361, 372, 45, 484, 4, 75, 99]
2022-05-05: [159, 182, 290]
2022-05-06: [159, 182, 290]
2022-05-07: [182, 290]
2022-05-08: [182, 290]
