In [1]:
import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(100_000_000_000)

from calitp.tables import tbl
from calitp import query_sql
import calitp.magics
import branca

import shared_utils
import utils

from siuba import *
import pandas as pd
import geopandas as gpd
import shapely

import datetime as dt
import time
from zoneinfo import ZoneInfo

import rt_analysis as rt
import importlib

import gcsfs
fs = gcsfs.GCSFileSystem()



In [2]:
analysis_date = dt.date(2022, 1, 12)

In [3]:
samtrans_itp_id = 290

In [4]:
importlib.reload(utils)
importlib.reload(rt)

<module 'rt_analysis' from '/home/jovyan/data-analyses/rt_delay/rt_analysis.py'>

In [5]:
colorscale = branca.colormap.step.RdYlGn_10.scale(vmin=0, 
 vmax=30)
colorscale.caption = "Speed (miles per hour)"

In [6]:
from tqdm import tqdm_notebook

In [7]:
from tqdm.notebook import trange, tqdm

In [8]:
pbar = tqdm()

0it [00:00, ?it/s]

In [9]:
## running about 2 minutes for interpolators, about 20 for delay(!)

In [10]:
%%capture
sam = rt.OperatorDayAnalysis(samtrans_itp_id, analysis_date, pbar)

In [11]:
(sam.stop_delay_view
 >> group_by(_.stop_id, _.stop_name)
 >> summarize(mean_delay = _.delay.mean(), max_delay = _.delay.max())
 >> arrange(-_.mean_delay)) >> head(3)

Unnamed: 0,stop_id,stop_name,mean_delay,max_delay
715,334620,South SF BART-Bay 5 Westside Busway,0 days 00:26:28.198510,0 days 00:26:28.198510
1192,343118,Alameda de las Pulgas & Alma St,0 days 00:18:37.985408200,0 days 00:22:35.850843
269,331100,SF Transit Center - BART shuttle connection,0 days 00:17:02.684474,0 days 00:17:02.684474


In [12]:
sam.set_filter(start_time='15:00', end_time='19:00', direction_id='1')

In [13]:
sam.filter

{'start_time': datetime.time(15, 0),
 'end_time': datetime.time(19, 0),
 'route_ids': None,
 'direction_id': '1',
 'direction': None}

In [37]:
# %%capture
# m = sam.segment_speed_map(colorscale=colorscale)

In [38]:
# m

### speedup sandbox

* selective use of numba @jit decorator in computation-heavy methods?
    * likely requires refactor to plain np, translation, etc...
    * done for time_at_position with wonderful results (60x speedup)
    * harder to do for shift_calculate, giving up for now to focus on more important things

In [48]:
from numba import jit

In [49]:
importlib.reload(rt)

<module 'rt_analysis' from '/home/jovyan/data-analyses/rt_delay/rt_analysis.py'>

In [50]:
one_interpolator = sam.position_interpolators['11030732-136-Blocks-Weekday-54']['rt']

In [51]:
pos_gdf = one_interpolator.position_gdf.copy()

In [52]:
_ix_array = pos_gdf.index.to_numpy().astype('float64')

In [53]:
time_col = 'vehicle_timestamp'
_raw_dt_array = (pos_gdf[time_col].to_numpy()
                              .astype('datetime64[s]')
                              .astype('float64')
                             )
_raw_dt_array[:5]

array([1.64196636e+09, 1.64196639e+09, 1.64196640e+09, 1.64196642e+09,
       1.64196645e+09])

In [54]:
_raw_shape_array = pos_gdf.shape_meters.to_numpy()
_raw_shape_array[:5]

array([ 323.81885049,  323.81885049, 1088.32441641, 1088.32441641,
       1088.32441641])

In [55]:
@jit(nopython=True)
def apply_shift_calculate_numba(ix_array, dt_array, shape_array):
## generate arrays for last time, last loc, secs from last, meters from last...
    # global _dt_arr
    _dt_arr = dt_array
    n = len(dt_array)
    # result = np.empty(n, dtype="float64")
    assert len(ix_array) == len(shape_array) == n
    
    # last_loc = np.roll(shape_array, 1) ##test
    meters_from_last = np.diff(shape_array) ##test2
    # last_time = np.roll(dt_array, 1) ##test
    seconds_from_last = np.diff(dt_array) ##test2
    # meters_from_last = (shape_array - np.roll(shape_array, 1)) ##test
    # seconds_from_last = (dt_array - np.roll(dt_array, 1)) ##test    
    speed_from_last = np.divide(meters_from_last, seconds_from_last)
    
    return ix_array, dt_array, shape_array, meters_from_last, seconds_from_last, speed_from_last

In [56]:
def map_dt_or_nan(timestamp):
    # print(timestamp)
    if  np.isnan(timestamp):
        return np.nan
    else:
        return dt.datetime.utcfromtimestamp(timestamp)

In [57]:
def wrap_shift_calculate_numba(ix_array, dt_array, shape_array):
    
#     global _debug
#     global _huh
#     global progression_mask
    n = len(dt_array)
    # print(n)
    arrays = apply_shift_calculate_numba(ix_array, dt_array, shape_array)
    bumped_meters = np.insert(arrays[3], 0, np.nan)
    bumped_sec = np.insert(arrays[4], 0, np.nan)
    bumped_spd = np.insert(arrays[5], 0, np.nan)
    # _huh = bumped_meters
    progression_mask = np.ma.masked_less_equal(bumped_meters, 0).mask
    compressed_arrays = [np.ma.array(array, mask=progression_mask).compressed() for array in
                         [arrays[0], arrays[1], arrays[2], bumped_meters, bumped_sec, bumped_spd]]
    new_n = len(compressed_arrays[1])
    if new_n == n:
        compressed_arrays[1] = np.array([map_dt_or_nan(x) for x in compressed_arrays[1]]) ##nice!
        return compressed_arrays
    else:
        # print('recursing...')
        # _debug = compressed_arrays
        return wrap_shift_calculate_numba(compressed_arrays[0], compressed_arrays[1], compressed_arrays[2])