In [1]:
import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(100_000_000_000)

from calitp.tables import tbl
from calitp import query_sql
import calitp.magics
import branca

import shared_utils
import utils

from siuba import *
import pandas as pd
import geopandas as gpd
import shapely

import datetime as dt
import time
from zoneinfo import ZoneInfo

import rt_analysis as rt
import importlib

import gcsfs
fs = gcsfs.GCSFileSystem()



In [2]:
analysis_date = dt.date(2022, 1, 12)

In [3]:
samtrans_itp_id = 290

In [4]:
importlib.reload(utils)
importlib.reload(rt)

<module 'rt_analysis' from '/home/jovyan/data-analyses/rt_delay/rt_analysis.py'>

In [5]:
colorscale = branca.colormap.step.RdYlGn_10.scale(vmin=0, 
 vmax=30)
colorscale.caption = "Speed (miles per hour)"

In [6]:
from tqdm import tqdm_notebook

In [7]:
from tqdm.notebook import trange, tqdm

In [8]:
pbar = tqdm()

0it [00:00, ?it/s]

In [9]:
## running about 2 minutes for interpolators, about 20 for delay(!)

In [10]:
%%capture
sam = rt.OperatorDayAnalysis(samtrans_itp_id, analysis_date, pbar)

In [11]:
(sam.stop_delay_view
 >> group_by(_.stop_id, _.stop_name)
 >> summarize(mean_delay = _.delay.mean(), max_delay = _.delay.max())
 >> arrange(-_.mean_delay)) >> head(3)

Unnamed: 0,stop_id,stop_name,mean_delay,max_delay
715,334620,South SF BART-Bay 5 Westside Busway,0 days 00:26:28.198510,0 days 00:26:28.198510
1192,343118,Alameda de las Pulgas & Alma St,0 days 00:18:37.985408200,0 days 00:22:35.850843
269,331100,SF Transit Center - BART shuttle connection,0 days 00:17:02.684474,0 days 00:17:02.684474


In [12]:
sam.set_filter(start_time='15:00', end_time='19:00', direction_id='1')

In [13]:
sam.filter

{'start_time': datetime.time(15, 0),
 'end_time': datetime.time(19, 0),
 'route_ids': None,
 'direction_id': '1',
 'direction': None}

In [14]:
# %%capture
# m = sam.segment_speed_map(colorscale=colorscale)

In [15]:
# m

### speedup ideas

* selective use of numba @jit decorator in computation-heavy methods?
    * likely requires refactor to plain np, translation, etc...
    * done for time_at_position with wonderful results (60x speedup) 

In [16]:
# rt.TripPositionInterpolator.detailed_speed_map(ex1)

In [17]:
# @jit(nopython=True) ##numba gives huge speedup here (~60x)
# def time_at_position_numba(desired_position, shape_array, dt_float_array):
#     if desired_position < shape_array.max() and desired_position > shape_array.min():\
#         return np.interp(desired_position, shape_array, dt_float_array)
#     else:
#         return None

In [18]:
def _shift_calculate(self, vehicle_positions):

    # if hasattr(self, "progressing_positions"):
    #     print(self.progressing_positions.shape)
    #     self.debug_dict[self.progressing_positions.shape[0]] = self.progressing_positions.copy()

    vehicle_positions = vehicle_positions >> arrange(self.time_col) ## unnecessary?
    vehicle_positions['last_time'] = vehicle_positions[self.time_col].shift(1)
    vehicle_positions['last_loc'] = vehicle_positions.shape_meters.shift(1)
    vehicle_positions['secs_from_last'] = vehicle_positions[self.time_col] - vehicle_positions.last_time
    vehicle_positions.secs_from_last = (vehicle_positions.secs_from_last
                                    .apply(lambda x: x.seconds))
    vehicle_positions['meters_from_last'] = (vehicle_positions.shape_meters
                                                  - vehicle_positions.last_loc)
    vehicle_positions['progressed'] = vehicle_positions['meters_from_last'] > 0 ## has the bus moved ahead?
    vehicle_positions['speed_from_last'] = (vehicle_positions.meters_from_last
                                                 / vehicle_positions.secs_from_last) ## meters/second
    return vehicle_positions

In [19]:
import numpy as np
from numba import jit

In [20]:
one_interpolator = sam.position_interpolators['11030873-136-Blocks-Weekday-54']['rt']

In [21]:
one_interpolator.position_gdf >> head(3)

Unnamed: 0,vehicle_timestamp,vehicle_position_latitude,vehicle_position_longitude,header_timestamp,geometry,shape_meters,last_time,last_loc,secs_from_last,meters_from_last,progressed,speed_from_last
0,2022-01-12 20:19:05,37.598866,-122.38635,2022-01-12 20:19:30,POINT (-210378.305 -43764.225),137.234294,NaT,,,,False,
1,2022-01-12 20:19:35,37.598866,-122.38635,2022-01-12 20:19:44,POINT (-210378.305 -43764.225),137.234294,2022-01-12 20:19:05,137.234294,30.0,0.0,False,0.0
2,2022-01-12 20:20:08,37.59826,-122.38694,2022-01-12 20:20:44,POINT (-210432.003 -43830.261),682.613133,2022-01-12 20:19:35,137.234294,33.0,545.378838,True,16.526631


In [22]:
one_interpolator.cleaned_positions >> head(3)

Unnamed: 0,vehicle_timestamp,vehicle_position_latitude,vehicle_position_longitude,header_timestamp,geometry,shape_meters,last_time,last_loc,secs_from_last,meters_from_last,progressed,speed_from_last
48056,2022-01-12 20:20:08,37.59826,-122.38694,2022-01-12 20:20:44,POINT (-210432.003 -43830.261),682.613133,2022-01-12 20:19:35,137.234294,33.0,545.378838,True,16.526631
48058,2022-01-12 20:21:40,37.60131,-122.382324,2022-01-12 20:22:10,POINT (-210016.625 -43501.548),1258.192127,2022-01-12 20:21:08,682.613133,32.0,575.578994,True,17.986844
48059,2022-01-12 20:22:09,37.60332,-122.37881,2022-01-12 20:22:44,POINT (-209701.303 -43285.954),1636.23087,2022-01-12 20:21:40,1258.192127,29.0,378.038743,True,13.035819


In [23]:
%%timeit
one_interpolator._linear_reference()

12.6 ms ± 997 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [24]:
pos_gdf = one_interpolator.position_gdf.copy()

In [35]:
_ix_array = pos_gdf.index.to_numpy()

In [26]:
time_col = 'vehicle_timestamp'
_raw_dt_array = (pos_gdf[time_col].to_numpy()
                              .astype('datetime64[s]')
                              .astype('float64')
                             )
_raw_dt_array[:5]

array([1.64201874e+09, 1.64201878e+09, 1.64201881e+09, 1.64201887e+09,
       1.64201890e+09])

In [27]:
_raw_shape_array = pos_gdf.shape_meters.to_numpy()
_raw_shape_array[:5]

array([ 137.23429403,  137.23429403,  682.61313251,  682.61313251,
       1258.19212654])

In [28]:
pos_gdf.head(5)

Unnamed: 0,vehicle_timestamp,vehicle_position_latitude,vehicle_position_longitude,header_timestamp,geometry,shape_meters,last_time,last_loc,secs_from_last,meters_from_last,progressed,speed_from_last
0,2022-01-12 20:19:05,37.598866,-122.38635,2022-01-12 20:19:30,POINT (-210378.305 -43764.225),137.234294,NaT,,,,False,
1,2022-01-12 20:19:35,37.598866,-122.38635,2022-01-12 20:19:44,POINT (-210378.305 -43764.225),137.234294,2022-01-12 20:19:05,137.234294,30.0,0.0,False,0.0
2,2022-01-12 20:20:08,37.59826,-122.38694,2022-01-12 20:20:44,POINT (-210432.003 -43830.261),682.613133,2022-01-12 20:19:35,137.234294,33.0,545.378838,True,16.526631
3,2022-01-12 20:21:08,37.59826,-122.38694,2022-01-12 20:21:24,POINT (-210432.003 -43830.261),682.613133,2022-01-12 20:20:08,682.613133,60.0,0.0,False,0.0
4,2022-01-12 20:21:40,37.60131,-122.382324,2022-01-12 20:22:10,POINT (-210016.625 -43501.548),1258.192127,2022-01-12 20:21:08,682.613133,32.0,575.578994,True,17.986844


In [31]:
_raw_shape_array[:5]

array([ 137.23429403,  137.23429403,  682.61313251,  682.61313251,
       1258.19212654])

In [46]:
np.roll(_raw_shape_array, -1)[:-1]

array([1.37234294e+02, 6.82613133e+02, 6.82613133e+02, 1.25819213e+03,
       1.63623087e+03, 2.32714948e+03, 2.32714948e+03, 2.32714948e+03,
       2.32714948e+03, 3.94883684e+03, 3.94883684e+03, 4.13043171e+03,
       4.13043171e+03, 4.12964695e+03, 4.12964695e+03, 4.13302947e+03,
       4.13302947e+03, 4.13302947e+03, 4.13302947e+03, 4.13556636e+03,
       4.13556636e+03, 4.13556636e+03, 4.13556636e+03, 4.70823876e+03,
       4.70823876e+03, 5.13120542e+03, 5.13120542e+03, 5.36652335e+03,
       5.63316505e+03, 5.78462973e+03, 5.78462973e+03, 6.70569403e+03,
       6.70569403e+03, 7.86851860e+03, 7.86851860e+03, 7.86851860e+03,
       7.86851860e+03, 7.86851860e+03, 8.25160058e+01, 8.99847607e+03,
       8.99847607e+03, 9.01912268e+03, 9.01912268e+03, 9.01912268e+03,
       9.01912268e+03, 9.01912268e+03, 9.01912268e+03, 9.01912268e+03,
       9.01912268e+03, 9.01912268e+03, 9.01912268e+03, 9.01912268e+03,
       9.01912268e+03, 9.01912268e+03, 9.01912268e+03, 9.01912268e+03,
      

In [32]:
##equiv to pd shift 1 (distance_from_last)
(np.roll(_raw_shape_array, -1) - _raw_shape_array)[:-1] ##checked

array([ 0.00000000e+00,  5.45378838e+02,  0.00000000e+00,  5.75578994e+02,
        3.78038743e+02,  6.90918611e+02,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  1.62168736e+03,  0.00000000e+00,  1.81594872e+02,
        0.00000000e+00, -7.84760283e-01,  0.00000000e+00,  3.38251873e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  2.53688933e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  5.72672406e+02,
        0.00000000e+00,  4.22966657e+02,  0.00000000e+00,  2.35317928e+02,
        2.66641702e+02,  1.51464685e+02,  0.00000000e+00,  9.21064300e+02,
        0.00000000e+00,  1.16282457e+03,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00, -7.78600260e+03,  8.91596007e+03,
        0.00000000e+00,  2.06466081e+01,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  

In [33]:
pos_gdf.tail(5)

Unnamed: 0,vehicle_timestamp,vehicle_position_latitude,vehicle_position_longitude,header_timestamp,geometry,shape_meters,last_time,last_loc,secs_from_last,meters_from_last,progressed,speed_from_last
115,2022-01-12 21:18:28,37.5995,-122.38646,2022-01-12 21:18:52,POINT (-210386.226 -43693.525),9019.12268,2022-01-12 21:18:06,9019.12268,22.0,0.0,False,0.0
116,2022-01-12 21:19:17,37.5995,-122.38646,2022-01-12 21:19:32,POINT (-210386.226 -43693.525),9019.12268,2022-01-12 21:18:28,9019.12268,49.0,0.0,False,0.0
117,2022-01-12 21:19:45,37.5995,-122.38646,2022-01-12 21:20:12,POINT (-210386.226 -43693.525),9019.12268,2022-01-12 21:19:17,9019.12268,28.0,0.0,False,0.0
118,2022-01-12 21:20:11,37.5995,-122.38646,2022-01-12 21:20:27,POINT (-210386.226 -43693.525),9019.12268,2022-01-12 21:19:45,9019.12268,26.0,0.0,False,0.0
119,2022-01-12 21:20:40,37.599552,-122.38653,2022-01-12 21:21:13,POINT (-210392.251 -43687.590),2.933806,2022-01-12 21:20:11,9019.12268,29.0,-9016.188875,False,-310.903065


In [41]:
np.roll(_raw_dt_array, -1)

array([1.64201878e+09, 1.64201881e+09, 1.64201887e+09, 1.64201890e+09,
       1.64201893e+09, 1.64201899e+09, 1.64201902e+09, 1.64201905e+09,
       1.64201908e+09, 1.64201911e+09, 1.64201914e+09, 1.64201916e+09,
       1.64201920e+09, 1.64201922e+09, 1.64201926e+09, 1.64201929e+09,
       1.64201932e+09, 1.64201934e+09, 1.64201937e+09, 1.64201939e+09,
       1.64201943e+09, 1.64201946e+09, 1.64201950e+09, 1.64201952e+09,
       1.64201954e+09, 1.64201958e+09, 1.64201961e+09, 1.64201964e+09,
       1.64201967e+09, 1.64201970e+09, 1.64201973e+09, 1.64201976e+09,
       1.64201980e+09, 1.64201982e+09, 1.64201985e+09, 1.64201989e+09,
       1.64201992e+09, 1.64201995e+09, 1.64201998e+09, 1.64202001e+09,
       1.64202003e+09, 1.64202006e+09, 1.64202008e+09, 1.64202012e+09,
       1.64202015e+09, 1.64202018e+09, 1.64202021e+09, 1.64202024e+09,
       1.64202027e+09, 1.64202030e+09, 1.64202034e+09, 1.64202036e+09,
       1.64202039e+09, 1.64202042e+09, 1.64202044e+09, 1.64202048e+09,
      

In [34]:
##equiv to pd shift 1 (secs_from_last)
(np.roll(_raw_dt_array, -1) - _raw_dt_array)[:-1] ##checked

array([30., 33., 60., 32., 29., 58., 30., 31., 31., 27., 30., 28., 32.,
       29., 30., 31., 33., 23., 28., 22., 38., 33., 33., 26., 22., 36.,
       31., 25., 38., 30., 22., 32., 37., 28., 30., 35., 30., 32., 34.,
       28., 22., 27., 21., 33., 36., 31., 32., 31., 27., 33., 30., 30.,
       27., 29., 24., 33., 38., 27., 53., 62., 29., 26., 29., 22., 45.,
       30., 28., 24., 35., 29., 32., 30., 26., 29., 35., 34., 55., 26.,
       35., 32., 28., 32., 30., 31., 29., 29., 23., 34., 33., 33., 31.,
       29., 30., 28., 25., 32., 26., 22., 43., 28., 28., 26., 40., 32.,
       28., 25., 40., 29., 22., 28., 31., 29., 21., 22., 22., 49., 28.,
       26., 29.])

In [37]:
##equiv to pd shift 1 (index)
np.roll(_ix_array, -1)[:-1]

array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
        27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
        40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,
        53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,
        66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,
        79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,
        92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104,
       105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
       118, 119])

In [29]:
@jit
def integrate_f_numba(a, b, N):
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f_plain(a + i * dx)
    return s * dx

In [56]:
@jit(nopython=True)
def apply_shift_calculate_numba(ix_array, dt_array, shape_array):
## generate arrays for last time, last loc, secs from last, meters from last...
    n = len(dt_array)
    # result = np.empty(n, dtype="float64")
    assert len(ix_array) == len(shape_array) == n
    
    last_loc = np.roll(shape_array, -1)[:-1]
    # last_loc = np.insert(last_loc, 0, np.nan, axis=0) ## prepend NaN to match pd behavior
    last_time = np.roll(dt_array, -1)[:-1]
    # last_time = np.insert(last_time, 0, np.nan, axis=0) ## prepend NaN to match pd behavior
    ##equiv to pd shift 1 (distance_from_last)
    meters_from_last = (np.roll(shape_array, -1) - shape_array)[:-1] ##checked
    # meters_from_last = np.insert(meters_from_last, 0, np.nan, axis=0) ## prepend NaN to match pd behavior
    ##equiv to pd shift 1 (secs_from_last)
    seconds_from_last = (np.roll(dt_array, -1) - dt_array)[:-1] ##checked
    # seconds_from_last = np.insert(seconds_from_last, 0, np.nan, axis=0) ## prepend NaN to match pd behavior
    
    speed_from_last = np.divide(meters_from_last, seconds_from_last)
    
    
    
    # for i in range(n):
    #     if i == 0:
    #         result[1]
    #     result[i] = integrate_f_numba(col_a[i], col_b[i], col_N[i])
    return last_time, last_loc, meters_from_last, seconds_from_last, speed_from_last

In [39]:
list(range(20))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]

In [None]:
last_time = last_time.map(lambda x: dt.datetime.utcfromtimestamp(x))

In [None]:
vehicle_positions['progressed'] = vehicle_positions['meters_from_last'] > 0 ## has the bus moved ahead?
vehicle_positions['speed_from_last'] = (vehicle_positions.meters_from_last
                                             / vehicle_positions.secs_from_last) ## meters/second

In [65]:
test_tuple = apply_shift_calculate_numba(_ix_array, _raw_dt_array, _raw_shape_array)

In [72]:
with_na = [np.insert(array, 0, np.nan, axis=0) for array in test_tuple]

In [88]:
def map_dt_or_nan(timestamp):
    # print(timestamp)
    if  np.isnan(timestamp):
        return np.nan
    else:
        return dt.datetime.utcfromtimestamp(timestamp)

In [91]:
back_to_dt = np.array([map_dt_or_nan(x) for x in with_na[0]]) ##nice!
with_na[0] = back_to_dt ##with na could be your result? (check...)
##TODO add progressed, wrapper...