In [1]:
import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(100_000_000_000)

from calitp.tables import tbl
from calitp import query_sql
import calitp.magics
import branca

import shared_utils
import utils

from siuba import *
import pandas as pd
import geopandas as gpd
import shapely

import datetime as dt
import time
from zoneinfo import ZoneInfo

import rt_analysis as rt
import importlib

import gcsfs
fs = gcsfs.GCSFileSystem()



In [2]:
analysis_date = dt.date(2022, 1, 12)

In [3]:
samtrans_itp_id = 290

In [4]:
importlib.reload(utils)
importlib.reload(rt)

<module 'rt_analysis' from '/home/jovyan/data-analyses/rt_delay/rt_analysis.py'>

In [5]:
colorscale = branca.colormap.step.RdYlGn_10.scale(vmin=0, 
 vmax=30)
colorscale.caption = "Speed (miles per hour)"

In [6]:
from tqdm import tqdm_notebook

In [7]:
from tqdm.notebook import trange, tqdm

In [8]:
pbar = tqdm()

0it [00:00, ?it/s]

In [9]:
## running about 2 minutes for interpolators, about 20 for delay(!)

In [10]:
%%capture
sam = rt.OperatorDayAnalysis(samtrans_itp_id, analysis_date, pbar)

In [11]:
(sam.stop_delay_view
 >> group_by(_.stop_id, _.stop_name)
 >> summarize(mean_delay = _.delay.mean(), max_delay = _.delay.max())
 >> arrange(-_.mean_delay)) >> head(3)

Unnamed: 0,stop_id,stop_name,mean_delay,max_delay
715,334620,South SF BART-Bay 5 Westside Busway,0 days 00:26:28.198510,0 days 00:26:28.198510
1192,343118,Alameda de las Pulgas & Alma St,0 days 00:18:37.985408200,0 days 00:22:35.850843
269,331100,SF Transit Center - BART shuttle connection,0 days 00:17:02.684474,0 days 00:17:02.684474


In [12]:
sam.set_filter(start_time='15:00', end_time='19:00', direction_id='1')

In [13]:
sam.filter

{'start_time': datetime.time(15, 0),
 'end_time': datetime.time(19, 0),
 'route_ids': None,
 'direction_id': '1',
 'direction': None}

In [14]:
# %%capture
# m = sam.segment_speed_map(colorscale=colorscale)

In [18]:
# m

### speedup ideas

* selective use of numba @jit decorator in computation-heavy methods?
    * likely requires refactor to plain np, translation, etc...
    * done for time_at_position with wonderful results (60x speedup) 

In [16]:
# rt.TripPositionInterpolator.detailed_speed_map(ex1)

In [15]:
# @jit(nopython=True) ##numba gives huge speedup here (~60x)
# def time_at_position_numba(desired_position, shape_array, dt_float_array):
#     if desired_position < shape_array.max() and desired_position > shape_array.min():\
#         return np.interp(desired_position, shape_array, dt_float_array)
#     else:
#         return None

In [17]:
def _shift_calculate(self, vehicle_positions):

    # if hasattr(self, "progressing_positions"):
    #     print(self.progressing_positions.shape)
    #     self.debug_dict[self.progressing_positions.shape[0]] = self.progressing_positions.copy()

    vehicle_positions = vehicle_positions >> arrange(self.time_col) ## unnecessary?
    vehicle_positions['last_time'] = vehicle_positions[self.time_col].shift(1)
    vehicle_positions['last_loc'] = vehicle_positions.shape_meters.shift(1)
    vehicle_positions['secs_from_last'] = vehicle_positions[self.time_col] - vehicle_positions.last_time
    vehicle_positions.secs_from_last = (vehicle_positions.secs_from_last
                                    .apply(lambda x: x.seconds))
    vehicle_positions['meters_from_last'] = (vehicle_positions.shape_meters
                                                  - vehicle_positions.last_loc)
    vehicle_positions['progressed'] = vehicle_positions['meters_from_last'] > 0 ## has the bus moved ahead?
    vehicle_positions['speed_from_last'] = (vehicle_positions.meters_from_last
                                                 / vehicle_positions.secs_from_last) ## meters/second
    return vehicle_positions

In [18]:
import numpy as np
from numba import jit

In [19]:
one_interpolator = sam.position_interpolators['11030873-136-Blocks-Weekday-54']['rt']

In [23]:
one_interpolator.position_gdf >> head(3)

Unnamed: 0,vehicle_timestamp,vehicle_position_latitude,vehicle_position_longitude,header_timestamp,geometry,shape_meters,last_time,last_loc,secs_from_last,meters_from_last,progressed,speed_from_last
0,2022-01-12 20:19:05,37.598866,-122.38635,2022-01-12 20:19:30,POINT (-210378.305 -43764.225),137.234294,NaT,,,,False,
1,2022-01-12 20:19:35,37.598866,-122.38635,2022-01-12 20:19:44,POINT (-210378.305 -43764.225),137.234294,2022-01-12 20:19:05,137.234294,30.0,0.0,False,0.0
2,2022-01-12 20:20:08,37.59826,-122.38694,2022-01-12 20:20:44,POINT (-210432.003 -43830.261),682.613133,2022-01-12 20:19:35,137.234294,33.0,545.378838,True,16.526631


In [25]:
one_interpolator.cleaned_positions >> head(3)

Unnamed: 0,vehicle_timestamp,vehicle_position_latitude,vehicle_position_longitude,header_timestamp,geometry,shape_meters,last_time,last_loc,secs_from_last,meters_from_last,progressed,speed_from_last
2,2022-01-12 20:20:08,37.59826,-122.38694,2022-01-12 20:20:44,POINT (-210432.003 -43830.261),682.613133,2022-01-12 20:19:35,137.234294,33.0,545.378838,True,16.526631
4,2022-01-12 20:21:40,37.60131,-122.382324,2022-01-12 20:22:10,POINT (-210016.625 -43501.548),1258.192127,2022-01-12 20:21:08,682.613133,32.0,575.578994,True,17.986844
5,2022-01-12 20:22:09,37.60332,-122.37881,2022-01-12 20:22:44,POINT (-209701.303 -43285.954),1636.23087,2022-01-12 20:21:40,1258.192127,29.0,378.038743,True,13.035819


In [22]:
%%timeit
one_interpolator._linear_reference()

12.5 ms ± 902 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [27]:
pos_gdf = one_interpolator.position_gdf.copy()

In [35]:
pos_gdf.index.to_numpy()[:5]

array([0, 1, 2, 3, 4])

In [34]:
time_col = 'vehicle_timestamp'
_raw_dt_array = (pos_gdf[time_col].to_numpy()
                              .astype('datetime64[s]')
                              .astype('float64')
                             )
_raw_dt_array[:5]

array([1.64201874e+09, 1.64201878e+09, 1.64201881e+09, 1.64201887e+09,
       1.64201890e+09])

In [33]:
_raw_shape_array = pos_gdf.shape_meters.to_numpy()
_raw_shape_array[:5]

array([ 137.23429403,  137.23429403,  682.61313251,  682.61313251,
       1258.19212654])

In [36]:
pos_gdf.head(5)

Unnamed: 0,vehicle_timestamp,vehicle_position_latitude,vehicle_position_longitude,header_timestamp,geometry,shape_meters,last_time,last_loc,secs_from_last,meters_from_last,progressed,speed_from_last
0,2022-01-12 20:19:05,37.598866,-122.38635,2022-01-12 20:19:30,POINT (-210378.305 -43764.225),137.234294,NaT,,,,False,
1,2022-01-12 20:19:35,37.598866,-122.38635,2022-01-12 20:19:44,POINT (-210378.305 -43764.225),137.234294,2022-01-12 20:19:05,137.234294,30.0,0.0,False,0.0
2,2022-01-12 20:20:08,37.59826,-122.38694,2022-01-12 20:20:44,POINT (-210432.003 -43830.261),682.613133,2022-01-12 20:19:35,137.234294,33.0,545.378838,True,16.526631
3,2022-01-12 20:21:08,37.59826,-122.38694,2022-01-12 20:21:24,POINT (-210432.003 -43830.261),682.613133,2022-01-12 20:20:08,682.613133,60.0,0.0,False,0.0
4,2022-01-12 20:21:40,37.60131,-122.382324,2022-01-12 20:22:10,POINT (-210016.625 -43501.548),1258.192127,2022-01-12 20:21:08,682.613133,32.0,575.578994,True,17.986844


In [None]:
@jit
def integrate_f_numba(a, b, N):
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f_plain(a + i * dx)
    return s * dx

In [None]:
@jit
def apply_shift_calculate_numba(col_a, col_b, col_N):
## generate arrays for last time, last loc, secs from last, meters from last...
    n = len(col_N)
    result = np.empty(n, dtype="float64")
    assert len(col_a) == len(col_b) == n
    for i in range(n):
        result[i] = integrate_f_numba(col_a[i], col_b[i], col_N[i])
    return result

In [37]:
_raw_shape_array[:5]

array([ 137.23429403,  137.23429403,  682.61313251,  682.61313251,
       1258.19212654])

In [47]:
##equiv to pd shift 1 (distance_from_last)
(np.roll(_raw_shape_array, -1) - _raw_shape_array)[:-1] ##checked

array([ 0.00000000e+00,  5.45378838e+02,  0.00000000e+00,  5.75578994e+02,
        3.78038743e+02,  6.90918611e+02,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  1.62168736e+03,  0.00000000e+00,  1.81594872e+02,
        0.00000000e+00, -7.84760283e-01,  0.00000000e+00,  3.38251873e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  2.53688933e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  5.72672406e+02,
        0.00000000e+00,  4.22966657e+02,  0.00000000e+00,  2.35317928e+02,
        2.66641702e+02,  1.51464685e+02,  0.00000000e+00,  9.21064300e+02,
        0.00000000e+00,  1.16282457e+03,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00, -7.78600260e+03,  8.91596007e+03,
        0.00000000e+00,  2.06466081e+01,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  

In [46]:
pos_gdf.tail(5)

Unnamed: 0,vehicle_timestamp,vehicle_position_latitude,vehicle_position_longitude,header_timestamp,geometry,shape_meters,last_time,last_loc,secs_from_last,meters_from_last,progressed,speed_from_last
115,2022-01-12 21:18:28,37.5995,-122.38646,2022-01-12 21:18:52,POINT (-210386.226 -43693.525),9019.12268,2022-01-12 21:18:06,9019.12268,22.0,0.0,False,0.0
116,2022-01-12 21:19:17,37.5995,-122.38646,2022-01-12 21:19:32,POINT (-210386.226 -43693.525),9019.12268,2022-01-12 21:18:28,9019.12268,49.0,0.0,False,0.0
117,2022-01-12 21:19:45,37.5995,-122.38646,2022-01-12 21:20:12,POINT (-210386.226 -43693.525),9019.12268,2022-01-12 21:19:17,9019.12268,28.0,0.0,False,0.0
118,2022-01-12 21:20:11,37.5995,-122.38646,2022-01-12 21:20:27,POINT (-210386.226 -43693.525),9019.12268,2022-01-12 21:19:45,9019.12268,26.0,0.0,False,0.0
119,2022-01-12 21:20:40,37.599552,-122.38653,2022-01-12 21:21:13,POINT (-210392.251 -43687.590),2.933806,2022-01-12 21:20:11,9019.12268,29.0,-9016.188875,False,-310.903065
