In [1]:
import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(100_000_000_000)

from calitp.tables import tbl
from calitp import query_sql
import calitp.magics
import branca

import shared_utils
from utils import *

from siuba import *
import pandas as pd
import geopandas as gpd
import shapely

import datetime as dt
import time
from zoneinfo import ZoneInfo

import rt_analysis as rt
import importlib

import gcsfs
fs = gcsfs.GCSFileSystem()

from tqdm import tqdm_notebook
from tqdm.notebook import trange, tqdm



In [29]:
importlib.reload(rt)

<module 'rt_analysis' from '/home/jovyan/data-analyses/rt_delay/rt_analysis.py'>

## SDMTS

In [3]:
mts_itp_id = 278

In [4]:
analysis_date = dt.date(2022, 2, 17)

In [5]:
def get_vehicle_positions(itp_id, analysis_date):
    ''' 
    itp_id: an itp_id (string or integer)
    analysis_date: datetime.date
    
    Interim function for getting complete vehicle positions data for a single operator on a single date of interest.
    To be replaced as RT views are implemented...
    
    Currently drops positions for day after analysis date after 2AM, temporary fix to balance capturing trips crossing
    midnight with avoiding duplicates...
    '''

    
    next_date = analysis_date + dt.timedelta(days = 1)
    date_str = analysis_date.strftime('%Y-%m-%d')
    
    start = dt.datetime.combine(analysis_date, dt.time(0))
    start_ts = int(start.timestamp())
    end = start + dt.timedelta(days = 1, seconds = 2 * 60**2)
    end_ts = int(end.timestamp())
    
    filename = f'vp_{itp_id}_{date_str}.parquet'
    path = check_cached(filename)
    if path:
        print('found parquet')
        return pd.read_parquet(path)
    else:
        df = query_sql(f"""
        SELECT calitp_itp_id, calitp_url_number,
        header.timestamp AS header_timestamp, vehicle.timestamp AS vehicle_timestamp,
        vehicle.vehicle.label AS entity_id, vehicle.vehicle.id AS vehicle_id,
        vehicle.trip.tripId AS trip_id, vehicle.position.longitude AS vehicle_longitude,
        vehicle.position.latitude AS vehicle_latitude
        FROM `cal-itp-data-infra.gtfs_rt.vehicle_positions`
        WHERE calitp_itp_id = {itp_id} AND vehicle.timestamp > {start_ts} AND vehicle.timestamp < {end_ts}
        """)
        
        df = df >> distinct(_.vehicle_trip_id, _.vehicle_timestamp, _keep_all=True)
        df = df.dropna(subset=['vehicle_timestamp'])
        assert not df.empty, f'no vehicle positions data found for {date_str}'
        df.vehicle_timestamp = df.vehicle_timestamp.apply(convert_ts)
        df.header_timestamp = df.header_timestamp.apply(convert_ts)

        # assert df.vehicle_timestamp.min() < dt.datetime.combine(analysis_date, dt.time(0)), 'rt data starts after analysis date'
        # assert dt.datetime.combine(analysis_date, dt.time(hour=23, minute=59)) < df.vehicle_timestamp.max(), 'rt data ends early on analysis date'
        # if not df.vehicle_timestamp.min() < dt.datetime.combine(analysis_date, dt.time(0)):
        #     warnings.warn('rt data starts after analysis date')
        # if not dt.datetime.combine(end) < df.vehicle_timestamp.max():
        #     warnings.warn('rt data ends early on analysis date')

        df.to_parquet(f'{GCS_FILE_PATH}cached_views/{filename}')
        return df

In [7]:
mts = rt.OperatorDayAnalysis(mts_itp_id, analysis_date, pbar)

found parquet
found parquet
found parquet
found parquet
found_parquet
3 scheduled trips out of 8104 have no shape, dropping
vehicle positions gdf must not be empty
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data




In [85]:
trolley_routes = ['510', '520', '530']

In [86]:
mts_bus = [route for route in list(
                   mts.rt_trips.route_id.unique()) if route not in trolley_routes
                         ]

In [104]:
mts.rt_trips.route_id.unique()

array(['921', '864', '945', '963', '856', '979', '964', '851', '704',
       '705', '204', '874', '120', '707', '510', '852', '20', '961',
       '950', '968', '944', '4', '115', '9', '967', '848', '88', '855',
       '833', '237', '14', '875', '936', '923', '11', '832', '909', '917',
       '894', '31', '928', '973', '905', '25', '202', '712', '110', '41',
       '235', '965', '290', '27', '816', '972', '83', '7', '916', '84',
       '929', '834', '13', '854', '872', '105', '8', '30', '985', '18',
       '904', '978', '838', '60', '974', '12', '6', '280', '1', '892',
       '946', '530', '992', '955', '44', '934', '215', '2', '3', '43',
       '5', '10', '140', '28', '35', '201', '225', '701', '815', '906',
       '907', '932', '933', '962', '520', '709', '901'], dtype=object)

In [105]:
mts_rapid = ['280', '290', '235', '225', '215', '237']

In [106]:
mts.reset_filter()

In [113]:
rt.OperatorDayAnalysis.set_filter(mts, start_time='15:00', end_time='19:00', route_ids=mts_rapid + ['7'])

{'start_time': datetime.time(15, 0), 'end_time': datetime.time(19, 0), 'route_ids': ['280', '290', '235', '225', '215', '237', '7'], 'direction_id': None, 'direction': None}


In [34]:
pbar = tqdm()

0it [00:00, ?it/s]

In [114]:
m = mts.segment_speed_map(size = [1300, 700])

view filter: {'start_time': datetime.time(15, 0), 'end_time': datetime.time(19, 0), 'route_ids': ['280', '290', '235', '225', '215', '237', '7'], 'direction_id': None, 'direction': None}





In [115]:
m

In [112]:
m.save(f'./tripmaps/{mts_itp_id}_rapid_02_17_pm_peak.html')

In [111]:
mts._filter(mts.rt_trips).mean_speed_mph.median()

view filter: {'start_time': datetime.time(15, 0), 'end_time': datetime.time(19, 0), 'route_ids': ['280', '290', '235', '225', '215', '237'], 'direction_id': None, 'direction': None}


19.334126021866084

In [99]:
m = mts.position_interpolators['16256662']['rt'].detailed_speed_map()




In [101]:
# m

In [102]:
m.save(f'./tripmaps/{mts_itp_id}_rt_07_tr_16256662.html')

## NCTD

In [35]:
analysis_date = dt.date(2022, 2, 8) ##tuesday, new tables

In [36]:
nctd_itp_id = 226

In [37]:
nctd = rt.OperatorDayAnalysis(nctd_itp_id, analysis_date, pbar)

found parquet
found parquet
found parquet
found parquet
found_parquet
vehicle positions gdf must not be empty
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
less than 1km of data
could not generate delays for trip 15765980-NC2110-NCTD-Weekday-20
time data '24:05:00' does not match format '%H:%M:%S'


In [68]:
nctd.set_filter(start_time='15:00', end_time='19:00', route_ids=nc_rail_all)

{'start_time': datetime.time(15, 0), 'end_time': datetime.time(19, 0), 'route_ids': ['398', '498', '399'], 'direction_id': None, 'direction': None}


In [69]:
m = nctd.segment_speed_map()

view filter: {'start_time': datetime.time(15, 0), 'end_time': datetime.time(19, 0), 'route_ids': ['398', '498', '399'], 'direction_id': None, 'direction': None}


Input geom 1TopologyException: Input geom 1 is invalid: Self-intersection at 245611.20865780726 -533434.45357998728
 is INVALID: Self-intersection at or near point 245611.20865780726 -533434.45357998728 (245611.20865780726308 -533434.45357998728286)
<A>
MULTIPOLYGON (((246950.7672806830669288 -532005.3026747559197247, 246897.2573147887014784 -532014.1165019599720836, 246928.4262990281276871 -531969.7375156998168677, 246950.7672806830669288 -532005.3026747559197247)), ((245615.1499025252996944 -533399.9907900973921642, 245601.4390231675060932 -533435.4231267983559519, 245608.8507208010123577 -533455.0717078715097159, 245615.1499025252996944 -533399.9907900973921642)), ((245644.2556810270762071 -533431.1739654304692522, 245601.4390231675060932 -533435.4231267983559519, 245608.5669568150769919 -533454.3194430114235729, 245644.2556810270762071 -533431.1739654304692522)))
</A>



In [57]:
# m

In [56]:
nctd._filter(nctd.rt_trips).mean_speed_mph.median()

view filter: {'start_time': datetime.time(15, 0), 'end_time': datetime.time(19, 0), 'route_ids': ['350', '351', '303', '304', '305', '352', '354', '301', '315', '302', '356', '332', '359', '355', '309', '353', '347', '318', '323', '306', '308', '444', '313', '445', '334', '388', '325', '358', '311', '357'], 'direction_id': None, 'direction': None}


13.768394241852054

In [52]:
coaster = ['398', '498']
sprinter = ['399']
nc_rail_all = coaster + sprinter
nc_bus = [route for route in list(
                   nctd.rt_trips.route_id.unique()) if route not in nc_rail_all
                         ]

In [83]:
# m.save(f'./tripmaps/{nctd_itp_id}_rail_02_17_pm_peak.html')