In [48]:
from models import arrival_history, util, config, vehicle_positions, eclipses
import datetime
import nest_asyncio
import pandas as pd
import numpy as np
import time
nest_asyncio.apply()

In [3]:
agency_id = 'trimet'
route_id = '4'
# util.parse_date('2022-03-10')
d = util.parse_date('2022-03-10')
# d = datetime.datetime.strptime('2022-03-10','%Y-%m-%d').date()
agency_config = config.get_agency(agency_id)

tz = agency_config.tz

routes = agency_config.get_route_list()
route_ids = [route.id for route in routes]
#from compute_arrivals line 59
start_hour = agency_config.default_day_start_hour

In [4]:
route_ids[:5]

['100', '200', '290', '90', '190']

In [5]:
len(route_ids)

89

In [6]:
start_dt = tz.localize(datetime.datetime(d.year, d.month, d.day, hour=start_hour))
# end_dt = start_dt + timedelta(days=1)
end_dt = tz.localize(datetime.datetime(d.year, d.month, d.day, hour=start_hour+3))

start_time = int(start_dt.timestamp())
end_time = int(end_dt.timestamp())

In [8]:
state = vehicle_positions.get_state(agency_id=agency_id,d=d
                            ,start_time=start_time, end_time=end_time
                            , route_ids=route_ids[:2])

state/v1/trimet/2022/03/10/11/
http://opentransit-pdx.s3.amazonaws.com/state/v1/trimet/2022/03/10/11/00/trimet_v1_1646910024856.json.gz
http://opentransit-pdx.s3.amazonaws.com/state/v1/trimet/2022/03/10/11/01/trimet_v1_1646910069914.json.gz
http://opentransit-pdx.s3.amazonaws.com/state/v1/trimet/2022/03/10/11/01/trimet_v1_1646910084933.json.gz
http://opentransit-pdx.s3.amazonaws.com/state/v1/trimet/2022/03/10/11/00/trimet_v1_1646910054894.json.gz
http://opentransit-pdx.s3.amazonaws.com/state/v1/trimet/2022/03/10/11/00/trimet_v1_1646910039874.json.gz
http://opentransit-pdx.s3.amazonaws.com/state/v1/trimet/2022/03/10/11/01/trimet_v1_1646910078748.json.gz
http://opentransit-pdx.s3.amazonaws.com/state/v1/trimet/2022/03/10/11/01/trimet_v1_1646910093813.json.gz
http://opentransit-pdx.s3.amazonaws.com/state/v1/trimet/2022/03/10/11/00/trimet_v1_1646910009836.json.gz
http://opentransit-pdx.s3.amazonaws.com/state/v1/trimet/2022/03/10/11/02/trimet_v1_1646910145012.json.gz
http://opentransit-pdx.s

In [12]:
route_id = route_ids[:5][0]
route_id

'100'

In [39]:
t0 = time.time()
route_state = state.get_for_route(route_id)

loading state for route 100 from cache: /app/backend/data/state_v4_trimet/2022-03-10/state_trimet_100_1646910000_1646920800.csv


In [40]:
route_config = agency_config.get_route_config(route_id)

## Recreate `eclipse.find_arrivals` below

In [41]:
tz = agency_config.tz

route_id = route_config.id

buses_raw = pd.concat([
        eclipses.resample_bus(bus)
        for vid, bus in route_state.groupby(route_state['VID'])
    ], ignore_index=True)

In [42]:
buses_raw.head(3)

Unnamed: 0,VID,LAT,LON,TIME,OBS_GROUP
0,101,45.52205,-122.47922,1646920247,1
1,101,45.51648,-122.46114,1646920574,1
2,101,45.516602,-122.461428,1646920580,1


In [43]:
buses = buses_raw[buses_raw['TIME'] != 0].copy()

In [44]:
lat_values = buses['LAT'].values
lon_values = buses['LON'].values

for stop_id in route_config.get_stop_ids():
    stop_info = route_config.get_stop_info(stop_id)
    stop_direction_ids = route_config.get_directions_for_stop(stop_id)
    if len(stop_direction_ids) > 0:
        # calculate distances fast with haversine function
        buses[f'DIST_{stop_id}'] = util.haver_distance(stop_info.lat, stop_info.lon, lat_values, lon_values)

In [49]:
possible_arrivals_arr = []
for dir_info in route_config.get_direction_infos():

    direction_id = dir_info.id

    # exclude times of day when bus is not making stops in this direction
    # (e.g. commuter express routes that only serve one direction in the morning/afternoon)
    valid_buses = buses
    for start_time_str, end_time_str in eclipses.get_invalid_direction_times(agency_config, route_config, direction_id):
        if start_time_str is not None:
            invalid_start_timestamp = util.get_localized_datetime(d, start_time_str, tz).timestamp()
            print(f"excluding buses after {invalid_start_timestamp} ({start_time_str}) for direction {direction_id}")
            valid_buses = valid_buses[valid_buses['TIME'] < invalid_start_timestamp]
        if end_time_str is not None:
            invalid_end_timestamp = util.get_localized_datetime(d, end_time_str, tz).timestamp()
            print(f"excluding buses before {invalid_end_timestamp} ({end_time_str}) for direction {direction_id}")
            valid_buses = valid_buses[valid_buses['TIME'] >= invalid_end_timestamp]

    dir_stops = dir_info.get_stop_ids()
    num_dir_stops = len(dir_stops)
    is_loop = dir_info.is_loop()

    for stop_index, stop_id in enumerate(dir_stops):
        stop_info = route_config.get_stop_info(stop_id)

        is_terminal = False
        radius = 200
        adjacent_stop_ids = []

        is_terminal = (stop_index == 0) or (stop_index == num_dir_stops - 1)

        if is_loop:
            adjacent_stop_ids.append(dir_stops[(stop_index + num_dir_stops - 1) % num_dir_stops])
            adjacent_stop_ids.append(dir_stops[(stop_index + 1) % num_dir_stops])
        else:
            if stop_index > 0:
                adjacent_stop_ids.append(dir_stops[stop_index - 1])
            if stop_index < num_dir_stops - 1:
                adjacent_stop_ids.append(dir_stops[stop_index + 1])

        for adjacent_stop_id in adjacent_stop_ids:
            adjacent_stop_info = route_config.get_stop_info(adjacent_stop_id)

            # set radius to be no larger than the distance to the previous/next stop.
            # this helps avoid odd results near the terminals of certain routes
            distance_to_adjacent_stop = util.haver_distance(stop_info.lat, stop_info.lon, adjacent_stop_info.lat, adjacent_stop_info.lon)
            radius = min(radius, round(distance_to_adjacent_stop))

        #dirs_text = [f'{d}[{i}]' for d, i in zip(stop_direction_ids, stop_indexes)]
        #print(f"{route_id}: {round(time.time() - t0, 1)} computing arrivals at stop {stop_id} {','.join(dirs_text)}  radius {radius} m  {'(terminal)' if is_terminal else ''}")

        possible_arrivals = eclipses.get_possible_arrivals_for_stop(valid_buses, stop_id,
            direction_id=direction_id,
            stop_index=stop_index,
            adjacent_stop_ids=adjacent_stop_ids,
            radius=radius,
            is_terminal=is_terminal,
        )

        possible_arrivals_arr.append(possible_arrivals)

    possible_arrivals = pd.concat(possible_arrivals_arr, ignore_index=True)

    if possible_arrivals.empty:
        arrivals, num_trips = possible_arrivals, 0
    else:
        print(f'{route_id}: {round(time.time() - t0, 1)} cleaning arrivals')

        arrivals = eclipses.clean_arrivals(possible_arrivals, buses, route_config)

        num_trips = len(np.unique(arrivals['TRIP'].values))

    print(f"{route_id}: {round(time.time() - t0, 1)} found {len(arrivals['TIME'].values)} arrivals in {num_trips} trips")

100: 53.1 cleaning arrivals
100: 53.9 found 206 arrivals in 30 trips
100: 55.3 cleaning arrivals
100: 56.5 found 690 arrivals in 76 trips


In [50]:
arrivals

Unnamed: 0,VID,TIME,DEPARTURE_TIME,DIST,SID,DID,STOP_INDEX,OBS_GROUP,TRIP
927,246,1646910051,1646910059,15.985551,8363,1,5,1,34
915,214,1646910051,1646910059,15.985551,8363,1,5,1,14
964,246,1646910156,1646910172,4.073136,8364,1,6,1,34
952,214,1646910156,1646910172,4.073136,8364,1,6,1,14
1003,246,1646910259,1646910265,3.308717,8365,1,7,1,34
987,214,1646910259,1646910265,3.308717,8365,1,7,1,14
1039,246,1646910374,1646910420,5.176467,8366,1,8,1,34
1028,214,1646910374,1646910420,5.176467,8366,1,8,1,14
1068,246,1646910541,1646910546,2.588229,8367,1,9,1,34
1058,214,1646910541,1646910546,2.588229,8367,1,9,1,14
