In [1]:
from skilift import GTFS, TransitGraph

In [2]:
import pandas as pd
from zipfile import ZipFile
from collections import defaultdict
from typing import Dict, Set
import numpy as np
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

In [3]:
fn = "/home/bmander/skilift_data/transit/google_transit.zip"

graph = TransitGraph.load(fn)

In [5]:
stop_times = graph.feed.stop_times
stop_times.head()

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,timepoint,stop_pattern_id,service_id
0,347461003,20340,20340,67655,1,,0,0,0.0,1,6,57068
1,347461003,20389,20389,67612,11,,0,0,2045.4,0,6,57068
2,347461003,20418,20418,70608,16,,0,0,3266.3,0,6,57068
3,347461003,20434,20434,70619,20,,0,0,3933.1,0,6,57068
4,347461003,20450,20450,72932,23,,0,0,4585.6,0,6,57068


In [153]:
def make_timetables(group):
    """This converts a group of stop_times that share a stop_pattern_id and 
    service_id into a timetable. 
    
    The timetable is a dataframe with three
    columns: trip_id, arrival_time, and departure_time. The arrival_time and
    departure_time columns are lists of times, one for each stop in the
    stop_pattern. The timetable is sorted by the first arrival time in the
    list."""

    trips = group.sort_values("stop_sequence") \
              .groupby("trip_id") \
              .agg(list)[["arrival_time", "departure_time"]] \
              .sort_values("arrival_time", key=lambda x: x.map(lambda x:x[0]))

    return trips


In [155]:
gg = stop_times.groupby(["stop_pattern_id", "service_id"]).apply(make_timetables).reset_index()
gg

Unnamed: 0,stop_pattern_id,service_id,trip_id,arrival_time,departure_time
0,0,11692,55295528,"[21480, 21600, 21720, 21943, 22040, 22260, 224...","[21480, 21600, 21720, 21943, 22040, 22260, 224..."
1,0,11692,55295529,"[22380, 22500, 22620, 22843, 22940, 23160, 233...","[22380, 22500, 22620, 22843, 22940, 23160, 233..."
2,0,11692,55295530,"[23280, 23400, 23520, 23743, 23840, 24060, 242...","[23280, 23400, 23520, 23743, 23840, 24060, 242..."
3,0,11692,55295531,"[24180, 24300, 24420, 24643, 24740, 24960, 251...","[24180, 24300, 24420, 24643, 24740, 24960, 251..."
4,0,11692,55295532,"[25080, 25200, 25320, 25543, 25640, 25860, 260...","[25080, 25200, 25320, 25543, 25640, 25860, 260..."
...,...,...,...,...,...
27683,546,57068,617043123,"[57720, 57859, 57965, 58080, 58184, 58279, 584...","[57720, 57859, 57965, 58080, 58184, 58279, 584..."
27684,546,57068,617043293,"[58320, 58459, 58565, 58680, 58784, 58879, 590...","[58320, 58459, 58565, 58680, 58784, 58879, 590..."
27685,546,57068,617043693,"[58920, 59059, 59165, 59280, 59384, 59479, 596...","[58920, 59059, 59165, 59280, 59384, 59479, 596..."
27686,546,57068,617043283,"[59520, 59659, 59765, 59880, 59984, 60079, 602...","[59520, 59659, 59765, 59880, 59984, 60079, 602..."


In [161]:
for (stop_pattern_id, service_id), df in gg.groupby(["stop_pattern_id", "service_id"]):
    trip_ids = df["trip_id"].values

    arrival_times = np.array(df["arrival_time"].values.tolist())
    departure_times = np.array(df["departure_time"].values.tolist())
    break

[[21480 21600 21720 21943 22040 22260 22434 22637 22852 22980]
 [22380 22500 22620 22843 22940 23160 23334 23537 23752 23880]
 [23280 23400 23520 23743 23840 24060 24234 24437 24652 24780]
 [24180 24300 24420 24643 24740 24960 25134 25337 25552 25680]
 [25080 25200 25320 25543 25640 25860 26034 26237 26452 26580]
 [25980 26100 26220 26443 26540 26760 26934 27137 27352 27480]
 [26880 27000 27120 27343 27440 27660 27834 28037 28252 28380]
 [27780 27900 28020 28243 28340 28560 28734 28937 29152 29280]
 [28680 28800 28920 29143 29240 29460 29634 29837 30052 30180]
 [29580 29700 29820 30043 30140 30360 30534 30737 30952 31080]
 [30480 30600 30720 30943 31040 31260 31434 31637 31852 31980]
 [31380 31500 31620 31843 31940 32160 32334 32537 32752 32880]
 [32280 32400 32520 32743 32840 33060 33234 33437 33652 33780]
 [33180 33300 33420 33643 33740 33960 34134 34337 34552 34680]
 [34080 34200 34320 34543 34640 34860 35034 35237 35452 35580]
 [34980 35100 35220 35443 35540 35760 35934 36137 36352

In [4]:
# get epoch time on Aptil 19, 2023 at 1:00pm pacific time
t0 = pd.Timestamp("2023-04-19 13:00:00", tz="America/Los_Angeles")
t0

Timestamp('2023-04-19 13:00:00-0700', tz='America/Los_Angeles')

In [5]:
stop_id = graph.feed.stops_with_name("Pine").iloc[0].stop_id
stop_id

1085

In [6]:
node = ("at_stop", stop_id, t0)
graph.adjacent_forward(node)

KeyError: (99, 9158)

In [63]:
stop_pattern_ids = gtfs.stop_pattern_ids[stop_id]
stop_pattern_ids

{98, 99, 100, 106, 336, 337, 342, 343, 371, 372, 374, 510}

In [67]:
# for each stop pattern associated with this stop
for stop_pattern_id in stop_pattern_ids:
    pattern = gtfs.stop_patterns[stop_pattern_id]
    j = pattern.index(stop_id)

    # for each service_id associated with this day
    for service_id in service_ids:
        key = (stop_pattern_id, service_id)

        if key not in gtfs.schedules:
            continue

        # sched is an array of shape (n_trips, n_stops, 2)
        trip_ids, timetable = gtfs.schedules[key]

        timetable = timetable[:, j, 1] # array of departure times

        # find the first departure after time_secs
        i = np.searchsorted(timetable, time_secs, side="left")
        print(i)

        print(time_secs, timetable[i, j, 1])

        break
    break

24
46800 47100


In [7]:
# join trips and stop_times
#stop_time_trips = trips.merge(stop_times, on="trip_id")

In [9]:
# get trip_id -> stop pattern

trip_stop_patterns = stop_times.sort_values("stop_sequence").groupby(["trip_id"]) \
             .agg({"stop_id": tuple}).to_dict(orient="index")


In [10]:
# reverse to get stop pattern -> list of trip_ids

stop_pattern_trips = defaultdict(set)
for trip_id, stop_pattern in trip_stop_patterns.items():
    stop_pattern_trips[stop_pattern["stop_id"]].add(trip_id)
stop_pattern_trips = dict(stop_pattern_trips)

In [30]:
# create stop_pattern_id -> ordered list of stops

stop_patterns = list(zip(range(len(stop_pattern_trips)), stop_pattern_trips.keys()))

In [12]:
# create stop_pattern_id -> list of trip_ids

stop_pattern_id_trips = expand_pairs(zip(range(len(stop_pattern_trips)), stop_pattern_trips.values()))

In [13]:
stop_pattern_id_trips_df = pd.DataFrame(stop_pattern_id_trips, columns=["stop_pattern_id", "trip_id"])
stop_pattern_id_trips_df

Unnamed: 0,stop_pattern_id,trip_id
0,0,55295528
1,0,55295529
2,0,55295530
3,0,55295531
4,0,55295532
...,...,...
27683,546,617043053
27684,546,617043693
27685,546,617043823
27686,546,617044343


In [16]:
stop_times = stop_times.merge(stop_pattern_id_trips_df, on="trip_id").merge(trips, on="trip_id")

In [24]:
# pivot. columns: stop_id, index: trip_id, values: [arrival_time, depature_time]
tmp = stop_times[stop_times["stop_pattern_id"] == 0].sort_values("departure_time")
tmp = tmp.pivot(index="trip_id", columns="stop_id", values=["arrival_time", "departure_time"])
tmp

Unnamed: 0_level_0,arrival_time,arrival_time,arrival_time,arrival_time,arrival_time,arrival_time,arrival_time,arrival_time,arrival_time,arrival_time,departure_time,departure_time,departure_time,departure_time,departure_time,departure_time,departure_time,departure_time,departure_time,departure_time
stop_id,1551,1652,1662,1672,1682,11175,27420,41904,41908,41970,1551,1652,1662,1672,1682,11175,27420,41904,41908,41970
trip_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
55295528,21480,21600,21720,21943,22040,22980,22260,22434,22637,22852,21480,21600,21720,21943,22040,22980,22260,22434,22637,22852
55295529,22380,22500,22620,22843,22940,23880,23160,23334,23537,23752,22380,22500,22620,22843,22940,23880,23160,23334,23537,23752
55295530,23280,23400,23520,23743,23840,24780,24060,24234,24437,24652,23280,23400,23520,23743,23840,24780,24060,24234,24437,24652
55295531,24180,24300,24420,24643,24740,25680,24960,25134,25337,25552,24180,24300,24420,24643,24740,25680,24960,25134,25337,25552
55295532,25080,25200,25320,25543,25640,26580,25860,26034,26237,26452,25080,25200,25320,25543,25640,26580,25860,26034,26237,26452
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55296140,67080,67200,67320,67543,67640,68580,67860,68034,68237,68452,67080,67200,67320,67543,67640,68580,67860,68034,68237,68452
55296141,68280,68400,68520,68743,68840,69780,69060,69234,69437,69652,68280,68400,68520,68743,68840,69780,69060,69234,69437,69652
55296142,69480,69600,69720,69943,70040,70980,70260,70434,70637,70852,69480,69600,69720,69943,70040,70980,70260,70434,70637,70852
55296143,70680,70800,70920,71143,71240,72180,71460,71634,71837,72052,70680,70800,70920,71143,71240,72180,71460,71634,71837,72052


In [55]:
trip_stop_pattern_id = {}
for stop_pattern_id, trips in stop_pattern_id_trips:
    for trip_id in trips:
        trip_stop_pattern_id[trip_id] = stop_pattern_id

In [56]:
trip_stop_pattern_id

{}

In [41]:
stop_service_schedule = stop_time_trips.sort_values("arrival_time").groupby(["service_id", "stop_id"]).agg({
    "arrival_time": list,
    "departure_time": list,
    "trip_id": list,
    "stop_sequence": list,
}).to_dict(orient="index")

In [42]:
stops = pd.read_csv( zf.open("stops.txt") )

In [43]:
stop_id = 11080

In [44]:

service_ids = tuple(service_dates[pd.Timestamp("2023-04-02")])
service_ids

(12202, 12034)

In [45]:
stop_service_schedule[(service_ids[0], stop_id)]

{'arrival_time': [22320,
  22620,
  24120,
  24600,
  25920,
  26520,
  27720,
  28380,
  29520,
  30360,
  31140,
  31320,
  32040,
  32940,
  33060,
  33840,
  34740,
  34860,
  35640,
  36540,
  36660,
  37440,
  38340,
  38460,
  39240,
  40200,
  40260,
  41100,
  42000,
  42060,
  42900,
  43800,
  43860,
  44700,
  45600,
  45660,
  46500,
  47400,
  47460,
  48300,
  49200,
  49260,
  50100,
  51000,
  51060,
  51900,
  52800,
  52860,
  53700,
  54600,
  54660,
  55500,
  56400,
  56460,
  57300,
  58200,
  58260,
  59100,
  60000,
  60060,
  60900,
  61800,
  61920,
  62700,
  63600,
  63720,
  64500,
  65340,
  65520,
  66420,
  67320,
  67320,
  68220,
  69120,
  69120,
  70020,
  70920,
  70920,
  71820,
  72720,
  72720,
  73620,
  74520,
  74520,
  75300,
  76200,
  76320,
  77100,
  77940,
  78120,
  78840,
  79920,
  80580,
  81720,
  82320,
  83520,
  84120,
  85320,
  85920,
  87000,
  87720,
  88560,
  90120,
  90600,
  91920,
  94620,
  98280,
  102240,
  103380],
