/
frequencies_to_stop_times.R
336 lines (287 loc) · 11.2 KB
/
frequencies_to_stop_times.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
#' Convert frequencies to stop times
#'
#' Creates `stop_times` entries based on the frequencies specified in the
#' `frequencies` table.
#'
#' @template gtfs
#' @param trip_id A character vector including the `trip_id`s to have their
#' frequencies converted to `stop_times` entries. If `NULL` (the default), the
#' function converts all trips listed in the `frequencies` table.
#' @param force Whether to convert trips specified in the `frequencies` table
#' even if they are not described in `stop_times` (defaults to `FALSE`). When
#' set to `TRUE`, these mismatched trip are removed from the `frequencies` table
#' and their correspondent entries in `trips` are substituted by what would be
#' their converted counterpart.
#'
#' @return A GTFS object with updated `frequencies`, `stop_times` and `trips`
#' tables.
#'
#' @section Details:
#' A single trip described in a `frequencies` table may yield multiple trips
#' after converting the GTFS. Let's say, for example, that the `frequencies`
#' table describes a trip called `"example_trip"`, that starts at 08:00 and
#' stops at 09:00, with a 30 minutes headway.
#'
#' In practice, that means that one trip will depart at 08:00, another at 08:30
#' and yet another at 09:00. `frequencies_to_stop_times()` appends a `"_<n>"`
#' suffix to the newly created trips to differentiate each one of them (e.g. in
#' this case, the new trips, described in the `trips` and `stop_times` tables,
#' would be called `"example_trip_1"`, `"example_trip_2"` and
#' `"example_trip_3"`).
#'
#' @examples
#' data_path <- system.file("extdata/spo_gtfs.zip", package = "gtfstools")
#' gtfs <- read_gtfs(data_path)
#' trip <- "CPTM L07-0"
#'
#' # converts all trips listed in the frequencies table
#' converted_gtfs <- frequencies_to_stop_times(gtfs)
#'
#' # converts only the specified trip_id
#' converted_gtfs <- frequencies_to_stop_times(gtfs, trip)
#'
#' # how the specified trip_id was described in the frequencies table
#' head(gtfs$frequencies[trip_id == trip])
#'
#' # the first row of each equivalent stop_times entry in the converted gtfs
#' equivalent_stop_times <- converted_gtfs$stop_times[grepl(trip, trip_id)]
#' equivalent_stop_times[equivalent_stop_times[, .I[1], by = trip_id]$V1]
#'
#' @export
frequencies_to_stop_times <- function(gtfs, trip_id = NULL, force = FALSE) {
gtfs <- assert_and_assign_gtfs_object(gtfs)
checkmate::assert_character(trip_id, null.ok = TRUE, any.missing = FALSE)
checkmate::assert_logical(force, len = 1, any.missing = FALSE)
gtfsio::assert_field_class(
gtfs,
"frequencies",
c("trip_id", "start_time", "end_time", "headway_secs"),
c("character", "character", "character", "integer")
)
gtfsio::assert_field_class(
gtfs,
"stop_times",
c("trip_id", "arrival_time", "departure_time"),
c("character", "character", "character")
)
if (!is.null(trip_id)) {
relevant_trips <- trip_id
} else {
relevant_trips <- unique(gtfs$frequencies$trip_id)
}
# raise warning if a given trip_id doesn't exist in 'frequencies'
if (!is.null(trip_id)) {
invalid_trip_id <- trip_id[
! trip_id %chin% unique(gtfs$frequencies$trip_id)
]
if (!identical(invalid_trip_id, character(0))) {
warning(
"'frequencies' doesn't contain the following trip_id(s): ",
paste0("'", invalid_trip_id, "'", collapse = ", "),
call. = FALSE
)
relevant_trips <- setdiff(relevant_trips, invalid_trip_id)
}
}
# check if a trip exists in 'frequencies' but not in 'stop_times', and
# conditionally remove them from the pool of trips based on 'force'
stop_times_trips <- unique(gtfs$stop_times$trip_id)
missing_from_stop_times <- setdiff(relevant_trips, stop_times_trips)
if (!force) relevant_trips <- setdiff(relevant_trips, missing_from_stop_times)
if (!identical(missing_from_stop_times, character(0))) {
warning(
"The following trip_id(s) are listed in 'frequencies' ",
"but not in 'stop_times': ",
paste0("'", missing_from_stop_times, "'", collapse = ", "),
call. = FALSE
)
}
# if they do not exist already, create auxiliary columns that hold the start
# and end time (in the case of frequencies) and the arrival and departure time
# (in the case of stop_times) of each trip in seconds
if (!gtfsio::check_field_exists(gtfs, "frequencies", "start_time_secs")) {
gtfs$frequencies[
trip_id %chin% relevant_trips,
start_time_secs := string_to_seconds(start_time)
]
created_start_secs <- TRUE
}
if (!gtfsio::check_field_exists(gtfs, "frequencies", "end_time_secs")) {
gtfs$frequencies[
trip_id %chin% relevant_trips,
end_time_secs := string_to_seconds(end_time)
]
created_end_secs <- TRUE
}
if (!gtfsio::check_field_exists(gtfs, "stop_times", "departure_time_secs")) {
gtfs$stop_times[
trip_id %chin% relevant_trips,
departure_time_secs := string_to_seconds(departure_time)
]
created_departure_secs <- TRUE
}
if (!gtfsio::check_field_exists(gtfs, "stop_times", "arrival_time_secs")) {
gtfs$stop_times[
trip_id %chin% relevant_trips,
arrival_time_secs := string_to_seconds(arrival_time)
]
created_arrival_secs <- TRUE
}
# first step: figure out, based on the 'frequencies' table, what are the
# departure times of each trip to be added to the 'stop_times' table
departure_times <- lapply(
relevant_trips,
function(trip) {
trip_frequencies <- gtfs$frequencies[trip_id == trip]
trip_frequencies[
,
departure_times := mapply(
seq,
from = start_time_secs,
to = end_time_secs,
by = headway_secs,
SIMPLIFY = FALSE
)
]
departure_secs <- unlist(trip_frequencies$departure_times)
# there may be some duplicated departure times if the same value is listed
# in the upper and lower limit of two differente 'frequencies' entries
# (e.g. if the table specifies one frequency from 5am to 6am and another
# from 6am to 7am, the 6am departure may appear in the departures
# generated by both entries).
# so we take the unique 'departure_secs' values.
departure_secs <- unique(departure_secs)
# each new trip added to the 'stop_times' will be name after the original
# trip, with a _<n> suffix. so the trip "original_trip" will generate the
# trips "original_trip_1", "original_trip_2", ..., "original_trip_<n>"
new_trips_names <- sprintf(
"%s_%d",
trip,
seq_along(departure_secs)
)
departure_secs <- structure(departure_secs, names = new_trips_names)
}
)
# second step: identify the stop_times template of each relevant trip.
# since we have the departure time of each one of the trips to be added, we
# subtract the template's first departure time value from all template's
# departure and arrival times.
templates <- lapply(
relevant_trips,
function(trip) {
template <- gtfs$stop_times[trip_id == trip]
# if template has 0 rows (i.e. the specified trip doesn't exist in
# stop_times), the min() call bellow will raise a warning and return Inf.
# the value of first_departure won't change the result of the subtractions
# below, since departure/arrival_time_secs are integer(0), but I'll assign
# integer(0) to first_departure just for greater expressiveness
if (identical(template$departure_time_secs, integer(0))) {
first_departure <- integer(0)
} else {
first_departure <- min(template$departure_time_secs, na.rm = TRUE)
}
template[
,
`:=`(
departure_time_secs = departure_time_secs - first_departure,
arrival_time_secs = arrival_time_secs - first_departure
)
][]
}
)
# third step: build a "new" stop_times table by adding the departure time of
# each new trip to the departure and arrival time of its correspondent
# template.
# the 'if' by the end of the step is required when the specified trip_id is
# character(0), in which case the rbindlist call returns a data.table with no
# columns
stop_times_to_add <- mapply(
departure_times,
templates,
SIMPLIFY = FALSE,
FUN = function(departures, template) {
n_stops <- nrow(template)
n_departures <- length(departures)
seconds_to_add <- rep(departures, each = n_stops)
template_dep <- rep(template$departure_time_secs, times = n_departures)
template_arr <- rep(template$arrival_time_secs, times = n_departures)
new_departures <- template_dep + seconds_to_add
new_arrivals <- template_arr + seconds_to_add
adjusted_times <- data.table::data.table(
trip_id = rep(names(departures), each = n_stops),
departure_time_secs = new_departures,
arrival_time_secs = new_arrivals
)
adjusted_cols <- c(
"trip_id",
"departure_time",
"departure_time_secs",
"arrival_time",
"arrival_time_secs"
)
other_cols <- setdiff(names(template), adjusted_cols)
template_excess <- template[, ..other_cols]
adjusted_times <- cbind(
adjusted_times,
template_excess[rep(seq_len(n_stops), times = n_departures)]
)
}
)
stop_times_to_add <- data.table::rbindlist(stop_times_to_add)
if (ncol(stop_times_to_add) > 0) {
stop_times_to_add[
,
`:=`(
departure_time = seconds_to_string(departure_time_secs),
arrival_time = seconds_to_string(arrival_time_secs)
)
]
}
# fourth step: filter the original stop_times table and bind the new one to
# it. remove the auxiliary columns if they didn't exist before the function
# call
if (exists("created_departure_secs")) {
gtfs$stop_times[, departure_time_secs := NULL]
if (ncol(stop_times_to_add) > 0) {
stop_times_to_add[, departure_time_secs := NULL]
}
}
if (exists("created_arrival_secs")) {
gtfs$stop_times[, arrival_time_secs := NULL]
if (ncol(stop_times_to_add) > 0) {
stop_times_to_add[, arrival_time_secs := NULL]
}
}
filtered_stop_times <- gtfs$stop_times[! trip_id %chin% relevant_trips]
gtfs$stop_times <- rbind(filtered_stop_times, stop_times_to_add)
# fifth step: adjust the trips table to include the new trips
trips_to_add <- mapply(
relevant_trips,
departure_times,
SIMPLIFY = FALSE,
FUN = function(trip, departures) {
n_departures <- length(departures)
new_trips <- gtfs$trips[trip_id == trip]
new_trips <- new_trips[rep(1, n_departures)]
new_trips[, trip_id := names(departures)][]
}
)
trips_to_add <- data.table::rbindlist(trips_to_add)
filtered_trips <- gtfs$trips[! trip_id %chin% relevant_trips]
gtfs$trips <- rbind(filtered_trips, trips_to_add)
# sixth step: adjust the frequencies table. remove the auxiliary columns if
# they didn't exist before the function call
if (exists("created_start_secs")) {
gtfs$frequencies[, start_time_secs := NULL]
}
if (exists("created_end_secs")) {
gtfs$frequencies[, end_time_secs := NULL]
}
filtered_frequencies <- gtfs$frequencies[! trip_id %chin% relevant_trips]
if (nrow(filtered_frequencies) > 0) {
gtfs$frequencies <- filtered_frequencies
} else {
gtfs$frequencies <- NULL
}
return(gtfs)
}