/
oneway.R
183 lines (174 loc) · 6.35 KB
/
oneway.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
#' Aggregate OD pairs they become non-directional
#'
#' For example, sum total travel in both directions.
#' @param x A data frame or SpatialLinesDataFrame, representing an OD matrix
#' @param attrib A vector of column numbers or names, representing variables to be aggregated.
#' By default, all numeric variables are selected.
#' @param FUN The aggregating function such as `sum` (the default) and `mean`
#' @param ... Further arguments passed to or used by methods
#' @param id1 Optional (it is assumed to be the first column)
#' text string referring to the name of the variable containing
#' the unique id of the origin
#' @param id2 Optional (it is assumed to be the second column)
#' text string referring to the name of the variable containing
#' the unique id of the destination
#' @return `oneway` outputs a data frame (or `sf` data frame) with rows containing
#' results for the user-selected attribute values that have been aggregated.
#' @param oneway_key Optional key of unique OD pairs regardless of the order,
#' e.g., as generated by [od_id_max_min()] or [od_id_szudzik()]
#' @export
#' @details
#' Flow data often contains movement in two directions: from point A to point B
#' and then from B to A. This can be problematic for transport planning, because
#' the magnitude of flow along a route can be masked by flows the other direction.
#' If only the largest flow in either direction is captured in an analysis, for
#' example, the true extent of travel will by heavily under-estimated for
#' OD pairs which have similar amounts of travel in both directions.
#' Flows in both direction are often represented by overlapping lines with
#' identical geometries which can be confusing
#' for users and are difficult to plot.
#' @examples
#' (od_min = od_data_df[c(1, 2, 1), 1:4])
#' od_min[3, 1:2] = rev(od_min[3, 1:2])
#' od_min[3, 3:4] = od_min[3, 3:4] - 5
#' (od_oneway = od_oneway(od_min))
#' nrow(od_oneway) < nrow(od_min) # result has fewer rows
#' sum(od_min$all) == sum(od_oneway$all) # but the same total flow
#' (od_oneway = od_oneway(od_min, FUN = mean))
#' od_oneway(od_min, attrib = "all")
#' od_min$all[3] = NA
#' (od_oneway = od_oneway(od_min, FUN = mean, na.rm = TRUE))
od_oneway = function(x,
attrib = names(x[-c(1:2)])[vapply(x[-c(1:2)], is.numeric, TRUE)],
FUN = sum,
...,
id1 = names(x)[1],
id2 = names(x)[2],
oneway_key = NULL
) {
# is_sf = is(x, "sf") # only make it work with dfs for now
if (is.null(oneway_key)) {
id1_temp = x[[id1]]
x[[id1]] = pmin(x[[id1]], x[[id2]])
x[[id2]] = pmax(id1_temp, x[[id2]])
}
if (is.numeric(attrib)) {
attrib = attrib - 2 # account for 1st 2 columns being ids
}
x_oneway = stats::aggregate(x[attrib], list(o = x[[id1]], d = x[[id2]]), FUN, ...)
if (is.numeric(attrib)) {
attrib_names = names(x)[attrib]
} else {
attrib_names = attrib
attrib = which(names(x) %in% attrib)
}
return(x_oneway)
}
#' Generate ordered ids of OD pairs so lowest is always first
#' This function is slow on large datasets, see szudzik_pairing for faster alternative
#'
#' @param x A data frame representing OD pairs
#' @param id1 Optional (it is assumed to be the first column)
#' text string referring to the name of the variable containing
#' the unique id of the origin
#' @param id2 Optional (it is assumed to be the second column)
#' text string referring to the name of the variable containing
#' the unique id of the destination
#' @examples
#' x = data.frame(id1 = c(1, 1, 2, 2, 3), id2 = c(1, 2, 3, 1, 4))
#' od_id_order(x) # 4th line switches id1 and id2 so oneway_key is in order
#' @export
od_id_order = function(x, id1 = names(x)[1], id2 = names(x)[2]) {
data.frame(
stringsAsFactors = FALSE,
stplanr.id1 = x[[id1]],
stplanr.id1 = x[[id2]],
oneway_key = od_id_character(x[[id1]], x[[id2]])
)
}
#' Combine two ID values to create a single ID number
#'
#' @details
#' In OD data it is common to have many 'oneway' flows from "A to B" and "B to A".
#' It can be useful to group these an have a single ID that represents pairs of IDs
#' with or without directionality, so they contain 'twoway' or bi-directional values.
#'
#' `od_id*` functions take two vectors of equal length and return a vector of IDs,
#' which are unique for each combination but the same for twoway flows.
#'
#' - the Szudzik pairing function, on two vectors of equal
#' length. It returns a vector of ID numbers.
#'
#' This function superseeds od_id_order as it is faster on large datasets
#' @param x a vector of numeric, character, or factor values
#' @param y a vector of numeric, character, or factor values
#' @param ordermatters logical, does the order of values matter to pairing, default = FALSE
#' @family od
#' @seealso od_oneway
#' @name od_id
#' @examples
#' (d = od_data_df[2:9, 1:2])
#' (id = od_id_character(d[[1]], d[[2]]))
#' duplicated(id)
#' od_id_szudzik(d[[1]], d[[2]])
#' od_id_max_min(d[[1]], d[[2]])
NULL
#' @rdname od_id
#' @export
od_id_szudzik = function(x, y, ordermatters = FALSE) {
if (length(x) != length(y)) {
stop("x and y are not of equal length")
}
if (methods::is(x, "factor")) {
x = as.character(x)
}
if (methods::is(y, "factor")) {
y = as.character(y)
}
lvls = unique(c(x, y))
x = as.integer(factor(x, levels = lvls))
y = as.integer(factor(y, levels = lvls))
if (ordermatters) {
ismax = x > y
oneway_key = (ismax * 1) * (x^2 + x + y) + ((!ismax) * 1) * (y^2 + x)
} else {
a = ifelse(x > y, y, x)
b = ifelse(x > y, x, y)
oneway_key = b^2 + a
}
return(oneway_key)
}
#' @export
#' @rdname od_id
od_id_max_min = function(x, y) {
d = convert_to_numeric(x, y)
a = pmax(d$x, d$y)
b = pmin(d$x, d$y)
a * (a + 1) / 2 + b
}
#' @export
#' @rdname od_id
od_id_character = function(x, y) {
paste(
pmin(x, y),
pmax(x, y)
)
}
convert_to_numeric = function(x, y) {
if (length(x) != length(y)) stop("x and y are not of equal length")
if (methods::is(x, "factor")) x = as.character(x)
if (methods::is(y, "factor")) y = as.character(y)
lvls = unique(c(x, y))
x = as.integer(factor(x, levels = lvls))
y = as.integer(factor(y, levels = lvls))
list(x = x, y = y)
}
od_id_order_base = function(x, y) {
d = convert_to_numeric(x, y)
x = d$x
y = d$y
paste(pmin(x, y), pmax(x, y))
}
not_duplicated = function(x) {
!duplicated(x)
}