/
stat-alluvium.r
executable file
·462 lines (415 loc) · 16.9 KB
/
stat-alluvium.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
#' Alluvial positions
#'
#' Given a dataset with alluvial structure, `stat_alluvium` calculates the
#' centroids (`x` and `y`) and heights (`ymin` and `ymax`) of the lodes, the
#' intersections of the alluvia with the strata. It leverages the `group`
#' aesthetic for plotting purposes (for now).
#' @template stat-aesthetics
#' @template computed-variables
#' @template order-options
#' @template defunct-stat-params
#'
#' @import ggplot2
#' @importFrom rlang .data
#' @family alluvial stat layers
#' @seealso [ggplot2::layer()] for additional arguments and [geom_alluvium()],
#' [geom_lode()], and [geom_flow()] for the corresponding geoms.
#' @inheritParams stat_flow
#' @param cement.alluvia Logical value indicating whether to aggregate `y`
#' values over equivalent alluvia before computing lode and flow positions.
#' @param aggregate.y Deprecated alias for `cement.alluvia`.
#' @param lode.guidance The function to prioritize the axis variables for
#' ordering the lodes within each stratum, or else a character string
#' identifying the function. Character options are "zigzag", "frontback",
#' "backfront", "forward", and "backward" (see [`lode-guidance-functions`]).
#' @param lode.ordering **Deprecated in favor of the `order` aesthetic.** A list
#' (of length the number of axes) of integer vectors (each of length the
#' number of rows of `data`) or NULL entries (indicating no imposed ordering),
#' or else a numeric matrix of corresponding dimensions, giving the preferred
#' ordering of alluvia at each axis. This will be used to order the lodes
#' within each stratum by sorting the lodes first by stratum, then by the
#' provided vectors, and lastly by remaining factors (if the vectors contain
#' duplicate entries and therefore do not completely determine the lode
#' orderings).
#' @example inst/examples/ex-stat-alluvium.r
#' @export
stat_alluvium <- function(mapping = NULL,
data = NULL,
geom = "alluvium",
position = "identity",
decreasing = NULL,
reverse = NULL,
absolute = NULL,
discern = FALSE,
negate.strata = NULL,
aggregate.y = NULL,
cement.alluvia = NULL,
lode.guidance = NULL,
lode.ordering = NULL,
aes.bind = NULL,
infer.label = FALSE,
min.y = NULL, max.y = NULL,
na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE,
...) {
layer(
stat = StatAlluvium,
data = data,
mapping = mapping,
geom = geom,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
decreasing = decreasing,
reverse = reverse,
absolute = absolute,
discern = discern,
negate.strata = negate.strata,
aggregate.y = aggregate.y,
cement.alluvia = cement.alluvia,
lode.guidance = lode.guidance,
lode.ordering = lode.ordering,
aes.bind = aes.bind,
infer.label = infer.label,
min.y = min.y, max.y = max.y,
na.rm = na.rm,
...
)
)
}
#' @rdname ggalluvial-ggproto
#' @usage NULL
#' @export
StatAlluvium <- ggproto(
"StatAlluvium", Stat,
required_aes = c("x"),
# `<new-aes> = NULL` prevents "unknown aesthetics" warnings
default_aes = aes(weight = 1, stratum = NULL, alluvium = NULL),
setup_params = function(data, params) {
if (! is.null(params$lode.ordering)) {
if (is.list(params$lode.ordering)) {
# replace any null entries with uniform `NA` vectors
wh_null <- which(sapply(params$lode.ordering, is.null))
len <- unique(sapply(params$lode.ordering[wh_null], length))
if (length(len) > 1) stop("Lode orderings have different lengths.")
for (w in wh_null) params$lode.ordering[[w]] <- rep(NA, len)
# convert list to array (requires equal-length numeric entries)
params$lode.ordering <- do.call(cbind, params$lode.ordering)
}
}
params
},
setup_data = function(data, params) {
# assign `alluvium` to `stratum` if `stratum` not provided
if (is.null(data$stratum) && ! is.null(data$alluvium)) {
data$stratum <- data$alluvium
}
# assign unit amounts if not provided
if (is.null(data$y)) {
data$y <- rep(1, nrow(data))
} else if (any(is.na(data$y))) {
stop("Data contains missing `y` values.")
}
type <- get_alluvial_type(data)
if (type == "none") {
stop("Data is not in a recognized alluvial form ",
"(see `help('alluvial-data')` for details).")
}
if (params$na.rm) {
data <- na.omit(object = data)
} else {
data <- na_keep(data = data, type = type)
}
# ensure that data is in lode form
if (type == "alluvia") {
axis_ind <- get_axes(names(data))
data <- to_lodes_form(data = data, axes = axis_ind,
discern = params$discern)
# positioning requires numeric `x`
data <- data[with(data, order(x, stratum, alluvium)), , drop = FALSE]
data$x <- contiguate(data$x)
} else {
if (! is.null(params$discern) && ! (params$discern == FALSE)) {
warning("Data is already in lodes format, ",
"so `discern` will be ignored.")
}
}
# negate strata
if (! is.null(params$negate.strata)) {
if (! all(params$negate.strata %in% unique(data$stratum))) {
warning("Some values of `negate.strata` are not among strata.")
}
wneg <- which(data$stratum %in% params$negate.strata)
if (length(wneg) > 0) data$y[wneg] <- -data$y[wneg]
}
data
},
compute_panel = function(data, scales,
decreasing = NULL,
reverse = NULL,
absolute = NULL,
discern = FALSE, distill = first,
negate.strata = NULL,
aggregate.y = NULL,
cement.alluvia = NULL,
lode.guidance = NULL,
lode.ordering = NULL,
aes.bind = NULL,
infer.label = FALSE,
min.y = NULL, max.y = NULL) {
# parameter defaults
if (is.null(decreasing)) decreasing <- ggalluvial_opt("decreasing")
if (is.null(reverse)) reverse <- ggalluvial_opt("reverse")
if (is.null(absolute)) absolute <- ggalluvial_opt("absolute")
if (is.null(cement.alluvia)) cement.alluvia <- ggalluvial_opt("cement.alluvia")
if (is.null(lode.guidance)) lode.guidance <- ggalluvial_opt("lode.guidance")
if (is.null(aes.bind)) aes.bind <- ggalluvial_opt("aes.bind")
# introduce label
if (infer.label) {
deprecate_parameter("infer.label",
msg = "Use `aes(label = after_stat(lode))`.")
if (is.null(data$label)) {
data$label <- data$alluvium
} else {
warning("Aesthetic `label` is specified, ",
"so parameter `infer.label` will be ignored.")
}
}
# ensure that `lode.ordering` is a matrix with column names
if (! is.null(lode.ordering)) {
deprecate_parameter("lode.ordering",
msg = "Use the `order` aesthetic instead.")
if (is.null(data$order)) {
# bind a vector to itself to create a matrix
if (is.vector(lode.ordering)) {
lode.ordering <- matrix(lode.ordering,
nrow = length(lode.ordering),
ncol = length(unique(data$x)))
}
# flatten `lode.ordering` into an 'order' column
data$order <- as.vector(lode.ordering)
} else {
warning("Aesthetic `order` is specified, ",
"so parameter `lode.ordering` will be ignored.")
}
}
# differentiation aesthetics (in prescribed order)
diff_aes <- intersect(c(.color_diff_aesthetics, .text_aesthetics),
names(data))
# match arguments for `aes.bind`
if (! is.null(aes.bind)) {
if (is.logical(aes.bind)) {
aes.bind.rep <- if (aes.bind) "flows" else "none"
warning("Logical values of `aes.bind` are deprecated; ",
"replacing ", aes.bind, " with '", aes.bind.rep, "'.")
aes.bind <- aes.bind.rep
}
aes.bind <- match.arg(aes.bind, c("none", "flows", "alluvia"))
}
# sign variable (sorts positives before negatives)
data$yneg <- data$y < 0
# lode variable (before co-opting 'alluvium')
data$lode <- data$alluvium
# specify distillation function from `distill`
distill <- distill_fun(distill)
# initiate variables for `after_stat()`
weight <- data$weight
data$weight <- NULL
if (is.null(weight)) weight <- 1
data$n <- weight
data$count <- data$y * weight
# transform 'order' according to `absolute` and `reverse` params
if (! is.null(data$order)) data$order <- xtfrm(data$order) *
(-1) ^ (data$yneg * absolute + reverse)
# cement (aggregate) `y` over otherwise equivalent alluvia
if (! is.null(aggregate.y)) {
deprecate_parameter("aggregate.y", "cement.alluvia")
cement.alluvia <- aggregate.y
}
if (cement.alluvia) {
# -+- need to stop depending on 'group' and 'PANEL' -+-
only_vars <- intersect(c(diff_aes, "group", "PANEL"), names(data))
bind_vars <- intersect(c("yneg", "stratum", only_vars), names(data))
sum_vars <- c("y", "n", "count")
# interaction of all variables to aggregate over (without dropping NAs)
# -+- need to stop depending on 'group' -+-
data$binding <- as.numeric(interaction(lapply(
data[, bind_vars, drop = FALSE],
addNA, ifany = FALSE
), drop = TRUE))
# convert to alluvia format with 'binding' entries
luv_dat <- alluviate(data, "x", "binding", "alluvium")
# sort by all axes (everything except 'alluvium')
luv_dat <- luv_dat[do.call(
order,
luv_dat[, setdiff(names(luv_dat), "alluvium"), drop = FALSE]
), , drop = FALSE]
# define map from original to aggregated 'alluvium' column
luv_orig <- luv_dat$alluvium
luv_agg <- cumsum(! duplicated(interaction(
luv_dat[, setdiff(names(luv_dat), "alluvium"), drop = FALSE],
drop = TRUE
)))
# transform 'alluvium' in `data` accordingly
data$alluvium <- luv_agg[match(data$alluvium, luv_orig)]
# aggregate variables over 'x', 'yneg', and 'stratum':
# sum of computed variables and unique-or-bust values of aesthetics
by_vars <- c("x", "yneg", "stratum", "alluvium", "binding")
agg_lode <- stats::aggregate(data[, "lode", drop = FALSE],
data[, by_vars],
distill)
if (length(only_vars) > 0) {
agg_only <- stats::aggregate(data[, only_vars, drop = FALSE],
data[, by_vars],
only)
}
agg_dat <- stats::aggregate(data[, sum_vars],
data[, by_vars],
sum)
agg_dat <- merge(agg_dat, agg_lode)
if (length(only_vars) > 0) {
agg_dat <- merge(agg_dat, agg_only)
}
# merge into `data`, ensuring that no `key`-`id` pairs are duplicated
data <- unique(merge(
agg_dat,
data[, setdiff(names(data), sum_vars)],
all.x = TRUE, all.y = FALSE
))
data$binding <- NULL
}
# define 'deposit' variable to rank strata vertically
data <- deposit_data(data, decreasing, reverse, absolute)
# ensure that `lode.guidance` is a function
if (is.character(lode.guidance)) {
lode.guidance <- get(paste0("lode_", lode.guidance))
}
stopifnot(is.function(lode.guidance))
# invoke surrounding axes in the order prescribed by `lode.guidance`
lode_ord <- guide_lodes(data, lode.guidance)
# convert `lode_ord` into a single sorting variable 'rem_deposit'
# that orders index lodes by remaining / remote deposits
names(lode_ord) <- sort(unique(data$x))
lode_ord$alluvium <- if (is.null(rownames(lode_ord))) {
if (is.factor(data$alluvium)) {
levels(data$alluvium)
} else if (is.numeric(data$alluvium)) {
sort(unique(data$alluvium))
} else {
unique(data$alluvium)
}
} else {
rownames(lode_ord)
}
# match `lode_ord$x` back to `data$x`
uniq_x <- sort(unique(data$x))
lode_ord <- tidyr::gather(lode_ord,
key = "x", value = "rem_deposit",
as.character(uniq_x))
match_x <- match(lode_ord$x, as.character(uniq_x))
lode_ord$x <- uniq_x[match_x]
# merge `lode_ord` back into `data`
data <- merge(data, lode_ord, by = c("x", "alluvium"),
all.x = TRUE, all.y = FALSE)
# identify fissures at aesthetics that vary within strata
n_lodes <- nrow(unique(data[, c("x", "stratum")]))
fissure_aes <- diff_aes[which(sapply(diff_aes, function(x) {
nrow(unique(data[, c("x", "stratum", x)]))
}) > n_lodes)]
data$fissure <- if (length(fissure_aes) == 0) {
1
} else {
# order by aesthetics in order
as.integer(interaction(data[, rev(fissure_aes)], drop = TRUE)) *
(-1) ^ (data$yneg * absolute + reverse)
}
# calculate variables for `after_stat()`
x_sums <- tapply(abs(data$count), data$x, sum, na.rm = TRUE)
data$prop <- data$count / x_sums[match(as.character(data$x), names(x_sums))]
# reverse alluvium order
data$fan <- xtfrm(data$alluvium) * (-1) ^ reverse
# sort data in preparation for `y` sums
sort_fields <- c(
"x",
"deposit",
if (! is.null(data$order)) "order",
if (aes.bind == "alluvia") "fissure",
"rem_deposit",
if (aes.bind == "flows") "fissure",
"fan"
)
data <- data[do.call(order, data[, sort_fields]), , drop = FALSE]
# calculate `y` sums
data$ycum <- NA
for (xx in unique(data$x)) {
for (yn in c(FALSE, TRUE)) {
ww <- which(data$x == xx & data$yneg == yn)
data$ycum[ww] <- cumulate(data$y[ww])
}
}
# calculate y bounds
data$rem_deposit <- NULL
data$order <- NULL
data$fissure <- NULL
data$fan <- NULL
data$ymin <- data$ycum - abs(data$y) / 2
data$ymax <- data$ycum + abs(data$y) / 2
data$y <- data$ycum
data$yneg <- NULL
data$ycum <- NULL
# within each alluvium, indices at which subsets are contiguous
data <- data[with(data, order(x, alluvium)), , drop = FALSE]
data$cont <- duplicated(data$alluvium) &
! duplicated(data[, c("x", "alluvium")])
data$axis <- contiguate(data$x)
# within each alluvium, group contiguous subsets
# (data is sorted by `x` and `alluvium`; group_by() does not reorder it)
data <- dplyr::ungroup(dplyr::mutate(dplyr::group_by(data, alluvium),
flow = axis - cumsum(cont)))
# add 'group' to group contiguous alluvial subsets
data <- transform(data, group = as.numeric(interaction(alluvium, flow)))
# remove unused fields
data$cont <- NULL
data$axis <- NULL
data$flow <- NULL
# impose height restrictions
if (! is.null(min.y)) data <- subset(data, ymax - ymin >= min.y)
if (! is.null(max.y)) data <- subset(data, ymax - ymin <= max.y)
# arrange data by aesthetics for consistent (reverse) z-ordering
data <- z_order_aes(data, diff_aes)
data
}
)
# apply lode guidance function to produce ordering matrix
guide_lodes <- function(data, guidance_fun) {
# summary data of alluvial deposits
alluv_dep <- alluviate(data, "x", "deposit", "alluvium")
# axis indices
alluv_x <- setdiff(names(alluv_dep), "alluvium")
# initialize ordering matrix
ord_mat <- matrix(NA_integer_,
nrow = nrow(alluv_dep), ncol = length(alluv_x))
dimnames(ord_mat) <- list(alluv_dep$alluvium, alluv_x)
# calculate orderings from `guidance_fun`
for (xx in alluv_x) {
ii <- match(xx, alluv_x)
ord_x <- guidance_fun(length(alluv_x), match(xx, alluv_x))
# order by prescribed ordering and by aesthetics in order
ord_mat[, xx] <- interaction(alluv_dep[, alluv_x[rev(ord_x)]],
drop = TRUE)
}
# check that array has correct dimensions
stopifnot(dim(ord_mat) ==
c(length(unique(data$alluvium)), length(unique(data$x))))
# return ordering matrix as a data aframe
as.data.frame(ord_mat)
}
# build alluvial dataset for reference during lode-ordering
alluviate <- function(data, key, value, id) {
to_alluvia_form(
data[, c(key, value, id)],
key = key, value = value, id = id
)
}