/
jitterPoints.R
145 lines (132 loc) · 5.8 KB
/
jitterPoints.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#' Jitter points for categorical variables
#'
#' Add quasi-random jitter on the x-axis for violin plots when the x-axis variable is categorical.
#' Add random jitter within a rectangular area for square plots when both x- and y-axis variables are categorical.
#'
#' @param X A factor corresponding to a categorical variable.
#' @param Y A numeric vector of the same length as \code{X} for \code{jitterViolinPoints}, or a factor of the same length as \code{X} for \code{jitterSquarePoints}.
#' @param grouping A named list of factors of the same length as \code{X}, specifying how elements should be grouped.
#' @param ... Further arguments to be passed to \code{\link{offsetX}}.
#'
#' @details
#' The \code{jitterViolinPoints} function calls \code{\link{offsetX}} to obtain quasi-random jittered x-axis values.
#' This reflects the area occupied by a violin plot, though some tuning of arguments in \code{...} may be required to get an exact match.
#'
#' The \code{jitterSquarePoints} function will uniformly jitter points on both the x- and y-axes.
#' The jitter area is a square with area proportional to the frequency of the paired levels in \code{X} and \code{Y}.
#' If either factor only has one level, the jitter area becomes a rectangle that can be interpreted as a bar plot.
#'
#' If \code{grouping} is specified, the values corresponding to each point defines a single combination of levels.
#' Both functions will then perform jittering separately within each unique combination of levels.
#' This is useful for obtaining appropriate jittering when points are split by group, e.g., during faceting.
#'
#' If \code{grouping!=NULL} for \code{jitterSquarePoints} the statistics in the returned \code{summary} data.frame will be stratified by unique combinations of levels.
#' To avoid clashes with existing fields, the names in \code{grouping} should not be \code{"X"}, \code{"Y"}, \code{"Freq"}, \code{"XWidth"} or \code{"YWidth"}.
#'
#' @return
#' For \code{jitterViolinPoints}, a numeric vector is returned containing the jittered x-axis coordinates for all points.
#'
#' For \code{jitterSquarePoints}, a list is returned with numeric vectors \code{X} and \code{Y}, containing jittered coordinates on the x- and y-axes respectively for all points;
#' and \code{summary}, a data.frame of frequencies and side lengths for each unique pairing of X/Y levels.
#'
#' @author Aaron Lun
#' @importFrom stats runif
#'
#' @export
#' @rdname jitterPoints
#'
#' @examples
#' X <- factor(sample(LETTERS[1:4], 100, replace=TRUE))
#' Y <- rnorm(100)
#' (out1 <- jitterViolinPoints(X=X, Y=Y))
#'
#' Y2 <- factor(sample(letters[1:3], 100, replace=TRUE))
#' (out2 <- jitterSquarePoints(X=X, Y=Y2))
#'
#' grouped <- sample(5, 100, replace=TRUE)
#' (out3 <- jitterViolinPoints(X=X, Y=Y, grouping=list(FacetRow=grouped)))
#' (out4 <- jitterSquarePoints(X=X, Y=Y2, grouping=list(FacetRow=grouped)))
jitterSquarePoints <- function(X, Y, grouping=NULL) {
if (!is.factor(Y)) {
stop("'Y' should be a factor")
}
if (!is.factor(X)) {
stop("'X' should be a factor")
}
by_group <- .define_groups(X, Y, grouping)
jittered_X <- jittered_Y <- numeric(length(Y))
all_summary <- vector("list", length(by_group))
# X/Y-jitter for square plots.
for (g in seq_along(by_group)) {
grp <- by_group[[g]]
current <- data.frame(X=X[grp], Y=Y[grp])
summary_data <- as.data.frame(table(X=current$X, Y=current$Y))
norm_freq <- summary_data$Freq / max(summary_data$Freq)
if (all(is.na(norm_freq))) {
norm_freq <- numeric(nrow(summary_data))
}
# Collapsing to a bar plot if there is only one level on either axis.
if (nlevels(Y)==1L && nlevels(X)!=1L) {
summary_data$XWidth <- 0.4
summary_data$YWidth <- 0.49 * norm_freq
} else if (nlevels(Y)!=1L && nlevels(X)==1L) {
summary_data$XWidth <- 0.49 * norm_freq
summary_data$YWidth <- 0.4
} else {
summary_data$XWidth <- summary_data$YWidth <- 0.49 * sqrt(norm_freq)
}
current$Marker <- seq_len(nrow(current))
combined <- merge(current, summary_data, by=c('X', 'Y'), all.x=TRUE)
o <- order(combined$Marker)
width_x <- combined$XWidth[o]
width_y <- combined$YWidth[o]
jittered_X[grp] <- width_x*runif(nrow(current), -1, 1)
jittered_Y[grp] <- width_y*runif(nrow(current), -1, 1)
# Adding current bits and pieces regarding the grouping.
if (!is.null(grouping)) {
for (mode in names(grouping)) {
summary_data[[mode]] <- rep(grouping[[mode]][grp[1]], nrow(summary_data))
}
}
all_summary[[g]] <- summary_data
}
return(list(X=jittered_X + as.integer(X), Y=jittered_Y + as.integer(Y),
summary=do.call(rbind, all_summary)))
}
#' @export
#' @rdname jitterPoints
#' @importFrom vipor offsetX
jitterViolinPoints <- function(X, Y, grouping=NULL, ...) {
if (!is.numeric(Y)) {
stop("'Y' should be numeric")
}
if (!is.factor(X)) {
stop("'X' should be a factor")
}
jittered_X <- numeric(length(Y))
by_group <- .define_groups(X, Y, grouping)
for (g in by_group) {
jittered_X[g] <- vipor::offsetX(Y[g], x=X[g], ...)
}
return(jittered_X + as.integer(X))
}
.define_groups <- function(X, Y, grouping) {
stopifnot(length(X)==length(Y))
if (is.null(grouping)) {
return(list(seq_along(Y)))
}
stopifnot(all(lengths(grouping)==length(X)))
stopifnot(!is.null(names(grouping)))
o <- do.call(order, grouping)
nvals <- length(o)
is_first <- logical(nvals)
if (nvals) {
for (grp in grouping) {
grp <- grp[o]
is_first <- is_first | c(TRUE, grp[-1]!=grp[-nvals])
}
}
overall_group <- cumsum(is_first)
by.group <- split(o, overall_group)
unname(by.group)
}