-
Notifications
You must be signed in to change notification settings - Fork 0
/
ANOPA-plot.R
342 lines (312 loc) · 13.5 KB
/
ANOPA-plot.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
######################################################################################
#' @title anopaPlot: Easy plotting of proportions.
#'
#' @aliases anopaPlot
#'
#' @md
#'
#' @description The function 'anopaPlot()' performs a plot of proportions for designs
#' with up to 4 factors according to the
#' 'ANOPA' framework. See \insertCite{lc23;textual}{ANOPA} for more. The plot is
#' realized using the 'suberb' library; see \insertCite{cgh21;textual}{ANOPA}.
#' It uses the arc-sine transformation 'A()'.
#'
#'
#' @usage anopaPlot(w, formula, confidenceLevel = .95, allowImputing = FALSE,
#' showPlotOnly = TRUE, plotStyle = "line",
#' errorbarParams = list( width =0.5, linewidth=0.75 ), ...)
#'
#'
#' @param w An ANOPA object obtained with `anopa()`;
#'
#'
#' @param formula (optional) Use formula to plot just specific terms of the omnibus test.
#' For example, if your analysis stored in `w` has factors A, B and C, then
#' `anopaPlot(w, ~ A * B)` will only plot the factors A and B.
#' @param confidenceLevel Provide the confidence level for the confidence intervals
#' (default is 0.95, i.e., 95%).
#' @param allowImputing (default FALSE) if there are cells with no observations, can they be
#' imputed? If imputed, the option "ANOPA.zeros" will be used to determine
#' how many additional observations to add, and with how many successes.
#' If for example, the option is (by default) `c(0.05, 1)`, then
#' 20 cases will be added, only one being a success (respecting the .05 target).
#' _Keep in mind that imputations has never been studies with regards to proportions
#' so be mindfull that the default optin has never been tested nor validated._
#' @param showPlotOnly (optional, default True) shows only the plot or else
#' shows the numbers needed to make the plot yourself.
#' @param plotStyle (optional; default "line") How to plot the proportions;
#' see superb for other layouts (e.g., "line").
#' @param errorbarParams (optional; default list( width =0.5, linewidth=0.75 ) ) is
#' a list of attributes used to plot the error bars. See superb for more.
#' @param ... Other directives sent to superb(), typically 'plotStyle',
#' 'errorbarParams', etc.
#'
#'
#' @return a ggplot2 object of the given proportions.
#'
#'
#' @details The plot shows the proportions on the vertical axis as a
#' function of the factors (the first on the horizontal axis, the second
#' if any in a legend; and if a third or even a fourth factors are present,
#' as distinct rows and columns). It also shows 95% confidence intervals of
#' the proportions, adjusted for between-cells comparisons.
#' The confidence intervals are based on a z distribution, which is adequate
#' for large samples \insertCite{c90,ll90}{ANOPA}. This "stand-alone" confidence
#' interval is then adjusted for between-cell comparisons using the _superb_
#' framework \insertCite{cgh21}{ANOPA}.
#'
#' See the vignette [`DataFormatsForProportions`](../articles/B-DataFormatsForProportions.html)
#' for more on data formats and how to write their formula.
#' See the vignette [`ConfidenceIntervals`](../articles/C-ConfidenceIntervals.html) for
#' details on the adjustment and its purpose.
#'
#' @references
#' \insertAllCited{}
#'
#' @examples
#' #
#' # The Arrington Et Al., 2002, data on fishes' stomach
#' ArringtonEtAl2002
#'
#' # This examine the omnibus analysis, that is, a 3 x 2 x 4 ANOPA:
#' w <- anopa( {s;n} ~ Location * Trophism * Diel, ArringtonEtAl2002)
#'
#' # Once processed into w, we can ask for a standard plot
#' anopaPlot(w)
#'
#' # As you may notice, there are points missing because the data have
#' # three missing cells. The litterature is not clear what should be
#' # done with missing cells. In this package, we propose to impute
#' # the missing cells based on the option `getOption("ANOPA.zeros")`.
#' # Consider this option with care.
#' anopaPlot(w, allowImputing = TRUE)
#'
#' # We can place the factor `Diel` on the x-axis (first):
#' \donttest{anopaPlot(w, ~ Diel * Trophism * Location )}
#'
#' # Change the style for a plot with bars instead of lines
#' \donttest{anopaPlot(w, plotStyle = "bar")}
#'
#' # Changing the error bar style
#' \donttest{anopaPlot(w, plotStyle = "bar", errorbarParams = list( width =0.1, linewidth=0.1 ) )}
#'
#' # Illustrating the main effect of Location (not interacting with other factors)
#' # and the interaction Diel * Trophism separately
#' \donttest{anopaPlot(w, ~ Location ) }
#' \donttest{anopaPlot(w, ~ Diel * Trophism ) }
#'
#' # All these plots are ggplot2 so they can be followed with additional directives, e.g.
#' library(ggplot2)
#' anopaPlot(w, ~ Location) + ylim(0.0, 1.0) + theme_classic()
#' anopaPlot(w, ~ Diel * Trophism) + ylim(0.0, 1.0) + theme_classic()
#'
#' # etc. Any ggplot2 directive can be added to customize the plot to your liking.
#' # See the vignette `ArringtonExample`.
#'
######################################################################################
#'
#' @importFrom Rdpack reprompt
#' @importFrom stats aggregate
#' @export anopaPlot
#
######################################################################################
################################################################
# create a custom scale based on the arcsin transform #
################################################################
# This scale is adapted to handle cases where numbers outside of
# the legitimate scale would be submitted. This happens with
# difference-adjusted CI whose length are increased by sqrt(2).
# This is tricky because ggplots send extreme cases to the
# function to know how to draw it, including NAs.
anopa_asn_trans1 <- function(x) {
w <- suppressWarnings(asin(sqrt(x)))
if (!is.na(x)&&(x<0)) w = anopa_asn_trans1(0) ## boundary truncation
if (!is.na(x)&&(x>1)) w = anopa_asn_trans1(1) ## boundary truncation
return(w)
}
anopa_asn_trans2 <- function(x) {
w <- suppressWarnings(sin(x)^2)
return(w)
}
anopa_asn_trans <- function () {
scales::trans_new("asn2",
function(x) {
sapply(x, anopa_asn_trans1)
},
function(x) {
sapply(x, anopa_asn_trans2)
},
domain = c(0, 1)
)
}
################################################################
# This is it! let's make the plot #
################################################################
# make the plot: just a proxy for suberbPlot
anopaPlot <- function(w,
formula = NULL,
confidenceLevel = 0.95,
allowImputing = FALSE,
showPlotOnly = TRUE,
plotStyle = "line", # lines by default
errorbarParams = list( width =0.5, linewidth=0.75 ), # thicker error bars
... # will be transmitted to superb as is
){
##############################################################################
## STEP 1: validating the formula if one is given
##############################################################################
if (!is.null(formula)) {
# 1.1. is it a legitimate formula?
if (!(is.formula(formula)))
stop("ANOPA::error(211): The formula argument is not a legitimate formula. Exiting...")
# 1.2. only lhs part
if ( !(is.one.sided(formula)))
stop("ANOPA::error(212): Only one-sided, rhs, formulas can be given. Exiting...")
# 1.3. No cbind, no nested
if ((has.nested.terms(formula))||has.cbind.terms(formula))
stop("ANOPA::error(213): No cbind nor nested variables allowed. Exiting...")
# 1.4. Are they existing factors?
if (!all(all.vars(formula) %in% c(w$BSfactColumns,w$WSfactColumns)))
stop("ANOPA::error(214): The named factor(s) given in the formula are not in the model. Exiting...")
# 1.6. if everything ok, let's extract the variables...
facts <- all.vars(formula)[ !(all.vars(formula) %in% w$DVvariables)]
bsfact <- all.vars(formula)[ all.vars(formula) %in% w$BSfactColumns]
wsfact <- all.vars(formula)[ all.vars(formula) %in% w$WSfactColumns]
# 1.7. ... and aggregate the data accordingly
ldata <- wtol( w$wideData, w$DVvariables )
Ainv <- function(a, n) (n+3/4)*(sin(a))^2 - 3/8
Amean <- function(v) Ainv( Atrans(v), 1)
if(length(wsfact)>0){
toto <- "Variable"
tempvars <- as.character(w$DVvariables)
} else {
toto <- "dummy"
ldata$dummy <- "s"
tempvars <- "s"
}
# merge the within-subject variables to the long data frame
if ( length(wsfact) != 0 ) {
ldata <- merge(ldata, w$WSfactDesign, by = "Variable", all.x = TRUE)
}
adata <- stats::aggregate(as.formula(
paste("Value", paste(
c("Id", toto, facts),
collapse="+"), sep="~")
),
data = ldata, Amean)
#remove the within-subject variables
adata[,wsfact] <- NULL
wdata <- ltow( adata, "Id", toto, "Value" )
} else {
facts <- c(w$BSfactColumns, w$WSfactColumns)
bsfact <- w$BSfactColumns
wsfact <- w$WSfactColumns
wdata <- w$wideData
tempvars <- w$DVvariables
}
# 1.8. If incorrect confidence level...
if ((confidenceLevel >= 1)||(confidenceLevel <=0.0))
stop("ANOPA::error(214): The confidence level is not within 0 and 1 (i.e., within 0% and 100%). Exiting...")
# 1.9. Empty is not NULL
if (length(bsfact)==0) bsfact = NULL
if (length(wsfact)==0) {
wsfact = NULL
} else {
wsfact = paste(w$WSfactColumn[w$WSfactColumn %in% wsfact], "(",
w$WSfactNlevels[w$WSfactColumn %in% wsfact], ")", sep="")
}
##############################################################################
## Step 2: Verify missing data?
##############################################################################
# partial observations are not preserved above if the zeros are smaller than 1.
if (allowImputing) {
missing <- addmissingcellsBSFactors(wdata, bsfact, tempvars)
wdata <- rbind(wdata, missing)
}
##############################################################################
## All done! ask for the plot or the data
##############################################################################
# quiet superb's information
opt <- getOption("superb.feedback")
on.exit(options(opt))
options(superb.feedback = 'none')
if (showPlotOnly) { # generate the plot
res <- superb::superbPlot( wdata,
BSFactors = bsfact,
WSFactors = wsfact,
factorOrder = facts,
variables = tempvars,
statistic = "prop", # the summary statistics defined above
errorbar = "CI", # its precision define above
gamma = confidenceLevel,
adjustment = list(
# purpose = "difference", # difference-adjusted CI done in CI.prop
decorrelation = if(length(tempvars)>1) "UA" else "none"
),
# the following is for the look of the plot
plotStyle = plotStyle,
errorbarParams = errorbarParams,
... ## passed as is to superb
) + ggplot2::ylab("Proportion") +
# use if you want the stretching visible
ggplot2::scale_y_continuous(trans=anopa_asn_trans(), breaks = scales::breaks_pretty(n = 10))
} else { # only compute the summary statistics
res <- superb::superbData( wdata,
BSFactors = bsfact,
WSFactors = wsfact,
variables = tempvars,
statistic = "prop", # the summary statistics defined above
errorbar = "CI", # its precision define above
gamma = confidenceLevel,
adjustment = list(
# purpose = "difference", # difference-adjusted CI done in CI.prop
decorrelation = if(length(tempvars)>1) "UA" else "none"
)
)#$summaryStatistics
}
# restore superb's information: done with on.exit()
# options(superb.feedback = opt)
return(res)
}
addmissingcellsBSFactors <- function( wData, BSFactors, ss) {
# List the possible cells to see if some are missing...
# The missing cells are added explicitely to the data
# There ought to be better code... Sorry about this.
toAdd <- wData[1,][-1,] # an empty line
repl <- unlist(options("ANOPA.zeros"))
repl <- repl/repl[1]
if (length(BSFactors) == 2) {
for (i in unique(wData[[BSFactors[[1]]]]) )
for (j in unique(wData[[BSFactors[[2]]]]) )
if (!any((wData[[BSFactors[[1]]]] == i)&(wData[[BSFactors[[2]]]] == j))){
ANOPAwarning(paste("ANOPA::warning(201): Cells", i, j, "missing in data. Imputing..."))
vide <- wData[1,][-1,]
line <- wData[1,] # an empty line
for (l in 1:repl[2]) {
line[[BSFactors[[1]]]] <- i
line[[BSFactors[2]]] <- j
for ( m in ss ) {line[[m]]<- if(l <= repl[1]) 1 else 0}
vide <- rbind( vide, line )
}
toAdd <- rbind(toAdd, vide)
}
} else if (length(BSFactors) == 3) {
for (i in unique(wData[[BSFactors[[1]]]]) )
for (j in unique(wData[[BSFactors[[2]]]]) )
for (k in unique(wData[[BSFactors[[3]]]]))
if (!any((wData[[BSFactors[[1]]]] == i)&(wData[[BSFactors[[2]]]] == j)&(wData[[BSFactors[[3]]]] == k))){
ANOPAwarning(paste("ANOPA::warning(201): Cell",i, j, k, "missing in data. Imputing..."))
vide <- wData[1,][-1,]
line <- wData[1,] # an empty line
for (l in 1:repl[2]) {
line[[BSFactors[[1]]]] <- i
line[[BSFactors[2]]] <- j
line[[BSFactors[3]]] <- k
for ( m in ss ) {line[[m]] <- if(l <= repl[1]) 1 else 0}
vide <- rbind( vide, line )
}
toAdd <- rbind(toAdd, vide)
}
}
return( toAdd )
}