-
Notifications
You must be signed in to change notification settings - Fork 0
/
ANOFA-emFrequencies.R
338 lines (292 loc) · 13.3 KB
/
ANOFA-emFrequencies.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
######################################################################################
#' @title emFrequencies: simple effect analysis of frequency data.
#'
#' @md
#'
#' @description The function `emFrequencies()` performs a simple effect analyses
#' of frequencies after an omnibus analysis has been obtained with `anofa()`
#' according to the ANOFA framework. See \insertCite{lc23b;textual}{ANOFA} for more.
#'
#' @usage emFrequencies(w, formula)
#'
#' @param w An ANOFA object obtained from `anofa()`;
#'
#' @param formula A formula which indicates what simple effect to analyze.
#' only one simple effect formula at a time can be analyzed. The formula
#' is given using a vertical bar, e.g., " ~ factorA | factorB " to obtain
#' the effect of Factor A within every level of the Factor B.
#'
#' @return a model fit of the simple effect.
#'
#' @details emFrequencies computes expected marginal frequencies and
#' analyze the hypothesis of equal frequencies.
#' The sum of the Gs of the simple effects are equal to the
#' interaction and main effect Gs, as this is an additive decomposition
#' of the effects.
#'
#' @references
#' \insertAllCited{}
#'
#' @examples
#' # Basic example using a two-factors design with the data in compiled format.
#' # Ficticious data present frequency of observation classified according
#' # to Intensity (three levels) and Pitch (two levels) for 6 possible cells.
#' minimalExample
#'
#' # performs the omnibus analysis first (mandatory):
#' w <- anofa(Frequency ~ Intensity * Pitch, minimalExample)
#' summary(w)
#'
#' # execute the simple effect of Pitch for every levels of Intensity
#' e <- emFrequencies(w, ~ Pitch | Intensity)
#' summary(e)
#'
#' # As a check, you can verify that the Gs are decomposed additively
#' sum(e$results[,1])
#' w$results[3,1]+w$results[4,1]
#'
#' # Real-data example using a two-factor design with the data in compiled format:
#' LandisBarrettGalvin2013
#'
#' w <- anofa( obsfreq ~ provider * program, LandisBarrettGalvin2013)
#' anofaPlot(w)
#' summary(w)
#'
#' # there is an interaction, so look for simple effects
#' e <- emFrequencies(w, ~ program | provider )
#' summary(e)
#'
#' # Example from Gillet1993 : 3 factors for appletrees
#' Gillet1993
#'
#' w <- anofa( Freq ~ species * location * florished, Gillet1993)
#' e <- emFrequencies(w, ~ florished | location )
#'
#' # Again, as a check, you can verify that the Gs are decomposed additively
#' w$results[4,1]+w$results[7,1] # B + B:C
#' sum(e$results[,1])
#'
#' # You can ask easier outputs with
#' summarize(w) # or summary(w) for the ANOFA table only
#' explain(w) # human-readable ouptut ((pending))
#'
######################################################################################
#'
#' @export emFrequencies
#' @importFrom utils capture.output
#
######################################################################################
emFrequencies <- function(
w = NULL,
formula = NULL
){
##############################################################################
## STEP 1: VALIDATION OF INPUT
##############################################################################
# 1.1 Is w of the right type and having more than 1 factor
if (!("ANOFAobject" %in% class(w)))
stop("ANOFA::error(21): The argument w is not an ANOFA object. Exiting...")
if (length(w$factColumns)==1)
stop("ANOFA::error(22): There must be at least two factors in the design. Exiting...")
# 1.2 If formula indeed a formula
if (!(is.formula(formula)))
stop("ANOFA::error(23): The formula argument is not a legitimate formula. Exiting...")
# 1.3 Is the rhs having a | sign, and only one
if (!(has.nested.terms(formula)))
stop("ANOFA::error(24): The rhs of formula must have a variable nested within another variable. Exiting... ")
if (length(tmp <- sub.formulas(formula, "|"))!=1)
stop("ANOFA::error(25): The rhs of formula must contain a single nested equation. Exiting...")
if (tmp[[1]][[2]]==tmp[[1]][[3]])
stop("ANOFA::error(26): The two variables on either side of | must differ. Exiting...")
# 1.4 If the dependent variable is named (optional), is it the correct variable
if (length(formula)==3) {
if (formula[[2]] != w$freqColumn)
stop("ANOFA::error(27): The lhs of formula is not the correct frequency variable. Exiting...")
}
# 1.5 Are the factors named in formula present in w
vars <- all.vars(formula) # extract variables in cbind and with | alike
if (!(all(vars %in% names(w$compiledData))))
stop("ANOFA::error(28): variables in `formula` are not all in the data. Exiting...")
##############################################################################
## STEP 2: Run the analysis based on the number of factors
##############################################################################
# 2.1: identify the factors
subfactors <- all.vars(tmp[[1]][[2]])
nestfactor <- all.vars(tmp[[1]][[3]])
# 2.2: perform the analysis based on the number of factors
analysis <- switch( length(subfactors),
emf1way(w, subfactors, nestfactor),
emf2way(w, subfactors, nestfactor),
emf3way(w, subfactors, nestfactor),
)
##############################################################################
# STEP 3: Return the object
##############################################################################
# 3.1: preserve everything in an object of class ANOFAobject
res <- list(
type = "ANOFAsimpleeffects",
formula = as.formula(w$formula),
compiledData = w$compiledData,
freqColumn = w$freqColumn,
factColumns = w$factColumns,
nlevels = w$nlevels,
clevels = w$clevels,
# new information added
formulasimple = as.formula(formula),
nestedFactors = nestfactor,
subFactors = subfactors,
nestedLevels = dim(analysis)[1],
results = analysis
)
class(res) <- c("ANOFAobject", class(res) )
return( res )
}
emf1way <- function(w, subfact, nestfact){
## Herein, the sole factor analyzed is called factor A, which is nested within factor B,
## that is, the effect of A within b1, the effect of A within b2, ... A within bj.
## There can only be a single nesting variable (e.g., B)
## We can get here if (a) there are 2 factors, (b) there are 3 factors, (c) there are 4 factors
# The factors position in the factor list and their number of levels
posA <- (1:length(w$factColumns))[w$factColumns==subfact]
A <- w$nlevels[posA]
# posB <- (1:length(w$factColumns))[w$factColumns==nestfact]
# B <- w$nlevels[posB]
if (length(nestfact)==1) {
posB <- (1:length(w$nlevels))[w$factColumns==nestfact]
B <- w$nlevels[posB]
lvls <- w$clevels[[posB]]
} else {
B <-1
lvls <- c()
for (i in nestfact) {
pos <- (1:length(w$nlevels))[w$factColumns==i]
B <- B * w$nlevels[pos]
lvls <- unlist(lapply(w$clevels[[pos]], function(x) paste(lvls,x, sep=":")))
}
nestfact <- paste(nestfact, collapse = "+")
}
# the total n and the vector marginals
ndd <- sum(w$compiledData[[w$freqColumn]])
ndj <- aggregate(as.formula(paste(w$freqColumn, nestfact, sep="~")),
data = w$compiledData, sum)[[w$freqColumn]]
by2 <- paste(c(nestfact, subfact), collapse=" + ")
nij <- matrix(aggregate(as.formula(paste(w$freqColumn, by2, sep="~")),
data = w$compiledData, sum)[[w$freqColumn]], B)
# First, we compute the expected frequencies e separately for each levels of factor B
eigivenj <- ndj / A
# Second, we get the G statistics for each level
Gs <- c()
for (j in 1:B) {
Gs[j] <- 2 * sum(nij[j,] * (log(nij[j,])- log(eigivenj[j])))
}
# Third, the correction factor (Williams, 1976) for each
cf <- 1+ (A^2-1) / ( 6 * (A-1) * ndd)
# Finally, getting the p-values for each corrected effect
ps = 1- pchisq( Gs / cf, df = B-1)
# This is it! let's put the results in a table
resultsSimpleEffects <- data.frame(
G = Gs,
df = rep(A-1, B),
Gcorrected = Gs / cf,
pvalue = ps,
etasq = apply( nij/ndj, 1, anofaES)
)
rownames(resultsSimpleEffects) <- paste(subfact, lvls, sep=" | ")
resultsSimpleEffects
}
emf2way <- function(w, subfacts, nestfact){
## Herein, the factors decomposed are called factors A*B,
## that is, the effects of (A, B, A*B) within c1, the effects of (A, B, A*B) within c2, etc.
## We can get here if (a) there are 3 factors, (b) there are 4 factors.
# The factor positions in the factor list and their number of levels
posA <- (1:length(w$nlevels))[w$factColumns==subfacts[1]]
A <- w$nlevels[posA]
posB <- (1:length(w$nlevels))[w$factColumns==subfacts[2]]
B <- w$nlevels[posB]
# posC <- (1:length(w$factColumns))[w$factColumns==nestfact]
# C <- w$nlevels[posC]
if (length(nestfact)==1) {
posC <- (1:length(w$nlevels))[w$factColumns==nestfact]
C <- w$nlevels[posC]
lvls <- w$clevels[[posC]]
} else {
C <-1
lvls <- c()
for (i in nestfact) {
pos <- (1:length(w$nlevels))[w$factColumns==i]
C <- C * w$nlevels[pos]
lvls <- unlist(lapply(w$clevels[[pos]], function(x) paste(x,lvls, sep=":")))
}
nestfact <- paste(nestfact, collapse = "+")
}
# the total n and the matrix marginals
nddd <- sum(w$compiledData[[w$freqColumn]])
nddk <- aggregate(as.formula(paste(w$freqColumn, nestfact, sep="~")),
data = w$compiledData, sum)[[w$freqColumn]]
by2 <- paste(subfacts, nestfact, sep="+")
nidk <- matrix(aggregate(as.formula(paste(w$freqColumn, by2[[1]],sep="~")),
data = w$compiledData, sum)[[w$freqColumn]], A,C )
ndjk <- matrix(aggregate(as.formula(paste(w$freqColumn, by2[[2]],sep="~")),
data = w$compiledData, sum)[[w$freqColumn]], B,C )
# # here is ok for a three factor design only... use tt uu with %in%
# nijd <- array(aggregate(as.formula(paste(w$freqColumn, paste(subfacts, collapse=" + "), sep="~")),
# data = w$compiledData, sum)[[w$freqColumn]],c(A,B,C))
by3 <- paste(paste(subfacts, collapse=" + "), nestfact, sep = " + ")
nijk <- array(aggregate(as.formula(paste(w$freqColumn, by3, sep="~")),
data = w$compiledData, sum)[[w$freqColumn]],c(A,B,C))
# First, we compute the expected frequencies e separately for each levels of factor C
eidgivenk <- nddk / A
edjgivenk <- nddk / B
eijgivenk <- nddk / (A * B)
# Second, we get the G statistics for each level
Gs <- c()
for (k in 1:C) {
Gs[(k-1)*3+1] <- 2 * sum(nijk[,,k] * (log(nijk[,,k])- log(eijgivenk[k])))
Gs[(k-1)*3+2] <- 2 * sum(nidk[,k] * (log(nidk[,k])- log(eidgivenk[k])))
Gs[(k-1)*3+3] <- 2 * sum(ndjk[,k] * (log(ndjk[,k])- log(edjgivenk[k])))
Gs[(k-1)*3+1] <- Gs[(k-1)*3+1] - Gs[(k-1)*3+2] - Gs[(k-1)*3+3] # remove simple effects
}
# Third, the correction factor (Williams, 1976) for each
cfA <- 1+ (A^2-1) / ( 6 * (A-1) * nddd)
cfB <- 1+ (B^2-1) / ( 6 * (B-1) * nddd)
cfAB <- 1+ ((A*B)^2-1) / ( 6 * (A-1) * (B-1) * nddd)
alletasq <- c()
for (k in 1:C) {
alletasq[(k-1)*3+1] <- anofaES(as.vector(nijk[,,k])/nddk[k])
alletasq[(k-1)*3+2] <- anofaES(nidk[,k]/nddk[k])
alletasq[(k-1)*3+3] <- anofaES(ndjk[,k]/nddk[k])
}
# Finally, getting the p-values for each corrected effect
ps <- 1- pchisq( Gs / c(cfAB, cfA, cfB), df = c((A-1)*(B-1), A-1, B-1) )
# This is it! let's put the results in a table
resultsSimpleEffects <- data.frame(
G = Gs,
df = rep(c((A-1)*(B-1), A-1, B-1), C),
Gcorrected = Gs / c(cfAB, cfA, cfB),
pvalue = ps,
etasq = alletasq
)
lbl <- c(paste(subfacts, collapse=":"), subfacts )
rownames(resultsSimpleEffects) <- unlist(lapply(lvls, \(x) paste(lbl, x, sep=" | ")))
resultsSimpleEffects
}
##########################################
# #
# ██╗ ██╗███████╗██████╗ ███████╗ #
# ██║ ██║██╔════╝██╔══██╗██╔════╝ #
# ███████║█████╗ ██████╔╝█████╗ #
# ██╔══██║██╔══╝ ██╔══██╗██╔══╝ #
# ██║ ██║███████╗██║ ██║███████╗ #
# ╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚══════╝ #
# #
##########################################
# Coding third-order simple effects #
# is postponed until interest is shown. #
##########################################
emf3way <- function(w, subfacts, nestfact){
## Herein, the factor decomposed is called factor D,
## that is, the effect of (A, B, C, AB, AC, BC, A*B*C) within d1,
## the effect of (A, B, C, AB, AC, BC, A*B*C) within d2, etc.
## There is only one way to get here: a 4 factor design is decomposed.
"If you need this functionality, please contact the author...\n"
}