/
rma.mv.r
2593 lines (1851 loc) · 97.4 KB
/
rma.mv.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# fixed/random/mixed-effects multivariate/multilevel model with:
# - possibly one or multiple random intercepts (sigma2) with potentially known correlation matrices
# - possibly correlated random effects for arms/groups/levels within studies (tau2 and rho for 1st term, gamma2 and phi for 2nd term)
# model also allows for correlated sampling errors via non-diagonal V matrix
# V = variance-covariance matrix of the sampling errors
# sigma2 = (preset) value(s) for the variance of the random intercept(s)
# tau2 = (preset) value(s) for the variance of the random effects
# rho = (preset) value(s) for the correlation(s) between random effects
# gamma2 = (preset) value(s) for the variance of the random effects
# phi = (preset) value(s) for the correlation(s) between random effects
# structures when there is an '~ inner | outer' term in the random argument:
# - CS (compound symmetry)
# - HCS (heteroscedastic compound symmetry)
# - UN (general positive-definite matrix with no structure)
# - UNR (general positive-definite correlation matrix with a single tau2/gamma2 value)
# - AR (AR1 structure with a single tau2/gamma2 value and autocorrelation rho/phi)
# - HAR (heteroscedastic AR1 structure with multiple tau2/gamma2 values and autocorrelation rho/phi)
# - CAR (continuous time AR1 structure)
# - ID (same as CS but with rho/phi=0)
# - DIAG (same as HCS but with rho/phi=0)
# - SPEXP/SPGAU/SPLIN/SPRAT/SPSPH (spatial structures: exponential, gaussian, linear, rational quadratic, spherical)
# - GEN (general positive-definite matrix for an arbitrary number of predictors)
# - PHYBM/PHYPL/PHYPD (phylogenetic structures: Brownian motion, Pagel's lambda, Pagel's delta)
rma.mv <- function(yi, V, W, mods, random, struct="CS", intercept=TRUE,
data, slab, subset, method="REML",
test="z", dfs="residual", level=95, btt,
R, Rscale="cor", sigma2, tau2, rho, gamma2, phi,
cvvc=FALSE, sparse=FALSE, verbose=FALSE, digits, control, ...) {
# add ni as argument in the future
#########################################################################
###### setup
### check argument specifications
mstyle <- .get.mstyle()
if (!is.element(method, c("FE","EE","CE","ML","REML")))
stop(mstyle$stop("Unknown 'method' specified."))
if (any(!is.element(struct, c("CS","HCS","UN","AR","HAR","CAR","ID","DIAG","SPEXP","SPGAU","SPLIN","SPRAT","SPSPH","GEN","GDIAG")))) # "UNR", "PHYBM","PHYPL","PHYPD"))))
stop(mstyle$stop("Unknown 'struct' specified."))
if (length(struct) == 1L)
struct <- c(struct, struct)
na.act <- getOption("na.action")
on.exit(options(na.action=na.act), add=TRUE)
if (!is.element(na.act, c("na.omit", "na.exclude", "na.fail", "na.pass")))
stop(mstyle$stop("Unknown 'na.action' specified under options()."))
if (missing(random))
random <- NULL
if (missing(R))
R <- NULL
if (missing(sigma2))
sigma2 <- NULL
if (missing(tau2))
tau2 <- NULL
if (missing(rho))
rho <- NULL
if (missing(gamma2))
gamma2 <- NULL
if (missing(phi))
phi <- NULL
if (missing(control))
control <- list()
### set defaults for digits
if (missing(digits)) {
digits <- .set.digits(dmiss=TRUE)
} else {
digits <- .set.digits(digits, dmiss=FALSE)
}
time.start <- proc.time()
### get ... argument and check for extra/superfluous arguments
ddd <- list(...)
.chkdots(ddd, c("tdist", "outlist", "time", "dist", "abbrev", "restart", "beta", "vccon", "retopt"))
### handle 'tdist' argument from ... (note: overrides test argument)
if (.isFALSE(ddd$tdist))
test <- "z"
if (.isTRUE(ddd$tdist))
test <- "t"
test <- tolower(test)
if (!is.element(test, c("z", "t", "knha", "hksj", "adhoc")))
stop(mstyle$stop("Invalid option selected for 'test' argument."))
if (test == "hksj")
test <- "knha"
if (is.character(dfs))
dfs <- match.arg(dfs, c("residual", "contain"))
if (is.numeric(dfs) || (dfs == "contain" && test == "z"))
test <- "t"
### handle Rscale argument (either character, logical, or integer)
if (is.character(Rscale))
Rscale <- match.arg(Rscale, c("none", "cor", "cor0", "cov0"))
if (is.logical(Rscale))
Rscale <- ifelse(Rscale, "cor", "none")
if (is.numeric(Rscale)) {
Rscale <- round(Rscale)
if (Rscale > 3 | Rscale < 0)
stop(mstyle$stop("Unknown 'Rscale' value specified."))
Rscale <- switch(as.character(Rscale), "0"="none", "1"="cor", "2"="cor0", "3"="cov0")
}
### handle 'dist' argument from ...
if (is.null(ddd$dist)) {
ddd$dist <- list("euclidean", "euclidean")
} else {
if (is.data.frame(ddd$dist) || .is.matrix(ddd$dist))
ddd$dist <- list(ddd$dist)
if (!inherits(ddd$dist, "list"))
ddd$dist <- as.list(ddd$dist)
if (length(ddd$dist) == 1L)
ddd$dist <- c(ddd$dist, ddd$dist)
dist.methods <- c("euclidean", "maximum", "manhattan", "gcd")
for (j in 1:2) {
if (is.data.frame(ddd$dist[[j]]))
ddd$dist[[j]] <- as.matrix(ddd$dist[[j]])
if (!is.function(ddd$dist[[j]]) && !.is.matrix(ddd$dist[[j]])) {
ddd$dist[[j]] <- charmatch(ddd$dist[[j]], dist.methods, nomatch = 0)
if (ddd$dist[[j]] == 0) {
stop(mstyle$stop("Argument 'dist' must be one of 'euclidean', 'maximum', 'manhattan', or 'gcd'."))
} else {
ddd$dist[[j]] <- dist.methods[ddd$dist[[j]]]
}
}
}
if (any(ddd$dist == "gcd")) {
if (!requireNamespace("sp", quietly=TRUE))
stop(mstyle$stop("Please install the 'sp' package to compute great-circle distances."))
}
}
if (is.null(ddd$vccon)) {
vccon <- NULL
} else {
vccon <- ddd$vccon
sigma2 <- .chkvccon(vccon$sigma2, sigma2)
tau2 <- .chkvccon(vccon$tau2, tau2)
rho <- .chkvccon(vccon$rho, rho)
gamma2 <- .chkvccon(vccon$gamma2, gamma2)
phi <- .chkvccon(vccon$phi, phi)
}
### set defaults for formulas
formula.yi <- NULL
formula.mods <- NULL
### in case user specifies v (instead of V), verbose is set to v, which is non-sensical
### - if v is set to the name of a variable in 'data', it won't be found; can check for
### this with try() and inherits(verbose, "try-error")
### - if v is set to vi or var (or anything else that might be interpreted as a function),
### then can catch this by checking if verbose is a function
verbose <- try(verbose, silent=TRUE)
if (inherits(verbose, "try-error") || is.function(verbose) || length(verbose) != 1L || !(is.logical(verbose) || is.numeric(verbose)))
stop(mstyle$stop("Argument 'verbose' must be a scalar (logical or numeric/integer)."))
### set options(warn=1) if verbose > 2
if (verbose > 2) {
opwarn <- options(warn=1)
on.exit(options(warn=opwarn$warn), add=TRUE)
}
#########################################################################
if (verbose > 1) .space()
if (verbose > 1)
message(mstyle$message("Extracting yi/V values ..."))
### check if data argument has been specified
if (missing(data))
data <- NULL
if (is.null(data)) {
data <- sys.frame(sys.parent())
} else {
if (!is.data.frame(data))
data <- data.frame(data)
}
mf <- match.call()
### extract yi, V, W, ni, slab, subset, and mods values, possibly from the data frame specified via data (arguments not specified are NULL)
yi <- .getx("yi", mf=mf, data=data)
V <- .getx("V", mf=mf, data=data)
W <- .getx("W", mf=mf, data=data)
ni <- .getx("ni", mf=mf, data=data) # not yet possible to specify this
slab <- .getx("slab", mf=mf, data=data)
subset <- .getx("subset", mf=mf, data=data)
mods <- .getx("mods", mf=mf, data=data)
### if yi is a formula, extract yi and X (this overrides anything specified via the mods argument further below)
if (inherits(yi, "formula")) {
formula.yi <- yi
formula.mods <- formula.yi[-2]
options(na.action = "na.pass") # set na.action to na.pass, so that NAs are not filtered out (we'll do that later)
mods <- model.matrix(yi, data=data) # extract model matrix (now mods is no longer a formula, so [a] further below is skipped)
attr(mods, "assign") <- NULL # strip assign attribute (not needed at the moment)
attr(mods, "contrasts") <- NULL # strip contrasts attribute (not needed at the moment)
yi <- model.response(model.frame(yi, data=data)) # extract yi values from model frame
options(na.action = na.act) # set na.action back to na.act
names(yi) <- NULL # strip names (1:k) from yi (so res$yi is the same whether yi is a formula or not)
intercept <- FALSE # set to FALSE since formula now controls whether the intercept is included or not
} # note: code further below ([b]) actually checks whether intercept is included or not
### in case user passed a data frame to yi, convert it to a vector (if possible)
if (is.data.frame(yi)) {
if (ncol(yi) == 1L) {
yi <- yi[[1]]
} else {
stop(mstyle$stop("The object/variable specified for the 'yi' argument is a data frame with multiple columns."))
}
}
### in case user passed a matrix to yi, convert it to a vector (if possible)
if (.is.matrix(yi)) {
if (nrow(yi) == 1L || ncol(yi) == 1L) {
yi <- as.vector(yi)
} else {
stop(mstyle$stop("The object/variable specified for the 'yi' argument is a matrix with multiple rows/columns."))
}
}
### check if yi is numeric
if (!is.numeric(yi))
stop(mstyle$stop("The object/variable specified for the 'yi' argument is not numeric."))
### number of outcomes before subsetting
k <- length(yi)
k.all <- k
### set default measure argument
measure <- "GEN"
if (!is.null(attr(yi, "measure"))) # take 'measure' from yi (if it is there)
measure <- attr(yi, "measure")
### add measure attribute (back) to the yi vector
attr(yi, "measure") <- measure
### some checks on V (and turn V into a diagonal matrix if it is a column/row vector)
if (is.null(V))
stop(mstyle$stop("Must specify 'V' argument."))
### catch cases where 'V' is the utils::vi() function
if (identical(V, utils::vi))
stop(mstyle$stop("Variable specified for 'V' argument cannot be found."))
if (is.list(V) && !is.data.frame(V)) {
### list elements may be data frames (or scalars), so coerce to matrices
V <- lapply(V, as.matrix)
### check that all elements are square
if (any(!sapply(V, .is.square)))
stop(mstyle$stop("All list elements in 'V' must be square matrices."))
### turn list into block-diagonal (sparse) matrix
if (sparse) {
V <- bdiag(V)
} else {
V <- bldiag(V)
}
}
### check if user constrained V to 0 (can skip a lot of the steps below then)
if ((.is.vector(V) && length(V) == 1L && V == 0) || (.is.vector(V) && length(V) == k && !anyNA(V) && all(V == 0))) {
V0 <- TRUE
} else {
V0 <- FALSE
}
### turn V into a diagonal matrix if it is a column/row vector
### note: if V is a scalar (e.g., V=0), then this will turn V into a kxk
### matrix with the value of V along the diagonal
if (V0 || .is.vector(V) || nrow(V) == 1L || ncol(V) == 1L) {
if (sparse) {
V <- Diagonal(k, as.vector(V))
} else {
V <- diag(as.vector(V), nrow=k, ncol=k)
}
}
### turn V into a matrix if it is a data frame
if (is.data.frame(V))
V <- as.matrix(V)
### remove row and column names (important for isSymmetric() function)
### (but only do this if V has row/column names to avoid making an unnecessary copy)
if (!is.null(dimnames(V)))
V <- unname(V)
### check whether V is square and symmetric (can skip when V0)
if (!V0 && !.is.square(V))
stop(mstyle$stop("'V' must be a square matrix."))
if (!V0 && !isSymmetric(V)) # note: copy of V is made when doing this
stop(mstyle$stop("'V' must be a symmetric matrix."))
### check length of yi and V
if (nrow(V) != k)
stop(mstyle$stop(paste0("Length of 'yi' (", k, ") and length/dimensions of 'V' (", nrow(V), ") is not the same.")))
### force V to be sparse when sparse=TRUE (and V is not yet sparse)
if (sparse && inherits(V, "matrix"))
V <- Matrix(V, sparse=TRUE)
### check if V is numeric (but only for 'regular' matrices, since this is always FALSE for sparse matrices)
if (inherits(V, "matrix") && !is.numeric(V))
stop(mstyle$stop("The object/variable specified for the 'V' argument is not numeric."))
### process W if it was specified
if (!is.null(W)) {
### turn W into a diagonal matrix if it is a column/row vector
### in general, turn W into A (arbitrary weight matrix)
if (.is.vector(W) || nrow(W) == 1L || ncol(W) == 1L) {
W <- as.vector(W)
### allow easy setting of W to a single value
if (length(W) == 1L)
W <- rep(W, k)
A <- diag(W, nrow=length(W), ncol=length(W))
} else {
A <- W
}
if (is.data.frame(A))
A <- as.matrix(A)
### remove row and column names (important for isSymmetric() function)
### (but only do this if A has row/column names to avoid making an unnecessary copy)
if (!is.null(dimnames(A)))
A <- unname(A)
### check whether A is square and symmetric
if (!.is.square(A))
stop(mstyle$stop("'W' must be a square matrix."))
if (!isSymmetric(A))
stop(mstyle$stop("'W' must be a symmetric matrix."))
### check length of yi and A
if (nrow(A) != k)
stop(mstyle$stop(paste0("Length of 'yi' (", k, ") and length/dimensions of 'W' (", nrow(A), ") is not the same.")))
### force A to be sparse when sparse=TRUE (and A is not yet sparse)
if (sparse && inherits(A, "matrix"))
A <- Matrix(A, sparse=TRUE)
if (inherits(A, "matrix") && !is.numeric(A))
stop(mstyle$stop("The object/variable specified for the 'W' argument is not numeric."))
} else {
A <- NULL
}
### if ni has not been specified (and hence is NULL), try to get it from the attributes of yi
### note: currently ni argument removed, so this is the only way to pass ni to the function
if (is.null(ni))
ni <- attr(yi, "ni")
### check length of yi and ni
### if there is a mismatch, then ni cannot be trusted, so set it to NULL
if (!is.null(ni) && length(ni) != k)
ni <- NULL
### if ni is now available, add it (back) as an attribute to yi
### this is currently pointless, but may be useful if function has an ni argument
#if (!is.null(ni))
# attr(yi, "ni") <- ni
#########################################################################
if (verbose > 1)
message(mstyle$message("Creating model matrix ..."))
### convert mods formula to X matrix and set intercept equal to FALSE
### skipped if formula has already been specified via yi argument, since mods is then no longer a formula (see [a])
if (inherits(mods, "formula")) {
formula.mods <- mods
if (isTRUE(all.equal(formula.mods, ~ 1))) { # needed so 'mods = ~ 1' without 'data' specified works
mods <- matrix(1, nrow=k, ncol=1)
intercept <- FALSE
} else {
options(na.action = "na.pass") # set na.action to na.pass, so that NAs are not filtered out (we'll do that later)
mods <- model.matrix(mods, data=data) # extract model matrix
attr(mods, "assign") <- NULL # strip assign attribute (not needed at the moment)
attr(mods, "contrasts") <- NULL # strip contrasts attribute (not needed at the moment)
options(na.action = na.act) # set na.action back to na.act
intercept <- FALSE # set to FALSE since formula now controls whether the intercept is included or not
} # note: code further below ([b]) actually checks whether intercept is included or not
}
### turn a vector for mods into a column vector
if (.is.vector(mods))
mods <- cbind(mods)
### turn a mods data frame into a matrix
if (is.data.frame(mods))
mods <- as.matrix(mods)
### check if model matrix contains character variables
if (is.character(mods))
stop(mstyle$stop("Model matrix contains character variables."))
### check if mods matrix has the right number of rows
if (!is.null(mods) && nrow(mods) != k)
stop(mstyle$stop(paste0("Number of rows in the model matrix (", nrow(mods), ") does not match length of the outcome vector (", k, ").")))
#########################################################################
#########################################################################
#########################################################################
### process random argument
if (!is.element(method, c("FE","EE","CE")) && !is.null(random)) {
if (verbose > 1)
message(mstyle$message("Processing 'random' argument ..."))
### make sure random argument is always a list (so lapply() below works)
if (!is.list(random))
random <- list(random)
### check that all elements are formulas
if (any(sapply(random, function(x) !inherits(x, "formula"))))
stop(mstyle$stop("All elements of 'random' must be formulas."))
### check that all formulas have a vertical bar
has.vbar <- sapply(random, function(f) grepl("|", paste0(f, collapse=""), fixed=TRUE))
if (any(!has.vbar))
stop(mstyle$stop("All formulas in 'random' must contain a grouping variable after the | symbol."))
### check if any formula have a $
has.dollar <- sapply(random, function(f) grepl("$", paste0(f, collapse=""), fixed=TRUE))
if (any(has.dollar))
stop(mstyle$stop("Cannot use '$' notation in formulas in the 'random' argument (use the 'data' argument instead)."))
### check if any formula have a :
has.colon <- sapply(random, function(f) grepl(":", paste0(f, collapse=""), fixed=TRUE))
if (any(has.colon))
stop(mstyle$stop("Cannot use ':' notation in formulas in the 'random' argument (use 'interaction()' instead)."))
### check if any formula have a %in%
has.in <- sapply(random, function(f) grepl("%in%", paste0(f, collapse=""), fixed=TRUE))
if (any(has.in))
stop(mstyle$stop("Cannot use '%in%' notation in formulas in the 'random' argument (use 'interaction()' instead)."))
### check which formulas have a ||
has.dblvbar <- sapply(random, function(f) grepl("||", paste0(f, collapse=""), fixed=TRUE))
### replace || with |
random <- lapply(random, function(f) {
if (grepl("||", paste0(f, collapse=""), fixed=TRUE)) {
f <- paste0(f, collapse="")
f <- gsub("||", "|", f, fixed=TRUE)
f <- as.formula(f)
}
return(f)
})
### check which formulas in random are '~ inner | outer' formulas
formulas <- list(NULL, NULL)
split.formulas <- sapply(random, function(f) strsplit(paste0(f, collapse=""), " | ", fixed=TRUE))
is.inner.outer <- sapply(split.formulas, function(f) f[1] != "~1")
### make sure that there are only up to two '~ inner | outer' formulas
if (sum(is.inner.outer) > 2)
stop(mstyle$stop("Only up to two '~ inner | outer' formulas allowed in the 'random' argument."))
### get '~ inner | outer' formulas
if (any(is.inner.outer))
formulas[[1]] <- random[is.inner.outer][1][[1]]
if (sum(is.inner.outer) == 2)
formulas[[2]] <- random[is.inner.outer][2][[1]]
### figure out if a formulas has a slash (as in '~ 1 | study/id')
has.slash <- sapply(random, function(f) grepl("/", paste0(f, collapse=""), fixed=TRUE))
### check if slash is used in combination with an '~ inner | outer' term
if (any(is.inner.outer & has.slash))
stop(mstyle$stop("Cannot use '~ inner | outer1/outer2' type terms in the 'random' argument."))
### substitute + for | in all formulas (so that model.frame() below works)
random.plus <- lapply(random, function(f) formula(sub("\\|", "+", paste0(f, collapse=""))))
### get all model frames corresponding to the formulas in the random argument
### mf.r <- lapply(random, get_all_vars, data=data)
### note: get_all_vars() does not carry out any functions calls within the formula
### so use model.frame(), which allows for things like 'random = ~ factor(arm) | study'
### need to use na.pass so that NAs are passed through (checks for NAs are done later)
#mf.r <- lapply(random.plus, model.frame, data=data, na.action=na.pass)
mf.r <- list()
io <- 0
for (j in seq_along(is.inner.outer)) {
if (is.inner.outer[j]) {
io <- io + 1
### for an '~ inner | outer' term with struct="GEN", expand the inner formula to the
### model matrix and re-combine this with the outer variable
if (is.element(struct[io], c("GEN","GDIAG"))) {
f.inner <- as.formula(strsplit(paste(random[[j]], collapse=""), " | ", fixed=TRUE)[[1]][1])
f.outer <- as.formula(paste("~", strsplit(paste(random[[j]], collapse=""), " | ", fixed=TRUE)[[1]][2]))
options(na.action = "na.pass")
X.inner <- model.matrix(f.inner, data=data)
options(na.action = na.act)
is.int <- apply(X.inner, 2, .is.intercept)
colnames(X.inner)[is.int] <- "intrcpt"
mf.r[[j]] <- cbind(X.inner, model.frame(f.outer, data=data, na.action=na.pass))
if (has.dblvbar[j]) # change "GEN" to "GDIAG" if the formula had a ||
struct[io] <- "GDIAG"
} else {
mf.r[[j]] <- model.frame(random.plus[[j]], data=data, na.action=na.pass)
}
} else {
mf.r[[j]] <- model.frame(random.plus[[j]], data=data, na.action=na.pass)
}
}
### count number of columns in each model frame
mf.r.ncols <- sapply(mf.r, ncol)
### for formulas with slashes, create interaction terms
for (j in seq_along(has.slash)) {
if (!has.slash[j])
next
### need to go backwards; otherwise, with 3 or more terms (e.g., ~ 1 | var1/var2/var3), the third term would be an
### interaction between var1, var1:var2, and var3; by going backwards, we get var1, var1:var2, and var1:var2:var3
for (p in mf.r.ncols[j]:1) {
mf.r[[j]][,p] <- interaction(mf.r[[j]][1:p], drop=TRUE, lex.order=TRUE, sep = "/")
colnames(mf.r[[j]])[p] <- paste(colnames(mf.r[[j]])[1:p], collapse="/")
}
}
### create list where model frames with multiple columns based on slashes are flattened out
if (any(has.slash)) {
if (length(mf.r) == 1L) {
### if formula only has one element of the form ~ 1 | var1/var2/..., create a list of the data frames (each with one column)
mf.r <- lapply(seq(ncol(mf.r[[1]])), function(x) mf.r[[1]][x])
} else {
### if there are non-slash elements, then this flattens things out (obviously ...)
mf.r <- unlist(mapply(function(mf, sl) if (sl) lapply(seq(mf), function(x) mf[x]) else list(mf), mf.r, has.slash, SIMPLIFY=FALSE), recursive=FALSE, use.names=FALSE)
}
### recount number of columns in each model frame
mf.r.ncols <- sapply(mf.r, ncol)
}
#return(mf.r)
### separate mf.r into mf.s (~ 1 | id), mf.g (~ inner | outer), and mf.h (~ inner | outer) parts
mf.s <- mf.r[which(mf.r.ncols == 1)] # if there is no '~ 1 | factor' term, this is list() ([] so that we get a list of data frames)
mf.g <- mf.r[[which(mf.r.ncols >= 2)[1]]] # if there is no 1st '~ inner | outer' terms, this is NULL ([[]] so that we get a data frame, not a list)
mf.h <- mf.r[[which(mf.r.ncols >= 2)[2]]] # if there is no 2nd '~ inner | outer' terms, this is NULL ([[]] so that we get a data frame, not a list)
### if there is no (~ 1 | factor) term, then mf.s is list(), so turn that into NULL
if (length(mf.s) == 0L)
mf.s <- NULL
### does the random argument include at least one (~ 1 | id) term?
withS <- !is.null(mf.s)
### does the random argument include '~ inner | outer' terms?
withG <- !is.null(mf.g)
withH <- !is.null(mf.h)
### count number of rows in each model frame
mf.r.nrows <- sapply(mf.r, nrow)
### make sure that rows in each model frame match the length of the data
if (any(mf.r.nrows != k))
stop(mstyle$stop("Length of variables specified via the 'random' argument does not match length of the data."))
### need this for profile(); with things like 'random = ~ factor(arm) | study', 'mf.r' contains variables 'factor(arm)' and 'study'
### but the former won't work when using the same formula for the refitting (same when using interaction() in the random formula)
### note: with ~ 1 | interaction(var1, var2), mf.r will have 2 columns, but is actually a 'one variable' term
### and with ~ interaction(var1, var2) | var3, mf.r will have 3 columns, but is actually a 'two variable' term
### mf.r.ncols above is correct even in these cases (since it is based on the model.frame() results), but need
### to be careful that this doesn't screw up anything in other functions (for now, mf.r.ncols is not used in any other function)
mf.r <- lapply(random.plus, get_all_vars, data=data)
} else {
### set defaults for some elements when method="FE/EE/CE"
formulas <- list(NULL, NULL)
mf.r <- NULL
mf.s <- NULL
mf.g <- NULL
mf.h <- NULL
withS <- FALSE
withG <- FALSE
withH <- FALSE
}
### warn that 'struct' argument is disregarded if it has been specified, but model contains no '~ inner | outer' terms
if (!withG && "struct" %in% names(mf))
warning(mstyle$warning("Model does not contain an '~ inner | outer' term, so 'struct' argument is disregaded."), call.=FALSE)
### warn that 'random' argument is disregarded if it has been specified, but method="FE/EE/CE"
if (is.element(method, c("FE","EE","CE")) && "random" %in% names(mf))
warning(mstyle$warning(paste0("The 'random' argument is disregaded when method=\"", method, "\".")), call.=FALSE)
#return(list(mf.r=mf.r, mf.s=mf.s, mf.g=mf.g, mf.h=mf.h))
### note: checks on NAs in mf.s, mf.g, and mf.h after subsetting (since NAs may be removed by subsetting)
#########################################################################
#########################################################################
#########################################################################
### generate study labels if none are specified (or none can be found in yi argument)
if (verbose > 1)
message(mstyle$message("Generating/extracting study labels ..."))
### study ids (1:k sequence before subsetting)
ids <- seq_len(k)
### if slab has not been specified but is an attribute of yi, get it
if (is.null(slab)) {
slab <- attr(yi, "slab") # will be NULL if there is no slab attribute
### check length of yi and slab (only if slab is now not NULL)
### if there is a mismatch, then slab cannot be trusted, so set it to NULL
if (!is.null(slab) && length(slab) != k)
slab <- NULL
}
if (is.null(slab)) {
slab.null <- TRUE
slab <- ids
} else {
if (anyNA(slab))
stop(mstyle$stop("NAs in study labels."))
if (length(slab) != k)
stop(mstyle$stop("Study labels not of same length as data."))
if (is.factor(slab))
slab <- as.character(slab)
slab.null <- FALSE
}
### if a subset of studies is specified
if (!is.null(subset)) {
if (verbose > 1)
message(mstyle$message("Subsetting ..."))
subset <- .chksubset(subset, k)
yi <- .getsubset(yi, subset)
V <- .getsubset(V, subset, col=TRUE)
A <- .getsubset(A, subset, col=TRUE)
ni <- .getsubset(ni, subset)
mods <- .getsubset(mods, subset)
slab <- .getsubset(slab, subset)
mf.r <- lapply(mf.r, .getsubset, subset)
mf.s <- lapply(mf.s, .getsubset, subset)
mf.g <- .getsubset(mf.g, subset)
mf.h <- .getsubset(mf.h, subset)
ids <- .getsubset(ids, subset)
k <- length(yi)
attr(yi, "measure") <- measure # add measure attribute back
attr(yi, "ni") <- ni # add ni attribute back
}
### check if study labels are unique; if not, make them unique
if (anyDuplicated(slab))
slab <- .make.unique(slab)
### add slab attribute back
attr(yi, "slab") <- slab
### get the sampling variances from the diagonal of V
vi <- diag(V)
### save full data (including potential NAs in yi/vi/V/W/ni/mods)
yi.f <- yi
vi.f <- vi
V.f <- V
W.f <- A
ni.f <- ni
mods.f <- mods
#mf.g.f <- mf.g # copied further below
#mf.h.f <- mf.h # copied further below
#mf.s.f <- mf.s # copied further below
k.f <- k # total number of observed outcomes including all NAs
#########################################################################
#########################################################################
#########################################################################
### stuff that need to be done after subsetting
if (withS) {
if (verbose > 1)
message(mstyle$message(paste0("Processing '", paste0("~ 1 | ", sapply(mf.s, names), collapse=", "), "' term(s) ...")))
### get variables names in mf.s
s.names <- sapply(mf.s, names) # one name per term
### turn each variable in mf.s into a factor (and turn each column vector into just a vector)
### if a variable was a factor to begin with, this drops any unused levels, but order of existing levels is preserved
mf.s <- lapply(mf.s, function(x) factor(x[[1]]))
### check if there are any NAs anywhere in mf.s
if (any(sapply(mf.s, anyNA)))
stop(mstyle$stop("No NAs allowed in variables specified in the 'random' argument."))
### how many (~ 1 | id) terms does the random argument include? (0 if none, but if withS is TRUE, must be at least 1)
sigma2s <- length(mf.s)
### set default value(s) for sigma2 argument if it is unspecified
if (is.null(sigma2))
sigma2 <- rep(NA_real_, sigma2s)
### allow quickly setting all sigma2 values to a fixed value
if (length(sigma2) == 1L)
sigma2 <- rep(sigma2, sigma2s)
### check if sigma2 is of the correct length
if (length(sigma2) != sigma2s)
stop(mstyle$stop(paste0("Length of 'sigma2' argument (", length(sigma2), ") does not match actual number of variance components (", sigma2s, ").")))
### checks on any fixed values of sigma2 argument
if (any(sigma2 < 0, na.rm=TRUE))
stop(mstyle$stop("Specified value(s) of 'sigma2' must be non-negative."))
### get number of levels of each variable in mf.s (vector with one value per term)
s.nlevels <- sapply(mf.s, nlevels)
### get levels of each variable in mf.s (list with levels for each variable)
s.levels <- lapply(mf.s, levels)
### checks on R (note: do this after subsetting, so user can filter out ids with no info in R)
if (is.null(R)) {
withR <- FALSE
Rfix <- rep(FALSE, sigma2s)
} else {
if (verbose > 1)
message(mstyle$message("Processing 'R' argument ..."))
withR <- TRUE
### make sure R is always a list (so lapply() below works)
if (is.data.frame(R) || !is.list(R))
R <- list(R)
### check if R list has no names at all or some names are missing
### (if only some elements of R have names, then names(R) is "" for the unnamed elements, so use nchar()==0 to check for that)
if (is.null(names(R)) || any(nchar(names(R)) == 0L))
stop(mstyle$stop("Argument 'R' must be a *named* list."))
### remove elements in R that are NULL (not sure why this is needed; why would anybody ever do this?)
### maybe this had something to do with functions that repeatedly call rma.mv(); so leave this be for now
R <- R[!sapply(R, is.null)]
### turn all elements in R into matrices (this would fail with a NULL element)
R <- lapply(R, as.matrix)
### match up R matrices based on the s.names (and correct names of R)
### so if a particular ~ 1 | id term has a matching id=R element, the corresponding R element is that R matrix
### if a particular ~ 1 | id term does not have a matching id=R element, the corresponding R element is NULL
R <- R[s.names]
### NULL elements in R would have no name, so this makes sure that all R elements have the correct s.names
names(R) <- s.names
### check for which components an R matrix has been specified
Rfix <- !sapply(R, is.null)
### Rfix could be all FALSE (if user has used id names in R that are not actually in 'random')
### so only do the rest below if that is *not* the case
if (any(Rfix)) {
### check if given R matrices are square and symmetric
if (any(!sapply(R[Rfix], .is.square)))
stop(mstyle$stop("Elements of 'R' must be square matrices."))
if (any(!sapply(R[Rfix], function(x) isSymmetric(unname(x)))))
stop(mstyle$stop("Elements of 'R' must be symmetric matrices."))
for (j in seq_along(R)) {
if (!Rfix[j])
next
### even if isSymmetric() is TRUE, there may still be minor numerical differences between the lower and upper triangular
### parts that could lead to isSymmetric() being FALSE once we do any potentially rescaling of the R matrices further
### below; this ensures strict symmetry to avoid this issue
#R[[j]][lower.tri(R[[j]])] <- t(R[[j]])[lower.tri(R[[j]])]
R[[j]] <- symmpart(R[[j]])
### if rownames are missing, copy colnames to rownames and vice-versa
if (is.null(rownames(R[[j]])))
rownames(R[[j]]) <- colnames(R[[j]])
if (is.null(colnames(R[[j]])))
colnames(R[[j]]) <- rownames(R[[j]])
### if colnames are still missing at this point, R element did not have dimension names to begin with
if (is.null(colnames(R[[j]])))