version 0.1.4

cran · Jan 27, 2024 · 3aec0b9 · 3aec0b9
1 parent 75bd203
commit 3aec0b9
Show file tree

Hide file tree

Showing 36 changed files with 580 additions and 588 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,20 +1,19 @@
 Package: metaforest
 Type: Package
-Date: 2020-03-01
 Title: Exploring Heterogeneity in Meta-Analysis using Random Forests
-Version: 0.1.3
+Version: 0.1.4
 Author: Caspar J. van Lissa
 Maintainer: Caspar J. van Lissa <c.j.vanlissa@gmail.com>
-Description: Conduct random forests-based meta-analysis, obtain partial dependence plots for metaforest and classic meta-analyses, and cross-validate and tune metaforest- and classic meta-analyses in conjunction with the caret package. A requirement of classic meta-analysis is that the studies being aggregated are conceptually similar, and ideally, close replications. However, in many fields, there is substantial heterogeneity between studies on the same topic. Classic meta-analysis lacks the power to assess more than a handful of univariate moderators. MetaForest, by contrast, has substantial power to explore heterogeneity in meta-analysis. It can identify important moderators from a larger set of potential candidates, even with as little as 20 studies (Van Lissa, in preparation). This is an appealing quality, because many meta-analyses have small sample sizes. Moreover, MetaForest yields a measure of variable importance which can be used to identify important moderators, and offers partial prediction plots to explore the shape of the marginal relationship between moderators and effect size.
+Description: Conduct random forests-based meta-analysis, obtain partial dependence plots for metaforest and classic meta-analyses, and cross-validate and tune metaforest- and classic meta-analyses in conjunction with the caret package. A requirement of classic meta-analysis is that the studies being aggregated are conceptually similar, and ideally, close replications. However, in many fields, there is substantial heterogeneity between studies on the same topic. Classic meta-analysis lacks the power to assess more than a handful of univariate moderators. MetaForest, by contrast, has substantial power to explore heterogeneity in meta-analysis. It can identify important moderators from a larger set of potential candidates (Van Lissa, 2020). This is an appealing quality, because many meta-analyses have small sample sizes. Moreover, MetaForest yields a measure of variable importance which can be used to identify important moderators, and offers partial prediction plots to explore the shape of the marginal relationship between moderators and effect size.
 Depends: R (>= 3.5.0), ggplot2, metafor, ranger, data.table, methods
 Imports: gtable, grid
 Suggests: testthat, caret, knitr, rmarkdown, covr
 License: GPL-3
 Encoding: UTF-8
 LazyData: true
-RoxygenNote: 6.1.1
+RoxygenNote: 7.2.3
 VignetteBuilder: knitr
 NeedsCompilation: no
-Packaged: 2020-01-07 13:31:24 UTC; Lissa102
+Packaged: 2024-01-26 07:55:42 UTC; vanlissa
 Repository: CRAN
-Date/Publication: 2020-01-08 04:50:02 UTC
+Date/Publication: 2024-01-26 09:40:05 UTC
diff --git a/MD5 b/MD5
@@ -1,51 +1,51 @@
-b3c6e75036c0b1bbae721f93168fa60a *DESCRIPTION
-8c1c14d0b47981263cc6444b3a72f897 *NAMESPACE
-02711f100a7abd4374dae7347070f286 *NEWS.md
+4c1d94c09a0f788002bcdbc03e4f7294 *DESCRIPTION
+5677383f3d6367f7500b36e42f0fe086 *NAMESPACE
+b6caa38155ac22e51128500eafaaa001 *NEWS.md
 409e8e06ae2e11fcbb13a81dfc4c7f81 *R/MF.R
-c219f24fb9d32bd2465c88bdec87502d *R/MF_cluster.R
-3c0613d0e4c3b3343413611ae670ae7c *R/MetaForest.R
-b6355e26020055df3889907d199934f7 *R/ModelInfo_mf.R
+d21eb20758743d61dd29bfa5493fef5f *R/MF_cluster.R
+3bfe2401221268ad4a030b9a1f8ce4e0 *R/MetaForest.R
+85780a285ac1f3970ad7d73e3dc189ce *R/ModelInfo_mf.R
 66533fca6c675f33afea961fc69588b1 *R/ModelInfo_rma.R
-6bf1b32406a4a6340cc180e371fbe9be *R/PartialDependence.R
-5bee56163ee1ed1ca34c3084ae29976e *R/SimulateSMD.R
+cd60dff4705a3b128e419a2745e985c8 *R/PartialDependence.R
+287f234d7debe4fd4f24157943411da4 *R/SimulateSMD.R
 e397b8e51fd13c1e7a1a3f76914a8fdd *R/VarImpPlot.R
-7dc6b16572b5d954dc8198d6bfef4117 *R/WeightedScatter.R
+6f316a3f6f1813476cea9e1e8a95af43 *R/WeightedScatter.R
 0d2f06c2b3bb9ac7ec556aa3dce737a3 *R/coef_test.R
-946c50248d6ed432e57a54a093a89b83 *R/curry.R
-d09bdd7b302e8953b5ffa9e2ec1f18bf *R/deprecated_functions.R
+8212b9265e008ff1775a83fde41eb5b1 *R/curry.R
+d41d8cd98f00b204e9800998ecf8427e *R/deprecated_functions.R
 b705eb0b3619b86be53f698cfe84fb2b *R/extract_proximity.MetaForest.R
-74232bd46fd9a3ff291b6724a0835fe5 *R/fukkink_lont.R
+009260f3b131b0c89914bc3f00565bd4 *R/fukkink_lont.R
 b3f6b2a586762766c2784e68fedccd0c *R/helpers.R
-89f2449a2d4e0a001d3462e1953f777f *R/marginal_predictions.R
-32285741dea0d32ff58c4cdf279712aa *R/metaforest-deprecated.r
-763d5e514e47360adebcf24797b268db *R/plot.MetaForest.R
+68e7e9f8b0ceaaa39cefd4071e7b9147 *R/marginal_predictions.R
+2c078b2d523d417d3beffdc9dad33911 *R/merge_forests.R
+7528225606f7da123ca90598402ab3ec *R/plot.MetaForest.R
 a79e0de646f739fd1b7682cd920dcce1 *R/predict.MetaForest.R
 abbf351cb879ddaa0080fed56efe0090 *R/preselect.R
 18ab42f4bc720a04cbb5584614976c15 *R/print.MetaForest.R
 8debd94bf6d50872a0958520095f96ea *R/print.summary.MetaForest.R
 4bd7a30acbc88c2383175353c8aec34d *R/summary.MetaForest.R
-1b9852c57eee5eafa518c70af03b74c8 *README.md
-eafaec85b832d041f9b1adb37a6c8728 *build/vignette.rds
+b05b7c974900ff20efb7422bf6bacc78 *README.md
+b943a1546a1e17be4eea235ec7b4616b *build/partial.rdb
+36fbd2ae4180441c075e587cc9b5db99 *build/vignette.rds
 289b6b0183d882e1df481e4c452eb69c *data/curry.csv
 dc1e97e1e613ff72c4bfc96c288f0ed4 *data/fukkink_lont.csv
-ad136ffcd7a8a3133de768ae87b37982 *inst/doc/Introduction_to_metaforest.R
-a7cd2280f981053d7b72322ce0bd764f *inst/doc/Introduction_to_metaforest.Rmd
-ceb0fdc44312cc11af9ed41af32daf90 *inst/doc/Introduction_to_metaforest.html
-3e5a340127b2b559f2efbb46cb08ff6e *man/ClusterMF-deprecated.Rd
-b2ce464db687a1a75969aa31e5f502c8 *man/MetaForest.Rd
-13f1dea9b8bad12306667bc2ca1ece4e *man/ModelInfo_mf.Rd
+25eb1ba3d7a6ff6e0b6880e2c9187fc5 *inst/CITATION
+0f99fb340dbbbfffcac445a81b75a853 *inst/doc/Introduction_to_metaforest.R
+9dfd785646d1e41135a6a9bb26bbfe71 *inst/doc/Introduction_to_metaforest.Rmd
+858f5e34c61078ea6c197321ec37e3af *inst/doc/Introduction_to_metaforest.html
+ac4a55db2acf53af501f0ca8018e6da6 *man/MetaForest.Rd
+bd36420fac0a645381060bb37eec732d *man/ModelInfo_mf.Rd
 1f5aaf3113ff9333addda2f507408b1f *man/ModelInfo_rma.Rd
-2288fde62aa54898f110a377d3812c0b *man/PartialDependence.Rd
-9416057960800caae33541931c570aab *man/SimulateSMD.Rd
+a68c0f03381da7c603a07b9a68f3a3a4 *man/PartialDependence.Rd
+5541437a0573d919d4e9e81f820b832c *man/SimulateSMD.Rd
 e878b5cb026252dad7a3709553050197 *man/VarImpPlot.Rd
-4671d451dfe4afde1d6fa3039620b99e *man/WeightedScatter.Rd
+572204047e4587528a9b821049434fba *man/WeightedScatter.Rd
 00a6f08db4fe25a9b0588fe54ac305eb *man/coef_test.Rd
-a77839d8b2064517231b0de254cfdaf0 *man/curry.Rd
+68bf241aa40851a8601c7e4b4df32570 *man/curry.Rd
 544cd29d80b072cb33938c11f74722c0 *man/extract_proximity.Rd
-1e6476f7146902b8620ed9c9565f4e64 *man/fukkink_lont.Rd
-30619d14393326e116d3c099ba9ecc23 *man/metaforest-deprecated.Rd
+4747ae582aee455cca024a2d2c1a5728 *man/fukkink_lont.Rd
 999dfbbaa2f02ca0baf94d3f67c60ead *man/plot.MetaForest.Rd
-d0729a89e979374167fa3c668a779821 *man/predict.MetaForest.Rd
+10e36f7940d7d3109a1a2a615e5305aa *man/predict.MetaForest.Rd
 b5f02e9b9cebc6c7c207573a5ff52a6d *man/preselect.Rd
 f0dc022b81b24284fe4b10a425b95033 *man/preselect_vars.Rd
 453ada4b241e83b52f432a7b71fde3de *man/print.summary.MetaForest.Rd
@@ -55,4 +55,5 @@ f0dc022b81b24284fe4b10a425b95033 *man/preselect_vars.Rd
 14275530d269f2ad35162d0e365ff431 *tests/testthat/test-rma_nonconverge.R
 18f6524126de70d301970ab6794e3ef3 *tests/testthat/test-s4.R
 5df7d4e4da960accaed4f79d892e4565 *tests/testthat/test-summary.R
-a7cd2280f981053d7b72322ce0bd764f *vignettes/Introduction_to_metaforest.Rmd
+b0781039d8af4002bcc52f624e028080 *tests/testthat/test-vi_works.R
+9dfd785646d1e41135a6a9bb26bbfe71 *vignettes/Introduction_to_metaforest.Rmd
diff --git a/NAMESPACE b/NAMESPACE
@@ -14,7 +14,6 @@ S3method(print,coef_test_results)
 S3method(print,mf_preselect)
 S3method(print,summary.MetaForest)
 S3method(summary,MetaForest)
-export(ClusterMF)
 export(MetaForest)
 export(ModelInfo_mf)
 export(ModelInfo_rma)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,17 @@
+# Version 0.1.5
+
+* Fixed NOTES in CRAN check
+* Fixed plot.ranger()
+* Fixed seq_unif.integer() so it will no longer duplicate unique values when
+  length.out exceeds the number of unique values
+
+# Version 0.1.4
+
+* ClusterMF is hard deprecated. Replace any legacy call to ClusterMF with a call
+  to MetaForest with the same arguments.
+* Fixed PartialDependence for ranger objects
+* Fixed bug where the argument "vi" was passed on to ranger()
+
 # Version 0.1.3
 
 * ClusterMF is soft deprecated; it has the same functionality as 

diff --git a/R/MF_cluster.R b/R/MF_cluster.R
@@ -1,6 +1,7 @@
 MF_cluster <- function(formula, whichweights = "random", num.trees = 500,
                        mtry = NULL, method = "REML", tau2 = NULL, ...,
                        v, df, id) {
+  #browser()
     args <- match.call()[-1]
     if(!(num.trees%%2 == 0)){
       message("Conducting a clustered MetaForest analysis with an odd value of num.trees; num.trees has been rounded up to the nearest even number.")
@@ -56,43 +57,17 @@ MF_cluster <- function(formula, whichweights = "random", num.trees = 500,
         holdout = TRUE, ...)
       })
     names(res) <- c("rf1", "rf2")
+    forest <- do.call(merge_mf_cluster, c(res, list(y = y)))
 
     ## Compute importance
-    predicted <- res$rf1$predictions
-    predicted[is.na(predicted)] <- res$rf2$predictions[!is.na(res$rf2$predictions)]
+    predicted <- forest$predictions
     residuals <- y - predicted
 
     rma_after <- tryCatch({rma(yi = residuals, vi = v, method = method)}, error = function(e){
       warning("Error when attempting to estimate residual heterogeneity using metafor::rma using method ='", method, "'. Used method = 'DL' instead. See 'help(rma)' for possible remedies.", call. = FALSE)
       return(rma(yi = residuals, vi = v, method = "DL"))
     })
 
-    forest <- list(predictions = predicted,
-                   num.trees = num.trees,
-                   num.independent.variables = res$rf1$num.independent.variables,
-                   mtry = res$rf1$mtry,
-                   min.node.size = res$rf1$min.node.size,
-                   variable.importance = (res$rf1$variable.importance + res$rf2$variable.importance)/2,
-                   prediction.error = mean(c(res$rf1$prediction.error, res$rf2$prediction.error)),
-                   forest = list(dependent.varID = res$rf1$forest$dependent.varID,
-                                 num.trees = num.trees,
-                                 child.nodeIDs = c(rbind(res$rf1$forest$child.nodeIDs, res$rf2$forest$child.nodeIDs)),
-                                 split.varIDs = c(rbind(res$rf1$forest$split.varIDs, res$rf2$forest$split.varIDs)),
-                                 split.values = c(rbind(res$rf1$forest$split.values, res$rf2$forest$split.values)),
-                                 is.ordered = res$rf1$forest$is.ordered,
-                                 independent.variable.names = res$rf1$forest$independent.variable.names,
-                                 treetype = res$rf1$forest$treetype),
-                   #rf2 = res$rf2,
-                   splitrule = res$rf1$splitrule,
-                   treetype = res$rf1$treetype,
-                   r.squared = 1 - mean(c(res$rf1$prediction.error, res$rf2$prediction.error)) / var(y),
-                   call = formula,
-                   importance.mode = "permutation",
-                   num.samples = res$rf1$num.samples,
-                   cluster_forests = res
-                   )
-    class(forest) <- "ranger"
-    class(forest$forest) <- "ranger.forest"
     output <- list(forest = forest, rma_before = rma_before, rma_after = rma_after, data = df, vi = v, study = id, weights = metaweights)
     class(output) <- c("cluster_mf", "MetaForest")
     output

diff --git a/R/MetaForest.R b/R/MetaForest.R
@@ -63,8 +63,7 @@
 #' are thus included in the same cross-validation sample. Then, two random
 #' forests are grown on these cross-validation samples, and for each random
 #' forest, the other sample is used to calculate prediction error and variable
-#' importance (see \href{http://doi.org/10.1007/s11634-016-0276-4}{Janitza,
-#' Celik, & Boulesteix, 2016}).
+#' importance, see \doi{10.1007/s11634-016-0276-4}.
 #' @import stats
 #' @import ranger
 #' @import metafor
@@ -112,6 +111,7 @@ MetaForest <- function(formula, data, vi = "vi", study = NULL,
     if(grepl("(\\*|:|-)", formula[3])){
       stop("MetaForest only accepts additive model formulae. The underlying regression trees algorithm inherently captures interactions and non-linear effects as a sequence of consecutive splits on the interacting variables, so no interaction terms need to be specified.")
     }
+ # browser()
     cl <- match.call()
     args <- as.list(cl)[-1]
 
@@ -125,7 +125,7 @@ MetaForest <- function(formula, data, vi = "vi", study = NULL,
 
     if(vi %in% names(df)) df <- df[-match(vi, names(df))]
     args[["v"]] <- data[[vi]]
-
+    args[["vi"]] <- NULL
     args[["formula"]] <- paste(formula[2], formula[3],sep = " ~ ")
     args[["data"]] <- NULL
     if(is.null(study)){

diff --git a/R/ModelInfo_mf.R b/R/ModelInfo_mf.R
@@ -9,8 +9,8 @@
 #' training data x. The name of this column is specified using the argument
 #' 'vi'.
 #'
-#' To train a clustered MetaForest (\code{clusterMF}), simply provide the
-#' optional argument 'study' to the train function, to specify the study ID.
+#' To train a clustered MetaForest, for nested data structures, simply provide
+#' the optional argument 'study' to the train function, to specify the study ID.
 #' This should again refer to a column of x.
 #'
 #' When training a clustered MetaForest, make sure to use

diff --git a/R/PartialDependence.R b/R/PartialDependence.R
@@ -39,12 +39,13 @@
 #' @param output Character. What type of output should be returned? Defaults to
 #' \code{"plot"}, which returns and plots a gtable object. To obtain a list of
 #' \code{ggplot} objects instead, provide the argument \code{"list"}.
-#' @param ... Additional arguments to be passed to \code{marginalPrediction}.
+#' @param ... Additional arguments to be passed to and from functions.
 #' @return A gtable object.
 #' @import ggplot2
 #' @importFrom methods hasArg
 #' @examples
-#' # Partial dependence plot for MetaForest() model:
+#' \dontrun{
+#' #' # Partial dependence plot for MetaForest() model:
 #' set.seed(42)
 #' data <- SimulateSMD(k_train = 200, model = es * x[, 1] + es * x[, 2] + es *
 #'                                            x[, 1] * x[, 2])$training
@@ -54,7 +55,6 @@
 #'                         tau2 = 0.2450)
 #' # Examine univariate partial dependence plot for all variables in the model:
 #' PartialDependence(mf.random, pi = .8)
-#' \dontrun{
 #' # Examine bivariate partial dependence plot the plot_int between X1 and X2:
 #' pd.plot <- PartialDependence(mf.random, vars = c("X1", "X2"), plot_int = TRUE)
 #' # Save to pdf file
@@ -139,10 +139,8 @@ PartialDependence.MetaForest <-
            mod_levels = NULL,
            output = "plot",
            ...) {
+    all_args <- as.list(match.call()[-1])
     # Check input arguments ---------------------------------------------------
-    if(hasArg("interaction")){
-      stop("The argument 'interaction' has been deprecated, and is replaced by the argument 'moderator'. See ?PartialDependence for help on how to use the 'moderator' argument." )
-    }
     if(hasArg("label_elements")){
       label_elements <- eval(match.call()[["label_elements"]])
     } else {
@@ -151,7 +149,7 @@ PartialDependence.MetaForest <-
     if (is.null(vars)) {
       select_vars <- names(x$forest$variable.importance)
     } else {
-      if (!class(vars) == "character") {
+      if (!inherits(vars, what = "character")) {
         stop("Argument 'vars' must be a character string.", call. = FALSE)
       }
       select_vars <-
@@ -170,7 +168,7 @@ PartialDependence.MetaForest <-
     }
     if (!is.null(moderator)) {
       select_vars <- select_vars[!select_vars == moderator]
-      if (!class(moderator) == "character") {
+      if (!inherits(moderator, what = "character")) {
         stop(
           "Moderator must be a character string, corresponding to the name of a variable in the MetaForest analysis.",
           call. = FALSE
@@ -211,7 +209,7 @@ PartialDependence.MetaForest <-
     cases <- nrow(x$data)
 
     numeric_vars <-
-      which(sapply(x$data[select_vars], class) %in% c("numeric", "integer"))
+      which(sapply(x$data[select_vars], inherits, c("numeric", "integer")))
 
     if (is.null(resolution)) {
       resolution <-
@@ -233,6 +231,9 @@ PartialDependence.MetaForest <-
 
     cont_mod <- FALSE
     raw.data <- data.table(x$data, wi = x$weights)
+    setcolorder(raw.data, c(names(raw.data)[!names(raw.data) %in% x$forest$forest$independent.variable.names],
+                            x$forest$forest$independent.variable.names
+                            ))
     if (!is.null(moderator)) {
       if (inherits(x$data[[moderator]], c("numeric", "integer"))) {
         if (is.null(mod_levels)) {
@@ -318,8 +319,11 @@ create_plotlist <-
       .plot <- pd[[.thisgrob]]
       if(!rename_labels){
         .plot[, ("Variable") := names(pd[[.thisgrob]])[1]]
+        y_label <- names(raw.data)[1]
+
       } else {
         .plot[, ("Variable") := rename_fun(names(pd[[.thisgrob]])[1], names(label_elements), label_elements)]
+        y_label <- rename_fun(names(raw.data)[1], names(label_elements), label_elements)
       }
       if (plot_int) {
         if (cont_mod) {
@@ -360,7 +364,7 @@ create_plotlist <-
               )
             ) +
               scale_y_continuous(limits = y_limits) +
-              ylab(names(raw.data)[1])
+              ylab(y_label)
           } else {
             p <- ggplot(
               .plot,
@@ -373,7 +377,7 @@ create_plotlist <-
               )
             ) +
               scale_y_continuous(limits = y_limits) +
-              ylab(names(raw.data)[1])
+              ylab(y_label)
           }
         }
 
@@ -383,7 +387,7 @@ create_plotlist <-
           p <- ggplot(.plot, aes_string(x = names(.plot)[1],
                                         y = names(.plot)[2])) +
             scale_y_continuous(limits = y_limits) +
-            ylab(names(raw.data)[1])
+            ylab(y_label)
         } else {
           p <- ggplot(.plot,
                       aes_string(
@@ -396,7 +400,7 @@ create_plotlist <-
             scale_color_continuous(guide = "none") +
             scale_fill_continuous(guide = "none") +
             scale_y_continuous(limits = y_limits) +
-            ylab(names(raw.data)[1])
+            ylab(y_label)
         }
 
       }