Permalink
Browse files

Add .paropts parameter to llply. #84

  • Loading branch information...
1 parent 05cc488 commit eb432b6aa2d49aa7cf70f5b253f3b797c3b1469b @hadley committed Oct 11, 2012
Showing with 36 additions and 3 deletions.
  1. +7 −0 R/parallel.r
  2. +11 −2 R/ply-list.r
  3. +9 −0 inst/tests/test-parallel.r
  4. +9 −1 man/llply.Rd
View
@@ -7,3 +7,10 @@ setup_parallel <- function() {
warning("No parallel backend registered", call. = TRUE)
}
}
+
+parallel_fe <- function(n, options) {
+ i <- seq_len(n)
+ fe_call <- as.call(c(list(as.name("foreach"), i = i), options))
+
+ eval(fe_call)
+}
View
@@ -10,6 +10,12 @@
#' @param .inform produce informative error messages? This is turned off by
#' by default because it substantially slows processing speed, but is very
#' useful for debugging
+#' @param .paropts a list of additional options passed into
+#' the \code{\link[foreach]{foreach}} function when parallel computation
+#' is enabled. This is important if (for example) your code relies on
+#' external data or packages: use the \code{.export} and \code{.packages}
+#' arguments to supply them so that all cluster nodes have the correct
+#' environment set up for computing.
#' @export
#' @examples
#' llply(llply(mtcars, round), table)
@@ -19,7 +25,8 @@
#'
#' llply(x, mean)
#' llply(x, quantile, probs = 1:3/4)
-llply <- function(.data, .fun = NULL, ..., .progress = "none", .inform = FALSE, .parallel = FALSE) {
+llply <- function(.data, .fun = NULL, ..., .progress = "none", .inform = FALSE,
+ .parallel = FALSE, .paropts = NULL) {
if (is.null(.fun)) return(as.list(.data))
if (is.character(.fun) || is.list(.fun)) .fun <- each(.fun)
if (!is.function(.fun)) stop(".fun is not a function.")
@@ -68,7 +75,9 @@ llply <- function(.data, .fun = NULL, ..., .progress = "none", .inform = FALSE,
}
if (.parallel) {
setup_parallel()
- result <- foreach(i = seq_len(n)) %dopar% do.ply(i)
+ fe <- parallel_fe(n, .paropts)
+
+ result <- fe %dopar% do.ply(i)
} else {
result <- loop_apply(n, do.ply)
}
@@ -17,3 +17,12 @@ test_that("l_ply + .parallel complains about invalid arguments", {
l_ply(1:10, force, .parallel = TRUE, .progress = "text"),
"Progress disabled")
})
+
+test_that(".paropts passes options to foreach", {
+ combine <- function(a, b) NULL
+ x <- llply(1:10, identity, .parallel = TRUE,
+ .paropts = list(.combine = combine))
+ expect_equal(x, NULL)
+})
+
+registerDoMC(1)
View
@@ -3,7 +3,7 @@
\title{Split list, apply function, and return results in a list.}
\usage{
llply(.data, .fun = NULL, ..., .progress = "none",
- .inform = FALSE, .parallel = FALSE)
+ .inform = FALSE, .parallel = FALSE, .paropts = NULL)
}
\arguments{
\item{.fun}{function to apply to each piece}
@@ -21,6 +21,14 @@
\item{.inform}{produce informative error messages? This
is turned off by by default because it substantially
slows processing speed, but is very useful for debugging}
+
+ \item{.paropts}{a list of additional options passed into
+ the \code{\link[foreach]{foreach}} function when parallel
+ computation is enabled. This is important if (for
+ example) your code relies on external data or packages:
+ use the \code{.export} and \code{.packages} arguments to
+ supply them so that all cluster nodes have the correct
+ environment set up for computing.}
}
\value{
list of results

0 comments on commit eb432b6

Please sign in to comment.