diff --git a/DESCRIPTION b/DESCRIPTION index ceaa19d1..4222f225 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,4 +26,4 @@ Collate: 'dimensions.r' 'helper-arrange.r' 'helper-col-wise.r' 'progress.r' 'quote.r' 'rbind-matrix.r' 'rbind.r' 'simplify-array.r' 'simplify-data-frame.r' 'simplify-vector.r' 'split-array.r' 'split-data-frame.r' 'split-indices.r' 'split.r' - 'utils.r' + 'utils.r' 'loop-apply.r' diff --git a/NEWS b/NEWS index 3c1424b5..1c9cf8a2 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,7 @@ plyr 1.2 (2010-XX-XX) --------------------------------------------------- * llply gains .parallel argument that when set to true will apply parallel functions in parallel using foreach package +* llply: in serial case, for loop replaced with custom C function that takes about 40% less time (or about 20% less time than lapply) plyr 1.1 (2010-07-19) --------------------------------------------------- diff --git a/R/loop-apply.r b/R/loop-apply.r new file mode 100644 index 00000000..f42b6103 --- /dev/null +++ b/R/loop-apply.r @@ -0,0 +1,12 @@ +#' Split indices. +#' An optimised version of split for the special case of splitting row +#' indices into groups, as used by \code{\link{splitter_d}} +#' +#' @param index integer indices +#' @param group integer groups +#' @param n largest integer (may not appear in index) +#' @useDynLib plyr +#' @keywords internal manip +loop_apply <- function(n, f, env = parent.frame()) { + .Call("loop_apply", as.integer(n), f, env) +} \ No newline at end of file diff --git a/R/ply-list.r b/R/ply-list.r index d2326f9d..df20fb0a 100644 --- a/R/ply-list.r +++ b/R/ply-list.r @@ -75,7 +75,7 @@ llply <- function(.data, .fun = NULL, ..., .progress = "none", .inform = FALSE, } result <- foreach(i = seq_len(n)) %dopar% do.ply(i) } else { - result <- lapply(seq_len(n), do.ply) + result <- loop_apply(n, do.ply) } attributes(result)[c("split_type", "split_labels")] <- diff --git a/R/split-indices.r b/R/split-indices.r index 368afd2c..12d4557e 100644 --- a/R/split-indices.r +++ b/R/split-indices.r @@ -10,5 +10,4 @@ split_indices <- function(index, group, n = max(group)) { if (length(index) == 0) return(integer()) .Call("split_indices", index, group, as.integer(n)) -} - \ No newline at end of file +} \ No newline at end of file diff --git a/benchmark/timings.csv b/benchmark/timings.csv index 04701d22..f03147e1 100644 --- a/benchmark/timings.csv +++ b/benchmark/timings.csv @@ -45,3 +45,17 @@ "bench-dlply.r","6i-a",2010-07-20 13:46:52,"67da54f",0.248,0.033,0.281 "bench-dlply.r","6i-b",2010-07-20 13:46:52,"67da54f",0.631,0.034,0.666 "bench-dlply.r","6i-c",2010-07-20 13:46:57,"67da54f",4.123,0.072,4.229 +"bench-llply.r","1e5",2010-07-20 15:17:10,"6562dda",0.461,0.035,0.497 +"bench-llply.r","1e6",2010-07-20 15:17:16,"6562dda",5.668,0.047,5.718 +"bench-llply.r","1e5",2010-07-20 15:17:18,"6562dda",0.461,0.042,0.503 +"bench-llply.r","1e6",2010-07-20 15:17:23,"6562dda",5.591,0.025,5.621 +"bench-dlply.r","6-a",2010-07-20 15:17:29,"6562dda",1.505,0.415,1.921 +"bench-dlply.r","6-b",2010-07-20 15:17:35,"6562dda",3.503,2.072,5.58 +"bench-dlply.r","6i-a",2010-07-20 15:17:35,"6562dda",0.251,0.032,0.283 +"bench-dlply.r","6i-b",2010-07-20 15:17:36,"6562dda",0.65,0.048,0.697 +"bench-dlply.r","6i-c",2010-07-20 15:17:40,"6562dda",4.182,0.07,4.255 +"bench-dlply.r","6-a",2010-07-20 15:17:45,"6562dda",1.169,0.375,1.545 +"bench-dlply.r","6-b",2010-07-20 15:17:51,"6562dda",3.524,1.959,5.484 +"bench-dlply.r","6i-a",2010-07-20 15:17:51,"6562dda",0.247,0.029,0.276 +"bench-dlply.r","6i-b",2010-07-20 15:17:52,"6562dda",0.638,0.031,0.67 +"bench-dlply.r","6i-c",2010-07-20 15:17:56,"6562dda",4.132,0.062,4.196 diff --git a/src/loop-apply.c b/src/loop-apply.c new file mode 100644 index 00000000..59588b0e --- /dev/null +++ b/src/loop-apply.c @@ -0,0 +1,25 @@ +#include +#include + +SEXP loop_apply(SEXP n, SEXP f, SEXP rho) { + if(!isFunction(f)) error("'f' must be a function"); + if(!isEnvironment(rho)) error("'rho' should be an environment"); + + int n1 = INTEGER(n)[0]; + + SEXP results, R_fcall; + PROTECT(results = allocVector(VECSXP, n1)); + PROTECT(R_fcall = lang2(f, R_NilValue)); + + SEXP ii; + for(int i = 0; i < n1; i++) { + ii = PROTECT(ScalarInteger(i + 1)); + SETCADR(R_fcall, ii); + SET_VECTOR_ELT(results, i, eval(R_fcall, rho)); + + UNPROTECT(1); + } + + UNPROTECT(2); + return results; +}