Permalink
Browse files

Massive speed ups to a*ply

  • Loading branch information...
hadley committed Apr 11, 2009
1 parent 9294897 commit bd286ed03e80686ef059d538f838f4b454ad2713
Showing with 38 additions and 14 deletions.
  1. +1 −0 NEWS
  2. +33 −0 R/indexed-array.r
  3. +3 −13 R/split-array.r
  4. +1 −1 R/split-data-frame.r
View
1 NEWS
@@ -8,6 +8,7 @@ plyr 0.1.6 (2008-XX-XX) ---------------------------------------------------
* fix bug in daply which prevented it from working correctly when data frame was split by multiple variables
* all ply functions deal more elegantly when given function names: can supply a vector of function names, and name is used as label in output
* ddply: first variable now varies slowest as you'd expect
+* massive speed ups for splitting large arrays, in a similar way to data frames
plyr 0.1.5 (2008-02-23) ---------------------------------------------------
View
@@ -0,0 +1,33 @@
+# An indexed array
+# Create a indexed array, a space efficient way of indexing into a large array
+#
+# @arguments environment containing data frame
+# @argument list of indices
+# @keywords internal
+# @alias [[.indexed_array
+# @alias length.indexed_array
+indexed_array <- function(env, index) {
+ if (is.list(env$data) && !is.data.frame(env$data)) { # && !is.array(data)
+ subs <- c("[[", "]]")
+ } else {
+ subs <- c("[", "]")
+ }
+
+ structure(
+ list(env = env, index = index, subs = subs),
+ class = c("indexed_array", "indexed")
+ )
+}
+
+length.indexed_array <- function(x) nrow(x$index)
+
+"[[.indexed_array" <- function(x, i) {
+ indices <- paste(x$index[i, ,drop=TRUE], collapse = ", ")
+
+ ## This is very slow because we have to create a copy to use do.call
+ # do.call(x$subs, c(list(x$env$data), indices, drop=TRUE))
+
+ call <- paste("x$env$data", x$subs[1], indices, ", drop = TRUE", x$subs[2],
+ sep = "")
+ eval(parse(text = call))
+}
View
@@ -1,4 +1,3 @@
-
# Split an array by .margins
# Split a 2d or higher data structure into lower-d pieces based
#
@@ -26,16 +25,8 @@ splitter_a <- function(data, .margins = 1) {
indices <- expand.grid(dimensions, KEEP.OUT.ATTRS = FALSE)
names(indices) <- paste("X", 1:ncol(indices), sep="")
- # && !is.array(data)
- subs <- if (is.list(data) && !is.data.frame(data)) "[[" else "["
- browser()
- pieces <- lapply(1:nrow(indices),
- function(i) do.call(subs,
- c(list(data), unname(indices[i, ,drop=TRUE]), drop=TRUE)
- )
- )
- dim(pieces) <- dim(data)[.margins]
+ il <- indexed_array(environment(), indices)
if (is.data.frame(data) & identical(.margins, 1)) {
split_labels <- data
@@ -46,10 +37,9 @@ splitter_a <- function(data, .margins = 1) {
if (!is.null(colnames)) names(split_labels) <- colnames
}
-
structure(
- pieces,
- class = c("split", "list"),
+ il,
+ class = c(class(il), "split", "list"),
split_type = "array",
split_labels = split_labels
)
View
@@ -42,7 +42,7 @@ splitter_d <- function(data, .variables = NULL, drop = TRUE) {
structure(
il,
- class = c("indexed", "indexed_df", "split", "list"),
+ class = c(class(il), "split", "list"),
split_type = "data.frame",
split_labels = split_labels
)

0 comments on commit bd286ed

Please sign in to comment.