Permalink
Browse files

Faster array indexing.

Closes #27
  • Loading branch information...
1 parent ea17d3b commit b34e78b24ce73701a21224989581c22a9e06fcd9 @hadley committed Oct 6, 2012
Showing with 76 additions and 12 deletions.
  1. +2 −0 NEWS
  2. +12 −12 R/indexed-array.r
  3. +62 −0 benchmark/extract.r
View
2 NEWS
@@ -1,6 +1,8 @@
Version 1.7.1.99
------------------------------------------------------------------------------
+* `a*ply`: 25x speedup when indexing array objects, 3x speedup when indexing data frames. This should substantially reduce the overhead of using `a*ply`
+
* `quickdf` is around 20% faster
* `round_any` now works with `POSIXct` objects, thanks to Jean-Olivier Irisson (#76)
View
@@ -23,19 +23,19 @@ indexed_array <- function(env, index) {
if (is.list(env$data)) {
if (is.data.frame(env$data) || (is.array(env$data) && !exact)) {
- subs <- c("[", "]")
+ subs <- "["
} else {
- subs <- c("[[", "]]")
+ subs <- "[["
}
} else {
- subs <- c("[", "]")
+ subs <- "["
}
# Don't drop if data is a data frame
drop <- !is.data.frame(env$data)
structure(
- list(env = env, index = index, drop = drop, subs = subs),
+ list(env = env, index = index, drop = drop, subs = as.name(subs)),
class = c("indexed_array", "indexed")
)
}
@@ -45,14 +45,14 @@ length.indexed_array <- function(x) nrow(x$index)
#' @S3method [[ indexed_array
"[[.indexed_array" <- function(x, i) {
- indices <- paste(x$index[i, ,drop=TRUE], collapse = ", ")
-
- ## This is very slow because we have to create a copy to use do.call
- # do.call(x$subs, c(list(x$env$data), indices, drop=TRUE))
-
- call <- paste("x$env$data",
- x$subs[1], indices, ", drop = ", x$drop, x$subs[2], sep = "")
- eval(parse(text = call))
+ indices <- unname(x$index[i, , drop = TRUE])
+ indices <- lapply(indices, function(x) if (x == "") bquote() else x)
+
+ call <- as.call(c(
+ list(x$subs, quote(x$env$data)),
+ indices,
+ list(drop = x$drop)))
+ eval(call)
}
#' @S3method names indexed
View
@@ -0,0 +1,62 @@
+library(microbenchmark)
+
+df <- data.frame(x = sample(100), y = sample(100))
+ar <- array(sample(200), c(100, 2))
+
+# # Motivating problem:
+# system.time(alply(ar, 1))
+# # user system elapsed
+# # 0.024 0.007 0.031
+# system.time(alply(df, 1))
+# # user system elapsed
+# # 0.030 0.007 0.037
+# system.time(split(df, 1:nrow(df)))
+# # user system elapsed
+# # 0.004 0.001 0.004
+
+# Construct call by parsing string
+extract1 <- function(x, i) {
+ i <- paste(unlist(lapply(i, as.character)), collapse = ", ")
+ call <- paste("x", "[", i, ", drop = ", FALSE, "]", sep = "")
+ eval(parse(text = call))
+}
+
+# Construct call by hand
+extract2 <- function(x, i) {
+ call <- as.call(c(
+ list(as.name("["), quote(x)), i, list(drop = FALSE)))
+ eval(call)
+}
+
+# Use .subset directly
+extract3 <- function(x, i) {
+
+ # ???
+}
+
+if (FALSE) {
+ # Which is faster? Parsing or constructing call?
+ microbenchmark(
+ extract1(df, list(20, T)),
+ extract2(df, list(20, T)),
+ extract1(ar, list(20, T)),
+ extract2(ar, list(20, T))
+ )
+
+ # Compare to doing it directly?
+ microbenchmark(
+ extract2(df, list(20, T)),
+ df[20, , drop = FALSE],
+ extract2(ar, list(20, T)),
+ ar[20, , drop = FALSE]
+ )
+
+ # Which is faster? TRUE or missing argument
+ microbenchmark(
+ extract2(df, list(20, T)),
+ extract2(df, list(20, bquote())),
+ extract2(ar, list(20, T)),
+ extract2(ar, list(20, bquote()))
+ )
+
+}

0 comments on commit b34e78b

Please sign in to comment.