Permalink
Browse files

Faster version of rbind.fill

  • Loading branch information...
1 parent efabba6 commit 489edc0b51c7326f1f6ed4fa98d5d55d60258635 @hadley committed Apr 11, 2009
Showing with 29 additions and 11 deletions.
  1. +1 −0 NEWS
  2. +27 −10 R/rbind.r
  3. +1 −1 R/simplify-data-frame.r
View
1 NEWS
@@ -11,6 +11,7 @@ Speed-ups
* massive speed ups for splitting large arrays
* fixed typo that was causing a 50% speed penalty for d*ply
+* rewritten rbind.fill is considerably (> 4x) faster for many data frames
Bug fixes:
View
@@ -7,24 +7,41 @@
# @arguments data frames to row bind together
# @keyword manip
#X rbind.fill(mtcars[c("mpg", "wt")], mtcars[c("wt", "cyl")])
+#X
+#X bplayer <- split(baseball, baseball$id)
+#X system.time(b1 <- do.call("rbind", bplayer))
+#X rownames(b1) <- NULL
+#X system.time(b2 <- rbind.fill(bplayer))
+#X stopifnot(all.equal(b1, b2))
rbind.fill <- function(...) {
dfs <- list(...)
if (length(dfs) == 0) return(list())
-
- all.names <- unique(unlist(lapply(dfs, names)))
- do.call("rbind", compact(lapply(dfs, function(df) {
- if (length(df) == 0 || nrow(df) == 0) return(NULL)
+ if (is.list(dfs[[1]]) && !is.data.frame(dfs[[1]])) {
+ dfs <- dfs[[1]]
+ }
+
+ rows <- unlist(lapply(dfs, nrow))
+ n <- sum(rows)
+
+ output <- list()
+ pos <- matrix(cumsum(rbind(1, rows - 1)), ncol = 2, byrow = T)
+ # head(cbind(pos, pos[,2] - pos[,1] + 1, rows))
+
+ for(i in rev(seq_along(rows))) {
+ rng <- pos[i, 1]:pos[i, 2]
+ df <- dfs[[i]]
- missing..variables <- setdiff(all.names, names(df))
- if (length(missing..variables) > 0) df[, missing..variables] <- NA
- df
- })))
+ for(var in names(df)) {
+ output[[var]][rng] <- df[[var]]
+ }
+ }
+
+ as.data.frame(output, stringsAsFactors = FALSE)
}
-
# Compact list
# Remove all NULL entries from a list
#
# @arguments list
# @keyword manip
-compact <- function(l) Filter(Negate(is.null), l)
+compact <- function(l) Filter(Negate(is.null), l)
View
@@ -21,7 +21,7 @@ list_to_dataframe <- function(res, labels = NULL) {
} else {
l_ply(res, function(x) if(!is.null(x) & !is.data.frame(x)) stop("Not a data.frame!"))
- resdf <- do.call("rbind.fill", res)
+ resdf <- rbind.fill(res)
rows <- unlist(llply(res, function(x) if(is.null(x)) 0 else nrow(x)))
}

0 comments on commit 489edc0

Please sign in to comment.