Permalink
Browse files

rbind.fill linear tests; linear factors

  • Loading branch information...
1 parent 2d79ebd commit 0d9b82797aab4cc979755e8336ff07423a741313 @crowding crowding committed Sep 1, 2013
Showing with 67 additions and 15 deletions.
  1. +24 −15 R/rbind-fill.r
  2. +43 −0 inst/tests/test-rbind.r
View
@@ -143,32 +143,35 @@ allocate_column <- function(example, nrows, dfs, var) {
if (is.factor(example)) {
df_has <- vapply(dfs, function(df) var %in% names(df), FALSE)
isfactor <- vapply(dfs[df_has], function(df) is.factor(df[[var]]), FALSE)
- if (!all(isfactor)) {
+ if (all(isfactor)) {
+ #will be referenced by the mutator
+ levels <- unique(unlist(lapply(dfs[df_has],
+ function(df) levels(df[[var]]))))
+ class <- a$class
+ a$levels <- levels
+ a$class <- NULL #postpone setting class
+ handler <- "factor"
+ } else {
#fall back on character
type <- "character"
handler <- "character"
a$class <- NULL
a$levels <- NULL
- } else {
- a$levels <- unique(unlist(lapply(dfs[df_has],
- function(df) levels(df[[var]]))))
- handler <- "factor"
}
}
column <- vector(type, length)
- #tracemem(column)
if (!isList) {
column[] <- NA
}
attributes(column) <- a
}
- # Check that all inputs are factors, and combine levels (or convert to char)
-
- #Return a mutator.
- #It is especially important never to inspect the column in the mutator.
- # To avoid inspecting the column, return a specialized mutator.
+ #It is especially important never to inspect the column when in the main
+ #rbind.fill loop. To avoid inspecting the column, we've done
+ #specialization (figuring out the array assignment form and data
+ #type) up front, and instead of returning the column, we return a
+ #mutator function that closes over the column.
switch(
handler,
character = function(rows, what) {
@@ -177,12 +180,18 @@ allocate_column <- function(example, nrows, dfs, var) {
eval(assignment)
},
factor = function(rows, what) {
- if(nargs() == 0) return(column)
- eval(assignment)
+ if(nargs() == 0) {
+ class(column) <<- class
+ column
+ } else {
+ #duplicate what `[<-.factor` does
+ what <- match(what, levels)
+ #no need to check since we already computed levels
+ eval(assignment)
+ }
},
function(rows, what) {
if(nargs() == 0) return(column)
eval(assignment)
- }
- )
+ })
}
View
@@ -209,3 +209,46 @@ test_that("zero col data frames ok", {
expect_equal(nrow(zb), 1)
expect_equal(nrow(zc), 1)
})
+
+rbind_time <- function(size,
+ classes=c("numeric", "character",
+ "array", "factor", "time")) {
+ unit <- quickdf(list(numeric = 1:3,
+ character = c("a", "b", "c"),
+ array = array(1:6, c(3,2)),
+ factor = factor(c("a", "b", "c")),
+ time = as.POSIXct(Sys.time()) + 1:3))
+ args <- rep(list(unit[classes]), size)
+ system.time(do.call(rbind.fill, args))
+}
+
+get_rbind_times <- function(...) {
+ rbind_time(10) #warm up/JIT
+ mdply(.fun=rbind_time, ...)
+}
+
+expect_linear_enough <- function(timings, size=2^10, threshold=0.03) {
+ #expect that no more than `threshold` of a `size` input's runtime is
+ #accounted for by quadratic behavior
+ model <- lm(user.self ~ size + I(size^2), timings)
+ p <- predict(model, newdata=data.frame(size=2^10), type="terms")
+ expect_that(p[2] / p[1] < 0.03, is_true(), NULL, NULL)
+}
+
+test_that("rbind.fill performance linear", {
+ rbind.times <- get_rbind_times(data.frame(size = 2^(1:10)),
+ classes=c("numeric", "character", "array"))
+ expect_linear_enough(rbind.times)
+})
+
+test_that("rbind.fill performance linear with factors", {
+ rbind.times <- get_rbind_times(data.frame(size = 2^(1:10)),
+ classes=c("factor"))
+ expect_linear_enough(rbind.times)
+})
+
+test_that("rbind.fill performance linear with times", {
+ rbind.times <- get_rbind_times(data.frame(size = 2^(1:10)),
+ classes=c("time"))
+ expect_linear_enough(rbind.times)
+})

0 comments on commit 0d9b827

Please sign in to comment.