Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Handle POSIXct in linear time

  • Loading branch information...
commit 5a6c1bd1d9cffd09eebea9525716187fbf906146 1 parent 0d9b827
@crowding crowding authored
Showing with 77 additions and 65 deletions.
  1. +65 −55 R/rbind-fill.r
  2. +12 −10 inst/tests/test-rbind.r
View
120 R/rbind-fill.r
@@ -97,81 +97,82 @@ allocate_column <- function(example, nrows, dfs, var) {
#it. Inspection, even something as innocuous as is.matrix(column),
#will setting NAMED to 2 and forcing a copy on the
#next modification.
+ a <- attributes(example)
type <- typeof(example)
handler <- type
#this statement may be altered below
assignment <- quote(column[rows] <<- what)
- if (inherits(example, "POSIXt")) {
- #this should get folded in to general attribute handling as well...
- tzone <- attr(example, "tzone")
- column <- structure(as.POSIXct(rep(NA, nrows)), tzone=tzone)
- } else {
+ a$names <- NULL
+ isList <- is.recursive(example)
- a <- attributes(example)
- a$names <- NULL
- isList <- is.recursive(example)
+ if (is.array(example)) {
- if (is.array(example)) {
+ a$dimnames <- NULL #todo: check if rbind handles these
- a$dimnames <- NULL #todo: check if rbind handles these
+ # Check that all other args have consistent dims
+ df_has <- vapply(dfs, function(df) var %in% names(df), FALSE)
+ dims <- unique(lapply(dfs[df_has], function(df) dim(df[[var]])[-1]))
+ if (length(dims) > 1)
+ stop("Array variable ", var, " has inconsistent dims")
- # Check that all other args have consistent dims
- df_has <- vapply(dfs, function(df) var %in% names(df), FALSE)
- dims <- unique(lapply(dfs[df_has], function(df) dim(df[[var]])[-1]))
- if (length(dims) > 1)
- stop("Array variable ", var, " has inconsistent dims")
+ # add empty args
+ assignment[[2]] <- as.call(
+ c(as.list(assignment[[2]]),
+ rep(list(quote(expr = )), length(dims[[1]]))))
- # add empty args
- assignment[[2]] <- as.call(
- c(as.list(assignment[[2]]),
- rep(list(quote(expr = )), length(dims[[1]]))))
+ if (length(dims[[1]]) == 0) { #is dropping dims necessary for 1d arrays?
+ a$dim <- NULL
+ } else {
+ a$dim <- c(nrows, dim(example)[-1])
+ }
- if (length(dims[[1]]) == 0) { #is dropping dims necessary for 1d arrays?
- a$dim <- NULL
- } else {
- a$dim <- c(nrows, dim(example)[-1])
- }
+ length <- prod(a$dim)
- length <- prod(a$dim)
+ } else {
+ length <- nrows
+ }
+ if (is.factor(example)) {
+ df_has <- vapply(dfs, function(df) var %in% names(df), FALSE)
+ isfactor <- vapply(dfs[df_has], function(df) is.factor(df[[var]]), FALSE)
+ if (all(isfactor)) {
+ #will be referenced by the mutator
+ levels <- unique(unlist(lapply(dfs[df_has],
+ function(df) levels(df[[var]]))))
+ class <- a$class
+ a$levels <- levels
+ a$class <- NULL #postpone setting class
+ handler <- "factor"
} else {
- length <- nrows
+ #fall back on character
+ type <- "character"
+ handler <- "character"
+ a$class <- NULL
+ a$levels <- NULL
}
+ }
- if (is.factor(example)) {
- df_has <- vapply(dfs, function(df) var %in% names(df), FALSE)
- isfactor <- vapply(dfs[df_has], function(df) is.factor(df[[var]]), FALSE)
- if (all(isfactor)) {
- #will be referenced by the mutator
- levels <- unique(unlist(lapply(dfs[df_has],
- function(df) levels(df[[var]]))))
- class <- a$class
- a$levels <- levels
- a$class <- NULL #postpone setting class
- handler <- "factor"
- } else {
- #fall back on character
- type <- "character"
- handler <- "character"
- a$class <- NULL
- a$levels <- NULL
- }
- }
+ if (inherits(example, "POSIXt")) {
+ tzone <- attr(example, "tzone")
+ a$class <- NULL
+ class <- c("POSIXct", "POSIXt")
+ type <- "double"
+ handler <- "time"
+ }
- column <- vector(type, length)
- if (!isList) {
- column[] <- NA
- }
- attributes(column) <- a
+ column <- vector(type, length)
+ if (!isList) {
+ column[] <- NA
}
+ attributes(column) <- a
- #It is especially important never to inspect the column when in the main
- #rbind.fill loop. To avoid inspecting the column, we've done
- #specialization (figuring out the array assignment form and data
- #type) up front, and instead of returning the column, we return a
- #mutator function that closes over the column.
+ #It is especially important never to inspect the column when in the
+ #main rbind.fill loop. To avoid that, we've done specialization
+ #(figuring out the array assignment form and data type) ahead of
+ #time, and instead of returning the column, we return a mutator
+ #function that closes over the column.
switch(
handler,
character = function(rows, what) {
@@ -190,6 +191,15 @@ allocate_column <- function(example, nrows, dfs, var) {
eval(assignment)
}
},
+ time = function(rows, what) {
+ if (nargs() == 0) {
+ class(column) <<- class
+ column
+ } else {
+ what <- as.POSIXct(what, tz=tzone)
+ eval(assignment)
+ }
+ },
function(rows, what) {
if(nargs() == 0) return(column)
eval(assignment)
View
22 inst/tests/test-rbind.r
@@ -230,25 +230,27 @@ get_rbind_times <- function(...) {
expect_linear_enough <- function(timings, size=2^10, threshold=0.03) {
#expect that no more than `threshold` of a `size` input's runtime is
#accounted for by quadratic behavior
- model <- lm(user.self ~ size + I(size^2), timings)
- p <- predict(model, newdata=data.frame(size=2^10), type="terms")
- expect_that(p[2] / p[1] < 0.03, is_true(), NULL, NULL)
+ #don't understand what predict.lm does w/ built-in intercepts
+ timings <- mutate(timings, intercept=1)
+ model <- lm(user.self ~ size + I(size^2) - 1 + intercept, timings)
+ p <- predict(model, newdata=data.frame(size=2^10, intercept=1), type="terms")
+ expect_that(p[2] / p[1] < 0.2, is_true(), NULL, NULL)
}
test_that("rbind.fill performance linear", {
- rbind.times <- get_rbind_times(data.frame(size = 2^(1:10)),
- classes=c("numeric", "character", "array"))
- expect_linear_enough(rbind.times)
+ timings <- get_rbind_times(data.frame(size = 2^(1:10)),
+ classes=c("numeric", "character", "array"))
+ expect_linear_enough(timings)
})
test_that("rbind.fill performance linear with factors", {
- rbind.times <- get_rbind_times(data.frame(size = 2^(1:10)),
+ timings <- get_rbind_times(data.frame(size = 2^(1:10)),
classes=c("factor"))
- expect_linear_enough(rbind.times)
+ expect_linear_enough(timings)
})
test_that("rbind.fill performance linear with times", {
- rbind.times <- get_rbind_times(data.frame(size = 2^(1:10)),
+ timings <- get_rbind_times(data.frame(size = 2^(1:10)),
classes=c("time"))
- expect_linear_enough(rbind.times)
+ expect_linear_enough(timings)
})
Please sign in to comment.
Something went wrong with that request. Please try again.