Browse files

Make indexed structure more generic. Reorganise many files

  • Loading branch information...
1 parent 937eb43 commit 929489745c012f68934e6676443f60aa92b5fb02 @hadley committed Apr 11, 2009
Showing with 197 additions and 199 deletions.
  1. +20 −0 R/indexed-data-frame.r
  2. +0 −40 R/indexed-list.r
  3. +15 −0 R/indexed.r
  4. +0 −40 R/{simplify.r → simplify-array.r}
  5. +40 −0 R/simplify-data-frame.r
  6. +56 −0 R/split-array.r
  7. +66 −0 R/split-data-frame.r
  8. +0 −119 R/split.r
View
20 R/indexed-data-frame.r
@@ -0,0 +1,20 @@
+# An indexed list
+# Create a indexed list, a space efficient way of indexing into a large data frame
+#
+# @arguments environment containing data frame
+# @argument list of indices
+# @keywords internal
+# @alias length.indexed
+# @alias names.indexed
+# @alias as.list.indexed
+# @alias [[.indexed_df
+indexed_df <- function(env, index) {
+ structure(
+ list(env = env, index = index),
+ class = c("indexed", "indexed_df")
+ )
+}
+
+"[[.indexed_df" <- function(x, i) {
+ x$env$data[x$index[[i]], , drop = FALSE]
+}
View
40 R/indexed-list.r
@@ -1,40 +0,0 @@
-# An indexed list
-# Create a indexed list, a space efficient way of indexing into a large data frame
-#
-# @arguments environment containing data frame
-# @argument list of indices
-# @keywords internal
-# @alias length.indexed_list
-# @alias names.indexed_list
-# @alias as.list.indexed_list
-# @alias [[.indexed_list
-indexed_list <- function(env, index) {
- structure(
- list(env = env, index = index),
- class = "indexed_list"
- )
-}
-
-length.indexed_list <- function(x) length(x$index)
-
-"[[.indexed_list" <- function(x, i) {
- x$env$data[x$index[[i]], , drop = FALSE]
-}
-
-names.indexed_list <- function(x) names(x$index)
-
-as.list.indexed_list <- function(x, ...) {
- n <- length(x)
- out <- vector("list", n)
- for(i in seq_len(n)) {
- out[[i]] <- x[[i]]
- }
-
- mostattributes(out) <- attributes(x)
- class(out) <- c("split", "list")
- out
-}
-
-
-# index <- tapply(1:nrow(mtcars), mtcars$cyl, c, simplify = FALSE)
-# il <- indexed_list(mtcars, index)
View
15 R/indexed.r
@@ -0,0 +1,15 @@
+length.indexed <- function(x) length(x$index)
+
+names.indexed <- function(x) names(x$index)
+
+as.list.indexed <- function(x, ...) {
+ n <- length(x)
+ out <- vector("list", n)
+ for(i in seq_len(n)) {
+ out[[i]] <- x[[i]]
+ }
+
+ mostattributes(out) <- attributes(x)
+ class(out) <- c("split", "list")
+ out
+}
View
40 R/simplify.r → R/simplify-array.r
@@ -1,43 +1,3 @@
-# List to data frame
-# Reduce/simplify a list of homogenous objects to a data frame
-#
-# @arguments list of input data
-# @arguments a data frame of labels, one row for each element of res
-# @keywords internal
-list_to_dataframe <- function(res, labels = NULL) {
- if (length(res) == 0) return(data.frame())
-
- atomic <- unlist(llply(res, is.atomic))
- if (all(atomic)) {
- ulength <- unique(unlist(llply(res, length)))
- if (length(ulength) != 1) stop("Results are not equal lengths")
-
- if (length(res) > 1) {
- resdf <- as.data.frame(do.call("rbind", res))
- } else {
- resdf <- data.frame(res[[1]])
- }
- rows <- rep(1, length(res))
- } else {
- l_ply(res, function(x) if(!is.null(x) & !is.data.frame(x)) stop("Not a data.frame!"))
-
- resdf <- do.call("rbind.fill", res)
- rows <- unlist(llply(res, function(x) if(is.null(x)) 0 else nrow(x)))
- }
-
- # If no labels supplied, use list names
- if (is.null(labels) && !is.null(names(res))) {
- labels <- data.frame(.id = names(res))
- }
-
- if (!is.null(labels) && nrow(labels) == length(res)) {
- cols <- setdiff(names(labels), names(resdf))
- resdf <- cbind(labels[rep(1:nrow(labels), rows), cols, drop=FALSE], resdf)
- }
-
- unrowname(resdf)
-}
-
# List to array
# Reduce/simplify a list of homogenous objects to an array
View
40 R/simplify-data-frame.r
@@ -0,0 +1,40 @@
+# List to data frame
+# Reduce/simplify a list of homogenous objects to a data frame
+#
+# @arguments list of input data
+# @arguments a data frame of labels, one row for each element of res
+# @keywords internal
+list_to_dataframe <- function(res, labels = NULL) {
+ if (length(res) == 0) return(data.frame())
+
+ atomic <- unlist(llply(res, is.atomic))
+ if (all(atomic)) {
+ ulength <- unique(unlist(llply(res, length)))
+ if (length(ulength) != 1) stop("Results are not equal lengths")
+
+ if (length(res) > 1) {
+ resdf <- as.data.frame(do.call("rbind", res))
+ } else {
+ resdf <- data.frame(res[[1]])
+ }
+ rows <- rep(1, length(res))
+ } else {
+ l_ply(res, function(x) if(!is.null(x) & !is.data.frame(x)) stop("Not a data.frame!"))
+
+ resdf <- do.call("rbind.fill", res)
+ rows <- unlist(llply(res, function(x) if(is.null(x)) 0 else nrow(x)))
+ }
+
+ # If no labels supplied, use list names
+ if (is.null(labels) && !is.null(names(res))) {
+ labels <- data.frame(.id = names(res))
+ }
+
+ if (!is.null(labels) && nrow(labels) == length(res)) {
+ cols <- setdiff(names(labels), names(resdf))
+ resdf <- cbind(labels[rep(1:nrow(labels), rows), cols, drop=FALSE], resdf)
+ }
+
+ unrowname(resdf)
+}
+
View
56 R/split-array.r
@@ -0,0 +1,56 @@
+
+# Split an array by .margins
+# Split a 2d or higher data structure into lower-d pieces based
+#
+# This is the workhorse of the \code{a*ply} functions. Given a >1 d
+# data structure (matrix, array, data.frame), it splits it into pieces
+# based on the subscripts that you supply. Each piece is a lower dimensional
+# slice.
+#
+# The margins are specified in the same way as \code{\link{apply}}, but
+# \code{splitter_a} just splits up the data, while \code{apply} also
+# applies a function and combines the pieces back together. This function
+# also includes enough information to recreate the split from attributes on
+# the list of pieces.
+#
+# @params >1d data structure (matrix, data.frame or array)
+# @params a vector giving the subscripts to split up \code{data} by. 1 splits up by rows, 2 by columns and c(1,2) by rows and columns
+# @value a list of lower-d slices, with attributes that record split details
+#X splitter_a(mtcars, 1)
+#X splitter_a(mtcars, 2)
+splitter_a <- function(data, .margins = 1) {
+ if (!all(.margins %in% seq_len(dims(data)))) stop("Invalid margin")
+
+ dimensions <- lapply(amv_dim(data), seq, from=1)
+ dimensions[-.margins] <- list(TRUE)
+ indices <- expand.grid(dimensions, KEEP.OUT.ATTRS = FALSE)
+ names(indices) <- paste("X", 1:ncol(indices), sep="")
+
+ # && !is.array(data)
+ subs <- if (is.list(data) && !is.data.frame(data)) "[[" else "["
+
+ browser()
+ pieces <- lapply(1:nrow(indices),
+ function(i) do.call(subs,
+ c(list(data), unname(indices[i, ,drop=TRUE]), drop=TRUE)
+ )
+ )
+ dim(pieces) <- dim(data)[.margins]
+
+ if (is.data.frame(data) & identical(.margins, 1)) {
+ split_labels <- data
+ } else {
+ dnames <- amv_dimnames(data)
+ split_labels <- expand.grid(dnames[.margins], KEEP.OUT.ATTRS = FALSE)
+ colnames <- names(dnames)[.margins]
+ if (!is.null(colnames)) names(split_labels) <- colnames
+ }
+
+
+ structure(
+ pieces,
+ class = c("split", "list"),
+ split_type = "array",
+ split_labels = split_labels
+ )
+}
View
66 R/split-data-frame.r
@@ -0,0 +1,66 @@
+# Split a data frame by variables
+# Split a data frame into pieces based on variable contained in that data frame
+#
+# This is the workhorse of the \code{d*ply} functions. Based on the variables
+# you supply, it breaks up a single data frame into a list of data frames,
+# each containing a single combination from the levels of the specified
+# variables.
+#
+# This is basically a thin wrapper around \code{\link{split}} which
+# evaluates the variables in the context of the data, and includes enough
+# information to reconstruct the labelling of the data frame after
+# other operations.
+#
+# @seealso \code{\link{.}} for quoting variables, \code{\link{split}}
+# @parameters data frame
+# @parameters a \link{quoted} list of variables, a formula, or character vector
+# @value a list of data.frames, with attributes that record split details
+#X splitter_d(mtcars, .(cyl))
+#X splitter_d(mtcars, .(vs, am))
+#X splitter_d(mtcars, .(am, vs))
+#X
+#X mtcars$cyl2 <- factor(mtcars$cyl, levels = c(2, 4, 6, 8, 10))
+#X splitter_d(mtcars, .(cyl2), drop = TRUE)
+#X splitter_d(mtcars, .(cyl2), drop = FALSE)
+#X
+#X mtcars$cyl3 <- ifelse(mtcars$vs == 1, NA, mtcars$cyl)
+#X splitter_d(mtcars, .(cyl3))
+#X splitter_d(mtcars, .(cyl3, vs))
+#X splitter_d(mtcars, .(cyl3, vs), drop = FALSE)
+splitter_d <- function(data, .variables = NULL, drop = TRUE) {
+ splits <- eval.quoted(.variables, data, parent.frame())
+ factors <- llply(splits, addNA, ifany = TRUE)
+ splitv <- addNA(interaction(factors, drop = drop, lex.order = TRUE),
+ ifany = TRUE)
+ split_labels <- split_labels(splits, drop = drop)
+
+ index <- tapply(1:nrow(data), splitv, list)
+ # Remove missing values. These when occur drop = FALSE and
+ # factor levels do not occur in the data
+ index <- lapply(index, Filter, f = Negate(is.na))
+ il <- indexed_df(environment(), index)
+
+ structure(
+ il,
+ class = c("indexed", "indexed_df", "split", "list"),
+ split_type = "data.frame",
+ split_labels = split_labels
+ )
+}
+
+split_labels <- function(splits, drop) {
+ factors <- llply(splits, addNA, ifany = TRUE)
+ splitv <- addNA(interaction(factors, drop = drop, lex.order = TRUE),
+ ifany = TRUE)
+
+ if (drop) {
+ # Need levels which occur in data
+ representative <- which(!duplicated(splitv))[order(unique(splitv))]
+ data.frame(lapply(splits, function(x) x[representative]))
+ } else {
+ # Need all combinations of levels
+ factor_levels <- lapply(factors, levels)
+ names(factor_levels) <- names(splits)
+ expand.grid(factor_levels)
+ }
+}
View
119 R/split.r
@@ -1,122 +1,3 @@
-# Split a data frame by variables
-# Split a data frame into pieces based on variable contained in that data frame
-#
-# This is the workhorse of the \code{d*ply} functions. Based on the variables
-# you supply, it breaks up a single data frame into a list of data frames,
-# each containing a single combination from the levels of the specified
-# variables.
-#
-# This is basically a thin wrapper around \code{\link{split}} which
-# evaluates the variables in the context of the data, and includes enough
-# information to reconstruct the labelling of the data frame after
-# other operations.
-#
-# @seealso \code{\link{.}} for quoting variables, \code{\link{split}}
-# @parameters data frame
-# @parameters a \link{quoted} list of variables, a formula, or character vector
-# @value a list of data.frames, with attributes that record split details
-#X splitter_d(mtcars, .(cyl))
-#X splitter_d(mtcars, .(vs, am))
-#X splitter_d(mtcars, .(am, vs))
-#X
-#X mtcars$cyl2 <- factor(mtcars$cyl, levels = c(2, 4, 6, 8, 10))
-#X splitter_d(mtcars, .(cyl2), drop = TRUE)
-#X splitter_d(mtcars, .(cyl2), drop = FALSE)
-#X
-#X mtcars$cyl3 <- ifelse(mtcars$vs == 1, NA, mtcars$cyl)
-#X splitter_d(mtcars, .(cyl3))
-#X splitter_d(mtcars, .(cyl3, vs))
-#X splitter_d(mtcars, .(cyl3, vs), drop = FALSE)
-splitter_d <- function(data, .variables = NULL, drop = TRUE) {
- splits <- eval.quoted(.variables, data, parent.frame())
- factors <- llply(splits, addNA, ifany = TRUE)
- splitv <- addNA(interaction(factors, drop = drop, lex.order = TRUE),
- ifany = TRUE)
- split_labels <- split_labels(splits, drop = drop)
-
- index <- tapply(1:nrow(data), splitv, list)
- # Remove missing values. These when occur drop = FALSE and
- # factor levels do not occur in the data
- index <- lapply(index, Filter, f = Negate(is.na))
- il <- indexed_list(environment(), index)
-
- structure(
- il,
- class = c("indexed_list", "split", "list"),
- split_type = "data.frame",
- split_labels = split_labels
- )
-}
-
-split_labels <- function(splits, drop) {
- factors <- llply(splits, addNA, ifany = TRUE)
- splitv <- addNA(interaction(factors, drop = drop, lex.order = TRUE),
- ifany = TRUE)
-
- if (drop) {
- # Need levels which occur in data
- representative <- which(!duplicated(splitv))[order(unique(splitv))]
- data.frame(lapply(splits, function(x) x[representative]))
- } else {
- # Need all combinations of levels
- factor_levels <- lapply(factors, levels)
- names(factor_levels) <- names(splits)
- expand.grid(factor_levels)
- }
-}
-
-# Split an array by .margins
-# Split a 2d or higher data structure into lower-d pieces based
-#
-# This is the workhorse of the \code{a*ply} functions. Given a >1 d
-# data structure (matrix, array, data.frame), it splits it into pieces
-# based on the subscripts that you supply. Each piece is a lower dimensional
-# slice.
-#
-# The margins are specified in the same way as \code{\link{apply}}, but
-# \code{splitter_a} just splits up the data, while \code{apply} also
-# applies a function and combines the pieces back together. This function
-# also includes enough information to recreate the split from attributes on
-# the list of pieces.
-#
-# @params >1d data structure (matrix, data.frame or array)
-# @params a vector giving the subscripts to split up \code{data} by. 1 splits up by rows, 2 by columns and c(1,2) by rows and columns
-# @value a list of lower-d slices, with attributes that record split details
-#X splitter_a(mtcars, 1)
-#X splitter_a(mtcars, 2)
-splitter_a <- function(data, .margins = 1) {
- if (!all(.margins %in% seq_len(dims(data)))) stop("Invalid margin")
-
- dimensions <- lapply(amv_dim(data), seq, from=1)
- dimensions[-.margins] <- list(TRUE)
- indices <- expand.grid(dimensions, KEEP.OUT.ATTRS = FALSE)
- names(indices) <- paste("X", 1:ncol(indices), sep="")
-
- subs <- if (is.list(data) && !is.array(data) && !is.data.frame(data)) "[[" else "["
-
- pieces <- lapply(1:nrow(indices),
- function(i) do.call(subs,
- c(list(data), unname(indices[i, ,drop=TRUE]), drop=TRUE)
- )
- )
- dim(pieces) <- dim(data)[.margins]
-
- if (is.data.frame(data) & identical(.margins, 1)) {
- split_labels <- data
- } else {
- dnames <- amv_dimnames(data)
- split_labels <- expand.grid(dnames[.margins], KEEP.OUT.ATTRS = FALSE)
- colnames <- names(dnames)[.margins]
- if (!is.null(colnames)) names(split_labels) <- colnames
- }
-
- structure(
- pieces,
- class = c("split", "list"),
- split_type = "array",
- split_labels = split_labels
- )
-}
# Subset splits
# Subset splits, ensuring that labels keep matching

0 comments on commit 9294897

Please sign in to comment.