Skip to content
Browse files

Improvements to splitter_d

  * Add some more examples
  * Split labels into own function
  * Better handling of missing levels
  • Loading branch information...
1 parent df62838 commit 60f501b6fedd4c75d5f4959b74df1f866b42a22a @hadley committed Mar 11, 2009
Showing with 31 additions and 12 deletions.
  1. +31 −12 R/split.r
View
43 R/split.r
@@ -18,23 +18,27 @@
#X splitter_d(mtcars, .(cyl))
#X splitter_d(mtcars, .(vs, am))
#X splitter_d(mtcars, .(am, vs))
+#X
+#X mtcars$cyl2 <- factor(mtcars$cyl, levels = c(2, 4, 6, 8, 10))
+#X splitter_d(mtcars, .(cyl2), drop = TRUE)
+#X splitter_d(mtcars, .(cyl2), drop = FALSE)
+#X
+#X mtcars$cyl3 <- ifelse(mtcars$vs == 1, NA, mtcars$cyl)
+#X splitter_d(mtcars, .(cyl3))
+#X splitter_d(mtcars, .(cyl3, vs))
+#X splitter_d(mtcars, .(cyl3, vs), drop = FALSE)
splitter_d <- function(data, .variables = NULL, drop = TRUE) {
splits <- eval.quoted(.variables, data, parent.frame())
factors <- llply(splits, addNA, ifany = TRUE)
splitv <- addNA(interaction(factors, drop = drop), ifany = TRUE)
-
- if (drop) {
- # Need levels which occur in data
- representative <- which(!duplicated(splitv))[order(unique(splitv))]
- split_labels <- data.frame(lapply(splits, function(x) x[representative]))
- } else {
- # Need all combinations of levels
- factor_levels <- lapply(factors, levels)
- split_labels <- expand.grid(factor_levels)
- }
+ split_labels <- split_labels(splits, drop = drop)
- index <- tapply(1:nrow(data), splitv, c)
+ index <- tapply(1:nrow(data), splitv, list)
+ # Remove missing values. These when occur drop = FALSE and
+ # factor levels do not occur in the data
+ index <- lapply(index, Filter, f = Negate(is.na))
il <- indexed_list(environment(), index)
+
structure(
il,
class = c("indexed_list", "split", "list"),
@@ -43,6 +47,22 @@ splitter_d <- function(data, .variables = NULL, drop = TRUE) {
)
}
+split_labels <- function(splits, drop) {
+ factors <- llply(splits, addNA, ifany = TRUE)
+ splitv <- addNA(interaction(factors, drop = drop), ifany = TRUE)
+
+ if (drop) {
+ # Need levels which occur in data
+ representative <- which(!duplicated(splitv))[order(unique(splitv))]
+ data.frame(lapply(splits, function(x) x[representative]))
+ } else {
+ # Need all combinations of levels
+ factor_levels <- lapply(factors, levels)
+ names(factor_levels) <- names(splits)
+ expand.grid(factor_levels)
+ }
+}
+
# Split an array by .margins
# Split a 2d or higher data structure into lower-d pieces based
#
@@ -96,7 +116,6 @@ splitter_a <- function(data, .margins = 1) {
)
}
-
# Subset splits
# Subset splits, ensuring that labels keep matching
#

0 comments on commit 60f501b

Please sign in to comment.
Something went wrong with that request. Please try again.