Skip to content

Commit

Permalink
Improvements to splitter_d
Browse files Browse the repository at this point in the history
  * Add some more examples
  * Split labels into own function
  * Better handling of missing levels
  • Loading branch information
hadley committed Mar 11, 2009
1 parent df62838 commit 60f501b
Showing 1 changed file with 31 additions and 12 deletions.
43 changes: 31 additions & 12 deletions R/split.r
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -18,23 +18,27 @@
#X splitter_d(mtcars, .(cyl)) #X splitter_d(mtcars, .(cyl))
#X splitter_d(mtcars, .(vs, am)) #X splitter_d(mtcars, .(vs, am))
#X splitter_d(mtcars, .(am, vs)) #X splitter_d(mtcars, .(am, vs))
#X
#X mtcars$cyl2 <- factor(mtcars$cyl, levels = c(2, 4, 6, 8, 10))
#X splitter_d(mtcars, .(cyl2), drop = TRUE)
#X splitter_d(mtcars, .(cyl2), drop = FALSE)
#X
#X mtcars$cyl3 <- ifelse(mtcars$vs == 1, NA, mtcars$cyl)
#X splitter_d(mtcars, .(cyl3))
#X splitter_d(mtcars, .(cyl3, vs))
#X splitter_d(mtcars, .(cyl3, vs), drop = FALSE)
splitter_d <- function(data, .variables = NULL, drop = TRUE) { splitter_d <- function(data, .variables = NULL, drop = TRUE) {
splits <- eval.quoted(.variables, data, parent.frame()) splits <- eval.quoted(.variables, data, parent.frame())
factors <- llply(splits, addNA, ifany = TRUE) factors <- llply(splits, addNA, ifany = TRUE)
splitv <- addNA(interaction(factors, drop = drop), ifany = TRUE) splitv <- addNA(interaction(factors, drop = drop), ifany = TRUE)

split_labels <- split_labels(splits, drop = drop)
if (drop) {
# Need levels which occur in data
representative <- which(!duplicated(splitv))[order(unique(splitv))]
split_labels <- data.frame(lapply(splits, function(x) x[representative]))
} else {
# Need all combinations of levels
factor_levels <- lapply(factors, levels)
split_labels <- expand.grid(factor_levels)
}


index <- tapply(1:nrow(data), splitv, c) index <- tapply(1:nrow(data), splitv, list)
# Remove missing values. These when occur drop = FALSE and
# factor levels do not occur in the data
index <- lapply(index, Filter, f = Negate(is.na))
il <- indexed_list(environment(), index) il <- indexed_list(environment(), index)

structure( structure(
il, il,
class = c("indexed_list", "split", "list"), class = c("indexed_list", "split", "list"),
Expand All @@ -43,6 +47,22 @@ splitter_d <- function(data, .variables = NULL, drop = TRUE) {
) )
} }


split_labels <- function(splits, drop) {
factors <- llply(splits, addNA, ifany = TRUE)
splitv <- addNA(interaction(factors, drop = drop), ifany = TRUE)

if (drop) {
# Need levels which occur in data
representative <- which(!duplicated(splitv))[order(unique(splitv))]
data.frame(lapply(splits, function(x) x[representative]))
} else {
# Need all combinations of levels
factor_levels <- lapply(factors, levels)
names(factor_levels) <- names(splits)
expand.grid(factor_levels)
}
}

# Split an array by .margins # Split an array by .margins
# Split a 2d or higher data structure into lower-d pieces based # Split a 2d or higher data structure into lower-d pieces based
# #
Expand Down Expand Up @@ -96,7 +116,6 @@ splitter_a <- function(data, .margins = 1) {
) )
} }



# Subset splits # Subset splits
# Subset splits, ensuring that labels keep matching # Subset splits, ensuring that labels keep matching
# #
Expand Down

0 comments on commit 60f501b

Please sign in to comment.