Skip to content

Commit

Permalink
New strip_splits function
Browse files Browse the repository at this point in the history
  • Loading branch information
hadley committed Mar 2, 2011
1 parent d741061 commit 1607d23
Show file tree
Hide file tree
Showing 9 changed files with 58 additions and 11 deletions.
8 changes: 4 additions & 4 deletions DESCRIPTION
Expand Up @@ -25,10 +25,10 @@ Collate: 'dimensions.r' 'helper-arrange.r'
'helper-each.r' 'helper-match-df.r' 'helper-mutate.r'
'helper-quick-df.r' 'helper-rename.r'
'helper-round-any.r' 'helper-splat.r'
'helper-summarise.r' 'helper-try.r'
'helper-vaggregate.r' 'id.r' 'immutable.r'
'indexed-array.r' 'indexed-data-frame.r' 'indexed.r'
'join.r' 'loop-apply.r' 'ply-array.r'
'helper-strip-splits.r' 'helper-summarise.r'
'helper-try.r' 'helper-vaggregate.r' 'id.r'
'immutable.r' 'indexed-array.r' 'indexed-data-frame.r'
'indexed.r' 'join.r' 'loop-apply.r' 'ply-array.r'
'ply-data-frame.r' 'ply-iterator.r' 'ply-list.r'
'ply-mapply.r' 'ply-null.r' 'ply-replicate.r'
'progress.r' 'quote.r' 'rbind-matrix.r' 'rbind.r'
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Expand Up @@ -12,6 +12,7 @@ export(quickdf)
export(rename)
export(round_any)
export(splat)
export(strip_splits)
export(summarise, summarize)
export(failwith)
export(try_default)
Expand Down
3 changes: 3 additions & 0 deletions NEWS
@@ -1,6 +1,9 @@
Version 1.5 (2011-XX-XX)
------------------------------------------------------------------------------

* new `strip_splits` function removes splitting variables from the data frames
returned by `ddply`.

* `join` now implements joins in a more SQL like way, returning all possible
matches, not just the first one. It is still a (little) faster than merge.
The previous behaviour is accessible with `match = "first"`.
Expand Down
15 changes: 15 additions & 0 deletions R/helper-strip-splits.r
@@ -0,0 +1,15 @@
#' Remove splitting variables from a data frame.
#'
#' This is useful when you want to perform some operation to every column
#' in the data frame, except the variables that you have used to split it.
#' These variables will be automatically added back on to the result when
#' combining all results together.
#'
#' @param df data frame produced by \code{d*ply}.
#' @export
#' @examples
#' dlply(mtcars, c("vs", "am"))
#' dlply(mtcars, c("vs", "am"), strip_splits)
strip_splits <- function(df) {
x[setdiff(names(df), attr(df, "vars"))]
}
7 changes: 4 additions & 3 deletions R/indexed-data-frame.r
Expand Up @@ -3,19 +3,20 @@
#'
#' @param env environment containing data frame
#' @param index list of indices
#' @param vars a character vector giving the variables used for subsetting
#' @keywords internal
#' @aliases indexed_df length.indexed names.indexed as.list.indexed
#' [[.indexed_df [.indexed print.indexed
indexed_df <- function(data, index) {
indexed_df <- function(data, index, vars) {

structure(
list(data = data, index = index),
list(data = data, index = index, vars = vars),
class = c("indexed", "indexed_df")
)
}

"[[.indexed_df" <- function(x, i) {
x$data[x$index[[i]], , drop = FALSE]
structure(x$data[x$index[[i]], , drop = FALSE], vars = x$vars)
# x$env$data[x$index[[i]], , drop = FALSE]
# slice(x, attr(x, "index")[[i]])
# subset_rows(x$env$data, x$index[[i]])
Expand Down
9 changes: 7 additions & 2 deletions R/split-data-frame.r
Expand Up @@ -13,7 +13,7 @@
#'
#' @seealso \code{\link{.}} for quoting variables, \code{\link{split}}
#' @param data data frame
#' @param .variables a \link{quoted} list of variables, a formula, or character vector. \code{NULL} will not split the data
#' @param .variables a \link{quoted} list of variables
#' @param drop drop unnused factor levels?
#' @return a list of data.frames, with attributes that record split details
#' @keywords internal
Expand All @@ -31,21 +31,26 @@
#' plyr:::splitter_d(mtcars, .(cyl3, vs))
#' plyr:::splitter_d(mtcars, .(cyl3, vs), drop = FALSE)
splitter_d <- function(data, .variables = NULL, drop = TRUE) {
stopifnot(is.quoted(.variables))


if (length(.variables) == 0) {
splitv <- rep(1, nrow(data))
split_labels <- NULL
attr(splitv, "n") <- max(splitv)
vars <- character(0)
} else {
splits <- eval.quoted(.variables, data)

splitv <- id(splits, drop = drop)
split_labels <- split_labels(splits, drop = drop, id = splitv)
vars <- unlist(lapply(.variables, all.vars))
}

index <- split_indices(seq_along(splitv), as.integer(splitv),
attr(splitv, "n"))

il <- indexed_df(data, index)
il <- indexed_df(data, index, vars)

structure(
il,
Expand Down
3 changes: 2 additions & 1 deletion man/indexed_df.Rd
@@ -1,6 +1,6 @@
\name{indexed_df}
\title{An indexed data frame.}
\usage{indexed_df(data, index)}
\usage{indexed_df(data, index, vars)}

\description{
An indexed data frame. Create a indexed list, a space
Expand All @@ -17,4 +17,5 @@
\arguments{
\item{env}{environment containing data frame}
\item{index}{list of indices}
\item{vars}{a character vector giving the variables used for subsetting}
}
2 changes: 1 addition & 1 deletion man/splitter_d.Rd
Expand Up @@ -26,7 +26,7 @@
\keyword{internal}
\arguments{
\item{data}{data frame}
\item{.variables}{a \link{quoted} list of variables, a formula, or character vector. \code{NULL} will not split the data}
\item{.variables}{a \link{quoted} list of variables}
\item{drop}{drop unnused factor levels?}
}
\examples{plyr:::splitter_d(mtcars, .(cyl))
Expand Down
21 changes: 21 additions & 0 deletions man/strip_splits.Rd
@@ -0,0 +1,21 @@
\name{strip_splits}
\alias{strip_splits}
\title{Remove splitting variables from a data frame.}
\usage{strip_splits(df)}

\description{
Remove splitting variables from a data frame.
}

\details{
This is useful when you want to perform some operation to
every column in the data frame, except the variables that
you have used to split it. These variables will be
automatically added back on to the result when combining
all results together.
}
\arguments{
\item{df}{data frame produced by \code{d*ply}.}
}
\examples{dlply(mtcars, c("vs", "am"))
dlply(mtcars, c("vs", "am"), strip_splits)}

0 comments on commit 1607d23

Please sign in to comment.