Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Merge pull request #188 from krlmlr/fix/140-142-id-137-column-as-factor

Enhancements for *dply
  • Loading branch information...
commit 67a818d27d64568716294e73ae0fabe682c4a62d 2 parents ca41d10 + e310244
@hadley authored
View
6 NEWS
@@ -1,6 +1,12 @@
Version 1.8.0.99
------------------------------------------------------------------------------
+* New parameter `.id` to `ldply()` and `rdply()` that specifies the name of the index column. (Thanks to Kirill Müller, #107, #140, #142)
+
+* New parameter `.id` to `rdply()` that specifies the name of the index column. (Thanks to Kirill Müller, #142)
+
+* The .id column in ldply() is generated as a factor to preserve the sort order. (Thanks to Kirill Müller, #137)
+
* `rbind.fill` now silently drops NULL inputs (#138)
* `rbind.fill` avoids array copying which had produced quadratic time
View
15 R/ldply.r
@@ -1,18 +1,21 @@
#' Split list, apply function, and return results in a data frame.
-#'
-#' For each element of a list, apply function then combine results into a data
-#' frame.
-#'
+#'
+#' For each element of a list, apply function then combine results into
+#' a data frame.
+#'
#' @template ply
#' @template l-
#' @template -d
+#' @param .id name of the index column (used if \code{.data} is a
+#' named list), defaults to \code{".id"}. Pass \code{NULL} to avoid
+#' creation of the index column
#' @export
ldply <- function(.data, .fun = NULL, ..., .progress = "none", .inform = FALSE,
- .parallel = FALSE, .paropts = NULL) {
+ .parallel = FALSE, .paropts = NULL, .id = ".id") {
if (!inherits(.data, "split")) .data <- as.list(.data)
res <- llply(.data = .data, .fun = .fun, ...,
.progress = .progress, .inform = .inform,
.parallel = .parallel, .paropts = .paropts)
- list_to_dataframe(res, attr(.data, "split_labels"))
+ list_to_dataframe(res, attr(.data, "split_labels"), .id)
}
View
15 R/list-to-dataframe.r
@@ -1,14 +1,16 @@
#' List to data frame.
#'
-#' Reduce/simplify a list of homogenous objects to a data frame.
-#' All \code{NULL} entries are removed. Remaining entries must be all atomic
+#' Reduce/simplify a list of homogenous objects to a data frame. All
+#' \code{NULL} entries are removed. Remaining entries must be all atomic
#' or all data frames.
#'
#' @family list simplification functions
#' @param res list of input data
#' @param labels a data frame of labels, one row for each element of res
+#' @param idname the name of the index column, \code{NULL} for no index
+#' column
#' @keywords internal
-list_to_dataframe <- function(res, labels = NULL) {
+list_to_dataframe <- function(res, labels = NULL, idname = NULL) {
null <- vapply(res, is.null, logical(1))
res <- res[!null]
if (length(res) == 0) return(data.frame())
@@ -17,8 +19,11 @@ list_to_dataframe <- function(res, labels = NULL) {
stopifnot(nrow(labels) == length(null))
labels <- labels[!null, , drop = FALSE]
}
- if (is.null(labels) && !is.null(names(res))) {
- labels <- data.frame(.id = names(res), stringsAsFactors = FALSE)
+ names.res <- names(res)
+ if (!is.null(idname) && is.null(labels) && !is.null(names.res)) {
+ stopifnot(length(idname) == 1)
+ labels <- data.frame(.id = factor(names.res, levels = unique(names.res)))
+ names(labels) <- idname
}
# Figure out how to turn elements into a data frame
View
8 R/rdply.r
@@ -13,6 +13,8 @@
#' @param .n number of times to evaluate the expression
#' @param .expr expression to evaluate
#' @param .progress name of the progress bar to use, see \code{\link{create_progress_bar}}
+#' @param .id name of the index column, defaults to \code{".n"}. Pass
+#' \code{NULL} to avoid creation of the index column
#' @return a data frame
#' @export
#' @references Hadley Wickham (2011). The Split-Apply-Combine Strategy for
@@ -22,7 +24,7 @@
#' rdply(20, mean(runif(100)))
#' rdply(20, each(mean, var)(runif(100)))
#' rdply(20, data.frame(x = runif(2)))
-rdply <- function(.n, .expr, .progress = "none") {
+rdply <- function(.n, .expr, .progress = "none", .id = ".n") {
if (is.function(.expr)) {
f <- .expr
} else {
@@ -30,6 +32,6 @@ rdply <- function(.n, .expr, .progress = "none") {
}
res <- rlply(.n = .n, .expr = f, .progress = .progress)
- labels <- data.frame(.n = seq_len(.n))
- list_to_dataframe(res, labels)
+ names(res) <- seq_len(.n)
+ list_to_dataframe(res, idname = .id)
}
View
13 inst/tests/test-data-frame.r
@@ -60,3 +60,16 @@ test_that("label variables always preserved", {
})
+
+# Test for #140
+test_that(".id column can be renamed", {
+ l <- llply(1:4, function(i) rep(i, i))
+ names(l) <- 1:4
+ f <- function(l) data.frame(sum=sum(unlist(l)))
+
+ out1 <- ldply(l, f)
+ out2 <- ldply(l, f, .id='x')
+
+ expect_equal(names(out1), c('.id', 'sum'))
+ expect_equal(names(out2), c('x', 'sum'))
+})
View
11 inst/tests/test-list.r
@@ -19,3 +19,14 @@ test_that("list names are preserved", {
expect_that(names(llply(a)), equals(letters[1:10]))
})
+
+# Test for #142
+test_that(".n column can be renamed", {
+ f <- function() data.frame(r=runif(1))
+
+ out1 <- rdply(4, f)
+ out2 <- rdply(4, f, .id='x')
+
+ expect_equal(names(out1), c('.n', 'r'))
+ expect_equal(names(out2), c('x', 'r'))
+})
View
6 inst/tests/test-replicate.r
@@ -7,3 +7,9 @@ test_that("length of results are correct", {
expect_equal(length(a), 4)
expect_equal(length(b), 4)
})
+
+test_that("name of id column is set", {
+ df <- rdply(4, function() c(a=1), .id='index')
+
+ expect_equal(names(df), c('index', 'a'))
+})
View
22 inst/tests/test-simplify-df.r
@@ -103,15 +103,15 @@ test_that("names preserved and filled for atomic vectors", {
})
test_that("names captured from list", {
- li <- list(a = 1:5, b = 5:10, c = 5:15)
+ li <- list(c = 5:15, b = 5:10, a = 1:5)
df <- ldply(li, function(x) mean(x))
- expect_that(df$.id, equals(c("a", "b", "c")))
+ expect_that(df$.id, equals(factor(names(li), levels=names(li))))
df <- ldply(li, function(x) {
if (any(x >= 10)) mean(x)
})
- expect_that(df$.id, equals(c("b", "c")))
+ expect_that(df$.id, equals(factor(names(li)[-3], levels=names(li)[-3])))
})
test_that("correct number of rows outputted", {
@@ -121,8 +121,7 @@ test_that("correct number of rows outputted", {
expect_that(nrow(res), equals(3))
})
-
-test_that("matrices converted to data frames", {
+test_that("matrices converted to data frames, without id column", {
mat <- matrix(1:20, ncol = 4)
colnames(mat) <- letters[1:4]
@@ -130,5 +129,16 @@ test_that("matrices converted to data frames", {
df <- list_to_dataframe(li)
expect_equal(nrow(df), 2 * nrow(mat))
- expect_equal(names(df), c(".id", "a", "b", "c", "d"))
+ expect_equal(names(df), c("a", "b", "c", "d"))
+})
+
+test_that("matrices converted to data frames, with id column", {
+ mat <- matrix(1:20, ncol = 4)
+ colnames(mat) <- letters[1:4]
+
+ li <- list(a = mat, b = mat)
+ df <- list_to_dataframe(li, idname="my-id")
+
+ expect_equal(nrow(df), 2 * nrow(mat))
+ expect_equal(names(df), c("my-id", "a", "b", "c", "d"))
})
View
6 man/ldply.Rd
@@ -3,7 +3,7 @@
\title{Split list, apply function, and return results in a data frame.}
\usage{
ldply(.data, .fun = NULL, ..., .progress = "none", .inform = FALSE,
- .parallel = FALSE, .paropts = NULL)
+ .parallel = FALSE, .paropts = NULL, .id = ".id")
}
\arguments{
\item{.fun}{function to apply to each piece}
@@ -29,6 +29,10 @@ ldply(.data, .fun = NULL, ..., .progress = "none", .inform = FALSE,
slows processing speed, but is very useful for debugging}
\item{.data}{list to be processed}
+
+ \item{.id}{name of the index column (used if \code{.data}
+ is a named list), defaults to \code{".id"}. Pass
+ \code{NULL} to avoid creation of the index column}
}
\value{
A data frame, as described in the output section.
View
5 man/list_to_dataframe.Rd
@@ -2,13 +2,16 @@
\alias{list_to_dataframe}
\title{List to data frame.}
\usage{
-list_to_dataframe(res, labels = NULL)
+list_to_dataframe(res, labels = NULL, idname = NULL)
}
\arguments{
\item{res}{list of input data}
\item{labels}{a data frame of labels, one row for each
element of res}
+
+ \item{idname}{the name of the index column, \code{NULL}
+ for no index column}
}
\description{
Reduce/simplify a list of homogenous objects to a data
View
6 man/rdply.Rd
@@ -2,7 +2,7 @@
\alias{rdply}
\title{Replicate expression and return results in a data frame.}
\usage{
-rdply(.n, .expr, .progress = "none")
+rdply(.n, .expr, .progress = "none", .id = ".n")
}
\arguments{
\item{.n}{number of times to evaluate the expression}
@@ -11,6 +11,10 @@ rdply(.n, .expr, .progress = "none")
\item{.progress}{name of the progress bar to use, see
\code{\link{create_progress_bar}}}
+
+ \item{.id}{name of the index column, defaults to
+ \code{".n"}. Pass \code{NULL} to avoid creation of the
+ index column}
}
\value{
a data frame
Please sign in to comment.
Something went wrong with that request. Please try again.