Permalink
Browse files

Don't use a factor for ID column if .id parameter is not set in ldply…

…() and rdply()

This is to ensure backward compatibility with the many packages that depend on plyr, e.g., ggplot2
  • Loading branch information...
1 parent 1182381 commit cd1633e0040bf1eecc854a101ce8798cc15e6acf @krlmlr krlmlr committed Jan 10, 2014
Showing with 78 additions and 39 deletions.
  1. +3 −2 NEWS.md
  2. +15 −9 R/ldply.r
  3. +8 −6 R/list-to-dataframe.r
  4. +20 −12 R/rdply.r
  5. +6 −1 inst/tests/test-rply.r
  6. +17 −4 inst/tests/test-simplify-df.r
  7. +4 −2 man/ldply.Rd
  8. +2 −1 man/list_to_dataframe.Rd
  9. +3 −2 man/rdply.Rd
View
@@ -6,8 +6,9 @@
* New parameter `.id` to `rdply()` that specifies the name of the index column.
(Thanks to Kirill Müller, #142)
-* The .id column in ldply() is generated as a factor to preserve the sort order.
- (Thanks to Kirill Müller, #137)
+* The .id column in `ldply()` and `rdply()` is generated as a factor to preserve
+ the sort order, but only if the new `.id` parameter is set. (Thanks to Kirill
+ Müller, #137)
* `rbind.fill` now silently drops NULL inputs (#138)
View
@@ -1,21 +1,27 @@
#' Split list, apply function, and return results in a data frame.
-#'
-#' For each element of a list, apply function then combine results into
-#' a data frame.
-#'
+#'
+#' For each element of a list, apply function then combine results into a data
+#' frame.
+#'
#' @template ply
#' @template l-
#' @template -d
-#' @param .id name of the index column (used if \code{.data} is a
-#' named list), defaults to \code{".id"}. Pass \code{NULL} to avoid
-#' creation of the index column
+#' @param .id name of the index column (used if \code{.data} is a named list).
+#' Pass \code{NULL} to avoid creation of the index column. For compatibility,
+#' omit this argument or pass \code{NA} to avoid converting the index column
+#' to a factor; in this case, \code{".id"} is used as colum name.
#' @export
ldply <- function(.data, .fun = NULL, ..., .progress = "none", .inform = FALSE,
- .parallel = FALSE, .paropts = NULL, .id = ".id") {
+ .parallel = FALSE, .paropts = NULL, .id = NA) {
if (!inherits(.data, "split")) .data <- as.list(.data)
res <- llply(.data = .data, .fun = .fun, ...,
.progress = .progress, .inform = .inform,
.parallel = .parallel, .paropts = .paropts)
- list_to_dataframe(res, attr(.data, "split_labels"), .id)
+ if (is.na(.id)) {
+ .id <- ".id"
+ id_as_factor <- FALSE
+ } else
+ id_as_factor <- TRUE
+ list_to_dataframe(res, attr(.data, "split_labels"), .id, id_as_factor)
}
View
@@ -10,7 +10,7 @@
#' @param idname the name of the index column, \code{NULL} for no index
#' column
#' @keywords internal
-list_to_dataframe <- function(res, labels = NULL, idname = NULL) {
+list_to_dataframe <- function(res, labels = NULL, id_name = NULL, id_as_factor = FALSE) {
null <- vapply(res, is.null, logical(1))
res <- res[!null]
if (length(res) == 0) return(data.frame())
@@ -19,11 +19,13 @@ list_to_dataframe <- function(res, labels = NULL, idname = NULL) {
stopifnot(nrow(labels) == length(null))
labels <- labels[!null, , drop = FALSE]
}
- names.res <- names(res)
- if (!is.null(idname) && is.null(labels) && !is.null(names.res)) {
- stopifnot(length(idname) == 1)
- labels <- data.frame(.id = factor(names.res, levels = unique(names.res)))
- names(labels) <- idname
+ names_res <- names(res)
+ if (!is.null(id_name) && is.null(labels) && !is.null(names_res)) {
+ stopifnot(length(id_name) == 1)
+ if (id_as_factor)
+ names_res <- factor(names_res, levels = unique(names_res))
+ labels <- data.frame(.id = names_res, stringsAsFactors = FALSE)
+ names(labels) <- id_name
}
# Figure out how to turn elements into a data frame
View
@@ -2,31 +2,39 @@
#'
#' Evalulate expression n times then combine results into a data frame
#'
-#' This function runs an expression multiple times, and combines the
-#' result into a data frame. If there are no results, then this function
-#' returns a data frame with zero rows and columns (\code{data.frame()}).
-#' This function is equivalent to \code{\link{replicate}}, but will always
-#' return results as a data frame.
+#' This function runs an expression multiple times, and combines the result into
+#' a data frame. If there are no results, then this function returns a data
+#' frame with zero rows and columns (\code{data.frame()}). This function is
+#' equivalent to \code{\link{replicate}}, but will always return results as a
+#' data frame.
#'
#'
#' @keywords manip
#' @param .n number of times to evaluate the expression
#' @param .expr expression to evaluate
-#' @param .progress name of the progress bar to use, see \code{\link{create_progress_bar}}
-#' @param .id name of the index column, defaults to \code{".n"}. Pass
-#' \code{NULL} to avoid creation of the index column
+#' @param .progress name of the progress bar to use, see
+#' \code{\link{create_progress_bar}}
+#' @param .id name of the index column. Pass \code{NULL} to avoid creation of
+#' the index column. For compatibility, omit this argument or pass \code{NA}
+#' to avoid converting the index column to a factor; in this case, \code{".n"}
+#' is used as colum name..
#' @return a data frame
#' @export
-#' @references Hadley Wickham (2011). The Split-Apply-Combine Strategy for
-#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
+#' @references Hadley Wickham (2011). The Split-Apply-Combine Strategy for Data
+#' Analysis. Journal of Statistical Software, 40(1), 1-29.
#' \url{http://www.jstatsoft.org/v40/i01/}.
#' @examples
#' rdply(20, mean(runif(100)))
#' rdply(20, each(mean, var)(runif(100)))
#' rdply(20, data.frame(x = runif(2)))
-rdply <- function(.n, .expr, .progress = "none", .id = ".n") {
+rdply <- function(.n, .expr, .progress = "none", .id = NA) {
res <- .rlply_worker(.n, .progress,
eval.parent(substitute(function() .expr)))
names(res) <- seq_len(.n)
- list_to_dataframe(res, idname = .id)
+ if (is.na(.id)) {
+ .id <- ".n"
+ id_as_factor <- FALSE
+ } else
+ id_as_factor <- TRUE
+ list_to_dataframe(res, id_name = .id, id_as_factor = id_as_factor)
}
View
@@ -113,7 +113,7 @@ test_that("Side effects for rdply", {
if (n == 0) {
exp_res <- data.frame()
} else {
- exp_res <- data.frame(.n = factor(1L:n, levels = 1L:n), i = 1L:n)
+ exp_res <- data.frame(.n = as.character(1L:n), i = 1L:n, stringsAsFactors = FALSE)
}
i <- 0
@@ -139,3 +139,8 @@ test_that("Invalid arguments for r_ply", {
expect_error(r_ply(c(1,2), identity))
expect_error(r_ply(list(5), identity))
})
+
+test_that(".id columnd for rdply", {
+ expect_equal(rdply(5, 10)$.n, as.character(1:5))
+ expect_equal(rdply(5, 10, .id=".n")$.n, factor(1:5))
+})
@@ -106,12 +106,12 @@ test_that("names captured from list", {
li <- list(c = 5:15, b = 5:10, a = 1:5)
df <- ldply(li, function(x) mean(x))
- expect_that(df$.id, equals(factor(names(li), levels=names(li))))
+ expect_that(df$.id, equals(names(li)))
df <- ldply(li, function(x) {
if (any(x >= 10)) mean(x)
})
- expect_that(df$.id, equals(factor(names(li)[-3], levels=names(li)[-3])))
+ expect_that(df$.id, equals(names(li)[-3]))
})
test_that("correct number of rows outputted", {
@@ -137,8 +137,21 @@ test_that("matrices converted to data frames, with id column", {
colnames(mat) <- letters[1:4]
li <- list(a = mat, b = mat)
- df <- list_to_dataframe(li, idname="my-id")
+ df <- plyr:::list_to_dataframe(li, id_name = "my_id")
expect_equal(nrow(df), 2 * nrow(mat))
- expect_equal(names(df), c("my-id", "a", "b", "c", "d"))
+ expect_equal(names(df), c("my_id", "a", "b", "c", "d"))
+ expect_equal(df$my_id, rep(c("a", "b"), c(5, 5)))
+})
+
+test_that("matrices converted to data frames, with id column as factor", {
+ mat <- matrix(1:20, ncol = 4)
+ colnames(mat) <- letters[1:4]
+
+ li <- list(a = mat, b = mat)
+ df <- list_to_dataframe(li, id_name = "my_id", id_as_factor = TRUE)
+
+ expect_equal(nrow(df), 2 * nrow(mat))
+ expect_equal(names(df), c("my_id", "a", "b", "c", "d"))
+ expect_equal(levels(df$my_id), c("a", "b"))
})
View
@@ -3,7 +3,7 @@
\title{Split list, apply function, and return results in a data frame.}
\usage{
ldply(.data, .fun = NULL, ..., .progress = "none", .inform = FALSE,
- .parallel = FALSE, .paropts = NULL, .id = ".id")
+ .parallel = FALSE, .paropts = NULL, .id)
}
\arguments{
\item{.fun}{function to apply to each piece}
@@ -32,7 +32,9 @@ ldply(.data, .fun = NULL, ..., .progress = "none", .inform = FALSE,
\item{.id}{name of the index column (used if \code{.data}
is a named list), defaults to \code{".id"}. Pass
- \code{NULL} to avoid creation of the index column}
+ \code{NULL} to avoid creation of the index column. For
+ compatibility, omit or pass \code{NA} to avoid converting
+ the index column to a factor.}
}
\value{
A data frame, as described in the output section.
View
@@ -2,7 +2,8 @@
\alias{list_to_dataframe}
\title{List to data frame.}
\usage{
-list_to_dataframe(res, labels = NULL, idname = NULL)
+list_to_dataframe(res, labels = NULL, id_name = NULL,
+ id_as_factor = FALSE)
}
\arguments{
\item{res}{list of input data}
View
@@ -2,7 +2,7 @@
\alias{rdply}
\title{Replicate expression and return results in a data frame.}
\usage{
-rdply(.n, .expr, .progress = "none", .id = ".n")
+rdply(.n, .expr, .progress = "none", .id)
}
\arguments{
\item{.n}{number of times to evaluate the expression}
@@ -14,7 +14,8 @@ rdply(.n, .expr, .progress = "none", .id = ".n")
\item{.id}{name of the index column, defaults to
\code{".n"}. Pass \code{NULL} to avoid creation of the
- index column}
+ index column. For compatibility, omit or pass \code{NA}
+ to avoid converting the index column to a factor.}
}
\value{
a data frame

0 comments on commit cd1633e

Please sign in to comment.