Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Trim trailing spaces

  • Loading branch information...
commit ea17d3b0b43eefde0666f1f9fe750d6592833820 1 parent 36e4916
@hadley authored
Showing with 543 additions and 543 deletions.
  1. +10 −10 R/data.r
  2. +11 −11 R/dimensions.r
  3. +2 −2 R/helper-arrange.r
  4. +7 −7 R/helper-col-wise.r
  5. +8 −8 R/helper-count.r
  6. +2 −2 R/helper-data-frame.r
  7. +3 −3 R/helper-defaults.r
  8. +8 −8 R/helper-each.r
  9. +3 −3 R/helper-match-df.r
  10. +2 −2 R/helper-mutate.r
  11. +1 −1  R/helper-rename.r
  12. +2 −2 R/helper-splat.r
  13. +5 −5 R/helper-summarise.r
  14. +4 −4 R/helper-take.r
  15. +6 −6 R/helper-try.r
  16. +6 −6 R/helper-vaggregate.r
  17. +3 −3 R/id.r
  18. +10 −10 R/immutable.r
  19. +8 −8 R/indexed-array.r
  20. +3 −3 R/indexed-data-frame.r
  21. +1 −1  R/indexed.r
  22. +24 −24 R/join.r
  23. +1 −1  R/loop-apply.r
  24. +17 −17 R/ply-array.r
  25. +18 −18 R/ply-data-frame.r
  26. +10 −10 R/ply-iterator.r
  27. +16 −16 R/ply-list.r
  28. +29 −29 R/ply-mapply.r
  29. +19 −19 R/ply-null.r
  30. +28 −28 R/ply-replicate.r
  31. +20 −20 R/progress.r
  32. +22 −22 R/quote.r
  33. +17 −17 R/rbind-matrix.r
  34. +23 −23 R/rbind.r
  35. +13 −13 R/simplify-array.r
  36. +7 −7 R/simplify-data-frame.r
  37. +4 −4 R/simplify-vector.r
  38. +15 −15 R/split-array.r
  39. +11 −11 R/split-data-frame.r
  40. +1 −1  R/split-indices.r
  41. +3 −3 R/split.r
  42. +7 −7 R/utils.r
  43. +1 −1  benchmark/bench-llply.r
  44. +1 −1  benchmark/data.r
  45. +4 −4 benchmark/vis.r
  46. +18 −18 inst/tests/test-array.r
  47. +11 −11 inst/tests/test-count.r
  48. +10 −10 inst/tests/test-data-frame.r
  49. +3 −3 inst/tests/test-empty.r
  50. +13 −13 inst/tests/test-join.r
  51. +3 −3 inst/tests/test-list.r
  52. +3 −3 inst/tests/test-mapply.r
  53. +3 −3 inst/tests/test-mutate.r
  54. +1 −1  inst/tests/test-ninteraction.r
  55. +1 −1  inst/tests/test-progress.r
  56. +7 −7 inst/tests/test-quote.r
  57. +8 −8 inst/tests/test-rbind.matrix.r
  58. +18 −18 inst/tests/test-rbind.r
  59. +4 −4 inst/tests/test-rename.r
  60. +2 −2 inst/tests/test-replicate.r
  61. +1 −1  inst/tests/test-simplify-df.r
  62. +1 −1  inst/tests/test-split-data-frame.r
  63. +3 −3 inst/tests/test-split-labels.r
  64. +2 −2 inst/tests/test-summarise.r
  65. +1 −1  man-roxygen/-a.r
  66. +2 −2 man-roxygen/-d.r
  67. +1 −1  man-roxygen/-l.r
  68. +2 −2 man-roxygen/a-.r
  69. +3 −3 man-roxygen/d-.r
  70. +1 −1  man-roxygen/l-.r
  71. +3 −3 man-roxygen/ply.r
  72. +1 −1  tests/dependencies.R
  73. +1 −1  tests/test-all.R
View
20 R/data.r
@@ -17,27 +17,27 @@
#' @examples
#' value <- ozone[1, 1, ]
#' time <- 1:72
-#' month.abbr <- c("Jan", "Feb", "Mar", "Apr", "May",
+#' month.abbr <- c("Jan", "Feb", "Mar", "Apr", "May",
#' "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
#' month <- factor(rep(month.abbr, length = 72), levels = month.abbr)
#' year <- rep(1:6, each = 12)
#' deseasf <- function(value) lm(value ~ month - 1)
-#'
+#'
#' models <- alply(ozone, 1:2, deseasf)
#' coefs <- laply(models, coef)
#' dimnames(coefs)[[3]] <- month.abbr
#' names(dimnames(coefs))[3] <- "month"
-#'
+#'
#' deseas <- laply(models, resid)
#' dimnames(deseas)[[3]] <- 1:72
#' names(dimnames(deseas))[3] <- "time"
-#'
+#'
#' dim(coefs)
#' dim(deseas)
NULL
#' Yearly batting records for all major league baseball players
-#'
+#'
#' This data frame contains batting statistics for a subset of players
#' collected from \url{http://www.baseball-databank.org/}. There are a total
#' of 21,699 records, covering 1,228 players from 1871 to 2007. Only players
@@ -57,7 +57,7 @@ NULL
#' \item h, hits, times reached base because of a batted, fair ball without
#' error by the defense
#' \item X2b, hits on which the batter reached second base safely
-#' \item X3b, hits on which the batter reached third base safely
+#' \item X3b, hits on which the batter reached third base safely
#' \item hr, number of home runs
#' \item rbi, runs batted in
#' \item sb, stolen bases
@@ -79,17 +79,17 @@ NULL
#' @examples
#' baberuth <- subset(baseball, id == "ruthba01")
#' baberuth$cyear <- baberuth$year - min(baberuth$year) + 1
-#'
+#'
#' calculate_cyear <- function(df) {
-#' mutate(df,
+#' mutate(df,
#' cyear = year - min(year),
#' cpercent = cyear / (max(year) - min(year))
#' )
#' }
-#'
+#'
#' baseball <- ddply(baseball, .(id), calculate_cyear)
#' baseball <- subset(baseball, ab >= 25)
-#'
+#'
#' model <- function(df) {
#' lm(rbi / ab ~ cyear, data=df)
#' }
View
22 R/dimensions.r
@@ -1,7 +1,7 @@
#' Number of dimensions.
#'
#' Number of dimensions of an array or vector
-#'
+#'
#' @param x array
#' @keywords internal
dims <- function(x) length(amv_dim(x))
@@ -9,30 +9,30 @@ dims <- function(x) length(amv_dim(x))
#' Dimensions.
#'
#' Consistent dimensions for vectors, matrices and arrays.
-#'
+#'
#' @param x array, matrix or vector
-#' @keywords internal
+#' @keywords internal
amv_dim <- function(x) if (is.vector(x)) length(x) else dim(x)
#' Dimension names.
#'
#' Consistent dimnames for vectors, matrices and arrays.
-#'
+#'
#' Unlike \code{\link{dimnames}} no part of the output will ever be
#' null. If a component of dimnames is omitted, \code{amv_dimnames}
#' will return an integer sequence of the appropriate length.
-#'
+#'
#' @param x array, matrix or vector
-#' @keywords internal
+#' @keywords internal
#' @export
amv_dimnames <- function(x) {
d <- if (is.vector(x)) list(names(x)) else dimnames(x)
-
+
if (is.null(d)) d <- rep(list(NULL), dims(x))
null_names <- which(unlist(llply(d, is.null)))
d[null_names] <- llply(null_names, function(i) seq.int(amv_dim(x)[i]))
-
+
# if (is.null(names(d))) names(d) <- paste("X", 1:length(d), sep="")
d
}
@@ -40,10 +40,10 @@ amv_dimnames <- function(x) {
#' Reduce dimensions.
#'
#' Remove extraneous dimensions
-#'
+#'
#' @param x array
-#' @keywords internal
+#' @keywords internal
reduce_dim <- function(x) {
- do.call("[", c(list(x), lapply(dim(x), function(x) if (x==1) 1 else TRUE), drop=TRUE))
+ do.call("[", c(list(x), lapply(dim(x), function(x) if (x==1) 1 else TRUE), drop=TRUE))
}
View
4 R/helper-arrange.r
@@ -1,7 +1,7 @@
#' Order a data frame by its colums.
#'
#' This function completes the subsetting, transforming and ordering triad
-#' with a function that works in a similar way to \code{\link{subset}} and
+#' with a function that works in a similar way to \code{\link{subset}} and
#' \code{\link{transform}} but for reordering a data frame by its columns.
#' This saves a lot of typing!
#'
@@ -25,7 +25,7 @@
arrange <- function(df, ...) {
ord <- eval(substitute(order(...)), df, parent.frame())
if(length(ord) != nrow(df)) {
- stop("Length of ordering vectors don't match data frame size",
+ stop("Length of ordering vectors don't match data frame size",
call. = FALSE)
}
unrowname(df[ord, , drop = FALSE])
View
14 R/helper-col-wise.r
@@ -5,7 +5,7 @@
#'
#' \code{catcolwise} and \code{numcolwise} provide version that only operate
#' on discrete and numeric variables respectively.
-#'
+#'
#' @param .fun function
#' @param .cols either a function that tests columns for inclusion, or a
#' quoted object giving which columns to process
@@ -15,16 +15,16 @@
#' # Count number of missing values
#' nmissing <- function(x) sum(is.na(x))
#'
-#' # Apply to every column in a data frame
+#' # Apply to every column in a data frame
#' colwise(nmissing)(baseball)
-#' # This syntax looks a little different. It is shorthand for the
+#' # This syntax looks a little different. It is shorthand for the
#' # the following:
#' f <- colwise(nmissing)
#' f(baseball)
#'
#' # This is particularly useful in conjunction with d*ply
#' ddply(baseball, .(year), colwise(nmissing))
-#'
+#'
#' # To operate only on specified columns, supply them as the second
#' # argument. Many different forms are accepted.
#' ddply(baseball, .(year), colwise(nmissing, .(sb, cs, so)))
@@ -37,7 +37,7 @@
#' ddply(baseball, .(year), colwise(nmissing, is.numeric))
#' ddply(baseball, .(year), colwise(nmissing, is.discrete))
#'
-#' # These last two cases are particularly common, so some shortcuts are
+#' # These last two cases are particularly common, so some shortcuts are
#' # provided:
#' ddply(baseball, .(year), numcolwise(nmissing))
#' ddply(baseball, .(year), catcolwise(nmissing))
@@ -48,13 +48,13 @@ colwise <- function(.fun, .cols = true) {
} else {
filter <- function(df) Filter(.cols, df)
}
-
+
function(df, ...) {
stopifnot(is.data.frame(df))
df <- strip_splits(df)
filtered <- filter(df)
if (length(filtered) == 0) return(data.frame())
-
+
df <- quickdf(lapply(filtered, .fun, ...))
names(df) <- names(filtered)
df
View
16 R/helper-count.r
@@ -23,7 +23,7 @@
#' @export
#' @examples
#' # Count of each value of "id" in the first 100 cases
-#' count(baseball[1:100,], vars = "id")
+#' count(baseball[1:100,], vars = "id")
#' # Count of ids, weighted by their "g" loading
#' count(baseball[1:100,], vars = "id", wt_var = "g")
#' count(baseball, "id", "ab")
@@ -39,35 +39,35 @@ count <- function(df, vars = NULL, wt_var = NULL) {
if (is.vector(df)) {
df <- data.frame(x = df)
}
-
+
if (!is.null(vars)) {
vars <- as.quoted(vars)
df2 <- quickdf(eval.quoted(vars, df))
} else {
df2 <- df
}
-
+
id <- ninteraction(df2, drop = TRUE)
u_id <- !duplicated(id)
labels <- df2[u_id, , drop = FALSE]
labels <- labels[order(id[u_id]), , drop = FALSE]
-
+
if (is.null(wt_var) && "freq" %in% names(df)) {
message("Using freq as weighting variable")
wt_var <- "freq"
}
-
+
if (!is.null(wt_var)) {
wt_var <- as.quoted(wt_var)
if (length(wt_var) > 1) {
stop("wt_var must be a single variable", call. = FALSE)
}
-
+
wt <- eval.quoted(wt_var, df)[[1]]
freq <- vaggregate(wt, id, sum, .default = 0)
} else {
- freq <- tabulate(id, attr(id, "n"))
+ freq <- tabulate(id, attr(id, "n"))
}
-
+
unrowname(data.frame(labels, freq))
}
View
4 R/helper-data-frame.r
@@ -2,10 +2,10 @@
#'
#' Create a new function that returns the existing function wrapped in a
#' data.frame
-#'
+#'
#' This is useful when calling \code{*dply} functions with a function that
#' returns a vector, and you want the output in rows, rather than columns
-#'
+#'
#' @keywords manip
#' @param x function to make return a data frame
#' @param row.names necessary to match the generic, but not used
View
6 R/helper-defaults.r
@@ -1,11 +1,11 @@
#' Set defaults.
#'
#' Convient method for combining a list of values with their defaults.
-#'
+#'
#' @param x list of values
#' @param y defaults
-#' @keywords manip
+#' @keywords manip
#' @export
defaults <- function(x, y) {
c(x, y[setdiff(names(y), names(x))])
-}
+}
View
16 R/helper-each.r
@@ -3,7 +3,7 @@
#' Combine multiple functions into a single function returning a named vector
#' of outputs.
#' Note: you cannot supply additional parameters for the summary functions
-#'
+#'
#' @param ... functions to combine. each function should produce a single
#' number as output
#' @keywords manip
@@ -27,12 +27,12 @@ each <- function(...) {
fs <- list(...)
if (length(fs[[1]]) > 1) {
fs <- fs[[1]]
-
+
# Jump through hoops to work out names
snames <- as.list(match.call()[2])[[1]]
fnames <- unlist(lapply(as.list(snames)[-1], deparse))
}
-
+
# Find function names and replace with function objects
char <- laply(fs, is.character)
fnames[char] <- fs[char]
@@ -41,14 +41,14 @@ each <- function(...) {
unames <- names(fs)
if (is.null(unames)) unames <- fnames
unames[unames == ""] <- fnames[unames == ""]
-
+
n <- length(fs)
proto <- NULL
result <- NULL
-
+
if (n == 1) {
# If there is only one function, things are simple. We just
- # need to name the output, if appropriate.
+ # need to name the output, if appropriate.
function(x, ...) {
res <- fs[[1]](x, ...)
if (length(res) == 1) names(res) <- unames
@@ -65,9 +65,9 @@ each <- function(...) {
for(i in 1:n) result[[i]] <- fs[[i]](x, ...)
proto <<- list_to_vector(result)
} else {
- for(i in 1:n) proto[[i]] <- fs[[i]](x, ...)
+ for(i in 1:n) proto[[i]] <- fs[[i]](x, ...)
}
proto
- }
+ }
}
}
View
6 R/helper-match-df.r
@@ -9,7 +9,7 @@
#' to both data frames.
#' @return a data frame
#' @seealso \code{\link{join}} to combine the columns from both x and y
-#' and \code{\link{match}} for the base function selecting matching items
+#' and \code{\link{match}} for the base function selecting matching items
#' @export
#' @examples
#' # count the occurrences of each id in the baseball dataframe, then get the subset with a freq >25
@@ -19,7 +19,7 @@
#' # 30 ansonca01 27
#' # 48 baineha01 27
#' # ...
-#' # Select only rows from these longterm players from the baseball dataframe
+#' # Select only rows from these longterm players from the baseball dataframe
#' # (match would default to match on shared column names, but here was explicitly set "id")
#' bb_longterm <- match_df(baseball, longterm, on="id")
#' bb_longterm[1:5,]
@@ -28,7 +28,7 @@ match_df <- function(x, y, on = NULL) {
on <- intersect(names(x), names(y))
message("Matching on: ", paste(on, collapse = ", "))
}
-
+
keys <- join.keys(x, y, on)
x[keys$x %in% keys$y, ]
}
View
4 R/helper-mutate.r
@@ -20,7 +20,7 @@
#' mutate(airquality, new = -Ozone, Temp = (Temp - 32) / 1.8)
#'
#' # Things transform can't do
-#' mutate(airquality, Temp = (Temp - 32) / 1.8, OzT = Ozone / Temp)
+#' mutate(airquality, Temp = (Temp - 32) / 1.8, OzT = Ozone / Temp)
#'
#' # mutate is rather faster than transform
#' system.time(transform(baseball, avg_ab = ab / g))
@@ -28,7 +28,7 @@
mutate <- function(.data, ...) {
cols <- as.list(substitute(list(...))[-1])
cols <- cols[names(cols) != ""] # Silently drop unnamed columns
-
+
for(col in names(cols)) {
.data[[col]] <- eval(cols[[col]], .data, parent.frame())
}
View
2  R/helper-rename.r
@@ -5,7 +5,7 @@
#' old names as names.
#' @param warn_missing print a message if any of the old names are
#' not actually present in \code{x}.
-#' Note: x is not altered: To save the result, you need to copy the returned
+#' Note: x is not altered: To save the result, you need to copy the returned
#' data into a variable.
#' @export
#' @importFrom stats setNames
View
4 R/helper-splat.r
@@ -2,10 +2,10 @@
#'
#' Wraps a function in do.call, so instead of taking multiple arguments, it
#' takes a single named list which will be interpreted as its arguments.
-#'
+#'
#' This is useful when you want to pass a function a row of data frame or
#' array, and don't want to manually pull it apart in your function.
-#'
+#'
#' @param flat function to splat
#' @return a function
#' @export
View
10 R/helper-summarise.r
@@ -4,7 +4,7 @@
#' columns to an existing data frame, it creates a new data frame. This is
#' particularly useful in conjunction with \code{\link{ddply}} as it makes it
#' easy to perform group-wise summaries.
-#'
+#'
#' @param .data the data frame to be summarised
#' @param ... further arguments of the form var = value
#' @keywords manip
@@ -13,12 +13,12 @@
#' @examples
#' # Let's extract the number of teams and total period of time
#' # covered by the baseball dataframe
-#' summarise(baseball,
-#' duration = max(year) - min(year),
+#' summarise(baseball,
+#' duration = max(year) - min(year),
#' nteams = length(unique(team)))
#' # Combine with ddply to do that for each separate id
-#' ddply(baseball, "id", summarise,
-#' duration = max(year) - min(year),
+#' ddply(baseball, "id", summarise,
+#' duration = max(year) - min(year),
#' nteams = length(unique(team)))
summarise <- function(.data, ...) {
cols <- as.list(substitute(list(...))[-1])
View
8 R/helper-take.r
@@ -1,8 +1,8 @@
#' Take a subset along an arbitrary dimension
-#'
+#'
#' @param x matrix or array to subset
#' @param along dimension to subset along
-#' @param indices
+#' @param indices
#' @param drop should the dimensions of the array be simplified? Defaults
#' to \code{FALSE} which is the opposite of the useful R default.
#' @export
@@ -16,10 +16,10 @@
#' take(x, 1, 1, drop = TRU)
take <- function(x, along, indices, drop = FALSE) {
nd <- length(dim(x))
-
+
index <- as.list(rep(TRUE, nd))
index[along] <- indices
-
+
eval(as.call(c(as.name("["), as.name("x"), index, drop = drop)))
}
View
12 R/helper-try.r
@@ -2,7 +2,7 @@
#'
#' Modify a function so that it returns a default value when there is an
#' error.
-#'
+#'
#' @param default default value
#' @param f function
#' @param quiet all error messages be suppressed?
@@ -19,7 +19,7 @@
#'
#' safef <- failwith(NULL, f)
#' safef(1)
-#' safef(2)
+#' safef(2)
failwith <- function(default = NULL, f, quiet = FALSE) {
f <- match.fun(f)
function(...) try_default(f(...), default, quiet = quiet)
@@ -28,9 +28,9 @@ failwith <- function(default = NULL, f, quiet = FALSE) {
#' Try, with default in case of error.
#'
#' \code{try_default} wraps try so that it returns a default value in the case of error.
-#'
+#'
#' \code{tryNULL} provides a useful special case when dealing with lists.
-#'
+#'
#' @param expr expression to try
#' @param default default value in case of error
#' @param quiet should errors be printed (TRUE) or ignored (FALSE, default)
@@ -41,7 +41,7 @@ failwith <- function(default = NULL, f, quiet = FALSE) {
try_default <- function(expr, default, quiet = FALSE) {
result <- default
if (quiet) {
- tryCatch(result <- expr, error = function(e) {})
+ tryCatch(result <- expr, error = function(e) {})
} else {
try(result <- expr)
}
@@ -52,7 +52,7 @@ tryNULL <- function(expr) try_default(expr, NULL, quiet = TRUE)
#' Apply with built in try.
#' Uses compact, lapply and tryNULL
-#'
+#'
#' @keywords internal
#' @export
tryapply <- function(list, fun, ...) {
View
12 R/helper-vaggregate.r
@@ -1,19 +1,19 @@
#' Vector aggregate.
#'
#' This function is somewhat similar to \code{tapply}, but is designed for
-#' use in conjunction with \code{id}. It is simpler in that it only
+#' use in conjunction with \code{id}. It is simpler in that it only
#' accepts a single grouping vector (use \code{\link{id}} if you have more)
#' and uses \code{\link{vapply}} internally, using the \code{.default} value
#' as the template.
-#'
+#'
#' \code{vaggregate} should be faster than \code{tapply} in most situations
#' because it avoids making a copy of the data.
-#'
+#'
#' @param .value vector of values to aggregate
#' @param .group grouping vector
#' @param .fun aggregation function
#' @param ... other arguments passed on to \code{.fun}
-#' @param .default default value used for missing groups. This argument is
+#' @param .default default value used for missing groups. This argument is
#' also used as the template for function output.
#' @param .n total number of groups
#' @export
@@ -29,7 +29,7 @@
#' # Unlike tapply, vaggregate does not support multi-d output:
#' tapply(warpbreaks$breaks, warpbreaks[,-1], sum)
#' vaggregate(warpbreaks$breaks, id(warpbreaks[,-1]), sum)
-#'
+#'
#' # But it is about 10x faster
#' x <- rnorm(1e6)
#' y1 <- sample.int(10, 1e6, replace = TRUE)
@@ -47,7 +47,7 @@ vaggregate <- function(.value, .group, .fun, ..., .default = NULL, .n = nlevels(
if (is.null(.default)) {
.default <- .fun(.value[0], ...)
}
-
+
fun <- function(i) {
if (length(i) == 0) return(.default)
.fun(.value[i], ...)
View
6 R/id.r
@@ -30,12 +30,12 @@ id <- function(.variables, drop = FALSE) {
p <- length(ids)
# Calculate dimensions
- ndistinct <- vapply(ids, attr, "n", FUN.VALUE = numeric(1),
+ ndistinct <- vapply(ids, attr, "n", FUN.VALUE = numeric(1),
USE.NAMES = FALSE)
n <- prod(ndistinct)
if (n > 2 ^ 31) {
# Too big for integers, have to use strings, which will be much slower :(
-
+
char_id <- do.call("paste", c(ids, sep = "\r"))
res <- match(char_id, unique(char_id))
} else {
@@ -60,7 +60,7 @@ ninteraction <- id
id_var <- function(x, drop = FALSE) {
if (length(x) == 0) return(structure(integer(), n = 0L))
if (!is.null(attr(x, "n")) && !drop) return(x)
-
+
if (is.factor(x) && !drop) {
id <- as.integer(addNA(x, ifany = TRUE))
n <- length(levels(x))
View
20 R/immutable.r
@@ -1,7 +1,7 @@
#' Construct an immutable data frame.
-#'
+#'
#' An immutable data frame works like an ordinary data frame, except that when
-#' you subset it, it returns a reference to the original data frame, not a
+#' you subset it, it returns a reference to the original data frame, not a
#' a copy. This makes subsetting substantially faster and has a big impact
#' when you are working with large datasets with many groups.
#'
@@ -28,7 +28,7 @@ idata.frame <- function(df) {
self$`_getters` <- lapply(names(df), function(name) {
eval(substitute(function(v) {
if (missing(v)) {
- `_data`[[name]][`_rows`]
+ `_data`[[name]][`_rows`]
} else {
stop("Immutable")
}
@@ -40,11 +40,11 @@ idata.frame <- function(df) {
environment(f) <- self
makeActiveBinding(name, f, self)
}
- structure(self,
+ structure(self,
class = c("idf", "environment"))
}
-"[.idf" <- function(x, i, j, drop = TRUE) {
+"[.idf" <- function(x, i, j, drop = TRUE) {
# Single column special cases
if (nargs() == 2) {
j <- i
@@ -55,14 +55,14 @@ idata.frame <- function(df) {
if (missing(i)) i <- TRUE
return(x[[j]][i])
}
-
+
# New rows
rows <- x$`_rows`
if (!missing(i)) {
if (is.character(i)) stop("Row names not supported")
rows <- rows[i]
}
-
+
# New cols
cols <- x$`_cols`
if (!missing(j)) {
@@ -70,9 +70,9 @@ idata.frame <- function(df) {
cols <- intersect(cols, j)
} else {
cols <- cols[j]
- }
+ }
}
-
+
# Make active bindings for functions like lm and eval that will treat this
# object as an environment or list
self <- new.env(parent = parent.env(x))
@@ -86,7 +86,7 @@ idata.frame <- function(df) {
environment(f) <- self
makeActiveBinding(col, f, self)
}
-
+
structure(self,
class = c("idf", "environment"))
}
View
16 R/indexed-array.r
@@ -2,7 +2,7 @@
#'
#' Create a indexed array, a space efficient way of indexing into a large
#' array.
-#'
+#'
#' @param env environment containing data frame
#' @param index list of indices
#' @keywords internal
@@ -16,24 +16,24 @@ indexed_array <- function(env, index) {
# * normal array
# * normal vector
# * list-array with inexact indexing
- #
+ #
# Situations that should use [[
# * list
# * list-array with exact indexing
-
+
if (is.list(env$data)) {
if (is.data.frame(env$data) || (is.array(env$data) && !exact)) {
subs <- c("[", "]")
} else {
- subs <- c("[[", "]]")
+ subs <- c("[[", "]]")
}
} else {
- subs <- c("[", "]")
+ subs <- c("[", "]")
}
-
+
# Don't drop if data is a data frame
drop <- !is.data.frame(env$data)
-
+
structure(
list(env = env, index = index, drop = drop, subs = subs),
class = c("indexed_array", "indexed")
@@ -50,7 +50,7 @@ length.indexed_array <- function(x) nrow(x$index)
## This is very slow because we have to create a copy to use do.call
# do.call(x$subs, c(list(x$env$data), indices, drop=TRUE))
- call <- paste("x$env$data",
+ call <- paste("x$env$data",
x$subs[1], indices, ", drop = ", x$drop, x$subs[2], sep = "")
eval(parse(text = call))
}
View
6 R/indexed-data-frame.r
@@ -1,13 +1,13 @@
#' An indexed data frame.
#'
#' Create a indexed list, a space efficient way of indexing into a large data frame
-#'
+#'
#' @param env environment containing data frame
#' @param index list of indices
#' @param vars a character vector giving the variables used for subsetting
#' @keywords internal
indexed_df <- function(data, index, vars) {
-
+
structure(
list(data = data, index = index, vars = vars),
class = c("indexed", "indexed_df")
@@ -18,6 +18,6 @@ indexed_df <- function(data, index, vars) {
"[[.indexed_df" <- function(x, i) {
structure(x$data[x$index[[i]], , drop = FALSE], vars = x$vars)
# x$env$data[x$index[[i]], , drop = FALSE]
- # slice(x, attr(x, "index")[[i]])
+ # slice(x, attr(x, "index")[[i]])
# subset_rows(x$env$data, x$index[[i]])
}
View
2  R/indexed.r
@@ -5,7 +5,7 @@ length.indexed <- function(x) length(x$index)
names.indexed <- function(x) {
labels <- attr(x, "split_labels")
labels[] <- lapply(labels, as.character)
-
+
do.call(paste, c(labels, list(sep = ".")))
}
View
48 R/join.r
@@ -1,10 +1,10 @@
#' Join two data frames together.
#'
#' Join, like merge, is designed for the types of problems
-#' where you would use a sql join.
+#' where you would use a sql join.
#'
#' The four join types return:
-#'
+#'
#' \itemize{
#' \item \code{inner}: only rows with matching keys in both x and y
#' \item \code{left}: all rows in x, adding matching columns from y
@@ -21,11 +21,11 @@
#' than merge, although it is somewhat less featureful - it currently offers
#' no way to rename output or merge on different variables in the x and y
#' data frames.
-#'
+#'
#' @param x data frame
#' @param y data frame
#' @param by character vector of variable names to join by
-#' @param type type of join: left (default), right, inner or full. See
+#' @param type type of join: left (default), right, inner or full. See
#' details for more information.
#' @param match how should duplicate ids be matched? Either match just the
#' \code{"first"} matching row, or match \code{"all"} matching rows.
@@ -42,12 +42,12 @@
join <- function(x, y, by = intersect(names(x), names(y)), type = "left", match = "all") {
type <- match.arg(type, c("left", "right", "inner", "full"))
match <- match.arg(match, c("first", "all"))
-
+
if (missing(by)) {
message("Joining by: ", paste(by, collapse = ", "))
}
-
- switch(match,
+
+ switch(match,
"first" = join_first(x, y, by, type),
"all" = join_all(x, y, by, type))
}
@@ -55,13 +55,13 @@ join <- function(x, y, by = intersect(names(x), names(y)), type = "left", match
join_first <- function(x, y, by, type) {
keys <- join.keys(x, y, by = by)
new.cols <- setdiff(names(y), by)
-
+
if (type == "inner") {
x.match <- match(keys$y, keys$x, 0)
y.match <- match(keys$x, keys$y, 0)
cbind(x[x.match, , drop = FALSE], y[y.match, new.cols, drop = FALSE])
- } else if (type == "left") {
+ } else if (type == "left") {
y.match <- match(keys$x, keys$y)
y.matched <- unrowname(y[y.match, new.cols, drop = FALSE])
cbind(x, y.matched)
@@ -70,12 +70,12 @@ join_first <- function(x, y, by, type) {
if (any(duplicated(keys$y))) {
stop("Duplicated key in y", call. = FALSE)
}
-
+
new.cols <- setdiff(names(x), by)
x.match <- match(keys$y, keys$x)
x.matched <- unrowname(x[x.match, , drop = FALSE])
cbind(y, x.matched[, new.cols, drop = FALSE])
-
+
} else if (type == "full") {
# x with matching y's then any unmatched ys
@@ -83,18 +83,18 @@ join_first <- function(x, y, by, type) {
y.matched <- unrowname(y[y.match, new.cols, drop = FALSE])
y.unmatch <- is.na(match(keys$y, keys$x))
-
+
rbind.fill(cbind(x, y.matched), y[y.unmatch, , drop = FALSE])
}
}
# Basic idea to perform a full cartesian product of the two data frames
-# and then evaluate which rows meet the merging criteria. But that is
+# and then evaluate which rows meet the merging criteria. But that is
# horrendously inefficient, so we do various types of hashing, implemented
# in R as split_indices
join_all <- function(x, y, by, type) {
new.cols <- setdiff(names(y), by)
-
+
if (type == "inner") {
ids <- join_ids(x, y, by)
out <- cbind(x[ids$x, , drop = FALSE], y[ids$y, new.cols, drop = FALSE])
@@ -110,36 +110,36 @@ join_all <- function(x, y, by, type) {
# x's with all matching y's, then non-matching y's - just the same as
# join.first
ids <- join_ids(x, y, by, all = TRUE)
-
- matched <- cbind(x[ids$x, , drop = FALSE],
+
+ matched <- cbind(x[ids$x, , drop = FALSE],
y[ids$y, new.cols, drop = FALSE])
unmatched <- y[setdiff(seq_len(nrow(y)), ids$y), , drop = FALSE]
out <- rbind.fill(matched, unmatched)
}
-
+
unrowname(out)
}
join_ids <- function(x, y, by, all = FALSE) {
keys <- join.keys(x, y, by = by)
-
+
ys <- split_indices(seq_along(keys$y), keys$y, keys$n)
length(ys) <- keys$n
-
+
if (all) {
# replace NULL with NA to preserve those x's without matching y's
nulls <- vapply(ys, function(x) length(x) == 0, logical(1))
- ys[nulls] <- list(NA)
+ ys[nulls] <- list(NA)
}
-
+
ys <- ys[keys$x]
xs <- rep(seq_along(keys$x), vapply(ys, length, numeric(1)))
-
+
list(x = xs, y = unlist(ys))
}
#' Join keys.
-#' Given two data frames, create a unique key for each row.
+#' Given two data frames, create a unique key for each row.
#'
#' @param x data frame
#' @param y data frame
@@ -149,7 +149,7 @@ join_ids <- function(x, y, by, all = FALSE) {
join.keys <- function(x, y, by) {
joint <- rbind.fill(x[by], y[by])
keys <- id(joint, drop = TRUE)
-
+
list(
x = keys[1:nrow(x)],
y = keys[-(1:nrow(x))],
View
2  R/loop-apply.r
@@ -1,6 +1,6 @@
#' Loop apply
#'
-#' An optimised version of lapply for the special case of operating on
+#' An optimised version of lapply for the special case of operating on
#' \code{seq_len(n)}
#'
#' @param n length of sequence
View
34 R/ply-array.r
@@ -22,11 +22,11 @@
laply <- function(.data, .fun = NULL, ..., .progress = "none", .drop = TRUE, .parallel = FALSE) {
if (is.character(.fun)) .fun <- do.call("each", as.list(.fun))
if (!is.function(.fun)) stop(".fun is not a function.")
-
+
if (!inherits(.data, "split")) .data <- as.list(.data)
- res <- llply(.data = .data, .fun = .fun, ...,
+ res <- llply(.data = .data, .fun = .fun, ...,
.progress = .progress, .parallel = .parallel)
-
+
list_to_array(res, attr(.data, "split_labels"), .drop)
}
@@ -35,8 +35,8 @@ laply <- function(.data, .fun = NULL, ..., .progress = "none", .drop = TRUE, .p
#'
#' For each subset of data frame, apply function then combine results into
#' an array. \code{daply} with a function that operates column-wise is
-#' similar to \code{\link{aggregate}}.
-#'
+#' similar to \code{\link{aggregate}}.
+#'
#' @template ply
#' @section Input: This function splits data frames by variables.
#' @section Output:
@@ -45,9 +45,9 @@ laply <- function(.data, .fun = NULL, ..., .progress = "none", .drop = TRUE, .p
#' @param .data data frame to be processed
#' @param .variables variables to split data frame by, as quoted
#' variables, a formula or character vector
-#' @param .drop_i should combinations of variables that do not appear in the
+#' @param .drop_i should combinations of variables that do not appear in the
#' input data be preserved (FALSE) or dropped (TRUE, default)
-#' @param .parallel if \code{TRUE}, apply function in parallel, using parallel
+#' @param .parallel if \code{TRUE}, apply function in parallel, using parallel
#' backend provided by foreach
#' @return if results are atomic with same type and dimensionality, a
#' vector, matrix or array; otherwise, a list-array (a list with
@@ -60,17 +60,17 @@ laply <- function(.data, .fun = NULL, ..., .progress = "none", .drop = TRUE, .p
#' @examples
#' daply(baseball, .(year), nrow)
#'
-#' # Several different ways of summarising by variables that should not be
+#' # Several different ways of summarising by variables that should not be
#' # included in the summary
-#'
+#'
#' daply(baseball[, c(2, 6:9)], .(year), colwise(mean))
#' daply(baseball[, 6:9], .(baseball$year), colwise(mean))
#' daply(baseball, .(year), function(df) colwise(mean)(df[, 6:9]))
daply <- function(.data, .variables, .fun = NULL, ..., .progress = "none", .drop_i = TRUE, .drop_o = TRUE, .parallel = FALSE) {
.variables <- as.quoted(.variables)
pieces <- splitter_d(.data, .variables, drop = .drop_i)
-
- laply(.data = pieces, .fun = .fun, ...,
+
+ laply(.data = pieces, .fun = .fun, ...,
.progress = .progress, .drop = .drop_o, .parallel = .parallel)
}
@@ -82,7 +82,7 @@ daply <- function(.data, .variables, .fun = NULL, ..., .progress = "none", .drop
#' those dimensions are added on to the highest dimensions, rather than the
#' lowest dimensions. This makes \code{aaply} idempotent, so that
#' \code{apply(input, X, identity)} is equivalent to \code{aperm(input, X)}.
-#'
+#'
#' @template ply
#' @template a-
#' @template -a
@@ -95,19 +95,19 @@ daply <- function(.data, .variables, .fun = NULL, ..., .progress = "none", .drop
#' aaply(ozone, c(1,2), mean)
#'
#' dim(aaply(ozone, c(1,2), mean))
-#' dim(aaply(ozone, c(1,2), mean, .drop = FALSE))
+#' dim(aaply(ozone, c(1,2), mean, .drop = FALSE))
#'
#' aaply(ozone, 1, each(min, max))
#' aaply(ozone, 3, each(min, max))
-#'
+#'
#' standardise <- function(x) (x - min(x)) / (max(x) - min(x))
#' aaply(ozone, 3, standardise)
#' aaply(ozone, 1:2, standardise)
-#'
+#'
#' aaply(ozone, 1:2, diff)
aaply <- function(.data, .margins, .fun = NULL, ..., .expand = TRUE, .progress = "none", .drop = TRUE, .parallel = FALSE) {
pieces <- splitter_a(.data, .margins, .expand)
-
- laply(.data = pieces, .fun = .fun, ...,
+
+ laply(.data = pieces, .fun = .fun, ...,
.progress = .progress, .drop = .drop, .parallel = .parallel)
}
View
36 R/ply-data-frame.r
@@ -1,7 +1,7 @@
#' Split list, apply function, and return results in a data frame.
#'
-#' For each element of a list, apply function then combine results into a data
-#' frame.
+#' For each element of a list, apply function then combine results into a data
+#' frame.
#'
#' @template ply
#' @template l-
@@ -9,9 +9,9 @@
#' @export
ldply <- function(.data, .fun = NULL, ..., .progress = "none", .parallel = FALSE) {
if (!inherits(.data, "split")) .data <- as.list(.data)
- res <- llply(.data = .data, .fun = .fun, ...,
+ res <- llply(.data = .data, .fun = .fun, ...,
.progress = .progress, .parallel = .parallel)
-
+
list_to_dataframe(res, attr(.data, "split_labels"))
}
@@ -32,13 +32,13 @@ ldply <- function(.data, .fun = NULL, ..., .progress = "none", .parallel = FALSE
#' sex <- sample(c("M", "F"), size = 29, replace = TRUE)
#' age <- runif(n = 29, min = 18, max = 54)
#' dfx <- data.frame (group, time, age)
-#'
-#' # Note the use of the '.' function to allow
+#'
+#' # Note the use of the '.' function to allow
#' # group and sex to be used without quoting
-#' ddply(dfx, .(group, sex), summarize,
-#' mean <- round(mean(age), 2),
+#' ddply(dfx, .(group, sex), summarize,
+#' mean <- round(mean(age), 2),
#' sd <- round(sd(age), 2))
-#'
+#'
#' # group sex mean sd
#' # 1 A F 35.89 8.53
#' # 2 A M 38.01 15.09
@@ -51,24 +51,24 @@ ldply <- function(.data, .fun = NULL, ..., .progress = "none", .parallel = FALSE
#' ddply(baseball[1:100,], .variables = ~year, .fun=nrow)
#' # Applying two functions; nrow and ncol
#' ddply(baseball, .(lg), c("nrow", "mean"))
-#'
+#'
#' # Calculate mean runs batted in for each year
-#' rbi <- ddply(baseball, .(year), summarise,
+#' rbi <- ddply(baseball, .(year), summarise,
#' mean_rbi = mean(rbi, na.rm = TRUE))
#' # Plot a line chart of the result
#' plot(mean_rbi ~year, type = "l", data = rbi))
-#'
-#' # make new variable career_year based on the
+#'
+#' # make new variable career_year based on the
#' # start year for each player (id)
-#' base2 <- ddply(baseball, .(id), transform,
+#' base2 <- ddply(baseball, .(id), transform,
#' career_year = year - min(year) + 1
#' )
ddply <- function(.data, .variables, .fun = NULL, ..., .progress = "none", .drop = TRUE, .parallel = FALSE) {
if (empty(.data)) return(.data)
.variables <- as.quoted(.variables)
pieces <- splitter_d(.data, .variables, drop = .drop)
-
- ldply(.data = pieces, .fun = .fun, ...,
+
+ ldply(.data = pieces, .fun = .fun, ...,
.progress = .progress, .parallel = .parallel)
}
@@ -83,7 +83,7 @@ ddply <- function(.data, .variables, .fun = NULL, ..., .progress = "none", .drop
#' @export
adply <- function(.data, .margins, .fun = NULL, ..., .expand = TRUE, .progress = "none", .parallel = FALSE) {
pieces <- splitter_a(.data, .margins, .expand)
-
- ldply(.data = pieces, .fun = .fun, ...,
+
+ ldply(.data = pieces, .fun = .fun, ...,
.progress = .progress, .parallel = .parallel)
}
View
20 R/ply-iterator.r
@@ -1,11 +1,11 @@
#' Experimental iterator based version of llply.
-#'
-#' Because iterators do not have known length, \code{liply} starts by
-#' allocating an output list of length 50, and then doubles that length
-#' whenever it runs out of space. This gives O(n ln n) performance rather
+#'
+#' Because iterators do not have known length, \code{liply} starts by
+#' allocating an output list of length 50, and then doubles that length
+#' whenever it runs out of space. This gives O(n ln n) performance rather
#' than the O(n ^ 2) performance from the naive strategy of growing the list
#' each time.
-#'
+#'
#' @keywords manip
#' @param .iterator iterator object
#' @param .fun function to apply to each piece
@@ -24,19 +24,19 @@
liply <- function(.iterator, .fun = NULL, ...) {
stopifnot(inherits(.iterator, "iter"))
if (is.null(.fun)) return(as.list(.iterator))
-
+
iterator <- itertools::ihasNext(.iterator)
-
+
if (is.character(.fun)) .fun <- each(.fun)
if (!is.function(.fun)) stop(".fun is not a function.")
-
+
result <- vector("list", 50)
i <- 0
while(itertools::hasNext(iterator)) {
piece <- iterators::nextElem(iterator)
res <- .fun(piece, ...)
-
+
# Double length of vector when necessary. Gives O(n ln n) performance
# instead of naive O(n^2)
i <- i + 1
@@ -46,7 +46,7 @@ liply <- function(.iterator, .fun = NULL, ...) {
if (!is.null(res)) result[[i]] <- res
}
length(result) <- i
-
+
result
}
View
32 R/ply-list.r
@@ -1,9 +1,9 @@
#' Split list, apply function, and return results in a list.
#'
#' For each element of a list, apply function, keeping results as a list.
-#' \code{llply} is equivalent to \code{\link{lapply}} except that it will
+#' \code{llply} is equivalent to \code{\link{lapply}} except that it will
#' preserve labels and can display a progress bar.
-#'
+#'
#' @template ply
#' @template l-
#' @template -l
@@ -32,14 +32,14 @@ llply <- function(.data, .fun = NULL, ..., .progress = "none", .inform = FALSE,
if (fast_path) {
return(structure(lapply(pieces, .fun, ...), dim = dim(pieces)))
}
-
+
} else {
pieces <- .data
}
-
+
n <- length(pieces)
if (n == 0) return(list())
-
+
progress <- create_progress_bar(.progress)
progress$init(n)
on.exit(progress$term())
@@ -54,7 +54,7 @@ llply <- function(.data, .fun = NULL, ..., .progress = "none", .inform = FALSE,
if (inherits(res, "try-error")) {
piece <- paste(capture.output(print(piece)), collapse = "\n")
stop("with piece ", i, ": \n", piece, call. = FALSE)
- }
+ }
} else {
res <- .fun(piece, ...)
}
@@ -63,7 +63,7 @@ llply <- function(.data, .fun = NULL, ..., .progress = "none", .inform = FALSE,
}
if (.parallel) {
if (!require("foreach")) {
- stop("foreach package required for parallel plyr operation",
+ stop("foreach package required for parallel plyr operation",
call. = FALSE)
}
if (getDoParWorkers() == 1) {
@@ -73,16 +73,16 @@ llply <- function(.data, .fun = NULL, ..., .progress = "none", .inform = FALSE,
} else {
result <- loop_apply(n, do.ply)
}
-
+
attributes(result)[c("split_type", "split_labels")] <-
attributes(pieces)[c("split_type", "split_labels")]
names(result) <- names(pieces)
# Only set dimension if not null, otherwise names are removed
if (!is.null(dim(pieces))) {
- dim(result) <- dim(pieces)
+ dim(result) <- dim(pieces)
}
-
+
result
}
@@ -91,7 +91,7 @@ llply <- function(.data, .fun = NULL, ..., .progress = "none", .inform = FALSE,
#' For each subset of a data frame, apply function then combine results into a
#' list. \code{dlply} is similar to \code{\link{by}} except that the results
#' are returned in a different format.
-#'
+#'
#' @template ply
#' @template d-
#' @template -l
@@ -110,8 +110,8 @@ llply <- function(.data, .fun = NULL, ..., .progress = "none", .inform = FALSE,
dlply <- function(.data, .variables, .fun = NULL, ..., .progress = "none", .drop = TRUE, .parallel = FALSE) {
.variables <- as.quoted(.variables)
pieces <- splitter_d(.data, .variables, drop = .drop)
-
- llply(.data = pieces, .fun = .fun, ...,
+
+ llply(.data = pieces, .fun = .fun, ...,
.progress = .progress, .parallel = .parallel)
}
@@ -120,7 +120,7 @@ dlply <- function(.data, .variables, .fun = NULL, ..., .progress = "none", .drop
#' For each slice of an array, apply function then combine results into a
#' list. \code{alply} is somewhat similar to \code{\link{apply}} for cases
#' where the results are not atomic.
-#'
+#'
#' @template ply
#' @template a-
#' @template -l
@@ -130,7 +130,7 @@ dlply <- function(.data, .variables, .fun = NULL, ..., .progress = "none", .drop
#' alply(ozone, 3, function(x) table(round(x)))
alply <- function(.data, .margins, .fun = NULL, ..., .expand = TRUE, .progress = "none", .parallel = FALSE) {
pieces <- splitter_a(.data, .margins, .expand)
-
- llply(.data = pieces, .fun = .fun, ...,
+
+ llply(.data = pieces, .fun = .fun, ...,
.progress = .progress, .parallel = .parallel)
}
View
58 R/ply-mapply.r
@@ -1,17 +1,17 @@
#' Call function with arguments in array or data frame, returning a data frame.
#'
#' Call a multi-argument function with values taken from columns of an data frame or array, and combine results into a data frame
-#'
+#'
#' The \code{m*ply} functions are the \code{plyr} version of \code{mapply},
#' specialised according to the type of output they produce. These functions
#' are just a convenient wrapper around \code{a*ply} with \code{margins = 1}
#' and \code{.fun} wrapped in \code{\link{splat}}.
-#'
+#'
#' This function combines the result into a data frame. If there are no
#' results, then this function will return a data frame with zero rows and
#' columns (\code{data.frame()}).
-#'
-#'
+#'
+#'
#' @keywords manip
#' @param .data matrix or data frame to use as source of arguments
#' @param .fun function to be called with varying arguments
@@ -19,12 +19,12 @@
#' @param .expand should output be 1d (expand = FALSE), with an element for
#' each row; or nd (expand = TRUE), with a dimension for each variable.
#' @param .progress name of the progress bar to use, see \code{\link{create_progress_bar}}
-#' @param .parallel if \code{TRUE}, apply function in parallel, using parallel
+#' @param .parallel if \code{TRUE}, apply function in parallel, using parallel
#' backend provided by foreach
#' @return a data frame
#' @export
#' @references Hadley Wickham (2011). The Split-Apply-Combine Strategy for
-#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
+#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
#' \url{http://www.jstatsoft.org/v40/i01/}.
#' @examples
#' mdply(data.frame(mean = 1:5, sd = 1:5), rnorm, n = 2)
@@ -35,23 +35,23 @@ mdply <- function(.data, .fun = NULL, ..., .expand = TRUE, .progress = "none", .
if (is.matrix(.data) & !is.list(.data)) .data <- .matrix_to_df(.data)
f <- splat(.fun)
- adply(.data = .data, .margins = 1, .fun = f, ...,
+ adply(.data = .data, .margins = 1, .fun = f, ...,
.expand = .expand, .progress = .progress, .parallel = .parallel)
}
#' Call function with arguments in array or data frame, returning an array.
#'
#' Call a multi-argument function with values taken from columns of an data frame or array, and combine results into an array
-#'
+#'
#' The \code{m*ply} functions are the \code{plyr} version of \code{mapply},
#' specialised according to the type of output they produce. These functions
#' are just a convenient wrapper around \code{a*ply} with \code{margins = 1}
#' and \code{.fun} wrapped in \code{\link{splat}}.
-#'
+#'
#' This function combines the result into an array. If there are no results,
#' then this function will return a vector of length 0 (\code{vector()}).
-#'
-#'
+#'
+#'
#' @keywords manip
#' @param .data matrix or data frame to use as source of arguments
#' @param .fun function to be called with varying arguments
@@ -59,12 +59,12 @@ mdply <- function(.data, .fun = NULL, ..., .expand = TRUE, .progress = "none", .
#' @param .expand should output be 1d (expand = FALSE), with an element for
#' each row; or nd (expand = TRUE), with a dimension for each variable.
#' @param .progress name of the progress bar to use, see \code{\link{create_progress_bar}}
-#' @param .parallel if \code{TRUE}, apply function in parallel, using parallel
+#' @param .parallel if \code{TRUE}, apply function in parallel, using parallel
#' backend provided by foreach
#' @return if results are atomic with same type and dimensionality, a vector, matrix or array; otherwise, a list-array (a list with dimensions)
#' @export
#' @references Hadley Wickham (2011). The Split-Apply-Combine Strategy for
-#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
+#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
#' \url{http://www.jstatsoft.org/v40/i01/}.
#' @examples
#' maply(cbind(mean = 1:5, sd = 1:5), rnorm, n = 5)
@@ -72,25 +72,25 @@ mdply <- function(.data, .fun = NULL, ..., .expand = TRUE, .progress = "none", .
#' maply(cbind(1:5, 1:5), rnorm, n = 5)
maply <- function(.data, .fun = NULL, ..., .expand = TRUE, .progress = "none", .parallel = FALSE) {
if (is.matrix(.data) & !is.list(.data)) .data <- .matrix_to_df(.data)
-
+
f <- splat(.fun)
- aaply(.data = .data, .margins = 1, .fun = f, ...,
+ aaply(.data = .data, .margins = 1, .fun = f, ...,
.expand = .expand, .progress = .progress, .parallel = .parallel)
}
#' Call function with arguments in array or data frame, returning a list.
#'
#' Call a multi-argument function with values taken from columns of an data frame or array, and combine results into a list
-#'
+#'
#' The \code{m*ply} functions are the \code{plyr} version of \code{mapply},
#' specialised according to the type of output they produce. These functions
#' are just a convenient wrapper around \code{a*ply} with \code{margins = 1}
#' and \code{.fun} wrapped in \code{\link{splat}}.
-#'
+#'
#' This function combines the result into a list. If there are no results,
#' then this function will return a list of length 0 (\code{list()}).
-#'
-#'
+#'
+#'
#' @keywords manip
#' @param .data matrix or data frame to use as source of arguments
#' @param .fun function to be called with varying arguments
@@ -98,17 +98,17 @@ maply <- function(.data, .fun = NULL, ..., .expand = TRUE, .progress = "none", .
#' @param .expand should output be 1d (expand = FALSE), with an element for
#' each row; or nd (expand = TRUE), with a dimension for each variable.
#' @param .progress name of the progress bar to use, see \code{\link{create_progress_bar}}
-#' @param .parallel if \code{TRUE}, apply function in parallel, using parallel
+#' @param .parallel if \code{TRUE}, apply function in parallel, using parallel
#' backend provided by foreach
#' @return list of results
#' @export
#' @references Hadley Wickham (2011). The Split-Apply-Combine Strategy for
-#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
+#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
#' \url{http://www.jstatsoft.org/v40/i01/}.
#' @examples
#' mlply(cbind(1:4, 4:1), rep)
#' mlply(cbind(1:4, times = 4:1), rep)
-#'
+#'
#' mlply(cbind(1:4, 4:1), seq)
#' mlply(cbind(1:4, length = 4:1), seq)
#' mlply(cbind(1:4, by = 4:1), seq, to = 20)
@@ -116,22 +116,22 @@ mlply <- function(.data, .fun = NULL, ..., .expand = TRUE, .progress = "none", .
if (is.matrix(.data) & !is.list(.data)) .data <- .matrix_to_df(.data)
f <- splat(.fun)
- alply(.data = .data, .margins = 1, .fun = f, ...,
+ alply(.data = .data, .margins = 1, .fun = f, ...,
.expand = .expand, .progress = .progress, .parallel = .parallel)
}
#' Call function with arguments in array or data frame, discarding results.
#'
#' Call a multi-argument function with values taken from columns of an data frame or array, and discard results
-#'
+#'
#' The \code{m*ply} functions are the \code{plyr} version of \code{mapply},
#' specialised according to the type of output they produce. These functions
#' are just a convenient wrapper around \code{a*ply} with \code{margins = 1}
#' and \code{.fun} wrapped in \code{\link{splat}}.
-#'
+#'
#' This function combines the result into a list. If there are no results,
#' then this function will return a list of length 0 (\code{list()}).
-#'
+#'
#' @keywords manip
#' @param .data matrix or data frame to use as source of arguments
#' @param .fun function to be called with varying arguments
@@ -141,13 +141,13 @@ mlply <- function(.data, .fun = NULL, ..., .expand = TRUE, .progress = "none", .
#' @param .progress name of the progress bar to use, see \code{\link{create_progress_bar}}
#' @export
#' @references Hadley Wickham (2011). The Split-Apply-Combine Strategy for
-#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
+#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
#' \url{http://www.jstatsoft.org/v40/i01/}.
m_ply <- function(.data, .fun = NULL, ..., .expand = TRUE, .progress = "none") {
if (is.matrix(.data) & !is.list(.data)) .data <- .matrix_to_df(.data)
f <- splat(.fun)
- a_ply(.data = .data, .margins = 1, .fun = f, ...,
+ a_ply(.data = .data, .margins = 1, .fun = f, ...,
.expand = .expand, .progress = .progress)
}
@@ -156,5 +156,5 @@ m_ply <- function(.data, .fun = NULL, ..., .expand = TRUE, .progress = "none") {
if (is.null(cnames)) cnames <- rep("", ncol(.data))
.data <- as.data.frame(.data, stringsAsFactors = FALSE)
colnames(.data) <- cnames
- .data
+ .data
}
View
38 R/ply-null.r
@@ -1,14 +1,14 @@
#' Split list, apply function, and discard results.
#'
#' For each element of a list, apply function and discard results
-#'
+#'
#' All plyr functions use the same split-apply-combine strategy: they split the
#' input into simpler pieces, apply \code{.fun} to each piece, and then combine
#' the pieces into a single data structure. This function splits lists by
-#' elements and discards the output. This is useful for functions that you are
+#' elements and discards the output. This is useful for functions that you are
#' calling purely for their side effects like display plots and saving output.
-#'
-#'
+#'
+#'
#' @keywords manip
#' @param .data list to be processed
#' @param .fun function to apply to each piece
@@ -17,38 +17,38 @@
#' @param .print automatically print each result? (default: \code{FALSE})
#' @export
#' @references Hadley Wickham (2011). The Split-Apply-Combine Strategy for
-#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
+#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
#' \url{http://www.jstatsoft.org/v40/i01/}.
l_ply <- function(.data, .fun = NULL, ..., .progress = "none", .print = FALSE) {
if (is.character(.fun) || is.list(.fun)) .fun <- each(.fun)
if (!is.function(.fun)) stop(".fun is not a function.")
-
+
progress <- create_progress_bar(.progress)
progress$init(length(.data))
on.exit(progress$term())
-
+
.data <- as.list(.data)
for(i in seq_along(.data)) {
x <- .fun(.data[[i]], ...)
if (.print) print(x)
progress$step()
}
-
+
invisible()
}
#' Split data frame, apply function, and discard results.
#'
#' For each subset of a data frame, apply function and discard results
-#'
+#'
#' All plyr functions use the same split-apply-combine strategy: they split the
#' input into simpler pieces, apply \code{.fun} to each piece, and then combine
#' the pieces into a single data structure. This function splits data frames
#' by variable and discards the output. This is useful for functions that you
#' are calling purely for their side effects like display plots and saving
#' output.
-#'
-#'
+#'
+#'
#' @keywords manip
#' @param .data data frame to be processed
#' @param .variables variables to split data frame by, as quoted variables, a formula or character vector
@@ -58,43 +58,43 @@ l_ply <- function(.data, .fun = NULL, ..., .progress = "none", .print = FALSE) {
#' @param .print automatically print each result? (default: \code{FALSE})
#' @export
#' @references Hadley Wickham (2011). The Split-Apply-Combine Strategy for
-#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
+#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
#' \url{http://www.jstatsoft.org/v40/i01/}.
d_ply <- function(.data, .variables, .fun = NULL, ..., .progress = "none", .print = FALSE) {
.variables <- as.quoted(.variables)
pieces <- splitter_d(.data, .variables)
-
+
l_ply(.data = pieces, .fun = .fun, ..., .progress = .progress, .print = .print)
}
#' Split array, apply function, and discard results.
#'
#' For each slice of an array, apply function and discard results
-#'
+#'
#' All plyr functions use the same split-apply-combine strategy: they split the
#' input into simpler pieces, apply \code{.fun} to each piece, and then combine
#' the pieces into a single data structure. This function splits matrices,
#' arrays and data frames by dimensions and discards the output. This is
#' useful for functions that you are calling purely for their side effects like
#' display plots and saving output.
-#'
-#'
+#'
+#'
#' @keywords manip
#' @param .data matrix, array or data frame to be processed
#' @param .margins a vector giving the subscripts to split up \code{data} by. 1 splits up by rows, 2 by columns and c(1,2) by rows and columns, and so on for higher dimensions
#' @param .fun function to apply to each piece
#' @param ... other arguments passed on to \code{.fun}
-#' @param .expand if \code{.data} is a data frame, should output be 1d
+#' @param .expand if \code{.data} is a data frame, should output be 1d
#' (expand = FALSE), with an element for each row; or nd (expand = TRUE),
#' with a dimension for each variable.
#' @param .progress name of the progress bar to use, see \code{\link{create_progress_bar}}
#' @param .print automatically print each result? (default: \code{FALSE})
#' @export
#' @references Hadley Wickham (2011). The Split-Apply-Combine Strategy for
-#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
+#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
#' \url{http://www.jstatsoft.org/v40/i01/}.
a_ply <- function(.data, .margins, .fun = NULL, ..., .expand = TRUE, .progress = "none", .print = FALSE) {
pieces <- splitter_a(.data, .margins, .expand)
-
+
l_ply(.data = pieces, .fun = .fun, ..., .progress = .progress, .print = .print)
}
View
56 R/ply-replicate.r
@@ -1,13 +1,13 @@
#' Replicate expression and return results in a list.
#'
#' Evalulate expression n times then combine results into a list
-#'
-#' This function runs an expression multiple times, and combines the
+#'
+#' This function runs an expression multiple times, and combines the
#' result into a list. If there are no results, then this function will return
#' a list of length 0 (\code{list()}). This function is equivalent to
#' \code{\link{replicate}}, but will always return results as a list.
-#'
-#'
+#'
+#'
#' @keywords manip
#' @param .n number of times to evaluate the expression
#' @param .expr expression to evaluate
@@ -15,7 +15,7 @@
#' @return list of results
#' @export
#' @references Hadley Wickham (2011). The Split-Apply-Combine Strategy for
-#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
+#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
#' \url{http://www.jstatsoft.org/v40/i01/}.
#' @examples
#' mods <- rlply(100, lm(y ~ x, data=data.frame(x=rnorm(100), y=rnorm(100))))
@@ -24,10 +24,10 @@ rlply <- function(.n, .expr, .progress = "none") {
if (is.function(.expr)) {
f <- .expr
} else {
- f <- eval.parent(substitute(function() .expr))
+ f <- eval.parent(substitute(function() .expr))
}
- progress <- create_progress_bar(.progress)
+ progress <- create_progress_bar(.progress)
result <- vector("list", length = .n)
progress$init(.n)
@@ -37,21 +37,21 @@ rlply <- function(.n, .expr, .progress = "none") {
result[i] <- list(f())
progress$step()
}
-
+
result
}
#' Replicate expression and return results in a data frame.
#'
#' Evalulate expression n times then combine results into a data frame
-#'
-#' This function runs an expression multiple times, and combines the
+#'
+#' This function runs an expression multiple times, and combines the
#' result into a data frame. If there are no results, then this function
#' returns a data frame with zero rows and columns (\code{data.frame()}).
#' This function is equivalent to \code{\link{replicate}}, but will always
#' return results as a data frame.
-#'
-#'
+#'
+#'
#' @keywords manip
#' @param .n number of times to evaluate the expression
#' @param .expr expression to evaluate
@@ -59,7 +59,7 @@ rlply <- function(.n, .expr, .progress = "none") {
#' @return a data frame
#' @export
#' @references Hadley Wickham (2011). The Split-Apply-Combine Strategy for
-#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
+#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
#' \url{http://www.jstatsoft.org/v40/i01/}.
#' @examples
#' rdply(20, mean(runif(100)))
@@ -69,9 +69,9 @@ rdply <- function(.n, .expr, .progress = "none") {
if (is.function(.expr)) {
f <- .expr
} else {
- f <- eval.parent(substitute(function() .expr))
+ f <- eval.parent(substitute(function() .expr))
}
-
+
res <- rlply(.n = .n, .expr = f, .progress = .progress)
labels <- data.frame(.n = seq_len(.n))
list_to_dataframe(res, labels)
@@ -81,13 +81,13 @@ rdply <- function(.n, .expr, .progress = "none") {
#' Replicate expression and return results in a array.
#'
#' Evalulate expression n times then combine results into an array
-#'
-#' This function runs an expression multiple times, and combines the
+#'
+#' This function runs an expression multiple times, and combines the
#' result into a data frame. If there are no results, then this function
#' returns a vector of length 0 (\code{vector(0)}).
#' This function is equivalent to \code{\link{replicate}}, but will always
#' return results as a vector, matrix or array.
-#'
+#'
#' @keywords manip
#' @param .n number of times to evaluate the expression
#' @param .expr expression to evaluate
@@ -96,7 +96,7 @@ rdply <- function(.n, .expr, .progress = "none") {
#' @param .drop should extra dimensions of length 1 be dropped, simplifying the output. Defaults to \code{TRUE}
#' @export
#' @references Hadley Wickham (2011). The Split-Apply-Combine Strategy for
-#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
+#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
#' \url{http://www.jstatsoft.org/v40/i01/}.
#' @examples
#' raply(100, mean(runif(100)))
@@ -104,7 +104,7 @@ rdply <- function(.n, .expr, .progress = "none") {
#'
#' raply(10, runif(4))
#' raply(10, matrix(runif(4), nrow=2))
-#'
+#'
#' # See the central limit theorem in action
#' hist(raply(1000, mean(rexp(10))))
#' hist(raply(1000, mean(rexp(100))))
@@ -113,9 +113,9 @@ raply <- function(.n, .expr, .progress = "none", .drop = TRUE) {
if (is.function(.expr)) {
f <- .expr
} else {
- f <- eval.parent(substitute(function() .expr))
+ f <- eval.parent(substitute(function() .expr))
}
-
+
res <- rlply(.n = .n, .expr = f, .progress = .progress)
list_to_array(res, NULL, .drop)
}
@@ -123,11 +123,11 @@ raply <- function(.n, .expr, .progress = "none", .drop = TRUE) {
#' Replicate expression and discard results.
#'
#' Evalulate expression n times then discard results
-#'
-#' This function runs an expression multiple times, discarding the results.
+#'
+#' This function runs an expression multiple times, discarding the results.
#' This function is equivalent to \code{\link{replicate}}, but never returns
#' anything
-#'
+#'
#' @keywords manip
#' @param .n number of times to evaluate the expression
#' @param .expr expression to evaluate
@@ -135,7 +135,7 @@ raply <- function(.n, .expr, .progress = "none", .drop = TRUE) {
#' @param .print automatically print each result? (default: \code{FALSE})
#' @export
#' @references Hadley Wickham (2011). The Split-Apply-Combine Strategy for
-#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
+#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
#' \url{http://www.jstatsoft.org/v40/i01/}.
#' @examples
#' r_ply(10, plot(runif(50)))
@@ -144,10 +144,10 @@ r_ply <- function(.n, .expr, .progress = "none", .print = FALSE) {
if (is.function(.expr)) {
f <- .expr
} else {
- f <- eval.parent(substitute(function() .expr))
+ f <- eval.parent(substitute(function() .expr))
}
- progress <- create_progress_bar(.progress)
+ progress <- create_progress_bar(.progress)
progress$init(.n)
on.exit(progress$term())
View
40 R/progress.r
@@ -1,25 +1,25 @@
#' Create progress bar.
#'
#' Create progress bar object from text string.
-#'
+#'
#' Progress bars give feedback on how apply step is proceeding. This
-#' is mainly useful for long running functions, as for short functions, the
-#' time taken up by splitting and combining may be on the same order (or
+#' is mainly useful for long running functions, as for short functions, the
+#' time taken up by splitting and combining may be on the same order (or
#' longer) as the apply step. Additionally, for short functions, the time
#' needed to update the progress bar can significantly slow down the process.
#' For the trivial examples below, using the tk progress bar slows things down
#' by a factor of a thousand.
-#'
+#'
#' Note the that progress bar is approximate, and if the time taken by
#' individual function applications is highly non-uniform it may not be very
#' informative of the time left.
-#'
+#'
#' There are currently four types of progress bar: "none", "text", "tk", and
-#' "win". See the individual documentation for more details. In plyr
+#' "win". See the individual documentation for more details. In plyr
#' functions, these can either be specified by name, or you can create the
#' progress bar object yourself if you want more control over its apperance.
#' See the examples.
-#'
+#'
#' @param name type of progress bar to create
#' @param ... other arguments passed onto progress bar function
#' @seealso \code{\link{progress_none}}, \code{\link{progress_text}}, \code{\link{progress_tk}}, \code{\link{progress_win}}
@@ -37,7 +37,7 @@
create_progress_bar <- function(name = "none", ...) {
if (!is.character(name)) return(name)
name <- paste("progress", name, sep="_")
-
+
if (!exists(name, mode = "function")) {
warning("Cannot find progress bar ", name, call. = FALSE)
progress_none()
@@ -49,10 +49,10 @@ create_progress_bar <- function(name = "none", ...) {
#' Null progress bar
#'
#' A progress bar that does nothing
-#'
+#'
#' This the default progress bar used by plyr functions. It's very simple to
#' understand - it does nothing!
-#'
+#'
#' @keywords internal
#' @family progress bars
#' @export
@@ -69,9 +69,9 @@ progress_none <- function() {
#' Text progress bar.
#'
#' A textual progress bar
-#'
-#' This progress bar displays a textual progress bar that works on all
-#' platforms. It is a thin wrapper around the built-in
+#'
+#' This progress bar displays a textual progress bar that works on all
+#' platforms. It is a thin wrapper around the built-in
#' \code{\link{setTxtProgressBar}} and can be customised in the same way.
#'
#' @param style style of text bar, see Details section of \code{\link{txtProgressBar}}
@@ -84,7 +84,7 @@ progress_none <- function() {
progress_text <- function(style = 3, ...) {
n <- 0
txt <- NULL
-
+
list(
init = function(x) {
txt <<- txtProgressBar(max = x, style = style, ...)
@@ -101,9 +101,9 @@ progress_text <- function(style = 3, ...) {
#' Graphical progress bar, powered by Tk.
#'
#' A graphical progress bar displayed in a Tk window
-#'
+#'
#' This graphical progress will appear in a separate window.
-#'
+#'
#' @param title window title
#' @param label progress bar label (inside window)
#' @param ... other arguments passed on to \code{\link[tcltk]{tkProgressBar}}
@@ -118,7 +118,7 @@ progress_tk <- function(title = "plyr progress", label = "Working...", ...) {
stopifnot(require("tcltk", quiet=TRUE))
n <- 0
tk <- NULL
-
+
list(
init = function(x) {
tk <<- tkProgressBar(max = x, title = title, label = label, ...)
@@ -135,9 +135,9 @@ progress_tk <- function(title = "plyr progress", label = "Working...", ...) {
#' Graphical progress bar, powered by Windows.
#'
#' A graphical progress bar displayed in a separate window
-#'
+#'
#' This graphical progress only works on Windows.
-#'
+#'
#' @param title window title
#' @param ... other arguments passed on to \code{winProgressBar}
#' @seealso \code{winProgressBar} for the function that powers this progress bar
@@ -151,7 +151,7 @@ progress_tk <- function(title = "plyr progress", label = "Working...", ...) {
progress_win <- function(title = "plyr progress", ...) {
n <- 0
win <- NULL
-
+
list(
init = function(x) {
win <<- winProgressBar(max = x, title = title, ...)
View
44 R/quote.r
@@ -1,19 +1,19 @@
#' Quote variables to create a list of unevaluated expressions for later
#' evaluation.
-#'
+#'
#' This function is similar to \code{\link{~}} in that it is used to
#' capture the name of variables, not their current value. This is used
#' throughout plyr to specify the names of variables (or more complicated
#' expressions).
-#'
+#'
#' Similar tricks can be performed with \code{\link{substitute}}, but when
#' functions can be called in multiple ways it becomes increasingly tricky
#' to ensure that the values are extracted from the correct frame. Substitute
#' tricks also make it difficult to program against the functions that use
-#' them, while the \code{quoted} class provides
+#' them, while the \code{quoted} class provides
#' \code{as.quoted.character} to convert strings to the appropriate
#' data structure.
-#'
+#'
#' @param ... unevaluated expressions to be recorded. Specify names if you
#' want the set the names of the resultant variables
#' @param .env environment in which unbound symbols in \code{...} should be
@@ -29,7 +29,7 @@
#' as.quoted(~ a + b + c)
#' as.quoted(a ~ b + c)
#' as.quoted(c("a", "b", "c"))
-#'
+#'
#' # Some examples using ddply - look at the column names
#' ddply(mtcars, "cyl", each(nrow, ncol))
#' ddply(mtcars, ~ cyl, each(nrow, ncol))
@@ -47,7 +47,7 @@ is.quoted <- function(x) inherits(x, "quoted")
#' Print quoted variables.
#'
#' Display the \code{\link{str}}ucture of quoted variables