Permalink
Browse files

Implment join_all. Fixes #29

  • Loading branch information...
1 parent 4d7c436 commit 86256dba6239ed6281bfc494d7b3a85dda41fb3b @hadley committed Oct 8, 2012
Showing with 73 additions and 10 deletions.
  1. +1 −0 DESCRIPTION
  2. +1 −0 NAMESPACE
  3. +2 −0 NEWS
  4. +9 −7 R/join.r
  5. +23 −0 R/join_all.r
  6. +3 −3 man/join.Rd
  7. +34 −0 man/join_all.Rd
View
1 DESCRIPTION
@@ -76,3 +76,4 @@ Collate:
'helper-take.r'
'plyr.r'
'parallel.r'
+ 'join_all.r'
View
1 NAMESPACE
@@ -58,6 +58,7 @@ export(is.quoted)
export(isplit2)
export(join)
export(join.keys)
+export(join_all)
export(l_ply)
export(laply)
export(ldply)
View
2 NEWS
@@ -1,6 +1,8 @@
Version 1.7.1.99
------------------------------------------------------------------------------
+* new `join_all` function recursively joins a list of data frames. (Fixes #29)
+
* `*_ply` now accepts `.parallel` argument to enable parallel processing. (Fixes #60)
* Progress bars are disabled when using parallel plyr (Fixes #32)
View
16 R/join.r
@@ -24,7 +24,8 @@
#'
#' @param x data frame
#' @param y data frame
-#' @param by character vector of variable names to join by
+#' @param by character vector of variable names to join by. If omitted, will
+#' match on all common variables.
#' @param type type of join: left (default), right, inner or full. See
#' details for more information.
#' @param match how should duplicate ids be matched? Either match just the
@@ -41,20 +42,21 @@
#' b2 <- arrange(b2, id, year, stint)
#' b3 <- arrange(b3, id, year, stint)
#' stopifnot(all.equal(b2, b3))
-join <- function(x, y, by = intersect(names(x), names(y)), type = "left", match = "all") {
+join <- function(x, y, by = NULL, type = "left", match = "all") {
type <- match.arg(type, c("left", "right", "inner", "full"))
match <- match.arg(match, c("first", "all"))
- if (missing(by)) {
+ if (is.null(by)) {
+ by <- intersect(names(x), names(y))
message("Joining by: ", paste(by, collapse = ", "))
}
switch(match,
- "first" = join_first(x, y, by, type),
- "all" = join_all(x, y, by, type))
+ "first" = .join_first(x, y, by, type),
+ "all" = .join_all(x, y, by, type))
}
-join_first <- function(x, y, by, type) {
+.join_first <- function(x, y, by, type) {
keys <- join.keys(x, y, by = by)
x.cols <- setdiff(names(x), by)
@@ -100,7 +102,7 @@ join_first <- function(x, y, by, type) {
# and then evaluate which rows meet the merging criteria. But that is
# horrendously inefficient, so we do various types of hashing, implemented
# in R as split_indices
-join_all <- function(x, y, by, type) {
+.join_all <- function(x, y, by, type) {
x.cols <- setdiff(names(x), by)
y.cols <- setdiff(names(y), by)
View
23 R/join_all.r
@@ -0,0 +1,23 @@
+#' Recursively join a list of data frames.
+#'
+#' @param dfs A list of data frames.
+#' @inheritParams join
+#' @export
+#' @examples
+#' dfs <- list(
+#' a = data.frame(x = 1:10, a = runif(10)),
+#' b = data.frame(x = 1:10, b = runif(10)),
+#' c = data.frame(x = 1:10, c = runif(10))
+#' )
+#' join_all(dfs)
+#' join_all(dfs, "x")
+join_all <- function(dfs, by = NULL, type = "left", match = "all") {
+ if (length(dfs) == 1) return(dfs[[1]])
+
+ joined <- dfs[[1]]
+ for(i in 2:length(dfs)) {
+ joined <- join(joined, dfs[[i]], by = by, type = type, match = match)
+ }
+
+ joined
+}
View
6 man/join.Rd
@@ -2,15 +2,15 @@
\alias{join}
\title{Join two data frames together.}
\usage{
- join(x, y, by = intersect(names(x), names(y)),
- type = "left", match = "all")
+ join(x, y, by = NULL, type = "left", match = "all")
}
\arguments{
\item{x}{data frame}
\item{y}{data frame}
- \item{by}{character vector of variable names to join by}
+ \item{by}{character vector of variable names to join by.
+ If omitted, will match on all common variables.}
\item{type}{type of join: left (default), right, inner or
full. See details for more information.}
View
34 man/join_all.Rd
@@ -0,0 +1,34 @@
+\name{join_all}
+\alias{join_all}
+\title{Recursively join a list of data frames.}
+\usage{
+ join_all(dfs, by = NULL, type = "left", match = "all")
+}
+\arguments{
+ \item{dfs}{A list of data frames.}
+
+ \item{by}{character vector of variable names to join by.
+ If omitted, will match on all common variables.}
+
+ \item{type}{type of join: left (default), right, inner or
+ full. See details for more information.}
+
+ \item{match}{how should duplicate ids be matched? Either
+ match just the \code{"first"} matching row, or match
+ \code{"all"} matching rows. Defaults to \code{"all"} for
+ compatibility with merge, but \code{"first"} is
+ significantly faster.}
+}
+\description{
+ Recursively join a list of data frames.
+}
+\examples{
+dfs <- list(
+ a = data.frame(x = 1:10, a = runif(10)),
+ b = data.frame(x = 1:10, b = runif(10)),
+ c = data.frame(x = 1:10, c = runif(10))
+)
+join_all(dfs)
+join_all(dfs, "x")
+}
+

0 comments on commit 86256db

Please sign in to comment.