Permalink
Browse files

Rewrite join.keys to correct work with 0 row inputs.

Fixes #48
  • Loading branch information...
1 parent f7efa35 commit c6cc206af3511d41bf14020416dc8bcaca0120ab @hadley committed Oct 7, 2012
Showing with 37 additions and 6 deletions.
  1. +1 −1 NEWS
  2. +1 −1 R/id.r
  3. +9 −3 R/join.r
  4. +26 −1 inst/tests/test-join.r
View
@@ -3,7 +3,7 @@ Version 1.7.1.99
* `rbind.fill` now works consistently with 0 column data frames
-* `join` works correctly when no common rows (Fixes #74)
+* `join` works correctly when there are no common rows (Fixes #74), or when one input has no rows (Fixes #48)
* `split_indices`, which powers much internal splitting code (like `vaggregate`, `join` and `d*ply`) is about 2x faster. It was already incredible fast ~0.2s for 1,000,000 obs, so this won't have much impact on overall performance
View
@@ -17,7 +17,7 @@
#' @export
id <- function(.variables, drop = FALSE) {
if (length(.variables) == 0) {
- n <- nrow(.variables) %||% 1L
+ n <- nrow(.variables) %||% 0L
return(structure(seq_len(n), n = n))
}
View
@@ -28,7 +28,9 @@
#' @param type type of join: left (default), right, inner or full. See
#' details for more information.
#' @param match how should duplicate ids be matched? Either match just the
-#' \code{"first"} matching row, or match \code{"all"} matching rows.
+#' \code{"first"} matching row, or match \code{"all"} matching rows. Defaults
+#' to \code{"all"} for compatibility with merge, but \code{"first"} is
+#' significantly faster.
#' @keywords manip
#' @export
#' @examples
@@ -64,6 +66,7 @@ join_first <- function(x, y, by, type) {
} else if (type == "left") {
y.match <- match(keys$x, keys$y)
y.matched <- unrowname(y[y.match, new.cols, drop = FALSE])
+
cbind(x, y.matched)
} else if (type == "right") {
@@ -150,9 +153,12 @@ join.keys <- function(x, y, by) {
joint <- rbind.fill(x[by], y[by])
keys <- id(joint, drop = TRUE)
+ n_x <- nrow(x)
+ n_y <- nrow(y)
+
list(
- x = keys[1:nrow(x)],
- y = keys[-(1:nrow(x))],
+ x = keys[seq_len(n_x)],
+ y = keys[n_x + seq_len(n_y)],
n = attr(keys, "n")
)
}
@@ -95,7 +95,7 @@ test_that("many potential combinations works", {
})
-test_that("joins with no common rows", {
+test_that("joins with no common rows work", {
a <- data.frame(a = 1:10)
b <- data.frame(b = 1:10)
@@ -117,3 +117,28 @@ test_that("joins with no common rows", {
expect_equal(nrow(right1), 10)
expect_equal(nrow(right2), 10)
})
+
+test_that("joins with zero row dataframe work", {
+ a <- data.frame(a = integer())
+ b <- data.frame(a = 1:10, b = letters[1:10])
+
+ full1 <- join(a, b, type = "full")
+ full2 <- join(a, b, type = "full", match = "first")
+ inner1 <- join(a, b, type = "inner")
+ inner2 <- join(a, b, type = "inner", match = "first")
+ left1 <- join(a, b, type = "left")
+ left2 <- join(a, b, type = "left", match = "first")
+ right1 <- join(a, b, type = "right")
+ right2 <- join(a, b, type = "right", match = "first")
+
+ expect_equal(nrow(full1), 10)
+ expect_equal(nrow(full2), 10)
+ expect_equal(nrow(inner1), 0)
+ expect_equal(nrow(inner2), 0)
+ expect_equal(nrow(left1), 0)
+ expect_equal(nrow(left2), 0)
+ expect_equal(nrow(right1), 10)
+ expect_equal(nrow(right2), 10)
+
+
+})

0 comments on commit c6cc206

Please sign in to comment.