From 769780697d81f91e911b5af516c24b8b4291f27d Mon Sep 17 00:00:00 2001
From: actuaryzhang <actuaryzhang10@gmail.com>
Date: Fri, 3 Mar 2017 16:53:22 -1000
Subject: [PATCH 1/8] union checks for name consistency

---
 R/pkg/R/DataFrame.R                       | 3 +++
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index cc4cfa3423ced..df8ded01bfaed 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2666,6 +2666,9 @@ generateAliasesForIntersectedCols <- function (x, intersectedColNames, suffix) {
 setMethod("union",
           signature(x = "SparkDataFrame", y = "SparkDataFrame"),
           function(x, y) {
+            if (!all.equal(names(x), names(y))){
+              stop("Names of input data frames are different.")
+            }
             unioned <- callJMethod(x@sdf, "union", y@sdf)
             dataFrame(unioned)
           })
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index ce0f5a198a259..98b88ea27ea1f 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1822,6 +1822,9 @@ test_that("union(), rbind(), except(), and intersect() on a DataFrame", {
   expect_equal(count(excepted), 2)
   expect_equal(first(excepted)$name, "Justin")
 
+  expected_error(union(df, df2[, c(2, 1)]),
+                 "Names of input data frames are different.")
+
   intersected <- arrange(intersect(df, df2), df$age)
   expect_is(unioned, "SparkDataFrame")
   expect_equal(count(intersected), 1)

From 293dc35fd203c0926aeb1e0b483372eb525aeec3 Mon Sep 17 00:00:00 2001
From: actuaryzhang <actuaryzhang10@gmail.com>
Date: Fri, 3 Mar 2017 22:08:35 -1000
Subject: [PATCH 2/8] fix test issue

---
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 98b88ea27ea1f..40d49b076e4ad 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1822,7 +1822,7 @@ test_that("union(), rbind(), except(), and intersect() on a DataFrame", {
   expect_equal(count(excepted), 2)
   expect_equal(first(excepted)$name, "Justin")
 
-  expected_error(union(df, df2[, c(2, 1)]),
+  expect_error(union(df, df2[, c(2, 1)]),
                  "Names of input data frames are different.")
 
   intersected <- arrange(intersect(df, df2), df$age)

From ef8450157fb6c6535f1608899bc3898974ba8454 Mon Sep 17 00:00:00 2001
From: actuaryzhang <actuaryzhang10@gmail.com>
Date: Fri, 3 Mar 2017 23:12:59 -1000
Subject: [PATCH 3/8] fix equal test

---
 R/pkg/R/DataFrame.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index df8ded01bfaed..230cbd516505a 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2666,7 +2666,7 @@ generateAliasesForIntersectedCols <- function (x, intersectedColNames, suffix) {
 setMethod("union",
           signature(x = "SparkDataFrame", y = "SparkDataFrame"),
           function(x, y) {
-            if (!all.equal(names(x), names(y))){
+            if (!isTRUE(all.equal(names(x), names(y)))) {
               stop("Names of input data frames are different.")
             }
             unioned <- callJMethod(x@sdf, "union", y@sdf)

From 7ea0c4a3929630e1f3508931f300b433725cfe05 Mon Sep 17 00:00:00 2001
From: actuaryzhang <actuaryzhang10@gmail.com>
Date: Sun, 5 Mar 2017 11:16:10 -0800
Subject: [PATCH 4/8] check names in rbind rather than union

---
 R/pkg/R/DataFrame.R                       |  7 ++++---
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 10 +++++++---
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 230cbd516505a..475c089b93dc5 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2666,9 +2666,6 @@ generateAliasesForIntersectedCols <- function (x, intersectedColNames, suffix) {
 setMethod("union",
           signature(x = "SparkDataFrame", y = "SparkDataFrame"),
           function(x, y) {
-            if (!isTRUE(all.equal(names(x), names(y)))) {
-              stop("Names of input data frames are different.")
-            }
             unioned <- callJMethod(x@sdf, "union", y@sdf)
             dataFrame(unioned)
           })
@@ -2712,6 +2709,10 @@ setMethod("unionAll",
 setMethod("rbind",
           signature(... = "SparkDataFrame"),
           function(x, ..., deparse.level = 1) {
+            nm <- lapply(list(x, ...), names)
+            if (!isTRUE(Reduce(all.equal, nm))) {
+              stop("Names of input data frames are different.")
+            }            
             if (nargs() == 3) {
               union(x, ...)
             } else {
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 40d49b076e4ad..784f51d48b09a 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1817,14 +1817,18 @@ test_that("union(), rbind(), except(), and intersect() on a DataFrame", {
   expect_equal(count(unioned2), 12)
   expect_equal(first(unioned2)$name, "Michael")
 
+  df3 <- df2
+  names(df3)[1] <- "newName"
+  expect_error(union(df, df3),
+               "Names of input data frames are different.")
+  expect_error(union(df, df2, df3),
+               "Names of input data frames are different.")
+
   excepted <- arrange(except(df, df2), desc(df$age))
   expect_is(unioned, "SparkDataFrame")
   expect_equal(count(excepted), 2)
   expect_equal(first(excepted)$name, "Justin")
 
-  expect_error(union(df, df2[, c(2, 1)]),
-                 "Names of input data frames are different.")
-
   intersected <- arrange(intersect(df, df2), df$age)
   expect_is(unioned, "SparkDataFrame")
   expect_equal(count(intersected), 1)

From b8b96d61d48417037000372f021ed012928ee2dd Mon Sep 17 00:00:00 2001
From: actuaryzhang <actuaryzhang10@gmail.com>
Date: Sun, 5 Mar 2017 14:34:28 -0800
Subject: [PATCH 5/8] update doc and test

---
 R/pkg/R/DataFrame.R                       | 6 ++++--
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 4 ++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 475c089b93dc5..24eed5db00173 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2642,6 +2642,7 @@ generateAliasesForIntersectedCols <- function (x, intersectedColNames, suffix) {
 #'
 #' Return a new SparkDataFrame containing the union of rows in this SparkDataFrame
 #' and another SparkDataFrame. This is equivalent to \code{UNION ALL} in SQL.
+#' Input SparkDataFrames can have different schemas (names and data types).
 #'
 #' Note: This does not remove duplicate rows across the two SparkDataFrames.
 #'
@@ -2685,7 +2686,8 @@ setMethod("unionAll",
 
 #' Union two or more SparkDataFrames
 #'
-#' Union two or more SparkDataFrames. This is equivalent to \code{UNION ALL} in SQL.
+#' Union two or more SparkDataFrames by row. In constrast with \link{union}, this method
+#' requires that the SparkDataFrames to be unioned have the same column names.
 #'
 #' Note: This does not remove duplicate rows across the two SparkDataFrames.
 #'
@@ -2712,7 +2714,7 @@ setMethod("rbind",
             nm <- lapply(list(x, ...), names)
             if (!isTRUE(Reduce(all.equal, nm))) {
               stop("Names of input data frames are different.")
-            }            
+            }
             if (nargs() == 3) {
               union(x, ...)
             } else {
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 784f51d48b09a..3e4210442c114 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1819,9 +1819,9 @@ test_that("union(), rbind(), except(), and intersect() on a DataFrame", {
 
   df3 <- df2
   names(df3)[1] <- "newName"
-  expect_error(union(df, df3),
+  expect_error(rbind(df, df3),
                "Names of input data frames are different.")
-  expect_error(union(df, df2, df3),
+  expect_error(rbind(df, df2, df3),
                "Names of input data frames are different.")
 
   excepted <- arrange(except(df, df2), desc(df$age))

From decc4683c536e328cd040c2bd3d80ad77fed588a Mon Sep 17 00:00:00 2001
From: actuaryzhang <actuaryzhang10@gmail.com>
Date: Sun, 5 Mar 2017 14:36:02 -0800
Subject: [PATCH 6/8] update doc

---
 R/pkg/R/DataFrame.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 24eed5db00173..243302d9e405c 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2686,8 +2686,8 @@ setMethod("unionAll",
 
 #' Union two or more SparkDataFrames
 #'
-#' Union two or more SparkDataFrames by row. In constrast with \link{union}, this method
-#' requires that the SparkDataFrames to be unioned have the same column names.
+#' Union two or more SparkDataFrames by row. In constrast to \link{union}, this method
+#' requires that the input SparkDataFrames have the same column names.
 #'
 #' Note: This does not remove duplicate rows across the two SparkDataFrames.
 #'

From cc80de34f3919c366dfb51d4e7e89e1161ea1331 Mon Sep 17 00:00:00 2001
From: actuaryzhang <actuaryzhang10@gmail.com>
Date: Sun, 5 Mar 2017 15:46:29 -0800
Subject: [PATCH 7/8] fix test issue

---
 R/pkg/R/DataFrame.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 243302d9e405c..e2489019669b8 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2712,7 +2712,7 @@ setMethod("rbind",
           signature(... = "SparkDataFrame"),
           function(x, ..., deparse.level = 1) {
             nm <- lapply(list(x, ...), names)
-            if (!isTRUE(Reduce(all.equal, nm))) {
+            if (length(unique(nm)) != 1) {
               stop("Names of input data frames are different.")
             }
             if (nargs() == 3) {

From 54427d505b7771cb558fcd3d764ce559ba764c7a Mon Sep 17 00:00:00 2001
From: actuaryzhang <actuaryzhang10@gmail.com>
Date: Sun, 5 Mar 2017 18:33:26 -0800
Subject: [PATCH 8/8] update doc

---
 R/pkg/R/DataFrame.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index e2489019669b8..7198af89e26dc 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2686,7 +2686,7 @@ setMethod("unionAll",
 
 #' Union two or more SparkDataFrames
 #'
-#' Union two or more SparkDataFrames by row. In constrast to \link{union}, this method
+#' Union two or more SparkDataFrames by row. As in R's \code{rbind}, this method
 #' requires that the input SparkDataFrames have the same column names.
 #'
 #' Note: This does not remove duplicate rows across the two SparkDataFrames.