From 944a3ec791a8f103093e24511e895a4ce60970d8 Mon Sep 17 00:00:00 2001 From: zero323 Date: Mon, 1 May 2017 10:59:24 +0200 Subject: [PATCH 01/19] Initial implementation --- R/pkg/R/DataFrame.R | 26 +++++++++++++++++++++++ R/pkg/inst/tests/testthat/test_sparkSQL.R | 9 ++++++++ 2 files changed, 35 insertions(+) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 1c8869202f677..125dcde3a895e 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -3744,4 +3744,30 @@ setMethod("hint", stopifnot(all(sapply(parameters, is.character))) jdf <- callJMethod(x@sdf, "hint", name, parameters) dataFrame(jdf) + }) + +#' alias +#' +#' Returns a new SparkDataFrame with an alias set. +#' +#' @param object a SparkDataFrame +#' @param data new name to use +#' @return SparkDataFrame +#' @aliases alias,SparkDataFrame-method +#' @rdname alias +#' @name alias +#' @examples \dontrun{ +#' df <- alias(createDataFrame(mtcars), "mtcars") +#' avg_mpg <- alias(agg(groupBy(df, df$cyl), avg(df$mpg)), "avg_mpg") +#' +#' head(select(df, column("mtcars.mpg"))) +#' head(join(df, avg_mpg, column("mtcars.cyl") == column("avg_mpg.cyl"))) +#' } +#' @note alias since 2.3.0 +setMethod("alias", + signature(object = "SparkDataFrame"), + function(object, data) { + stopifnot(is.character(data)) + sdf <- callJMethod(object@sdf, "alias", data) + dataFrame(sdf) }) diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 82007a5348496..1f90419f8bd46 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -2352,6 +2352,15 @@ test_that("mutate(), transform(), rename() and names()", { detach(airquality) }) +test("alias on SparkDataFrame", { + df <- alias(read.df(jsonPath, "json"), "table") + + actual <- sort(collect(select(df, column("table.name")))$name) + expected <- c("Andy", "Justin", "Michael") + + expect_equal(actual, expected) +}) + test_that("read/write ORC files", { skip_on_cran() From 5e9f8da45c432e0752e5e78556add33e0a6d0557 Mon Sep 17 00:00:00 2001 From: zero323 Date: Tue, 2 May 2017 00:27:11 +0200 Subject: [PATCH 02/19] Adjust argument annotations - Remove param annotations from dataframe.alias - Use generic annotations for column.alias --- R/pkg/R/DataFrame.R | 4 ---- R/pkg/R/column.R | 4 ++-- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 125dcde3a895e..2f3b035fc32ae 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -3748,10 +3748,6 @@ setMethod("hint", #' alias #' -#' Returns a new SparkDataFrame with an alias set. -#' -#' @param object a SparkDataFrame -#' @param data new name to use #' @return SparkDataFrame #' @aliases alias,SparkDataFrame-method #' @rdname alias diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index 147ee4b6887b9..455d0dbf68865 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -132,9 +132,9 @@ createMethods() #' alias #' -#' Set a new name for a column +#' Set a new name for an object #' -#' @param object Column to rename +#' @param object object to rename #' @param data new name to use #' #' @rdname alias From 73133f9442ad8317fb12b600221962bf47d8a95c Mon Sep 17 00:00:00 2001 From: zero323 Date: Tue, 2 May 2017 00:31:26 +0200 Subject: [PATCH 03/19] Add usage examples to column.alias --- R/pkg/R/column.R | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index 455d0dbf68865..8f1e934b87248 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -142,6 +142,13 @@ createMethods() #' @aliases alias,Column-method #' @family colum_func #' @export +#' @examples \dontrun{ +#' df <- createDataFrame(iris) +#' +#' head(select( +#' df, alias(df$Sepal_Length, "slength"), alias(df$Petal_Length, "plength") +#' )) +#' } #' @note alias since 1.4.0 setMethod("alias", signature(object = "Column"), From 848eeefc1f18c6aabaf65e6efed259a2fa5c19c3 Mon Sep 17 00:00:00 2001 From: zero323 Date: Tue, 2 May 2017 00:34:51 +0200 Subject: [PATCH 04/19] Remove return type annotation --- R/pkg/R/DataFrame.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 2f3b035fc32ae..5ea89ebd1e98e 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -3748,7 +3748,6 @@ setMethod("hint", #' alias #' -#' @return SparkDataFrame #' @aliases alias,SparkDataFrame-method #' @rdname alias #' @name alias From 05c0781110b42a940e06cc31650449a8715e85c9 Mon Sep 17 00:00:00 2001 From: zero323 Date: Tue, 2 May 2017 04:00:13 +0200 Subject: [PATCH 05/19] Fix typo --- R/pkg/inst/tests/testthat/test_sparkSQL.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 1f90419f8bd46..2c9a3502b3006 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -2352,7 +2352,7 @@ test_that("mutate(), transform(), rename() and names()", { detach(airquality) }) -test("alias on SparkDataFrame", { +test_that("alias on SparkDataFrame", { df <- alias(read.df(jsonPath, "json"), "table") actual <- sort(collect(select(df, column("table.name")))$name) From 22d7cf661bb54a8f7f9c660e1d914802f1eb4153 Mon Sep 17 00:00:00 2001 From: zero323 Date: Tue, 2 May 2017 06:25:34 +0200 Subject: [PATCH 06/19] Move dontruns to their own lines --- R/pkg/R/DataFrame.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 5ea89ebd1e98e..f4860ad3e80ee 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -3751,7 +3751,8 @@ setMethod("hint", #' @aliases alias,SparkDataFrame-method #' @rdname alias #' @name alias -#' @examples \dontrun{ +#' @examples +#' \dontrun{ #' df <- alias(createDataFrame(mtcars), "mtcars") #' avg_mpg <- alias(agg(groupBy(df, df$cyl), avg(df$mpg)), "avg_mpg") #' From 22e1292557f1a5597cde6337267a099bbcdc07aa Mon Sep 17 00:00:00 2001 From: zero323 Date: Tue, 2 May 2017 06:27:11 +0200 Subject: [PATCH 07/19] Extend param description --- R/pkg/R/column.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index 8f1e934b87248..f8f3c8b011fe2 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -134,7 +134,7 @@ createMethods() #' #' Set a new name for an object #' -#' @param object object to rename +#' @param object x a Column or a SparkDataFrame #' @param data new name to use #' #' @rdname alias From 6bb3d914960d1cf63e582a7d732ca80ed321e9c5 Mon Sep 17 00:00:00 2001 From: zero323 Date: Tue, 2 May 2017 06:33:34 +0200 Subject: [PATCH 08/19] Add type annotations to since notes --- R/pkg/R/DataFrame.R | 2 +- R/pkg/R/column.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index f4860ad3e80ee..61511b90bb1c7 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -3759,7 +3759,7 @@ setMethod("hint", #' head(select(df, column("mtcars.mpg"))) #' head(join(df, avg_mpg, column("mtcars.cyl") == column("avg_mpg.cyl"))) #' } -#' @note alias since 2.3.0 +#' @note alias(SparkDataFrame) since 2.3.0 setMethod("alias", signature(object = "SparkDataFrame"), function(object, data) { diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index f8f3c8b011fe2..e3e7d6eea2ce3 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -149,7 +149,7 @@ createMethods() #' df, alias(df$Sepal_Length, "slength"), alias(df$Petal_Length, "plength") #' )) #' } -#' @note alias since 1.4.0 +#' @note alias(Column) since 1.4.0 setMethod("alias", signature(object = "Column"), function(object, data) { From b3c1a416a16a9d32649edda2b66fc9c3476358a5 Mon Sep 17 00:00:00 2001 From: zero323 Date: Tue, 2 May 2017 06:38:51 +0200 Subject: [PATCH 09/19] Attach alias test to select-with-column test case --- R/pkg/inst/tests/testthat/test_sparkSQL.R | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 2c9a3502b3006..92cc9c104cb86 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -1223,6 +1223,12 @@ test_that("select with column", { expect_equal(columns(df4), c("name", "age")) expect_equal(count(df4), 3) + # Test select with alias + df5 <- alias(df, "table") + + expect_equal(columns(select(df5, column("table.name"))), "name") + expect_equal(columns(select(df5, "table.name")), "name") + expect_error(select(df, c("name", "age"), "name"), "To select multiple columns, use a character vector or list for col") }) @@ -2352,15 +2358,6 @@ test_that("mutate(), transform(), rename() and names()", { detach(airquality) }) -test_that("alias on SparkDataFrame", { - df <- alias(read.df(jsonPath, "json"), "table") - - actual <- sort(collect(select(df, column("table.name")))$name) - expected <- c("Andy", "Justin", "Michael") - - expect_equal(actual, expected) -}) - test_that("read/write ORC files", { skip_on_cran() From 40fedcb8c41bc84deead205aad81e84c095045b5 Mon Sep 17 00:00:00 2001 From: zero323 Date: Tue, 2 May 2017 06:44:45 +0200 Subject: [PATCH 10/19] Extend description --- R/pkg/R/column.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index e3e7d6eea2ce3..24a96a531e2fd 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -132,7 +132,7 @@ createMethods() #' alias #' -#' Set a new name for an object +#' Set a new name for an object. Equivalent to SQL "AS" keyword. #' #' @param object x a Column or a SparkDataFrame #' @param data new name to use From 1e1ad443751fc3dc93487e5385cc934feb93f631 Mon Sep 17 00:00:00 2001 From: zero323 Date: Wed, 3 May 2017 02:25:15 +0200 Subject: [PATCH 11/19] Move alias documentation to generics --- R/pkg/R/column.R | 7 ------- R/pkg/R/generics.R | 10 ++++++++++ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index 24a96a531e2fd..574078012adad 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -130,13 +130,6 @@ createMethods <- function() { createMethods() -#' alias -#' -#' Set a new name for an object. Equivalent to SQL "AS" keyword. -#' -#' @param object x a Column or a SparkDataFrame -#' @param data new name to use -#' #' @rdname alias #' @name alias #' @aliases alias,Column-method diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 56ef1bee93536..6a41cd2aa8fe4 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -387,6 +387,16 @@ setGeneric("value", function(bcast) { standardGeneric("value") }) #' @export setGeneric("agg", function (x, ...) { standardGeneric("agg") }) +#' alias +#' +#' Set a new name for a Column or a SparkDataFrame. Equivalent to SQL "AS" keyword. +#' +#' @name alias +#' @rdname alias +#' @param object x a Column or a SparkDataFrame +#' @param data new name to use +NULL + #' @rdname arrange #' @export setGeneric("arrange", function(x, col, ...) { standardGeneric("arrange") }) From 2d5ace288f2443327696823c343c095f0d8d64ca Mon Sep 17 00:00:00 2001 From: zero323 Date: Thu, 4 May 2017 03:13:45 +0200 Subject: [PATCH 12/19] Add family annotation --- R/pkg/R/DataFrame.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 61511b90bb1c7..006b9b470b03d 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -3749,6 +3749,7 @@ setMethod("hint", #' alias #' #' @aliases alias,SparkDataFrame-method +#' @family SparkDataFrame functions #' @rdname alias #' @name alias #' @examples From 5fe5495580eb3852ea5092a34dc2334c0e45c9b7 Mon Sep 17 00:00:00 2001 From: zero323 Date: Thu, 4 May 2017 08:32:54 +0200 Subject: [PATCH 13/19] Check that stats::alias is not masked --- R/pkg/inst/tests/testthat/test_sparkSQL.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 92cc9c104cb86..af1e19198d734 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -1229,6 +1229,10 @@ test_that("select with column", { expect_equal(columns(select(df5, column("table.name"))), "name") expect_equal(columns(select(df5, "table.name")), "name") + # Test that stats::alias is not masked + expect_is(alias(aov(yield ~ block + N*P*K, npk)), "listof") + + expect_error(select(df, c("name", "age"), "name"), "To select multiple columns, use a character vector or list for col") }) From 09f9ccaf5e66a400d26b4ab6d600d951305d5fd3 Mon Sep 17 00:00:00 2001 From: zero323 Date: Thu, 4 May 2017 09:04:52 +0200 Subject: [PATCH 14/19] Fix style --- R/pkg/inst/tests/testthat/test_sparkSQL.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index af1e19198d734..977825fcfd8ce 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -1230,7 +1230,7 @@ test_that("select with column", { expect_equal(columns(select(df5, "table.name")), "name") # Test that stats::alias is not masked - expect_is(alias(aov(yield ~ block + N*P*K, npk)), "listof") + expect_is(alias(aov(yield ~ block + N * P * K, npk)), "listof") expect_error(select(df, c("name", "age"), "name"), From f1c74f338b8df865a5e8b9a6e281211aa27af7d3 Mon Sep 17 00:00:00 2001 From: zero323 Date: Thu, 4 May 2017 12:17:42 +0200 Subject: [PATCH 15/19] vim --- R/pkg/R/DataFrame.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 006b9b470b03d..0445e4a23b229 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -3744,7 +3744,7 @@ setMethod("hint", stopifnot(all(sapply(parameters, is.character))) jdf <- callJMethod(x@sdf, "hint", name, parameters) dataFrame(jdf) - }) + }) #' alias #' From 43c02bcb644a8057249d110ba057c41e68e64a31 Mon Sep 17 00:00:00 2001 From: zero323 Date: Fri, 5 May 2017 20:29:31 +0200 Subject: [PATCH 16/19] Emphasize that alias returns new DataFrame --- R/pkg/R/generics.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 6a41cd2aa8fe4..6064748dc2bb7 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -389,7 +389,7 @@ setGeneric("agg", function (x, ...) { standardGeneric("agg") }) #' alias #' -#' Set a new name for a Column or a SparkDataFrame. Equivalent to SQL "AS" keyword. +#' Returns a new SparkDataFrame or Column with an alias set. Equivalent to SQL "AS" keyword. #' #' @name alias #' @rdname alias From 505561ab9bac02fa70568e2dbfb0b7f2a79cd6ce Mon Sep 17 00:00:00 2001 From: zero323 Date: Fri, 5 May 2017 20:32:30 +0200 Subject: [PATCH 17/19] Add return to generic alias --- R/pkg/R/generics.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 6064748dc2bb7..5175a6414c0a3 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -395,6 +395,7 @@ setGeneric("agg", function (x, ...) { standardGeneric("agg") }) #' @rdname alias #' @param object x a Column or a SparkDataFrame #' @param data new name to use +#' @return a Column or a SparkDataFrame NULL #' @rdname arrange From 1f1e72bb1d0aa268e179762eebcbd56e15640c1f Mon Sep 17 00:00:00 2001 From: zero323 Date: Sat, 6 May 2017 07:39:50 +0200 Subject: [PATCH 18/19] Add export --- R/pkg/R/DataFrame.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 0445e4a23b229..b56dddcb9f2ef 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -3752,6 +3752,7 @@ setMethod("hint", #' @family SparkDataFrame functions #' @rdname alias #' @name alias +#' @export #' @examples #' \dontrun{ #' df <- alias(createDataFrame(mtcars), "mtcars") From 2b8f288e64ff21d5da22f6c5e9bc863c0a464854 Mon Sep 17 00:00:00 2001 From: zero323 Date: Sun, 7 May 2017 23:43:54 +0200 Subject: [PATCH 19/19] Reorder annotations --- R/pkg/R/generics.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 5175a6414c0a3..3877f1906c270 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -389,13 +389,13 @@ setGeneric("agg", function (x, ...) { standardGeneric("agg") }) #' alias #' -#' Returns a new SparkDataFrame or Column with an alias set. Equivalent to SQL "AS" keyword. +#' Returns a new SparkDataFrame or a Column with an alias set. Equivalent to SQL "AS" keyword. #' #' @name alias #' @rdname alias -#' @param object x a Column or a SparkDataFrame +#' @param object x a SparkDataFrame or a Column #' @param data new name to use -#' @return a Column or a SparkDataFrame +#' @return a SparkDataFrame or a Column NULL #' @rdname arrange