From dcc359ff69cd45fcb4b021a348f0b75eb3e0d8d9 Mon Sep 17 00:00:00 2001 From: zero323 Date: Sat, 22 Apr 2017 01:49:58 +0200 Subject: [PATCH 01/15] Initial implementation --- R/pkg/NAMESPACE | 2 + R/pkg/R/DataFrame.R | 53 +++++++++++++ R/pkg/R/generics.R | 9 +++ R/pkg/inst/tests/testthat/test_sparkSQL.R | 90 +++++++++++++++++++++++ 4 files changed, 154 insertions(+) diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index e804e30e14b86..ad90d7856f6b1 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -101,6 +101,7 @@ exportMethods("arrange", "createOrReplaceTempView", "crossJoin", "crosstab", + "cube", "dapply", "dapplyCollect", "describe", @@ -143,6 +144,7 @@ exportMethods("arrange", "registerTempTable", "rename", "repartition", + "rollup", "sample", "sample_frac", "sampleBy", diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 88a138fd8eb1f..1eb081081dabe 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -3642,3 +3642,56 @@ setMethod("checkpoint", df <- callJMethod(x@sdf, "checkpoint", as.logical(eager)) dataFrame(df) }) + + +#' cube +#' +#' Create a multi-dimensional cube for the SparkDataFrame using the specified columns. +#' +#' @param x a SparkDataFrame. +#' @param ... variable(s) (character names(s) or Column(s)) to group on. +#' @return A GroupedData. +#' @family SparkDataFrame functions +#' @aliases cube,SparkDataFrame-method +#' @rdname cube +#' @name cube +#' @export +#' @examples +#' \dontrun{ +#' +#' } +#' @note cube since 2.3.0 +setMethod("cube", + signature(x = "SparkDataFrame"), + function(x, ...) { + cols <- list(...) + jcol <- lapply(cols, function(x) if (is.character(x)) column(x)@jc else x@jc) + sgd <- callJMethod(x@sdf, "cube", jcol) + groupedData(sgd) + }) + +#' rollup +#' +#' Create a multi-dimensional rollup for the SparkDataFrame using the specified columns. +#' +#' @param x a SparkDataFrame. +#' @param ... variable(s) (character names(s) or Column(s)) to group on. +#' @return A GroupedData. +#' @family SparkDataFrame functions +#' @aliases rollup,SparkDataFrame-method +#' @rdname rollup +#' @name rollup +#' @export +#' @examples +#' \dontrun{ +#' +#' } +#' @note rollup since 2.3.0 +setMethod("rollup", + signature(x = "SparkDataFrame"), + function(x, ...) { + cols <- list(...) + jcol <- lapply(cols, function(x) if (is.character(x)) column(x)@jc else x@jc) + sgd <- callJMethod(x@sdf, "rollup", jcol) + groupedData(sgd) + }) \ No newline at end of file diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 61d248ebd2e3e..005c43ff52f8c 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -483,6 +483,10 @@ setGeneric("createOrReplaceTempView", # @export setGeneric("crossJoin", function(x, y) { standardGeneric("crossJoin") }) +#' @rdname cube +#' @export +setGeneric("cube", function(x, ...) { standardGeneric("cube") }) + #' @rdname dapply #' @export setGeneric("dapply", function(x, func, schema) { standardGeneric("dapply") }) @@ -631,6 +635,11 @@ setGeneric("sample", standardGeneric("sample") }) +#' @rdname rollup +#' @export +setGeneric("rollup", + function(x, ...) { standardGeneric("rollup") }) + #' @rdname sample #' @export setGeneric("sample_frac", diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index bf2093fdc475a..c41aefc48bb28 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -1782,6 +1782,96 @@ test_that("pivot GroupedData column", { expect_error(collect(sum(pivot(groupBy(df, "year"), "course", list("R", "R")), "earnings"))) }) +test_that("test multi-dimensional aggregations with cube and rollup", { + df <- createDataFrame(data.frame( + id = 1:6, + year = c(2016, 2016, 2016, 2017, 2017, 2017), + salary = c(10000, 15000, 20000, 22000, 32000, 21000), + department = c("management", "rnd", "sales", "management", "rnd", "sales") + )) + + actual_cube <- collect( + orderBy( + agg( + cube(df, "year", "department"), + expr("sum(salary) AS total_salary"), expr("avg(salary) AS average_salary") + ), + "year", "department" + ) + ) + + expected_cube <- data.frame( + year = c(rep(NA, 4), rep(2016, 4), rep(2017, 4)), + department = rep(c(NA, "management", "rnd", "sales"), times=3), + total_salary = c( + 120000, # Total + 10000 + 22000, 15000 + 32000, 20000 + 21000, # Department only + 20000 + 15000 + 10000, # 2016 + 10000, 15000, 20000, # 2016 each department + 21000 + 32000 + 22000, # 2017 + 22000, 32000, 21000 # 2017 each department + ), + average_salary = c( + # Total + mean(c(20000, 15000, 10000, 21000, 32000, 22000)), + # Mean by department + mean(c(10000, 22000)), mean(c(15000, 32000)), mean(c(20000, 21000)), + mean(c(10000, 15000, 20000)), # 2016 + 10000, 15000, 20000, # 2016 each department + mean(c(21000, 32000, 22000)), # 2017 + 22000, 32000, 21000 # 2017 each department + ), + stringsAsFactors = FALSE + ) + + expect_equal(actual_cube, expected_cube) + + # cube should accept column objects + expect_equal( + count(sum(cube(df, df$year, df$department), "salary")), + 12 + ) + + actual_rollup <- collect( + orderBy( + agg( + rollup(df, "year", "department"), + expr("sum(salary) AS total_salary"), expr("avg(salary) AS average_salary") + ), + "year", "department" + ) + ) + + expected_rollup <- data.frame( + year = c(NA, rep(2016, 4), rep(2017, 4)), + department = c(NA, rep(c(NA, "management", "rnd", "sales"), times=2)), + total_salary = c( + 120000, # Total + 20000 + 15000 + 10000, # 2016 + 10000, 15000, 20000, # 2016 each department + 21000 + 32000 + 22000, # 2017 + 22000, 32000, 21000 # 2017 each department + ), + average_salary = c( + # Total + mean(c(20000, 15000, 10000, 21000, 32000, 22000)), + mean(c(10000, 15000, 20000)), # 2016 + 10000, 15000, 20000, # 2016 each department + mean(c(21000, 32000, 22000)), # 2017 + 22000, 32000, 21000 # 2017 each department + ), + stringsAsFactors = FALSE + ) + + expect_equal(actual_rollup, expected_rollup) + + # cube should accept column objects + expect_equal( + count(sum(rollup(df, df$year, df$department), "salary")), + 9 + ) +}) + test_that("arrange() and orderBy() on a DataFrame", { df <- read.json(jsonPath) sorted <- arrange(df, df$age) From bc0401b660179d3a2a7ba763a9fdcd39caaeba1a Mon Sep 17 00:00:00 2001 From: zero323 Date: Sat, 22 Apr 2017 16:27:58 +0200 Subject: [PATCH 02/15] Add description to the vignette --- R/pkg/vignettes/sparkr-vignettes.Rmd | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd index f81dbab10b1e1..71f6acac09aeb 100644 --- a/R/pkg/vignettes/sparkr-vignettes.Rmd +++ b/R/pkg/vignettes/sparkr-vignettes.Rmd @@ -308,6 +308,21 @@ numCyl <- summarize(groupBy(carsDF, carsDF$cyl), count = n(carsDF$cyl)) head(numCyl) ``` +`groupBy` can be replaced with `cube` or `rollup` to compute subtotals across multiple dimensions. + +```{r} +mean(cube(carsDF, "cyl", "gear", "am"), "mpg") +``` + +generates groupings for {(`cyl`, `gear`, `am`), (`cyl`, `gear`), (`cyl`), ()}, while + +```{r} +mean(rollup(carsDF, "cyl", "gear", "am"), "mpg") +``` + +generates groupings for all possible combinations of grouping columns. + + #### Operating on Columns SparkR also provides a number of functions that can directly applied to columns for data processing and during aggregation. The example below shows the use of basic arithmetic functions. From 7af59e3af87e478af1b24edc071c57dae1c3b927 Mon Sep 17 00:00:00 2001 From: zero323 Date: Sat, 22 Apr 2017 16:37:44 +0200 Subject: [PATCH 03/15] Fix tests style --- R/pkg/inst/tests/testthat/test_sparkSQL.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index c41aefc48bb28..5b569a4253792 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -1802,7 +1802,7 @@ test_that("test multi-dimensional aggregations with cube and rollup", { expected_cube <- data.frame( year = c(rep(NA, 4), rep(2016, 4), rep(2017, 4)), - department = rep(c(NA, "management", "rnd", "sales"), times=3), + department = rep(c(NA, "management", "rnd", "sales"), times = 3), total_salary = c( 120000, # Total 10000 + 22000, 15000 + 32000, 20000 + 21000, # Department only @@ -1844,7 +1844,7 @@ test_that("test multi-dimensional aggregations with cube and rollup", { expected_rollup <- data.frame( year = c(NA, rep(2016, 4), rep(2017, 4)), - department = c(NA, rep(c(NA, "management", "rnd", "sales"), times=2)), + department = c(NA, rep(c(NA, "management", "rnd", "sales"), times = 2)), total_salary = c( 120000, # Total 20000 + 15000 + 10000, # 2016 From 132099cc668baa240a0a417950f78fea4be961ec Mon Sep 17 00:00:00 2001 From: zero323 Date: Sat, 22 Apr 2017 17:34:11 +0200 Subject: [PATCH 04/15] Add mising examples --- R/pkg/R/DataFrame.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 1eb081081dabe..beed73367ce76 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -3658,7 +3658,8 @@ setMethod("checkpoint", #' @export #' @examples #' \dontrun{ -#' +#' df <- createDataFrame(mtcars) +#' mean(cube(df, "cyl", "gear", "am"), "mpg") #' } #' @note cube since 2.3.0 setMethod("cube", @@ -3684,7 +3685,8 @@ setMethod("cube", #' @export #' @examples #' \dontrun{ -#' +#' df <- createDataFrame(mtcars) +#' mean(rollup(df, "cyl", "gear", "am"), "mpg") #' } #' @note rollup since 2.3.0 setMethod("rollup", From 97602398239a7a0a5dd86ece0ad1d1663ac7e4fe Mon Sep 17 00:00:00 2001 From: zero323 Date: Sun, 23 Apr 2017 23:08:01 +0200 Subject: [PATCH 05/15] Place rollup generic in a single line --- R/pkg/R/generics.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 005c43ff52f8c..ee14595589b42 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -637,8 +637,7 @@ setGeneric("sample", #' @rdname rollup #' @export -setGeneric("rollup", - function(x, ...) { standardGeneric("rollup") }) +setGeneric("rollup", function(x, ...) { standardGeneric("rollup") }) #' @rdname sample #' @export From 396cf552447877f2c62f0ae1872df932102158ab Mon Sep 17 00:00:00 2001 From: zero323 Date: Sun, 23 Apr 2017 23:09:20 +0200 Subject: [PATCH 06/15] Add missing line at the end of the `DataFrame.R` --- R/pkg/R/DataFrame.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index beed73367ce76..3ab3a106f2d64 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -3696,4 +3696,4 @@ setMethod("rollup", jcol <- lapply(cols, function(x) if (is.character(x)) column(x)@jc else x@jc) sgd <- callJMethod(x@sdf, "rollup", jcol) groupedData(sgd) - }) \ No newline at end of file + }) From ab05919d256c9e7fde880bee79b769f12530284d Mon Sep 17 00:00:00 2001 From: zero323 Date: Sun, 23 Apr 2017 23:11:51 +0200 Subject: [PATCH 07/15] Adjust ellipsis description --- R/pkg/R/DataFrame.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 3ab3a106f2d64..c7f0d8e79e0cb 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -3649,7 +3649,7 @@ setMethod("checkpoint", #' Create a multi-dimensional cube for the SparkDataFrame using the specified columns. #' #' @param x a SparkDataFrame. -#' @param ... variable(s) (character names(s) or Column(s)) to group on. +#' @param ... character name(s) or Column(s) to group on. #' @return A GroupedData. #' @family SparkDataFrame functions #' @aliases cube,SparkDataFrame-method @@ -3676,7 +3676,7 @@ setMethod("cube", #' Create a multi-dimensional rollup for the SparkDataFrame using the specified columns. #' #' @param x a SparkDataFrame. -#' @param ... variable(s) (character names(s) or Column(s)) to group on. +#' @param ... character name(s) or Column(s) to group on. #' @return A GroupedData. #' @family SparkDataFrame functions #' @aliases rollup,SparkDataFrame-method From a3203272c5ce9dc1a9f923180dcfe00e6665d102 Mon Sep 17 00:00:00 2001 From: zero323 Date: Mon, 24 Apr 2017 02:52:41 +0200 Subject: [PATCH 08/15] Replace is.character check with class(x) == "Column" --- R/pkg/R/DataFrame.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index c7f0d8e79e0cb..e6a484d75ba37 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -3666,7 +3666,7 @@ setMethod("cube", signature(x = "SparkDataFrame"), function(x, ...) { cols <- list(...) - jcol <- lapply(cols, function(x) if (is.character(x)) column(x)@jc else x@jc) + jcol <- lapply(cols, function(x) if (class(x) == "Column") x@jc else column(x)@jc) sgd <- callJMethod(x@sdf, "cube", jcol) groupedData(sgd) }) @@ -3693,7 +3693,7 @@ setMethod("rollup", signature(x = "SparkDataFrame"), function(x, ...) { cols <- list(...) - jcol <- lapply(cols, function(x) if (is.character(x)) column(x)@jc else x@jc) + jcol <- lapply(cols, function(x) if (class(x) == "Column") x@jc else column(x)@jc) sgd <- callJMethod(x@sdf, "rollup", jcol) groupedData(sgd) }) From f4fa32f80657c66c24f6a5b4829de05f2e611e7c Mon Sep 17 00:00:00 2001 From: zero323 Date: Mon, 24 Apr 2017 21:50:10 +0200 Subject: [PATCH 09/15] Add tests for cube and rollup without groupings --- R/pkg/inst/tests/testthat/test_sparkSQL.R | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 5b569a4253792..a097b4db7f9f4 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -1832,6 +1832,12 @@ test_that("test multi-dimensional aggregations with cube and rollup", { 12 ) + # cube without columns should result in a single aggregate + expect_equal( + collect(agg(cube(df), expr("sum(salary) as total_salary"))), + data.frame(total_salary = 120000) + ) + actual_rollup <- collect( orderBy( agg( @@ -1870,6 +1876,12 @@ test_that("test multi-dimensional aggregations with cube and rollup", { count(sum(rollup(df, df$year, df$department), "salary")), 9 ) + + # rollup without columns should result in a single aggregate + expect_equal( + collect(agg(rollup(df), expr("sum(salary) as total_salary"))), + data.frame(total_salary = 120000) + ) }) test_that("arrange() and orderBy() on a DataFrame", { From caeafdb13a6fc0b594510324077a85205e24807e Mon Sep 17 00:00:00 2001 From: zero323 Date: Mon, 24 Apr 2017 22:07:04 +0200 Subject: [PATCH 10/15] Clarify vignette description --- R/pkg/vignettes/sparkr-vignettes.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd index 71f6acac09aeb..4b9d6c3806098 100644 --- a/R/pkg/vignettes/sparkr-vignettes.Rmd +++ b/R/pkg/vignettes/sparkr-vignettes.Rmd @@ -308,7 +308,7 @@ numCyl <- summarize(groupBy(carsDF, carsDF$cyl), count = n(carsDF$cyl)) head(numCyl) ``` -`groupBy` can be replaced with `cube` or `rollup` to compute subtotals across multiple dimensions. +Use `cube` or `rollup` to compute subtotals across multiple dimensions. ```{r} mean(cube(carsDF, "cyl", "gear", "am"), "mpg") From e9bbe6f892f3b2614cce0bd5ef6791953ca1dbfb Mon Sep 17 00:00:00 2001 From: zero323 Date: Mon, 24 Apr 2017 23:30:55 +0200 Subject: [PATCH 11/15] Extend R programming guide with cube and rollup --- docs/sparkr.md | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/docs/sparkr.md b/docs/sparkr.md index a1a35a7757e57..e015ab260fca8 100644 --- a/docs/sparkr.md +++ b/docs/sparkr.md @@ -264,6 +264,36 @@ head(arrange(waiting_counts, desc(waiting_counts$count))) {% endhighlight %} +In addition to standard aggregations, SparkR supports [OLAP cube](https://en.wikipedia.org/wiki/OLAP_cube) operators `cube`: + +
+{% highlight r %} +head(agg(cube(df, "cyl", "disp", "gear"), avg(df$mpg))) +## cyl disp gear avg(mpg) +##1 NA 140.8 4 22.8 +##2 4 75.7 4 30.4 +##3 8 400.0 3 19.2 +##4 8 318.0 3 15.5 +##5 NA 351.0 NA 15.8 +##6 NA 275.8 NA 16.3 +{% endhighlight %} +
+ +and `rollup`: + +
+{% highlight r %} +head(agg(rollup(df, "cyl", "disp", "gear"), avg(df$mpg))) +## cyl disp gear avg(mpg) +##1 4 75.7 4 30.4 +##2 8 400.0 3 19.2 +##3 8 318.0 3 15.5 +##4 4 78.7 NA 32.4 +##5 8 304.0 3 15.2 +##6 4 79.0 NA 27.3 +{% endhighlight %} +
+ ### Operating on Columns SparkR also provides a number of functions that can directly applied to columns for data processing and during aggregation. The example below shows the use of basic arithmetic functions. From 7d6c6d5bf3fe700ce9e6d1dc6bad622169ffb1e6 Mon Sep 17 00:00:00 2001 From: zero323 Date: Tue, 25 Apr 2017 16:25:54 +0200 Subject: [PATCH 12/15] Describe behavior with missing grouping columns --- R/pkg/R/DataFrame.R | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index e6a484d75ba37..88206f189ad07 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -3648,6 +3648,9 @@ setMethod("checkpoint", #' #' Create a multi-dimensional cube for the SparkDataFrame using the specified columns. #' +#' If grouping expression is missing `cube` creates a single global aggregate and is equivalent to +#' direct application of \link{agg}. +#' #' @param x a SparkDataFrame. #' @param ... character name(s) or Column(s) to group on. #' @return A GroupedData. @@ -3660,6 +3663,10 @@ setMethod("checkpoint", #' \dontrun{ #' df <- createDataFrame(mtcars) #' mean(cube(df, "cyl", "gear", "am"), "mpg") +#' +#' # Following calls are equivalent +#' agg(cube(carsDF), mean(carsDF$mpg)) +#' agg(carsDF, mean(carsDF$mpg)) #' } #' @note cube since 2.3.0 setMethod("cube", @@ -3675,6 +3682,9 @@ setMethod("cube", #' #' Create a multi-dimensional rollup for the SparkDataFrame using the specified columns. #' +#' If grouping expression is missing `rollup` creates a single global aggregate and is equivalent to +#' direct application of \link{agg}. +#' #' @param x a SparkDataFrame. #' @param ... character name(s) or Column(s) to group on. #' @return A GroupedData. @@ -3684,9 +3694,13 @@ setMethod("cube", #' @name rollup #' @export #' @examples -#' \dontrun{ +#'\dontrun{ #' df <- createDataFrame(mtcars) #' mean(rollup(df, "cyl", "gear", "am"), "mpg") +#' +#' # Following calls are equivalent +#' agg(rollup(carsDF), mean(carsDF$mpg)) +#' agg(carsDF, mean(carsDF$mpg)) #' } #' @note rollup since 2.3.0 setMethod("rollup", From 76f12cd236844751ffd85a729a9ffe698c0ffa2a Mon Sep 17 00:00:00 2001 From: zero323 Date: Tue, 25 Apr 2017 16:30:09 +0200 Subject: [PATCH 13/15] Use seealso to link groupBy, cube and rollup and agg --- R/pkg/R/DataFrame.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 88206f189ad07..f397b52c6aa0b 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1337,6 +1337,7 @@ setMethod("toRDD", #' agg(groupBy(df, "department", "gender"), salary="avg", "age" -> "max") #' } #' @note groupBy since 1.4.0 +#' @seealso \link{agg}, \link{cube}, \link{rollup} setMethod("groupBy", signature(x = "SparkDataFrame"), function(x, ...) { @@ -3669,6 +3670,7 @@ setMethod("checkpoint", #' agg(carsDF, mean(carsDF$mpg)) #' } #' @note cube since 2.3.0 +#' @seealso \link{agg}, \link{groupBy}, \link{rollup} setMethod("cube", signature(x = "SparkDataFrame"), function(x, ...) { @@ -3703,6 +3705,7 @@ setMethod("cube", #' agg(carsDF, mean(carsDF$mpg)) #' } #' @note rollup since 2.3.0 +#' @seealso \link{agg}, \link{cube}, \link{groupBy} setMethod("rollup", signature(x = "SparkDataFrame"), function(x, ...) { From ee73dd88794d4e3fd1c4532c54d1fa424b1d77bb Mon Sep 17 00:00:00 2001 From: zero323 Date: Tue, 25 Apr 2017 16:31:25 +0200 Subject: [PATCH 14/15] Makge groupBy ... description consistent with cube and rollup --- R/pkg/R/DataFrame.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index f397b52c6aa0b..e58eca494de0f 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1321,7 +1321,7 @@ setMethod("toRDD", #' Groups the SparkDataFrame using the specified columns, so we can run aggregation on them. #' #' @param x a SparkDataFrame. -#' @param ... variable(s) (character names(s) or Column(s)) to group on. +#' @param ... character name(s) or Column(s) to group on. #' @return A GroupedData. #' @family SparkDataFrame functions #' @aliases groupBy,SparkDataFrame-method From 0da03b2d1e1c0e752329b6816bcf7e076c4450cd Mon Sep 17 00:00:00 2001 From: zero323 Date: Tue, 25 Apr 2017 21:13:43 +0200 Subject: [PATCH 15/15] Remove extra whitespaces and replace backticks --- R/pkg/R/DataFrame.R | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index e58eca494de0f..cd6f03a13d7c7 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1321,7 +1321,7 @@ setMethod("toRDD", #' Groups the SparkDataFrame using the specified columns, so we can run aggregation on them. #' #' @param x a SparkDataFrame. -#' @param ... character name(s) or Column(s) to group on. +#' @param ... character name(s) or Column(s) to group on. #' @return A GroupedData. #' @family SparkDataFrame functions #' @aliases groupBy,SparkDataFrame-method @@ -3644,12 +3644,11 @@ setMethod("checkpoint", dataFrame(df) }) - #' cube #' #' Create a multi-dimensional cube for the SparkDataFrame using the specified columns. #' -#' If grouping expression is missing `cube` creates a single global aggregate and is equivalent to +#' If grouping expression is missing \code{cube} creates a single global aggregate and is equivalent to #' direct application of \link{agg}. #' #' @param x a SparkDataFrame. @@ -3684,7 +3683,7 @@ setMethod("cube", #' #' Create a multi-dimensional rollup for the SparkDataFrame using the specified columns. #' -#' If grouping expression is missing `rollup` creates a single global aggregate and is equivalent to +#' If grouping expression is missing \code{rollup} creates a single global aggregate and is equivalent to #' direct application of \link{agg}. #' #' @param x a SparkDataFrame.