Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions R/pkg/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,8 @@ exportMethods("%<=>%",
"getField",
"getItem",
"greatest",
"grouping_bit",
"grouping_id",
"hex",
"histogram",
"hour",
Expand Down
84 changes: 84 additions & 0 deletions R/pkg/R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -3890,3 +3890,87 @@ setMethod("not",
jc <- callJStatic("org.apache.spark.sql.functions", "not", x@jc)
column(jc)
})

#' grouping_bit
#'
#' Indicates whether a specified column in a GROUP BY list is aggregated or not,
#' returns 1 for aggregated or 0 for not aggregated in the result set.
#'
#' Same as \code{GROUPING} in SQL and \code{grouping} function in Scala.
#'
#' @param x Column to compute on
#'
#' @rdname grouping_bit
#' @name grouping_bit
#' @family agg_funcs
#' @aliases grouping_bit,Column-method
#' @export
#' @examples \dontrun{
#' df <- createDataFrame(mtcars)
#'
#' # With cube
#' agg(
#' cube(df, "cyl", "gear", "am"),
#' mean(df$mpg),
#' grouping_bit(df$cyl), grouping_bit(df$gear), grouping_bit(df$am)
#' )
#'
#' # With rollup
#' agg(
#' rollup(df, "cyl", "gear", "am"),
#' mean(df$mpg),
#' grouping_bit(df$cyl), grouping_bit(df$gear), grouping_bit(df$am)
#' )
#' }
#' @note grouping_bit since 2.3.0
setMethod("grouping_bit",
signature(x = "Column"),
function(x) {
jc <- callJStatic("org.apache.spark.sql.functions", "grouping", x@jc)
column(jc)
})

#' grouping_id
#'
#' Returns the level of grouping.
#'
#' Equals to \code{
#' grouping_bit(c1) * 2^(n - 1) + grouping_bit(c2) * 2^(n - 2) + ... + grouping_bit(cn)
#' }
#'
#' @param x Column to compute on
#' @param ... additional Column(s) (optional).
#'
#' @rdname grouping_id
#' @name grouping_id
#' @family agg_funcs
#' @aliases grouping_id,Column-method
#' @export
#' @examples \dontrun{
#' df <- createDataFrame(mtcars)
#'
#' # With cube
#' agg(
#' cube(df, "cyl", "gear", "am"),
#' mean(df$mpg),
#' grouping_id(df$cyl, df$gear, df$am)
#' )
#'
#' # With rollup
#' agg(
#' rollup(df, "cyl", "gear", "am"),
#' mean(df$mpg),
#' grouping_id(df$cyl, df$gear, df$am)
#' )
#' }
#' @note grouping_id since 2.3.0
setMethod("grouping_id",
signature(x = "Column"),
function(x, ...) {
jcols <- lapply(list(x, ...), function (x) {
stopifnot(class(x) == "Column")
x@jc
})
jc <- callJStatic("org.apache.spark.sql.functions", "grouping_id", jcols)
column(jc)
})
8 changes: 8 additions & 0 deletions R/pkg/R/generics.R
Original file line number Diff line number Diff line change
Expand Up @@ -1052,6 +1052,14 @@ setGeneric("from_unixtime", function(x, ...) { standardGeneric("from_unixtime")
#' @export
setGeneric("greatest", function(x, ...) { standardGeneric("greatest") })

#' @rdname grouping_bit
#' @export
setGeneric("grouping_bit", function(x) { standardGeneric("grouping_bit") })

#' @rdname grouping_id
#' @export
setGeneric("grouping_id", function(x, ...) { standardGeneric("grouping_id") })

#' @rdname hex
#' @export
setGeneric("hex", function(x) { standardGeneric("hex") })
Expand Down
56 changes: 54 additions & 2 deletions R/pkg/inst/tests/testthat/test_sparkSQL.R
Original file line number Diff line number Diff line change
Expand Up @@ -1848,7 +1848,11 @@ test_that("test multi-dimensional aggregations with cube and rollup", {
orderBy(
agg(
cube(df, "year", "department"),
expr("sum(salary) AS total_salary"), expr("avg(salary) AS average_salary")
expr("sum(salary) AS total_salary"),
expr("avg(salary) AS average_salary"),
alias(grouping_bit(df$year), "grouping_year"),
alias(grouping_bit(df$department), "grouping_department"),
alias(grouping_id(df$year, df$department), "grouping_id")
),
"year", "department"
)
Expand All @@ -1875,6 +1879,30 @@ test_that("test multi-dimensional aggregations with cube and rollup", {
mean(c(21000, 32000, 22000)), # 2017
22000, 32000, 21000 # 2017 each department
),
grouping_year = c(
1, # global
1, 1, 1, # by department
0, # 2016
0, 0, 0, # 2016 by department
0, # 2017
0, 0, 0 # 2017 by department
),
grouping_department = c(
1, # global
0, 0, 0, # by department
1, # 2016
0, 0, 0, # 2016 by department
1, # 2017
0, 0, 0 # 2017 by department
),
grouping_id = c(
3, # 11
2, 2, 2, # 10
1, # 01
0, 0, 0, # 00
1, # 01
0, 0, 0 # 00
),
stringsAsFactors = FALSE
)

Expand All @@ -1896,7 +1924,10 @@ test_that("test multi-dimensional aggregations with cube and rollup", {
orderBy(
agg(
rollup(df, "year", "department"),
expr("sum(salary) AS total_salary"), expr("avg(salary) AS average_salary")
expr("sum(salary) AS total_salary"), expr("avg(salary) AS average_salary"),
alias(grouping_bit(df$year), "grouping_year"),
alias(grouping_bit(df$department), "grouping_department"),
alias(grouping_id(df$year, df$department), "grouping_id")
),
"year", "department"
)
Expand All @@ -1920,6 +1951,27 @@ test_that("test multi-dimensional aggregations with cube and rollup", {
mean(c(21000, 32000, 22000)), # 2017
22000, 32000, 21000 # 2017 each department
),
grouping_year = c(
1, # global
0, # 2016
0, 0, 0, # 2016 each department
0, # 2017
0, 0, 0 # 2017 each department
),
grouping_department = c(
1, # global
1, # 2016
0, 0, 0, # 2016 each department
1, # 2017
0, 0, 0 # 2017 each department
),
grouping_id = c(
3, # 11
1, # 01
0, 0, 0, # 00
1, # 01
0, 0, 0 # 00
),
stringsAsFactors = FALSE
)

Expand Down