From 2a5f08446f7a39b8a0f32e9722f764501806d732 Mon Sep 17 00:00:00 2001 From: Yu ISHIKAWA Date: Thu, 13 Aug 2015 15:08:27 +0900 Subject: [PATCH 1/6] [SPARK-9871][SparkR] Add expression functions into SparkR which have a variable parameter --- R/pkg/R/functions.R | 42 ++++++++++++++++++++++++++++++++ R/pkg/R/generics.R | 16 ++++++++++++ R/pkg/inst/tests/test_sparkSQL.R | 13 ++++++++++ 3 files changed, 71 insertions(+) diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index a15d2d5da534e..8203680625c33 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -67,6 +67,14 @@ createFunctions <- function() { createFunctions() +#' @rdname functions +#' @return Creates a Column class of literal value. +#' @export +lit <- function(x) { + jc <- callJStatic("org.apache.spark.sql.functions", "lit", ifelse(class(x) == "Column", x@jc, x)) + column(jc) +} + #' Approx Count Distinct #' #' @rdname functions @@ -93,6 +101,40 @@ setMethod("countDistinct", column(jc) }) +#' @rdname functions +#' @return Concatenates multiple input string columns together into a single string column. +setMethod("concat", + signature(x = "Column"), + function(x, ...) { + jcols <- lapply(list(x, ...), function(x) { x@jc }) + jc <- callJStatic("org.apache.spark.sql.functions", "concat", listToSeq(jcols)) + column(jc) + }) + +#' @rdname functions +#' @return Returns the greatest value of the list of column names, skipping null values. +#' This function takes at least 2 parameters. It will return null if all parameters are null. +setMethod("greatest", + signature(x = "Column"), + function(x, ...) { + stopifnot(length(list(...)) > 0) + jcols <- lapply(list(x, ...), function(x) { x@jc }) + jc <- callJStatic("org.apache.spark.sql.functions", "greatest", listToSeq(jcols)) + column(jc) + }) + +#' @rdname functions +#' @return Returns the least value of the list of column names, skipping null values. +#' This function takes at least 2 parameters. It will return null iff all parameters are null. +setMethod("least", + signature(x = "Column"), + function(x, ...) { + stopifnot(length(list(...)) > 0) + jcols <- lapply(list(x, ...), function(x) { x@jc }) + jc <- callJStatic("org.apache.spark.sql.functions", "least", listToSeq(jcols)) + column(jc) + }) + #' @rdname functions #' @aliases ceil setMethod("ceiling", diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index f11e7fcb6a07c..692a5474a5568 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -682,6 +682,10 @@ setGeneric("cbrt", function(x) { standardGeneric("cbrt") }) #' @export setGeneric("ceil", function(x) { standardGeneric("ceil") }) +#' @rdname column +#' @export +setGeneric("concat", function(x, ...) { standardGeneric("concat") }) + #' @rdname functions #' @export setGeneric("crc32", function(x) { standardGeneric("crc32") }) @@ -702,6 +706,10 @@ setGeneric("dayofyear", function(x) { standardGeneric("dayofyear") }) #' @export setGeneric("explode", function(x) { standardGeneric("explode") }) +#' @rdname column +#' @export +setGeneric("greatest", function(x, ...) { standardGeneric("greatest") }) + #' @rdname functions #' @export setGeneric("hex", function(x) { standardGeneric("hex") }) @@ -722,6 +730,10 @@ setGeneric("isNaN", function(x) { standardGeneric("isNaN") }) #' @export setGeneric("last_day", function(x) { standardGeneric("last_day") }) +#' @rdname column +#' @export +setGeneric("least", function(x, ...) { standardGeneric("least") }) + #' @rdname functions #' @export setGeneric("levenshtein", function(y, x) { standardGeneric("levenshtein") }) @@ -794,6 +806,10 @@ setGeneric("size", function(x) { standardGeneric("size") }) #' @export setGeneric("soundex", function(x) { standardGeneric("soundex") }) +#' @rdname column +#' @export +setGeneric("struct", function(x, ...) { standardGeneric("struct") }) + #' @rdname functions #' @export setGeneric("sumDistinct", function(x) { standardGeneric("sumDistinct") }) diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index e6d3b21ff825b..26cb4c809aa33 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -560,6 +560,11 @@ test_that("select with column", { df2 <- select(df, df$age) expect_equal(columns(df2), c("age")) expect_equal(count(df2), 3) + + df3 <- select(df, lit("x")) + expect_equal(columns(df3), c("x")) + expect_equal(count(df3), 3) + expect_equal(collect(select(df3, "x"))[[1, 1]], "x") }) test_that("selectExpr() on a DataFrame", { @@ -692,6 +697,14 @@ test_that("string operators", { expect_equal(count(where(df, startsWith(df$name, "A"))), 1) expect_equal(first(select(df, substr(df$name, 1, 2)))[[1]], "Mi") expect_equal(collect(select(df, cast(df$age, "string")))[[2, 1]], "30") + expect_equal(collect(select(df, concat(df$name, lit(":"), df$age)))[[2, 1]], "Andy:30") +}) + +test_that("greatest() and least() on a DataFrame", { + l <- list(list(a = 1, b = 2), list(a = 3, b = 4)) + df <- createDataFrame(sqlContext, l) + expect_equal(collect(select(df, greatest(df$a, df$b)))[, 1], c(2, 4)) + expect_equal(collect(select(df, least(df$a, df$b)))[, 1], c(1, 3)) }) test_that("group by", { From 1915e60190b85c83f82e2bb19cdee039be8b4afc Mon Sep 17 00:00:00 2001 From: Yu ISHIKAWA Date: Sat, 15 Aug 2015 10:08:23 +0900 Subject: [PATCH 2/6] Fix the rdnames from `column` to `functions` --- R/pkg/R/generics.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 692a5474a5568..91af10594811f 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -682,7 +682,7 @@ setGeneric("cbrt", function(x) { standardGeneric("cbrt") }) #' @export setGeneric("ceil", function(x) { standardGeneric("ceil") }) -#' @rdname column +#' @rdname functions #' @export setGeneric("concat", function(x, ...) { standardGeneric("concat") }) @@ -706,7 +706,7 @@ setGeneric("dayofyear", function(x) { standardGeneric("dayofyear") }) #' @export setGeneric("explode", function(x) { standardGeneric("explode") }) -#' @rdname column +#' @rdname functions #' @export setGeneric("greatest", function(x, ...) { standardGeneric("greatest") }) @@ -730,7 +730,7 @@ setGeneric("isNaN", function(x) { standardGeneric("isNaN") }) #' @export setGeneric("last_day", function(x) { standardGeneric("last_day") }) -#' @rdname column +#' @rdname functions #' @export setGeneric("least", function(x, ...) { standardGeneric("least") }) @@ -806,7 +806,7 @@ setGeneric("size", function(x) { standardGeneric("size") }) #' @export setGeneric("soundex", function(x) { standardGeneric("soundex") }) -#' @rdname column +#' @rdname functions #' @export setGeneric("struct", function(x, ...) { standardGeneric("struct") }) From 50ed0e57830a1c55138df6d1b5188e8ce79d44af Mon Sep 17 00:00:00 2001 From: Yu ISHIKAWA Date: Sat, 15 Aug 2015 10:09:40 +0900 Subject: [PATCH 3/6] Remove the generic of `struct` --- R/pkg/R/generics.R | 4 ---- 1 file changed, 4 deletions(-) diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 91af10594811f..3fd24d93f9f26 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -806,10 +806,6 @@ setGeneric("size", function(x) { standardGeneric("size") }) #' @export setGeneric("soundex", function(x) { standardGeneric("soundex") }) -#' @rdname functions -#' @export -setGeneric("struct", function(x, ...) { standardGeneric("struct") }) - #' @rdname functions #' @export setGeneric("sumDistinct", function(x) { standardGeneric("sumDistinct") }) From dc98a13d5bc3ba2c79713d45abebb514de1b4c13 Mon Sep 17 00:00:00 2001 From: Yu ISHIKAWA Date: Sat, 15 Aug 2015 10:33:49 +0900 Subject: [PATCH 4/6] Update NAMESPACE --- R/pkg/NAMESPACE | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index b2d92bdf4840e..bcca8133b0c2e 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -98,6 +98,7 @@ exportMethods("abs", "contains", "cos", "cosh", + "concat", "countDistinct", "desc", "endsWith", @@ -106,10 +107,12 @@ exportMethods("abs", "floor", "getField", "getItem", + "greatest", "hypot", "isNotNull", "isNull", "last", + "least", "like", "log", "log10", From bf59a84ba53bfc682a88c67a5e5bdfc43d038891 Mon Sep 17 00:00:00 2001 From: Yu ISHIKAWA Date: Mon, 17 Aug 2015 09:49:46 +0900 Subject: [PATCH 5/6] Modify `lit` function from a S3 function to a S4 function --- R/pkg/R/functions.R | 10 +++++----- R/pkg/R/generics.R | 4 ++++ 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 8203680625c33..6eef4d638ee9a 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -69,11 +69,11 @@ createFunctions() #' @rdname functions #' @return Creates a Column class of literal value. -#' @export -lit <- function(x) { - jc <- callJStatic("org.apache.spark.sql.functions", "lit", ifelse(class(x) == "Column", x@jc, x)) - column(jc) -} +setMethod("lit", signature("ANY"), + function(x) { + jc <- callJStatic("org.apache.spark.sql.functions", "lit", ifelse(class(x) == "Column", x@jc, x)) + column(jc) + }) #' Approx Count Distinct #' diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 3fd24d93f9f26..5c1cc98fd9e80 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -738,6 +738,10 @@ setGeneric("least", function(x, ...) { standardGeneric("least") }) #' @export setGeneric("levenshtein", function(y, x) { standardGeneric("levenshtein") }) +#' @rdname functions +#' @export +setGeneric("lit", function(x) { standardGeneric("lit") }) + #' @rdname functions #' @export setGeneric("lower", function(x) { standardGeneric("lower") }) From 36407efe872f0b317db2ff40542d70fdcb0691f1 Mon Sep 17 00:00:00 2001 From: Yu ISHIKAWA Date: Mon, 17 Aug 2015 14:21:08 +0900 Subject: [PATCH 6/6] Update NAMESPACE to add `lit` --- R/pkg/NAMESPACE | 1 + 1 file changed, 1 insertion(+) diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index bcca8133b0c2e..fd9dfdf60edb3 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -111,6 +111,7 @@ exportMethods("abs", "hypot", "isNotNull", "isNull", + "lit", "last", "least", "like",