From c1961738fa366cc92d7e9a324c0e0445ca6d26b4 Mon Sep 17 00:00:00 2001 From: Yu ISHIKAWA Date: Tue, 23 Jun 2015 07:10:25 +0900 Subject: [PATCH 1/4] [SPARK-8431][SparkR] Add in operator to DataFrame Column in SparkR --- R/pkg/R/column.R | 11 +++++++++++ R/pkg/inst/tests/test_sparkSQL.R | 10 ++++++++++ 2 files changed, 21 insertions(+) diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index 80e92d3105a36..2b1b03611df52 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -91,6 +91,16 @@ createOperator <- function(op) { }) } +createInOperator <- function(op) { + setMethod("%in%", + signature(x = "Column"), + function(x, table) { + table <- listToSeq(as.list(table)) + bar <- callJMethod(x@jc, "in", table) + return(column(bar)) + }) +} + createColumnFunction1 <- function(name) { setMethod(name, signature(x = "Column"), @@ -139,6 +149,7 @@ createBinaryMathfunctions <- function(name) { } createMethods <- function() { + createInOperator() for (op in names(operators)) { createOperator(op) } diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index 8946348ef801c..c43d4503d27ba 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -693,6 +693,16 @@ test_that("filter() on a DataFrame", { filtered2 <- where(df, df$name != "Michael") expect_true(count(filtered2) == 2) expect_true(collect(filtered2)$age[2] == 19) + + # test suites for %in% + filtered3 <- filter(df, "age in (19)") + expect_equal(count(filtered3), 1) + filtered4 <- filter(df, "age in (19, 30)") + expect_equal(count(filtered4), 2) + filtered5 <- where(df, df$age %in% c(19)) + expect_equal(count(filtered5), 1) + filtered6 <- where(df, df$age %in% c(19, 30)) + expect_equal(count(filtered6), 2) }) test_that("join() on a DataFrame", { From 6e37936949b4f7dd25ed521f82e9c89d0684aef0 Mon Sep 17 00:00:00 2001 From: Yu ISHIKAWA Date: Tue, 23 Jun 2015 07:16:56 +0900 Subject: [PATCH 2/4] Modify a variable name --- R/pkg/R/column.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index 2b1b03611df52..41502dbf50535 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -96,8 +96,8 @@ createInOperator <- function(op) { signature(x = "Column"), function(x, table) { table <- listToSeq(as.list(table)) - bar <- callJMethod(x@jc, "in", table) - return(column(bar)) + jc <- callJMethod(x@jc, "in", table) + return(column(jc)) }) } From f4309a70c8adaf7c526a4dbf57a85e6d6bf6ab4b Mon Sep 17 00:00:00 2001 From: Yu ISHIKAWA Date: Tue, 23 Jun 2015 07:48:02 +0900 Subject: [PATCH 3/4] Make a `setMethod` for `%in%` be independent --- R/pkg/R/column.R | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index 41502dbf50535..60d7e8da13c61 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -91,16 +91,6 @@ createOperator <- function(op) { }) } -createInOperator <- function(op) { - setMethod("%in%", - signature(x = "Column"), - function(x, table) { - table <- listToSeq(as.list(table)) - jc <- callJMethod(x@jc, "in", table) - return(column(jc)) - }) -} - createColumnFunction1 <- function(name) { setMethod(name, signature(x = "Column"), @@ -149,7 +139,6 @@ createBinaryMathfunctions <- function(name) { } createMethods <- function() { - createInOperator() for (op in names(operators)) { createOperator(op) } @@ -221,6 +210,18 @@ setMethod("cast", } }) +#' Specify multiple values +#' +#' @rdname column +#' @return a matched value as a result of comparing with given values. +setMethod("%in%", + signature(x = "Column"), + function(x, table) { + table <- listToSeq(as.list(table)) + jc <- callJMethod(x@jc, "in", table) + return(column(jc)) + }) + #' Approx Count Distinct #' #' @rdname column From 1f644233cf933083cab2b209436c0cdca77a7b83 Mon Sep 17 00:00:00 2001 From: Yu ISHIKAWA Date: Tue, 23 Jun 2015 14:07:50 +0900 Subject: [PATCH 4/4] Modify the comment --- R/pkg/R/column.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index 60d7e8da13c61..8e4b0f5bf1c4d 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -210,10 +210,14 @@ setMethod("cast", } }) -#' Specify multiple values +#' Match a column with given values. #' #' @rdname column -#' @return a matched value as a result of comparing with given values. +#' @return a matched values as a result of comparing with given values. +#' \dontrun{ +#' filter(df, "age in (10, 30)") +#' where(df, df$age %in% c(10, 30)) +#' } setMethod("%in%", signature(x = "Column"), function(x, table) {