From 009f82c984c559b68169bb1944d2921b0c218e9e Mon Sep 17 00:00:00 2001 From: Huaxin Gao Date: Wed, 27 Jun 2018 12:48:44 -0700 Subject: [PATCH 1/6] SPARK[23648][R][SQL]Adds more types for hint in SparkR --- R/pkg/R/DataFrame.R | 14 +++++++++++++- R/pkg/tests/fulltests/test_sparkSQL.R | 9 +++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 70eb7a874b75c..17ca5db4fe7b5 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -3905,6 +3905,18 @@ setMethod("rollup", groupedData(sgd) }) +isTypeAllowed <- function(x) { + if (is.character(x)) { + TRUE + } else if (is.list(x)) { + TRUE + } else if (is.numeric(x)) { + TRUE + } else { + FALSE + } +} + #' hint #' #' Specifies execution plan hint and return a new SparkDataFrame. @@ -3929,7 +3941,7 @@ setMethod("hint", signature(x = "SparkDataFrame", name = "character"), function(x, name, ...) { parameters <- list(...) - stopifnot(all(sapply(parameters, is.character))) + stopifnot(all(sapply(parameters, isTypeAllowed))) jdf <- callJMethod(x@sdf, "hint", name, parameters) dataFrame(jdf) }) diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index 36e0f78bb0599..52ae74ebf631f 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -2370,6 +2370,15 @@ test_that("join(), crossJoin() and merge() on a DataFrame", { expect_true(any(grepl("BroadcastHashJoin", execution_plan_broadcast))) }) +test_that("test hint", { + df <- sql("SELECT * FROM range(10e10)") + hintList <- list("hint2", "hin3", "hint4") + execution_plan_hint <- capture.output( + explain(hint(df, "hint1", 1.23456, "aaaaaaaaaa", hintList), TRUE) + ) + expect_true(any(grepl("1.23456, aaaaaaaaaa", execution_plan_hint))) +}) + test_that("toJSON() on DataFrame", { df <- as.DataFrame(cars) df_json <- toJSON(df) From d323dd00916957abb48a31440c3274c3877e5401 Mon Sep 17 00:00:00 2001 From: Huaxin Gao Date: Sat, 30 Jun 2018 10:06:18 -0700 Subject: [PATCH 2/6] add type check for elements in list --- R/pkg/R/DataFrame.R | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 17ca5db4fe7b5..94967be2b4085 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -3905,13 +3905,11 @@ setMethod("rollup", groupedData(sgd) }) -isTypeAllowed <- function(x) { - if (is.character(x)) { +isTypeAllowedForSqlHint <- function(x) { + if (is.character(x) | is.numeric(x)) { TRUE } else if (is.list(x)) { - TRUE - } else if (is.numeric(x)) { - TRUE + all (sapply(x, (function (y) is.character(y) | is.numeric(y)))) } else { FALSE } @@ -3941,7 +3939,7 @@ setMethod("hint", signature(x = "SparkDataFrame", name = "character"), function(x, name, ...) { parameters <- list(...) - stopifnot(all(sapply(parameters, isTypeAllowed))) + stopifnot(all(sapply(parameters, isTypeAllowedForSqlHint))) jdf <- callJMethod(x@sdf, "hint", name, parameters) dataFrame(jdf) }) From aec391c9dcb6362874736e663d435f9dd8400125 Mon Sep 17 00:00:00 2001 From: Huaxin Gao Date: Tue, 3 Jul 2018 10:13:06 -0700 Subject: [PATCH 3/6] change | to || for shortcut eval --- R/pkg/R/DataFrame.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 94967be2b4085..eb5fe55bda2ca 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -3906,10 +3906,10 @@ setMethod("rollup", }) isTypeAllowedForSqlHint <- function(x) { - if (is.character(x) | is.numeric(x)) { + if (is.character(x) || is.numeric(x)) { TRUE } else if (is.list(x)) { - all (sapply(x, (function (y) is.character(y) | is.numeric(y)))) + all(sapply(x, (function(y) is.character(y) || is.numeric(y)))) } else { FALSE } From 8a52e50737b778b6b6aa56010af7b7c0a9fd0d7f Mon Sep 17 00:00:00 2001 From: Huaxin Gao Date: Mon, 10 Sep 2018 10:23:16 -0700 Subject: [PATCH 4/6] address comment --- R/pkg/R/DataFrame.R | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index eb5fe55bda2ca..3658ce934689b 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -3905,16 +3905,6 @@ setMethod("rollup", groupedData(sgd) }) -isTypeAllowedForSqlHint <- function(x) { - if (is.character(x) || is.numeric(x)) { - TRUE - } else if (is.list(x)) { - all(sapply(x, (function(y) is.character(y) || is.numeric(y)))) - } else { - FALSE - } -} - #' hint #' #' Specifies execution plan hint and return a new SparkDataFrame. @@ -3939,7 +3929,15 @@ setMethod("hint", signature(x = "SparkDataFrame", name = "character"), function(x, name, ...) { parameters <- list(...) - stopifnot(all(sapply(parameters, isTypeAllowedForSqlHint))) + stopifnot(all(sapply(parameters, function(x) { + if (is.character(x) || is.numeric(x)) { + TRUE + } else if (is.list(x)) { + all(sapply(x, function(y) { is.character(y) || is.numeric(y) })) + } else { + FALSE + } + }))) jdf <- callJMethod(x@sdf, "hint", name, parameters) dataFrame(jdf) }) From f437bd9f3880d1f88f51bd2d5dfe3648d52c53d7 Mon Sep 17 00:00:00 2001 From: Huaxin Gao Date: Tue, 11 Sep 2018 09:07:57 -0700 Subject: [PATCH 5/6] address comment --- R/pkg/R/DataFrame.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 3658ce934689b..a5037873003b9 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -3929,11 +3929,11 @@ setMethod("hint", signature(x = "SparkDataFrame", name = "character"), function(x, name, ...) { parameters <- list(...) - stopifnot(all(sapply(parameters, function(x) { - if (is.character(x) || is.numeric(x)) { + stopifnot(all(sapply(parameters, function(y) { + if (is.character(y) || is.numeric(y)) { TRUE - } else if (is.list(x)) { - all(sapply(x, function(y) { is.character(y) || is.numeric(y) })) + } else if (is.list(y)) { + all(sapply(y, function(z) { is.character(z) || is.numeric(z) })) } else { FALSE } From e8d97a01ab99404fbfda42499143a8de489a3896 Mon Sep 17 00:00:00 2001 From: Huaxin Gao Date: Mon, 17 Sep 2018 10:59:38 -0700 Subject: [PATCH 6/6] change stopifnot --- R/pkg/R/DataFrame.R | 6 ++++-- R/pkg/tests/fulltests/test_sparkSQL.R | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index a5037873003b9..d288b3f22d83f 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -3929,7 +3929,7 @@ setMethod("hint", signature(x = "SparkDataFrame", name = "character"), function(x, name, ...) { parameters <- list(...) - stopifnot(all(sapply(parameters, function(y) { + if (!all(sapply(parameters, function(y) { if (is.character(y) || is.numeric(y)) { TRUE } else if (is.list(y)) { @@ -3937,7 +3937,9 @@ setMethod("hint", } else { FALSE } - }))) + }))) { + stop("sql hint should be character, numeric, or list with character or numeric.") + } jdf <- callJMethod(x@sdf, "hint", name, parameters) dataFrame(jdf) }) diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index 52ae74ebf631f..8611eaf34e3da 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -2372,7 +2372,7 @@ test_that("join(), crossJoin() and merge() on a DataFrame", { test_that("test hint", { df <- sql("SELECT * FROM range(10e10)") - hintList <- list("hint2", "hin3", "hint4") + hintList <- list("hint2", "hint3", "hint4") execution_plan_hint <- capture.output( explain(hint(df, "hint1", 1.23456, "aaaaaaaaaa", hintList), TRUE) )