From 461714d727457d0aa4a3f39c5ff046860a1c7b9a Mon Sep 17 00:00:00 2001 From: "Oscar D. Lara Yejas" Date: Thu, 24 Sep 2015 22:01:39 +0100 Subject: [PATCH 1/8] SPARK-10807. Added as.data.frame as a synonym for collect(). --- R/pkg/NAMESPACE | 2 ++ R/pkg/R/DataFrame.R | 25 +++++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 9d39630706436..c28c47daeac13 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -247,3 +247,5 @@ export("structField", "structType.jobj", "structType.structField", "print.structType") + +export("as.data.frame") \ No newline at end of file diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index c3c1893487334..a594339c3e7cb 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1848,3 +1848,28 @@ setMethod("crosstab", sct <- callJMethod(statFunctions, "crosstab", col1, col2) collect(dataFrame(sct)) }) + + +#' This function downloads the contents of a DataFrame into an R's data.frame. +#' Since data.frames are held in memory, ensure that you have enough memory +#' in your system to accommodate the contents. +#' +#' @title Download data from a DataFrame into a data.frame +#' @param x a DataFrame +#' @return a data.frame +#' @rdname as.data.frame +#' @examples \dontrun{ +#' +#' irisDF <- createDataFrame(sqlContext, iris) +#' df <- as.data.frame(irisDF[irisDF$Species == "setosa", ]) +#' } +setGeneric("as.data.frame") +setMethod(f = "as.data.frame", signature = "DataFrame", definition = + function(x, ...) { + # Check if additional parameters have been passed + if (length(list(...)) > 0) { + stop("Unused argument(s): (" %++% paste(list(...), collapse=", ") %++% ")") + } + return(collect(x)) + } +) \ No newline at end of file From e9e34b54f22ad99a80ee144774fd852a6634ed4e Mon Sep 17 00:00:00 2001 From: olarayej Date: Thu, 24 Sep 2015 14:16:48 -0700 Subject: [PATCH 2/8] Removed operator %++%, which is a synonym for paste() --- R/pkg/R/DataFrame.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index a594339c3e7cb..49500b08147a3 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1868,8 +1868,8 @@ setMethod(f = "as.data.frame", signature = "DataFrame", definition = function(x, ...) { # Check if additional parameters have been passed if (length(list(...)) > 0) { - stop("Unused argument(s): (" %++% paste(list(...), collapse=", ") %++% ")") + stop(paste("Unused argument(s): ", paste(list(...), collapse=", "))) } return(collect(x)) } -) \ No newline at end of file +) From c65b682701c84cead439473e3a48ef53a70574f2 Mon Sep 17 00:00:00 2001 From: "Oscar D. Lara Yejas" Date: Thu, 24 Sep 2015 23:53:07 +0100 Subject: [PATCH 3/8] Removed extra blank space. --- .gitignore | 1 + R/pkg/R/DataFrame.R | 16 ++++++++-------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index debad77ec2ad3..718302500017b 100644 --- a/.gitignore +++ b/.gitignore @@ -74,3 +74,4 @@ metastore/ warehouse/ TempStatsStore/ sql/hive-thriftserver/test_warehouses +.Rproj.user diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index a594339c3e7cb..c7dfafc36d825 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1864,12 +1864,12 @@ setMethod("crosstab", #' df <- as.data.frame(irisDF[irisDF$Species == "setosa", ]) #' } setGeneric("as.data.frame") -setMethod(f = "as.data.frame", signature = "DataFrame", definition = - function(x, ...) { - # Check if additional parameters have been passed - if (length(list(...)) > 0) { - stop("Unused argument(s): (" %++% paste(list(...), collapse=", ") %++% ")") - } - return(collect(x)) +setMethod(f = "as.data.frame", signature = "DataFrame", definition = + function(x, ...) { + # Check if additional parameters have been passed + if (length(list(...)) > 0) { + stop(paste("Unused argument(s): ", paste(list(...), collapse=", "))) } -) \ No newline at end of file + return(collect(x)) + } +) From cee871c3a1a00b9da18da535f872ea752307fa92 Mon Sep 17 00:00:00 2001 From: olarayej Date: Thu, 24 Sep 2015 15:57:45 -0700 Subject: [PATCH 4/8] Removed extra spaces to comply with R style --- R/pkg/R/DataFrame.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 49500b08147a3..c7dfafc36d825 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1864,12 +1864,12 @@ setMethod("crosstab", #' df <- as.data.frame(irisDF[irisDF$Species == "setosa", ]) #' } setGeneric("as.data.frame") -setMethod(f = "as.data.frame", signature = "DataFrame", definition = - function(x, ...) { - # Check if additional parameters have been passed - if (length(list(...)) > 0) { - stop(paste("Unused argument(s): ", paste(list(...), collapse=", "))) - } - return(collect(x)) +setMethod(f = "as.data.frame", signature = "DataFrame", definition = + function(x, ...) { + # Check if additional parameters have been passed + if (length(list(...)) > 0) { + stop(paste("Unused argument(s): ", paste(list(...), collapse=", "))) } + return(collect(x)) + } ) From 085116346a5f25b703a59c1cfef579ed31c62379 Mon Sep 17 00:00:00 2001 From: "Oscar D. Lara Yejas" Date: Mon, 28 Sep 2015 19:19:43 +0100 Subject: [PATCH 5/8] Moved setGeneric declaration to generics.R. Changed setMethod stle Removed return() --- R/pkg/R/DataFrame.R | 6 +++--- R/pkg/R/generics.R | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index c7dfafc36d825..df580fa3e2e92 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1863,13 +1863,13 @@ setMethod("crosstab", #' irisDF <- createDataFrame(sqlContext, iris) #' df <- as.data.frame(irisDF[irisDF$Species == "setosa", ]) #' } -setGeneric("as.data.frame") -setMethod(f = "as.data.frame", signature = "DataFrame", definition = +setMethod(f = "as.data.frame", + signature = "DataFrame", function(x, ...) { # Check if additional parameters have been passed if (length(list(...)) > 0) { stop(paste("Unused argument(s): ", paste(list(...), collapse=", "))) } - return(collect(x)) + collect(x) } ) diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 43dd8d283ab6b..3db41e0fe2bb6 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -983,3 +983,7 @@ setGeneric("glm") #' @rdname rbind #' @export setGeneric("rbind", signature = "...") + +#' @rdname as.data.frame +#' @export +setGeneric("as.data.frame") \ No newline at end of file From 7a8e62a2a578a681638b592add7927c18a051cba Mon Sep 17 00:00:00 2001 From: "Oscar D. Lara Yejas" Date: Mon, 28 Sep 2015 19:48:32 +0100 Subject: [PATCH 6/8] Added test cases for as.data.frame --- R/pkg/inst/tests/test_sparkSQL.R | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index e159a69584274..8f85eecbc4a97 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -1327,6 +1327,13 @@ test_that("SQL error message is returned from JVM", { expect_equal(grepl("Table Not Found: blah", retError), TRUE) }) +test_that("Method as.data.frame as a synonym for collect()", { + irisDF <- createDataFrame(sqlContext, iris) + expect_equal(as.data.frame(irisDF), collect(irisDF)) + irisDF2 <- irisDF[irisDF$Species == "setosa", ] + expect_equal(as.data.frame(irisDF2), collect(irisDF2)) +}) + unlink(parquetPath) unlink(jsonPath) -unlink(jsonPathNa) +unlink(jsonPathNa) \ No newline at end of file From a346cc62834ef28246505483cb76957e8b8cba0a Mon Sep 17 00:00:00 2001 From: "Oscar D. Lara Yejas" Date: Mon, 28 Sep 2015 21:33:29 +0100 Subject: [PATCH 7/8] Changed setMethod declaration to comply with standard --- R/pkg/R/DataFrame.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index df580fa3e2e92..65e368c47dd81 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1863,8 +1863,8 @@ setMethod("crosstab", #' irisDF <- createDataFrame(sqlContext, iris) #' df <- as.data.frame(irisDF[irisDF$Species == "setosa", ]) #' } -setMethod(f = "as.data.frame", - signature = "DataFrame", +setMethod("as.data.frame", + signature(x = "DataFrame"), function(x, ...) { # Check if additional parameters have been passed if (length(list(...)) > 0) { From 6c4dcbcdc56b790b3e0207a1698f5def1b5c3ca5 Mon Sep 17 00:00:00 2001 From: "Oscar D. Lara Yejas" Date: Wed, 30 Sep 2015 11:13:10 -0700 Subject: [PATCH 8/8] Removed changes to .gitignore --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index 718302500017b..debad77ec2ad3 100644 --- a/.gitignore +++ b/.gitignore @@ -74,4 +74,3 @@ metastore/ warehouse/ TempStatsStore/ sql/hive-thriftserver/test_warehouses -.Rproj.user