Support omitting path for read.df and make the error messages better

apache · Sep 26, 2016 · 5c3d222 · 5c3d222
1 parent c2a64db
commit 5c3d222
Show file tree

Hide file tree

Showing 4 changed files with 68 additions and 11 deletions.
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
@@ -2624,6 +2624,15 @@ setMethod("except",
 setMethod("write.df",
           signature(df = "SparkDataFrame"),
           function(df, path = NULL, source = NULL, mode = "error", ...) {
+            if (!is.character(path) && !is.null(path)) {
+              stop("path should be charactor, null or omitted.")
+            }
+            if (!is.character(source) && !is.null(source)) {
+              stop("source should be charactor, null or omitted. It is 'parquet' by default.")
+            }
+            if (!is.character(mode)) {
+              stop("mode should be charactor or omitted. It is 'error' by default.")
+            }
             if (is.null(source)) {
               source <- getDefaultSqlSource()
             }
@@ -2636,7 +2645,7 @@ setMethod("write.df",
             write <- callJMethod(write, "format", source)
             write <- callJMethod(write, "mode", jmode)
             write <- callJMethod(write, "options", options)
-            write <- callJMethod(write, "save")
+            write <- tryCatch(callJMethod(write, "save"), error = captureJVMException)
           })
 
 #' @rdname write.df

diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
@@ -771,6 +771,12 @@ dropTempView <- function(viewName) {
 #' @method read.df default
 #' @note read.df since 1.4.0
 read.df.default <- function(path = NULL, source = NULL, schema = NULL, na.strings = "NA", ...) {
+  if (!is.character(path) && !is.null(path)) {
+    stop("path should be charactor, null or omitted.")
+  }
+  if (!is.character(source) && !is.null(source)) {
+    stop("source should be charactor, null or omitted. It is 'parquet' by default.")
+  }
   sparkSession <- getSparkSession()
   options <- varargsToEnv(...)
   if (!is.null(path)) {
@@ -784,16 +790,20 @@ read.df.default <- function(path = NULL, source = NULL, schema = NULL, na.string
   }
   if (!is.null(schema)) {
     stopifnot(class(schema) == "structType")
-    sdf <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "loadDF", sparkSession, source,
-                       schema$jobj, options)
+    sdf <- tryCatch({
+      callJStatic("org.apache.spark.sql.api.r.SQLUtils", "loadDF", sparkSession, source,
+                  schema$jobj, options)
+    }, error = captureJVMException)
   } else {
-    sdf <- callJStatic("org.apache.spark.sql.api.r.SQLUtils",
-                       "loadDF", sparkSession, source, options)
+    sdf <- tryCatch({
+      callJStatic("org.apache.spark.sql.api.r.SQLUtils",
+                  "loadDF", sparkSession, source, options)
+    }, error = captureJVMException)
   }
   dataFrame(sdf)
 }
 
-read.df <- function(x, ...) {
+read.df <- function(x = NULL, ...) {
   dispatchFunc("read.df(path = NULL, source = NULL, schema = NULL, ...)", x, ...)
 }
 
@@ -805,7 +815,7 @@ loadDF.default <- function(path = NULL, source = NULL, schema = NULL, ...) {
   read.df(path, source, schema, ...)
 }
 
-loadDF <- function(x, ...) {
+loadDF <- function(x = NULL, ...) {
   dispatchFunc("loadDF(path = NULL, source = NULL, schema = NULL, ...)", x, ...)
 }
 

diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
@@ -698,6 +698,21 @@ isSparkRShell <- function() {
   grepl(".*shell\\.R$", Sys.getenv("R_PROFILE_USER"), perl = TRUE)
 }
 
+captureJVMException <- function(e) {
+  stacktrace <- as.character(e)
+  if (any(grep("java.lang.IllegalArgumentException: ", stacktrace))) {
+    msg <- strsplit(stacktrace, "java.lang.IllegalArgumentException: ", fixed = TRUE)[[1]][2]
+    first <- strsplit(msg, "\r?\n\tat")[[1]][1]
+    stop(first)
+  } else if (any(grep("org.apache.spark.sql.AnalysisException: ", stacktrace))) {
+    msg <- strsplit(stacktrace, "org.apache.spark.sql.AnalysisException: ", fixed = TRUE)[[1]][2]
+    first <- strsplit(msg, "\r?\n\tat")[[1]][1]
+    stop(first)
+  } else {
+    stop(stacktrace)
+  }
+}
+
 # rbind a list of rows with raw (binary) columns
 #
 # @param inputData a list of rows, with each row a list

diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -2544,13 +2544,36 @@ test_that("Spark version from SparkSession", {
   expect_equal(ver, version)
 })
 
-test_that("Call DataFrameWriter.save() API in Java without path", {
+test_that("Call DataFrameWriter.save() API in Java without path and check argument types", {
   df <- read.df(jsonPath, "json")
-  # This tests if the exception is thrown from Spark side not from SparkR side.
+  # This tests if the exception is thrown from JVM not from SparkR side.
   # It makes sure that we can omit path argument in write.df API and then it calls
   # DataFrameWriter.save() without path.
-  expect_error(write.df(df, source = "csv"),
-               "java.lang.IllegalArgumentException: 'path' is not specified")
+  expect_error(write.df(df, source = "csv"), "'path' is not specified")
+
+  # Arguments checking in R side.
+  expect_error(write.df(df, "data.tmp", source = c(1, 2)),
+               "source should be charactor, null or omitted. It is 'parquet' by default.")
+  expect_error(write.df(df, path = c(3)),
+               "path should be charactor, null or omitted.")
+  expect_error(write.df(df, mode = TRUE),
+               "mode should be charactor or omitted. It is 'error' by default.")
+})
+
+test_that("Call DataFrameWriter.load() API in Java without path and check argument types", {
+  df <- read.df(jsonPath, "json")
+  # This tests if the exception is thrown from JVM not from SparkR side.
+  # It makes sure that we can omit path argument in read.df API and then it calls
+  # DataFrameWriter.load() without path.
+  expect_error(read.df(source = "json"),
+               "Unable to infer schema for JSON at . It must be specified manually")
+  expect_error(read.df("arbitrary_path"), "Path does not exist:")
+
+  # Arguments checking in R side.
+  expect_error(read.df(path = c(3)),
+               "path should be charactor, null or omitted.")
+  expect_error(read.df(jsonPath, source = c(1, 2)),
+               "source should be charactor, null or omitted. It is 'parquet' by default.")
 })
 
 unlink(parquetPath)