Skip to content

Commit

Permalink
Support omitting path for read.df and make the error messages better
Browse files Browse the repository at this point in the history
  • Loading branch information
HyukjinKwon committed Sep 26, 2016
1 parent c2a64db commit 5c3d222
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 11 deletions.
11 changes: 10 additions & 1 deletion R/pkg/R/DataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -2624,6 +2624,15 @@ setMethod("except",
setMethod("write.df",
signature(df = "SparkDataFrame"),
function(df, path = NULL, source = NULL, mode = "error", ...) {
if (!is.character(path) && !is.null(path)) {
stop("path should be charactor, null or omitted.")
}
if (!is.character(source) && !is.null(source)) {
stop("source should be charactor, null or omitted. It is 'parquet' by default.")
}
if (!is.character(mode)) {
stop("mode should be charactor or omitted. It is 'error' by default.")
}
if (is.null(source)) {
source <- getDefaultSqlSource()
}
Expand All @@ -2636,7 +2645,7 @@ setMethod("write.df",
write <- callJMethod(write, "format", source)
write <- callJMethod(write, "mode", jmode)
write <- callJMethod(write, "options", options)
write <- callJMethod(write, "save")
write <- tryCatch(callJMethod(write, "save"), error = captureJVMException)
})

#' @rdname write.df
Expand Down
22 changes: 16 additions & 6 deletions R/pkg/R/SQLContext.R
Original file line number Diff line number Diff line change
Expand Up @@ -771,6 +771,12 @@ dropTempView <- function(viewName) {
#' @method read.df default
#' @note read.df since 1.4.0
read.df.default <- function(path = NULL, source = NULL, schema = NULL, na.strings = "NA", ...) {
if (!is.character(path) && !is.null(path)) {
stop("path should be charactor, null or omitted.")
}
if (!is.character(source) && !is.null(source)) {
stop("source should be charactor, null or omitted. It is 'parquet' by default.")
}
sparkSession <- getSparkSession()
options <- varargsToEnv(...)
if (!is.null(path)) {
Expand All @@ -784,16 +790,20 @@ read.df.default <- function(path = NULL, source = NULL, schema = NULL, na.string
}
if (!is.null(schema)) {
stopifnot(class(schema) == "structType")
sdf <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "loadDF", sparkSession, source,
schema$jobj, options)
sdf <- tryCatch({
callJStatic("org.apache.spark.sql.api.r.SQLUtils", "loadDF", sparkSession, source,
schema$jobj, options)
}, error = captureJVMException)
} else {
sdf <- callJStatic("org.apache.spark.sql.api.r.SQLUtils",
"loadDF", sparkSession, source, options)
sdf <- tryCatch({
callJStatic("org.apache.spark.sql.api.r.SQLUtils",
"loadDF", sparkSession, source, options)
}, error = captureJVMException)
}
dataFrame(sdf)
}

read.df <- function(x, ...) {
read.df <- function(x = NULL, ...) {
dispatchFunc("read.df(path = NULL, source = NULL, schema = NULL, ...)", x, ...)
}

Expand All @@ -805,7 +815,7 @@ loadDF.default <- function(path = NULL, source = NULL, schema = NULL, ...) {
read.df(path, source, schema, ...)
}

loadDF <- function(x, ...) {
loadDF <- function(x = NULL, ...) {
dispatchFunc("loadDF(path = NULL, source = NULL, schema = NULL, ...)", x, ...)
}

Expand Down
15 changes: 15 additions & 0 deletions R/pkg/R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,21 @@ isSparkRShell <- function() {
grepl(".*shell\\.R$", Sys.getenv("R_PROFILE_USER"), perl = TRUE)
}

captureJVMException <- function(e) {
stacktrace <- as.character(e)
if (any(grep("java.lang.IllegalArgumentException: ", stacktrace))) {
msg <- strsplit(stacktrace, "java.lang.IllegalArgumentException: ", fixed = TRUE)[[1]][2]
first <- strsplit(msg, "\r?\n\tat")[[1]][1]
stop(first)
} else if (any(grep("org.apache.spark.sql.AnalysisException: ", stacktrace))) {
msg <- strsplit(stacktrace, "org.apache.spark.sql.AnalysisException: ", fixed = TRUE)[[1]][2]
first <- strsplit(msg, "\r?\n\tat")[[1]][1]
stop(first)
} else {
stop(stacktrace)
}
}

# rbind a list of rows with raw (binary) columns
#
# @param inputData a list of rows, with each row a list
Expand Down
31 changes: 27 additions & 4 deletions R/pkg/inst/tests/testthat/test_sparkSQL.R
Original file line number Diff line number Diff line change
Expand Up @@ -2544,13 +2544,36 @@ test_that("Spark version from SparkSession", {
expect_equal(ver, version)
})

test_that("Call DataFrameWriter.save() API in Java without path", {
test_that("Call DataFrameWriter.save() API in Java without path and check argument types", {
df <- read.df(jsonPath, "json")
# This tests if the exception is thrown from Spark side not from SparkR side.
# This tests if the exception is thrown from JVM not from SparkR side.
# It makes sure that we can omit path argument in write.df API and then it calls
# DataFrameWriter.save() without path.
expect_error(write.df(df, source = "csv"),
"java.lang.IllegalArgumentException: 'path' is not specified")
expect_error(write.df(df, source = "csv"), "'path' is not specified")

# Arguments checking in R side.
expect_error(write.df(df, "data.tmp", source = c(1, 2)),
"source should be charactor, null or omitted. It is 'parquet' by default.")
expect_error(write.df(df, path = c(3)),
"path should be charactor, null or omitted.")
expect_error(write.df(df, mode = TRUE),
"mode should be charactor or omitted. It is 'error' by default.")
})

test_that("Call DataFrameWriter.load() API in Java without path and check argument types", {
df <- read.df(jsonPath, "json")
# This tests if the exception is thrown from JVM not from SparkR side.
# It makes sure that we can omit path argument in read.df API and then it calls
# DataFrameWriter.load() without path.
expect_error(read.df(source = "json"),
"Unable to infer schema for JSON at . It must be specified manually")
expect_error(read.df("arbitrary_path"), "Path does not exist:")

# Arguments checking in R side.
expect_error(read.df(path = c(3)),
"path should be charactor, null or omitted.")
expect_error(read.df(jsonPath, source = c(1, 2)),
"source should be charactor, null or omitted. It is 'parquet' by default.")
})

unlink(parquetPath)
Expand Down

0 comments on commit 5c3d222

Please sign in to comment.