Skip to content

Commit

Permalink
[SPARK-12198][SPARKR] SparkR support read.parquet and deprecate parqu…
Browse files Browse the repository at this point in the history
…etFile

SparkR support ```read.parquet``` and deprecate ```parquetFile```. This change is similar with #10145 for ```jsonFile```.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #10191 from yanboliang/spark-12198.

(cherry picked from commit eeb5872)
Signed-off-by: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
  • Loading branch information
yanboliang authored and shivaram committed Dec 10, 2015
1 parent f939c71 commit b7b9f77
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 6 deletions.
1 change: 1 addition & 0 deletions R/pkg/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@ export("as.DataFrame",
"loadDF",
"parquetFile",
"read.df",
"read.parquet",
"sql",
"table",
"tableNames",
Expand Down
16 changes: 14 additions & 2 deletions R/pkg/R/SQLContext.R
Original file line number Diff line number Diff line change
Expand Up @@ -256,18 +256,30 @@ jsonRDD <- function(sqlContext, rdd, schema = NULL, samplingRatio = 1.0) {
}
}


#' Create a DataFrame from a Parquet file.
#'
#' Loads a Parquet file, returning the result as a DataFrame.
#'
#' @param sqlContext SQLContext to use
#' @param ... Path(s) of parquet file(s) to read.
#' @param path Path of file to read. A vector of multiple paths is allowed.
#' @return DataFrame
#' @rdname read.parquet
#' @name read.parquet
#' @export
read.parquet <- function(sqlContext, path) {
# Allow the user to have a more flexible definiton of the text file path
paths <- as.list(suppressWarnings(normalizePath(path)))
read <- callJMethod(sqlContext, "read")
sdf <- callJMethod(read, "parquet", paths)
dataFrame(sdf)
}

#' @rdname read.parquet
#' @name parquetFile
#' @export
# TODO: Implement saveasParquetFile and write examples for both
parquetFile <- function(sqlContext, ...) {
.Deprecated("read.parquet")
# Allow the user to have a more flexible definiton of the text file path
paths <- lapply(list(...), function(x) suppressWarnings(normalizePath(x)))
sdf <- callJMethod(sqlContext, "parquetFile", paths)
Expand Down
11 changes: 7 additions & 4 deletions R/pkg/inst/tests/testthat/test_sparkSQL.R
Original file line number Diff line number Diff line change
Expand Up @@ -1420,22 +1420,25 @@ test_that("mutate(), transform(), rename() and names()", {
detach(airquality)
})

test_that("write.df() on DataFrame and works with parquetFile", {
test_that("write.df() on DataFrame and works with read.parquet", {
df <- jsonFile(sqlContext, jsonPath)
write.df(df, parquetPath, "parquet", mode="overwrite")
parquetDF <- parquetFile(sqlContext, parquetPath)
parquetDF <- read.parquet(sqlContext, parquetPath)
expect_is(parquetDF, "DataFrame")
expect_equal(count(df), count(parquetDF))
})

test_that("parquetFile works with multiple input paths", {
test_that("read.parquet()/parquetFile() works with multiple input paths", {
df <- jsonFile(sqlContext, jsonPath)
write.df(df, parquetPath, "parquet", mode="overwrite")
parquetPath2 <- tempfile(pattern = "parquetPath2", fileext = ".parquet")
write.df(df, parquetPath2, "parquet", mode="overwrite")
parquetDF <- parquetFile(sqlContext, parquetPath, parquetPath2)
parquetDF <- read.parquet(sqlContext, c(parquetPath, parquetPath2))
expect_is(parquetDF, "DataFrame")
expect_equal(count(parquetDF), count(df) * 2)
parquetDF2 <- suppressWarnings(parquetFile(sqlContext, parquetPath, parquetPath2))
expect_is(parquetDF2, "DataFrame")
expect_equal(count(parquetDF2), count(df) * 2)

# Test if varargs works with variables
saveMode <- "overwrite"
Expand Down

0 comments on commit b7b9f77

Please sign in to comment.