Skip to content

Commit

Permalink
ARROW-8433: [R] Add feather alias for ipc format in dataset API
Browse files Browse the repository at this point in the history
Closes #6920 from nealrichardson/feather-dataset

Authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Wes McKinney <wesm+git@apache.org>
  • Loading branch information
nealrichardson authored and wesm committed Apr 13, 2020
1 parent 3725aaa commit 1a4caa9
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 19 deletions.
16 changes: 8 additions & 8 deletions r/R/dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@ open_dataset <- function(sources,
#' * `filesystem`: A [FileSystem]
#' * `selector`: A [FileSelector]
#' * `format`: A string identifier of the format of the files in `path`.
#' Currently supported options are "parquet", "arrow", and "ipc" (an alias for
#' the Arrow file format)
#' Currently "parquet" and "ipc"/"arrow"/"feather" (aliases for each other)
#' are supported. For Feather, only version 2 files are supported.
#' @section Methods:
#'
#' A `Dataset` has the following methods:
Expand Down Expand Up @@ -269,7 +269,7 @@ DatasetFactory <- R6Class("DatasetFactory", inherit = ArrowObject,
)
DatasetFactory$create <- function(x,
filesystem = c("auto", "local"),
format = c("parquet", "arrow", "ipc"),
format = c("parquet", "arrow", "ipc", "feather"),
partitioning = NULL,
allow_not_found = FALSE,
recursive = TRUE,
Expand Down Expand Up @@ -334,8 +334,8 @@ DatasetFactory$create <- function(x,
#' @param filesystem A string identifier for the filesystem corresponding to
#' `x`. Currently only "local" is supported.
#' @param format A string identifier of the format of the files in `x`.
#' Currently supported options are "parquet", "arrow", and "ipc" (an alias for
#' the Arrow file format)
#' Currently "parquet" and "ipc"/"arrow"/"feather" (aliases for each other)
#' are supported. For Feather, only version 2 files are supported.
#' @param partitioning One of
#' * A `Schema`, in which case the file paths relative to `sources` will be
#' parsed, and path segments will be matched with the schema fields. For
Expand Down Expand Up @@ -400,8 +400,8 @@ FileSystemDatasetFactory$create <- function(filesystem,
#' @section Factory:
#' `FileFormat$create()` takes the following arguments:
#' * `format`: A string identifier of the format of the files in `path`.
#' Currently supported options are "parquet", "arrow", and "ipc" (an alias for
#' the Arrow file format)
#' Currently "parquet" and "ipc"/"arrow"/"feather" (aliases for each other)
#' are supported. For Feather, only version 2 files are supported.
#' * `...`: Additional format-specific options
#' format="parquet":
#' * `use_buffered_stream`: Read files through buffered input streams rather than
Expand Down Expand Up @@ -436,7 +436,7 @@ FileFormat <- R6Class("FileFormat", inherit = ArrowObject,
FileFormat$create <- function(format, ...) {
if (format == "parquet") {
ParquetFileFormat$create(...)
} else if (format %in% c("ipc", "arrow")) { # These are aliases for the same thing
} else if (format %in% c("ipc", "arrow", "feather")) { # These are aliases for the same thing
shared_ptr(IpcFileFormat, dataset___IpcFileFormat__Make())
} else {
stop("Unsupported file format: ", format, call. = FALSE)
Expand Down
4 changes: 2 additions & 2 deletions r/man/Dataset.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions r/man/FileFormat.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions r/man/dataset_factory.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions r/tests/testthat/test-dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ test_that("Setup (putting data in the dir)", {
# Now, an IPC format dataset
dir.create(file.path(ipc_dir, 3))
dir.create(file.path(ipc_dir, 4))
write_arrow(df1, file.path(ipc_dir, 3, "file1.arrow"))
write_arrow(df2, file.path(ipc_dir, 4, "file2.arrow"))
write_feather(df1, file.path(ipc_dir, 3, "file1.arrow"))
write_feather(df2, file.path(ipc_dir, 4, "file2.arrow"))
expect_length(dir(ipc_dir, recursive = TRUE), 2)
})

Expand Down Expand Up @@ -167,8 +167,8 @@ test_that("Partitioning inference", {
)
})

test_that("IPC/Arrow format data", {
ds <- open_dataset(ipc_dir, partitioning = "part", format = "arrow")
test_that("IPC/Feather format data", {
ds <- open_dataset(ipc_dir, partitioning = "part", format = "feather")
expect_identical(names(ds), c(names(df1), "part"))
expect_warning(
dim(ds),
Expand Down

0 comments on commit 1a4caa9

Please sign in to comment.