Skip to content

Commit

Permalink
ARROW-7792: [R] read_* functions should close connection to file
Browse files Browse the repository at this point in the history
This seems only to have manifested as an error on Windows

Closes #6769 from nealrichardson/close-file

Authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
  • Loading branch information
nealrichardson committed Mar 31, 2020
1 parent 7c7014b commit 78ee4ae
Show file tree
Hide file tree
Showing 6 changed files with 55 additions and 43 deletions.
4 changes: 4 additions & 0 deletions r/R/csv.R
Expand Up @@ -114,6 +114,10 @@ read_delim_arrow <- function(file,
convert_options <- readr_to_csv_convert_options(na, quoted_na)
}

if (is.character(file)) {
file <- make_readable_file(file)
on.exit(file$close())
}
reader <- CsvTableReader$create(
file,
read_options = read_options,
Expand Down
4 changes: 4 additions & 0 deletions r/R/feather.R
Expand Up @@ -123,6 +123,10 @@ write_feather <- function(x,
#' df <- read_feather(tf, col_select = starts_with("Sepal"))
#' }
read_feather <- function(file, col_select = NULL, as_data_frame = TRUE, ...) {
if (is.character(file)) {
file <- make_readable_file(file)
on.exit(file$close())
}
reader <- FeatherReader$create(file, ...)

all_columns <- ipc___feather___Reader__column_names(reader)
Expand Down
71 changes: 35 additions & 36 deletions r/R/parquet.R
Expand Up @@ -36,10 +36,14 @@
#' }
#' @export
read_parquet <- function(file,
col_select = NULL,
as_data_frame = TRUE,
props = ParquetReaderProperties$create(),
...) {
col_select = NULL,
as_data_frame = TRUE,
props = ParquetReaderProperties$create(),
...) {
if (is.character(file)) {
file <- make_readable_file(file)
on.exit(file$close())
}
reader <- ParquetFileReader$create(file, props = props, ...)
tab <- reader$ReadTable(!!enquo(col_select))

Expand Down Expand Up @@ -117,38 +121,33 @@ read_parquet <- function(file,
#' }
#' @export
write_parquet <- function(x,
sink,
chunk_size = NULL,

# writer properties
version = NULL,
compression = NULL,
compression_level = NULL,
use_dictionary = NULL,
write_statistics = NULL,
data_page_size = NULL,

properties = ParquetWriterProperties$create(
x,
version = version,
compression = compression,
compression_level = compression_level,
use_dictionary = use_dictionary,
write_statistics = write_statistics,
data_page_size = data_page_size
),

# arrow writer properties
use_deprecated_int96_timestamps = FALSE,
coerce_timestamps = NULL,
allow_truncated_timestamps = FALSE,

arrow_properties = ParquetArrowWriterProperties$create(
use_deprecated_int96_timestamps = use_deprecated_int96_timestamps,
coerce_timestamps = coerce_timestamps,
allow_truncated_timestamps = allow_truncated_timestamps
)
) {
sink,
chunk_size = NULL,
# writer properties
version = NULL,
compression = NULL,
compression_level = NULL,
use_dictionary = NULL,
write_statistics = NULL,
data_page_size = NULL,
properties = ParquetWriterProperties$create(
x,
version = version,
compression = compression,
compression_level = compression_level,
use_dictionary = use_dictionary,
write_statistics = write_statistics,
data_page_size = data_page_size
),
# arrow writer properties
use_deprecated_int96_timestamps = FALSE,
coerce_timestamps = NULL,
allow_truncated_timestamps = FALSE,
arrow_properties = ParquetArrowWriterProperties$create(
use_deprecated_int96_timestamps = use_deprecated_int96_timestamps,
coerce_timestamps = coerce_timestamps,
allow_truncated_timestamps = allow_truncated_timestamps
)) {
x_out <- x
x <- to_arrow(x)

Expand Down
6 changes: 0 additions & 6 deletions r/tests/testthat/test-csv.R
Expand Up @@ -42,10 +42,6 @@ test_that("read_csv_arrow(as_data_frame=TRUE)", {

tab1 <- read_csv_arrow(tf, as_data_frame = TRUE)
expect_equivalent(iris, tab1)
tab2 <- read_csv_arrow(mmap_open(tf), as_data_frame = TRUE)
expect_equivalent(iris, tab2)
tab3 <- read_csv_arrow(ReadableFile$create(tf), as_data_frame = TRUE)
expect_equivalent(iris, tab3)
})

test_that("read_delim_arrow parsing options: delim", {
Expand Down Expand Up @@ -135,8 +131,6 @@ test_that("read_csv_arrow parsing options: skip_empty_rows", {
})

test_that("read_csv_arrow parsing options: na strings", {
# There's some weird crash that happens on Appveyor in this test
skip_on_os("windows")
tf <- tempfile()
on.exit(unlink(tf))

Expand Down
9 changes: 9 additions & 0 deletions r/tests/testthat/test-feather.R
Expand Up @@ -163,4 +163,13 @@ test_that("FeatherReader", {
expect_identical(reader2$version, 2L)
})

test_that("read_feather closes connection to file", {
tf <- tempfile()
write_feather(tib, sink = tf)
expect_true(file.exists(tf))
read_feather(tf)
expect_error(file.remove(tf), NA)
expect_false(file.exists(tf))
})

unlink(feather_file)
4 changes: 3 additions & 1 deletion r/tests/testthat/test-parquet.R
Expand Up @@ -30,11 +30,13 @@ test_that("reading a known Parquet file to tibble", {
test_that("simple int column roundtrip", {
df <- tibble::tibble(x = 1:5)
pq_tmp_file <- tempfile() # You can specify the .parquet here but that's probably not necessary
on.exit(unlink(pq_tmp_file))

write_parquet(df, pq_tmp_file)
df_read <- read_parquet(pq_tmp_file)
expect_equivalent(df, df_read)
# Make sure file connection is cleaned up
expect_error(file.remove(pq_tmp_file), NA)
expect_false(file.exists(pq_tmp_file))
})

test_that("read_parquet() supports col_select", {
Expand Down

0 comments on commit 78ee4ae

Please sign in to comment.