Skip to content

Commit

Permalink
ARROW-8354: [R] Fix segfault in Table to Array conversion
Browse files Browse the repository at this point in the history
The Converter::Make did not like receiving empty ArrayVector. The bug was exposed in ARROW-8216 which could return an empty selection vector due to a randomly generated fixture in test-dplyr.R

Closes #6871 from fsaintjacques/ARROW-8354

Lead-authored-by: François Saint-Jacques <fsaintjacques@gmail.com>
Co-authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
  • Loading branch information
fsaintjacques and nealrichardson committed Apr 8, 2020
1 parent c886381 commit 0365356
Show file tree
Hide file tree
Showing 7 changed files with 47 additions and 8 deletions.
2 changes: 1 addition & 1 deletion r/R/table.R
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ Table$create <- function(..., schema = NULL) {
}

#' @export
as.data.frame.Table <- function(x, row.names = NULL, optional = FALSE, use_threads = TRUE, ...){
as.data.frame.Table <- function(x, row.names = NULL, optional = FALSE, ...) {
Table__to_dataframe(x, use_threads = option_use_threads())
}

Expand Down
4 changes: 3 additions & 1 deletion r/src/array_from_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,9 @@ std::shared_ptr<Array> MakeStructArray(SEXP df, const std::shared_ptr<DataType>&
for (int i = 0; i < n; i++) {
children[i] = Array__from_vector(VECTOR_ELT(df, i), type->child(i)->type(), true);
}
return std::make_shared<StructArray>(type, children[0]->length(), children);

int64_t rows = n ? children[0]->length() : 0;
return std::make_shared<StructArray>(type, rows, children);
}

std::shared_ptr<Array> MakeListArray(SEXP x, const std::shared_ptr<DataType>& type) {
Expand Down
13 changes: 9 additions & 4 deletions r/src/array_to_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -656,8 +656,15 @@ class Converter_Null : public Converter {
return Status::OK();
}
};

std::shared_ptr<Converter> Converter::Make(const ArrayVector& arrays) {
switch (arrays[0]->type_id()) {
if (arrays.empty()) {
Rcpp::stop(tfm::format("Must have at least one array to create a converter"));
}

auto type = arrays[0]->type();

switch (type->id()) {
// direct support
case Type::INT32:
return std::make_shared<arrow::r::Converter_SimpleArray<INTSXP>>(arrays);
Expand Down Expand Up @@ -742,7 +749,7 @@ std::shared_ptr<Converter> Converter::Make(const ArrayVector& arrays) {
break;
}

Rcpp::stop(tfm::format("cannot handle Array of type %s", arrays[0]->type()->name()));
Rcpp::stop(tfm::format("cannot handle Array of type %s", type->name()));
return nullptr;
}

Expand Down Expand Up @@ -813,7 +820,6 @@ SEXP Array__as_vector(const std::shared_ptr<arrow::Array>& array) {

// [[arrow::export]]
SEXP ChunkedArray__as_vector(const std::shared_ptr<arrow::ChunkedArray>& chunked_array) {
// NB: this segfaults if there are 0 chunks (presumably something tries chunks[0])
return arrow::r::ArrayVector__as_vector(chunked_array->length(),
chunked_array->chunks());
}
Expand Down Expand Up @@ -849,7 +855,6 @@ Rcpp::List Table__to_dataframe(const std::shared_ptr<arrow::Table>& table,
std::vector<std::shared_ptr<arrow::r::Converter>> converters(nc);

for (int64_t i = 0; i < nc; i++) {
// This crashes if num_chunks == 0
converters[i] = arrow::r::Converter::Make(table->column(i)->chunks());
names[i] = table->field(i)->name();
}
Expand Down
18 changes: 16 additions & 2 deletions r/src/compute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,14 @@ std::shared_ptr<arrow::Table> Table__Filter(const std::shared_ptr<arrow::Table>&
options.null_selection_behavior = arrow::compute::FilterOptions::EMIT_NULL;
}
STOP_IF_NOT_OK(arrow::compute::Filter(&context, table, filter, options, &out));
return out.table();
std::shared_ptr<arrow::Table> tab = out.table();
if (tab->num_rows() == 0) {
// Slight hack: if there are no rows in the result, instead do a 0-length
// slice so that we get chunked arrays with 1 chunk (itself length 0).
// We need that because the Arrow-to-R converter fails when there are 0 chunks.
return table->Slice(0, 0);
}
return tab;
}

// [[arrow::export]]
Expand All @@ -249,6 +256,13 @@ std::shared_ptr<arrow::Table> Table__FilterChunked(
options.null_selection_behavior = arrow::compute::FilterOptions::EMIT_NULL;
}
STOP_IF_NOT_OK(arrow::compute::Filter(&context, table, filter, options, &out));
return out.table();
std::shared_ptr<arrow::Table> tab = out.table();
if (tab->num_rows() == 0) {
// Slight hack: if there are no rows in the result, instead do a 0-length
// slice so that we get chunked arrays with 1 chunk (itself length 0).
// We need that because the Arrow-to-R converter fails when there are 0 chunks.
return table->Slice(0, 0);
}
return tab;
}
#endif
2 changes: 2 additions & 0 deletions r/tests/testthat/helper-arrow.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ if (tolower(Sys.info()[["sysname"]]) == "windows") {
options(arrow.use_threads = FALSE)
}

set.seed(1)

test_that <- function(what, code) {
testthat::test_that(what, {
skip_if(getOption("..skip.tests", TRUE), "arrow C++ library not available")
Expand Down
5 changes: 5 additions & 0 deletions r/tests/testthat/test-Array.R
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,11 @@ test_that("Array$create() can handle data frame with custom struct type (not inf
expect_error(Array$create(df, type = type), regexp = "Expecting a character vector")
})

test_that("Array$create() supports tibble with no columns (ARROW-8354)", {
df <- tibble::tibble()
expect_equal(Array$create(df)$as_vector(), df)
})

test_that("Array$create() handles vector -> list arrays (ARROW-7662)", {
# Should be able to create an empty list with a type hint.
expect_is(Array$create(list(), list_of(bool())), "ListArray")
Expand Down
11 changes: 11 additions & 0 deletions r/tests/testthat/test-dplyr.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ tbl <- tibble::tibble(
int = 1:10,
dbl = as.numeric(1:10),
lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
false = logical(10),
chr = letters[1:10],
fct = factor(letters[1:10])
)
Expand Down Expand Up @@ -118,6 +119,16 @@ test_that("filter() with NAs in selection", {
)
})

test_that("Filter returning an empty Table should not segfault (ARROW-8354)", {
expect_dplyr_equal(
input %>%
filter(false) %>%
select(chr, int, lgl) %>%
collect(),
tbl
)
})

test_that("filtering with expression", {
char_sym <- "b"
expect_dplyr_equal(
Expand Down

0 comments on commit 0365356

Please sign in to comment.