Skip to content

Commit

Permalink
ARROW-7786: [R] Wire up check_metadata in Table.Equals method
Browse files Browse the repository at this point in the history
In tests, use `expect_equal()` to test that Tables and Schemas are the same, including metadata. Use `expect_equivalent()` to skip the metadata check.

This patch also adds the `Schema$WithMetadata()` method to set key-value metadata. It also fixes a bunch of invalid Equals methods that either errored or segfaulted when you passed a non-Arrow object to them.

I created https://issues.apache.org/jira/browse/ARROW-7891 as a followup for adding a check_metadata argument to RecordBatch->Equals in C++, since it seems like it should exist.

Closes #6459 from nealrichardson/r-table-equals and squashes the following commits:

13d8cea <Neal Richardson> Doc and lint
3b1abe4 <Neal Richardson> Fix a bunch of segfaults and faulty Equals methods
081abba <Neal Richardson> expect_equivalent does not check metadata
a4a3d95 <Neal Richardson> Wire up Table__equals(check_metadata). Add bindings/methods to let you set metadata

Authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
  • Loading branch information
nealrichardson committed Feb 20, 2020
1 parent 12aa05a commit 9834601
Show file tree
Hide file tree
Showing 23 changed files with 146 additions and 25 deletions.
4 changes: 3 additions & 1 deletion r/R/array.R
Expand Up @@ -89,7 +89,9 @@ Array <- R6Class("Array",
IsValid = function(i) Array__IsValid(self, i),
length = function() Array__length(self),
type_id = function() Array__type_id(self),
Equals = function(other) Array__Equals(self, other),
Equals = function(other, ...) {
inherits(other, "Array") && Array__Equals(self, other)
},
ApproxEquals = function(other) Array__ApproxEquals(self, other),
data = function() shared_ptr(ArrayData, Array__data(self)),
as_vector = function() Array__as_vector(self),
Expand Down
4 changes: 2 additions & 2 deletions r/R/arrow-package.R
Expand Up @@ -96,8 +96,8 @@ Object <- R6Class("Object",
}

#' @export
all.equal.Object <- function(target, current, ...) {
target$Equals(current)
all.equal.Object <- function(target, current, ..., check.attributes = TRUE) {
target$Equals(current, check_metadata = check.attributes)
}

shared_ptr <- function(class, xp) {
Expand Down
4 changes: 4 additions & 0 deletions r/R/arrowExports.R

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion r/R/buffer.R
Expand Up @@ -39,7 +39,9 @@ Buffer <- R6Class("Buffer", inherit = Object,
public = list(
ZeroPadding = function() Buffer__ZeroPadding(self),
data = function() Buffer__data(self),
Equals = function(other) Buffer__Equals(self, other)
Equals = function(other, ...) {
inherits(other, "Buffer") && Buffer__Equals(self, other)
}
),

active = list(
Expand Down
4 changes: 2 additions & 2 deletions r/R/chunked-array.R
Expand Up @@ -116,8 +116,8 @@ ChunkedArray <- R6Class("ChunkedArray", inherit = Object,
}
out
},
Equals = function(other) {
ChunkedArray__Equals(self, other)
Equals = function(other, ...) {
inherits(other, "ChunkedArray") && ChunkedArray__Equals(self, other)
}
),
active = list(
Expand Down
2 changes: 1 addition & 1 deletion r/R/field.R
Expand Up @@ -36,7 +36,7 @@ Field <- R6Class("Field", inherit = Object,
ToString = function() {
prettier_dictionary_type(Field__ToString(self))
},
Equals = function(other) {
Equals = function(other, ...) {
inherits(other, "Field") && Field__Equals(self, other)
}
),
Expand Down
5 changes: 2 additions & 3 deletions r/R/message.R
Expand Up @@ -31,9 +31,8 @@
#' @name Message
Message <- R6Class("Message", inherit = Object,
public = list(
Equals = function(other){
assert_is(other, "Message")
ipc___Message__Equals(self, other)
Equals = function(other, ...) {
inherits(other, "Message") && ipc___Message__Equals(self, other)
},
body_length = function() ipc___Message__body_length(self),
Verify = function() ipc___Message__Verify(self)
Expand Down
7 changes: 4 additions & 3 deletions r/R/record-batch.R
Expand Up @@ -83,9 +83,10 @@ RecordBatch <- R6Class("RecordBatch", inherit = Object,
},
column_name = function(i) RecordBatch__column_name(self, i),
names = function() RecordBatch__names(self),
Equals = function(other) {
assert_is(other, "RecordBatch")
RecordBatch__Equals(self, other)
Equals = function(other, ...) {
# RecordBatch->Equals should have a check_metadata arg
# https://issues.apache.org/jira/browse/ARROW-7891
inherits(other, "RecordBatch") && RecordBatch__Equals(self, other)
},
GetColumnByName = function(name) {
assert_is(name, "character")
Expand Down
12 changes: 10 additions & 2 deletions r/R/schema.R
Expand Up @@ -36,6 +36,9 @@
#' - `$ToString()`: convert to a string
#' - `$field(i)`: returns the field at index `i` (0-based)
#' - `$GetFieldByName(x)`: returns the field with name `x`
#' - `$WithMetadata(metadata)`: returns a new `Schema` with the key-value
#' `metadata` set. Note that all list elements in `metadata` will be coerced
#' to `character`.
#'
#' @section Active bindings:
#'
Expand Down Expand Up @@ -70,8 +73,13 @@ Schema <- R6Class("Schema",
field = function(i) shared_ptr(Field, Schema__field(self, i)),
GetFieldByName = function(x) shared_ptr(Field, Schema__GetFieldByName(self, x)),
serialize = function() Schema__serialize(self),
Equals = function(other, check_metadata = TRUE) {
Schema__Equals(self, other, isTRUE(check_metadata))
WithMetadata = function(metadata = list()) {
# metadata must be a named character vector
metadata <- map_chr(metadata, as.character)
shared_ptr(Schema, Schema__WithMetadata(self, metadata))
},
Equals = function(other, check_metadata = TRUE, ...) {
inherits(other, "Schema") && Schema__Equals(self, other, isTRUE(check_metadata))
}
),
active = list(
Expand Down
8 changes: 5 additions & 3 deletions r/R/table.R
Expand Up @@ -76,10 +76,11 @@
#' - `$cast(target_schema, safe = TRUE, options = cast_options(safe))`: Alter
#' the schema of the record batch.
#'
#' There are also some active bindings
#' There are also some active bindings:
#' - `$num_columns`
#' - `$num_rows`
#' - `$schema`
#' - `$metadata`: Returns the key-value metadata of the `Schema`
#' - `$columns`: Returns a list of `ChunkedArray`s
#' @rdname Table
#' @name Table
Expand Down Expand Up @@ -162,15 +163,16 @@ Table <- R6Class("Table", inherit = Object,
shared_ptr(Table, Table__Filter(self, i))
},

Equals = function(other, check_metadata = TRUE) {
Table__Equals(self, other, isTRUE(check_metadata))
Equals = function(other, check_metadata = TRUE, ...) {
inherits(other, "Table") && Table__Equals(self, other, isTRUE(check_metadata))
}
),

active = list(
num_columns = function() Table__num_columns(self),
num_rows = function() Table__num_rows(self),
schema = function() shared_ptr(Schema, Table__schema(self)),
metadata = function() self$schema$metadata,
columns = function() map(Table__columns(self), shared_ptr, class = ChunkedArray)
)
)
Expand Down
5 changes: 2 additions & 3 deletions r/R/type.R
Expand Up @@ -34,9 +34,8 @@ DataType <- R6Class("DataType",
ToString = function() {
DataType__ToString(self)
},
Equals = function(other) {
assert_is(other, "DataType")
DataType__Equals(self, other)
Equals = function(other, ...) {
inherits(other, "DataType") && DataType__Equals(self, other)
},
num_children = function() {
DataType__num_children(self)
Expand Down
3 changes: 3 additions & 0 deletions r/man/Schema.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion r/man/Table.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions r/src/arrowExports.cpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions r/src/schema.cpp
Expand Up @@ -73,6 +73,14 @@ std::string Schema__metadata(const std::shared_ptr<arrow::Schema>& schema) {
return schema->metadata()->ToString();
}

// [[arrow::export]]
std::shared_ptr<arrow::Schema> Schema__WithMetadata(
const std::shared_ptr<arrow::Schema>& schema, Rcpp::CharacterVector metadata) {
auto kv = std::shared_ptr<arrow::KeyValueMetadata>(new arrow::KeyValueMetadata(
metadata.names(), Rcpp::as<std::vector<std::string>>(metadata)));
return schema->WithMetadata(kv);
}

// [[arrow::export]]
Rcpp::RawVector Schema__serialize(const std::shared_ptr<arrow::Schema>& schema) {
arrow::ipc::DictionaryMemo empty_memo;
Expand Down
2 changes: 1 addition & 1 deletion r/src/table.cpp
Expand Up @@ -89,7 +89,7 @@ std::shared_ptr<arrow::Table> Table__Slice2(const std::shared_ptr<arrow::Table>&
// [[arrow::export]]
bool Table__Equals(const std::shared_ptr<arrow::Table>& lhs,
const std::shared_ptr<arrow::Table>& rhs, bool check_metadata) {
return lhs->Equals(*rhs.get());
return lhs->Equals(*rhs.get(), check_metadata);
}

// [[arrow::export]]
Expand Down
9 changes: 9 additions & 0 deletions r/tests/testthat/test-Array.R
Expand Up @@ -601,3 +601,12 @@ test_that("Dictionary array: translate to R when dict isn't string", {
"Coercing dictionary values from type double to R character factor levels"
)
})

test_that("Array$Equals", {
vec <- 11:20
a <- Array$create(vec)
b <- Array$create(vec)
expect_equal(a, b)
expect_true(a$Equals(b))
expect_false(a$Equals(vec))
})
9 changes: 9 additions & 0 deletions r/tests/testthat/test-RecordBatch.R
Expand Up @@ -286,3 +286,12 @@ test_that("record_batch() handles null type (ARROW-7064)", {
batch <- record_batch(a = 1:10, n = vctrs::unspecified(10))
expect_equal(batch$schema, schema(a = int32(), n = null()))
})

test_that("RecordBatch$Equals", {
df <- tibble::tibble(x = 1:10, y = letters[1:10])
a <- record_batch(df)
b <- record_batch(df)
expect_equal(a, b)
expect_true(a$Equals(b))
expect_false(a$Equals(df))
})
23 changes: 21 additions & 2 deletions r/tests/testthat/test-Table.R
Expand Up @@ -250,9 +250,28 @@ test_that("==.Table", {
expect_equal(tab1, tab2)
})

test_that("Table$Equals(check_metadata)", {
tab1 <- Table$create(x = 1:2, y = c("a", "b"))
tab2 <- Table$create(x = 1:2, y = c("a", "b"),
schema = tab1$schema$WithMetadata(list(some="metadata")))

expect_is(tab1, "Table")
expect_is(tab2, "Table")
expect_false(tab1$schema$HasMetadata)
expect_true(tab2$schema$HasMetadata)
expect_match(tab2$schema$metadata, "some: metadata", fixed = TRUE)

expect_false(tab1 == tab2)
expect_false(tab1$Equals(tab2))
expect_true(tab1$Equals(tab2, check_metadata = FALSE))

expect_failure(expect_equal(tab1, tab2)) # expect_equal does check_metadata
expect_equivalent(tab1, tab2) # expect_equivalent does not

expect_false(tab1$Equals(24)) # Not a Table
})

test_that("Table handles null type (ARROW-7064)", {
tab <- Table$create(a = 1:10, n = vctrs::unspecified(10))
expect_equal(tab$schema, schema(a = int32(), n = null()))
})


9 changes: 9 additions & 0 deletions r/tests/testthat/test-buffer.R
Expand Up @@ -88,3 +88,12 @@ test_that("can read remaining bytes of a RandomAccessFile", {
expect_equal(z, c(x, y))
expect_equal(a, y)
})

test_that("Buffer$Equals", {
vec <- integer(17)
buf1 <- buffer(vec)
buf2 <- buffer(vec)
expect_equal(buf1, buf2)
expect_true(buf1$Equals(buf2))
expect_false(buf1$Equals(vec))
})
9 changes: 9 additions & 0 deletions r/tests/testthat/test-chunked-array.R
Expand Up @@ -392,3 +392,12 @@ test_that("[ ChunkedArray", {
c(2, 5, 6, 8, 9, 35, 36, 38, 39, 55)
)
})

test_that("ChunkedArray$Equals", {
vec <- 11:20
a <- ChunkedArray$create(vec[1:5], vec[6:10])
b <- ChunkedArray$create(vec[1:5], vec[6:10])
expect_equal(a, b)
expect_true(a$Equals(b))
expect_false(a$Equals(vec))
})
8 changes: 8 additions & 0 deletions r/tests/testthat/test-schema.R
Expand Up @@ -85,3 +85,11 @@ test_that("Input validation when creating a table with a schema", {
"schema must be an arrow::Schema or NULL"
)
})

test_that("Schema$Equals", {
a <- schema(b = double(), c = bool())
b <- a$WithMetadata(list(some="metadata"))
expect_failure(expect_equal(a, b))
expect_equivalent(a, b)
expect_false(a$Equals(42))
})
12 changes: 12 additions & 0 deletions r/tests/testthat/test-type.R
Expand Up @@ -55,3 +55,15 @@ test_that("type() can infer struct types from data frames", {
df <- tibble::tibble(x = 1:10, y = rnorm(10), z = letters[1:10])
expect_equal(type(df), struct(x = int32(), y = float64(), z = utf8()))
})

test_that("DataType$Equals", {
a <- int32()
b <- int32()
z <- float64()
expect_true(a == b)
expect_true(a$Equals(b))
expect_false(a == z)
expect_equal(a, b)
expect_failure(expect_equal(a, z))
expect_false(a$Equals(32L))
})

0 comments on commit 9834601

Please sign in to comment.