Skip to content

Commit

Permalink
ARROW-7720: [C++][Python] Add check_metadata argument to Table.equals
Browse files Browse the repository at this point in the history
Use

```py
table.equals(other, check_metadata=False)
```

instead of

```py
table.replace_schema_metadata().equals(other)
```

Closes #6318 from kszucs/table-check-metadata and squashes the following commits:

471d007 <Antoine Pitrou> Fix typo
6bb1c82 <Antoine Pitrou> Trivial R bindings update
e2e1291 <Antoine Pitrou> Add parameter to docstring
7a00295 <Krisztián Szűcs> tests
bb18363 <Krisztián Szűcs> add check_metadata argument to Table.equals

Lead-authored-by: Antoine Pitrou <antoine@python.org>
Co-authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
  • Loading branch information
pitrou and kszucs committed Feb 7, 2020
1 parent e1bccd0 commit d03c49c
Show file tree
Hide file tree
Showing 11 changed files with 34 additions and 21 deletions.
4 changes: 2 additions & 2 deletions cpp/src/arrow/table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -666,11 +666,11 @@ Result<std::shared_ptr<Table>> PromoteTableToSchema(const std::shared_ptr<Table>
return Table::Make(schema, std::move(columns));
}

bool Table::Equals(const Table& other) const {
bool Table::Equals(const Table& other, bool check_metadata) const {
if (this == &other) {
return true;
}
if (!schema_->Equals(*other.schema())) {
if (!schema_->Equals(*other.schema(), check_metadata)) {
return false;
}
if (this->num_columns() != other.num_columns()) {
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/table.h
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ class ARROW_EXPORT Table {
///
/// Two tables can be equal only if they have equal schemas.
/// However, they may be equal even if they have different chunkings.
bool Equals(const Table& other) const;
bool Equals(const Table& other, bool check_metadata = true) const;

/// \brief Make a new table by combining the chunks this table has.
///
Expand Down
6 changes: 6 additions & 0 deletions cpp/src/arrow/table_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,12 @@ TEST_F(TestTable, Equals) {

other = Table::Make(schema_, other_columns);
ASSERT_FALSE(table_->Equals(*other));

// Differring schema metadata
other_schema = schema_->WithMetadata(::arrow::key_value_metadata({"key"}, {"value"}));
other = Table::Make(other_schema, columns_);
ASSERT_FALSE(table_->Equals(*other));
ASSERT_TRUE(table_->Equals(*other, /*check_metadata=*/false));
}

TEST_F(TestTable, FromRecordBatches) {
Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/includes/libarrow.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -676,7 +676,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
int num_columns()
int64_t num_rows()

c_bool Equals(const CTable& other)
c_bool Equals(const CTable& other, c_bool check_metadata)

shared_ptr[CSchema] schema()
shared_ptr[CChunkedArray] column(int i)
Expand Down
6 changes: 4 additions & 2 deletions python/pyarrow/table.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -1058,13 +1058,15 @@ cdef class Table(_PandasConvertible):
except TypeError:
return NotImplemented

def equals(self, Table other):
def equals(self, Table other, bint check_metadata=True):
"""
Check if contents of two tables are equal
Parameters
----------
other : pyarrow.Table
check_metadata : bool, default True
Whether metadata equality should be checked as well.
Returns
-------
Expand All @@ -1079,7 +1081,7 @@ cdef class Table(_PandasConvertible):
return False

with nogil:
result = this_table.Equals(deref(other_table))
result = this_table.Equals(deref(other_table), check_metadata)

return result

Expand Down
10 changes: 5 additions & 5 deletions python/pyarrow/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,19 +568,19 @@ def _check_dataset_from_path(path, table, **kwargs):
dataset = ds.dataset(ds.source(path, **kwargs))
assert dataset.schema.equals(table.schema, check_metadata=False)
result = dataset.to_table(use_threads=False) # deterministic row order
assert result.replace_schema_metadata().equals(table)
assert result.equals(table, check_metadata=False)

# string path
dataset = ds.dataset(ds.source(str(path), **kwargs))
assert dataset.schema.equals(table.schema, check_metadata=False)
result = dataset.to_table(use_threads=False) # deterministic row order
assert result.replace_schema_metadata().equals(table)
assert result.equals(table, check_metadata=False)

# passing directly to dataset
dataset = ds.dataset(str(path), **kwargs)
assert dataset.schema.equals(table.schema, check_metadata=False)
result = dataset.to_table(use_threads=False) # deterministic row order
assert result.replace_schema_metadata().equals(table)
assert result.equals(table, check_metadata=False)


@pytest.mark.parquet
Expand Down Expand Up @@ -608,7 +608,7 @@ def test_open_dataset_list_of_files(tempdir):
ds.dataset(ds.source([str(path1), str(path2)]))]:
assert dataset.schema.equals(table.schema, check_metadata=False)
result = dataset.to_table(use_threads=False) # deterministic row order
assert result.replace_schema_metadata().equals(table)
assert result.equals(table, check_metadata=False)


@pytest.mark.skipif(sys.platform == "win32", reason="fails on windows")
Expand Down Expand Up @@ -646,7 +646,7 @@ def test_open_dataset_partitioned_directory(tempdir):
result = dataset.to_table(use_threads=False)
expected = full_table.append_column(
"part", pa.array(np.repeat([0, 1, 2], 9), type=pa.int8()))
assert result.replace_schema_metadata().equals(expected)
assert result.equals(expected, check_metadata=False)


@pytest.mark.parquet
Expand Down
4 changes: 4 additions & 0 deletions python/pyarrow/tests/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,10 @@ def test_table_equals():
# ARROW-4822
assert not table.equals(None)

other = pa.Table.from_arrays([], names=[], metadata={'key': 'value'})
assert not table.equals(other)
assert table.equals(other, check_metadata=False)


def test_table_from_batches_and_schema():
schema = pa.schema([
Expand Down
4 changes: 2 additions & 2 deletions r/R/arrowExports.R

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions r/R/table.R
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,8 @@ Table <- R6Class("Table", inherit = Object,
shared_ptr(Table, Table__Filter(self, i))
},

Equals = function(other) {
Table__Equals(self, other)
Equals = function(other, check_metadata = TRUE) {
Table__Equals(self, other, isTRUE(check_metadata))
}
),

Expand Down
11 changes: 6 additions & 5 deletions r/src/arrowExports.cpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion r/src/table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ std::shared_ptr<arrow::Table> Table__Slice2(const std::shared_ptr<arrow::Table>&

// [[arrow::export]]
bool Table__Equals(const std::shared_ptr<arrow::Table>& lhs,
const std::shared_ptr<arrow::Table>& rhs) {
const std::shared_ptr<arrow::Table>& rhs, bool check_metadata) {
return lhs->Equals(*rhs.get());
}

Expand Down

0 comments on commit d03c49c

Please sign in to comment.