Skip to content
Permalink
Browse files
feat: make the same Table* instances equal to each other (#867)
* feat: make the same Table instances equal to each other

* Table equality should ignore metadata differences

* Compare instances through tableReference property

* Make Table instances hashable

* Make Table* classes interchangeable

If these classes reference the same table, they are now considered equal.
  • Loading branch information
plamut committed Aug 11, 2021
1 parent aee814c commit c1a3d4435739a21d25aa154145e36d3a7c42eeb6
Showing with 244 additions and 23 deletions.
  1. +40 −2 google/cloud/bigquery/table.py
  2. +204 −21 tests/unit/test_table.py
@@ -255,9 +255,16 @@ def _key(self):
return (self._project, self._dataset_id, self._table_id)

def __eq__(self, other):
if not isinstance(other, TableReference):
if isinstance(other, (Table, TableListItem)):
return (
self.project == other.project
and self.dataset_id == other.dataset_id
and self.table_id == other.table_id
)
elif isinstance(other, TableReference):
return self._key() == other._key()
else:
return NotImplemented
return self._key() == other._key()

def __ne__(self, other):
return not self == other
@@ -1011,6 +1018,24 @@ def _build_resource(self, filter_fields):
"""Generate a resource for ``update``."""
return _helpers._build_resource_from_properties(self, filter_fields)

def __eq__(self, other):
if isinstance(other, Table):
return (
self._properties["tableReference"]
== other._properties["tableReference"]
)
elif isinstance(other, (TableReference, TableListItem)):
return (
self.project == other.project
and self.dataset_id == other.dataset_id
and self.table_id == other.table_id
)
else:
return NotImplemented

def __hash__(self):
return hash((self.project, self.dataset_id, self.table_id))

def __repr__(self):
return "Table({})".format(repr(self.reference))

@@ -1229,6 +1254,19 @@ def to_api_repr(self) -> dict:
"""
return copy.deepcopy(self._properties)

def __eq__(self, other):
if isinstance(other, (Table, TableReference, TableListItem)):
return (
self.project == other.project
and self.dataset_id == other.dataset_id
and self.table_id == other.table_id
)
else:
return NotImplemented

def __hash__(self):
return hash((self.project, self.dataset_id, self.table_id))


def _row_from_mapping(mapping, schema):
"""Convert a mapping to a row tuple using the schema.
@@ -115,17 +115,13 @@ def _make_one(self, *args, **kw):
return self._get_target_class()(*args, **kw)

def test_ctor_defaults(self):
from google.cloud.bigquery.dataset import DatasetReference

dataset_ref = DatasetReference("project_1", "dataset_1")

table_ref = self._make_one(dataset_ref, "table_1")
self.assertEqual(table_ref.dataset_id, dataset_ref.dataset_id)
self.assertEqual(table_ref.table_id, "table_1")

def test_to_api_repr(self):
from google.cloud.bigquery.dataset import DatasetReference

dataset_ref = DatasetReference("project_1", "dataset_1")
table_ref = self._make_one(dataset_ref, "table_1")

@@ -137,7 +133,6 @@ def test_to_api_repr(self):
)

def test_from_api_repr(self):
from google.cloud.bigquery.dataset import DatasetReference
from google.cloud.bigquery.table import TableReference

dataset_ref = DatasetReference("project_1", "dataset_1")
@@ -204,51 +199,39 @@ def test_from_string_ignores_default_project(self):
self.assertEqual(got.table_id, "string_table")

def test___eq___wrong_type(self):
from google.cloud.bigquery.dataset import DatasetReference

dataset_ref = DatasetReference("project_1", "dataset_1")
table = self._make_one(dataset_ref, "table_1")
other = object()
self.assertNotEqual(table, other)
self.assertEqual(table, mock.ANY)

def test___eq___project_mismatch(self):
from google.cloud.bigquery.dataset import DatasetReference

dataset = DatasetReference("project_1", "dataset_1")
other_dataset = DatasetReference("project_2", "dataset_1")
table = self._make_one(dataset, "table_1")
other = self._make_one(other_dataset, "table_1")
self.assertNotEqual(table, other)

def test___eq___dataset_mismatch(self):
from google.cloud.bigquery.dataset import DatasetReference

dataset = DatasetReference("project_1", "dataset_1")
other_dataset = DatasetReference("project_1", "dataset_2")
table = self._make_one(dataset, "table_1")
other = self._make_one(other_dataset, "table_1")
self.assertNotEqual(table, other)

def test___eq___table_mismatch(self):
from google.cloud.bigquery.dataset import DatasetReference

dataset = DatasetReference("project_1", "dataset_1")
table = self._make_one(dataset, "table_1")
other = self._make_one(dataset, "table_2")
self.assertNotEqual(table, other)

def test___eq___equality(self):
from google.cloud.bigquery.dataset import DatasetReference

dataset = DatasetReference("project_1", "dataset_1")
table = self._make_one(dataset, "table_1")
other = self._make_one(dataset, "table_1")
self.assertEqual(table, other)

def test___hash__set_equality(self):
from google.cloud.bigquery.dataset import DatasetReference

dataset = DatasetReference("project_1", "dataset_1")
table1 = self._make_one(dataset, "table1")
table2 = self._make_one(dataset, "table2")
@@ -257,8 +240,6 @@ def test___hash__set_equality(self):
self.assertEqual(set_one, set_two)

def test___hash__not_equals(self):
from google.cloud.bigquery.dataset import DatasetReference

dataset = DatasetReference("project_1", "dataset_1")
table1 = self._make_one(dataset, "table1")
table2 = self._make_one(dataset, "table2")
@@ -294,8 +275,6 @@ def _get_target_class():
return Table

def _make_one(self, *args, **kw):
from google.cloud.bigquery.dataset import DatasetReference

if len(args) == 0:
dataset = DatasetReference(self.PROJECT, self.DS_ID)
table_ref = dataset.table(self.TABLE_NAME)
@@ -581,6 +560,68 @@ def test_num_rows_getter(self):
with self.assertRaises(ValueError):
getattr(table, "num_rows")

def test__eq__wrong_type(self):
table = self._make_one("project_foo.dataset_bar.table_baz")

class TableWannabe:
pass

not_a_table = TableWannabe()
not_a_table._properties = table._properties

assert table != not_a_table # Can't fake it.

def test__eq__same_table_basic(self):
table_1 = self._make_one("project_foo.dataset_bar.table_baz")
table_2 = self._make_one("project_foo.dataset_bar.table_baz")
assert table_1 == table_2

def test__eq__same_table_multiple_properties(self):
from google.cloud.bigquery import SchemaField

table_1 = self._make_one("project_foo.dataset_bar.table_baz")
table_1.require_partition_filter = True
table_1.labels = {"first": "one", "second": "two"}

table_1.schema = [
SchemaField("name", "STRING", "REQUIRED"),
SchemaField("age", "INTEGER", "NULLABLE"),
]

table_2 = self._make_one("project_foo.dataset_bar.table_baz")
table_2.require_partition_filter = True
table_2.labels = {"first": "one", "second": "two"}
table_2.schema = [
SchemaField("name", "STRING", "REQUIRED"),
SchemaField("age", "INTEGER", "NULLABLE"),
]

assert table_1 == table_2

def test__eq__same_table_property_different(self):
table_1 = self._make_one("project_foo.dataset_bar.table_baz")
table_1.description = "This is table baz"

table_2 = self._make_one("project_foo.dataset_bar.table_baz")
table_2.description = "This is also table baz"

assert table_1 == table_2 # Still equal, only table reference is important.

def test__eq__different_table(self):
table_1 = self._make_one("project_foo.dataset_bar.table_baz")
table_2 = self._make_one("project_foo.dataset_bar.table_baz_2")

assert table_1 != table_2

def test_hashable(self):
table_1 = self._make_one("project_foo.dataset_bar.table_baz")
table_1.description = "This is a table"

table_1b = self._make_one("project_foo.dataset_bar.table_baz")
table_1b.description = "Metadata is irrelevant for hashes"

assert hash(table_1) == hash(table_1b)

def test_schema_setter_non_sequence(self):
dataset = DatasetReference(self.PROJECT, self.DS_ID)
table_ref = dataset.table(self.TABLE_NAME)
@@ -1543,6 +1584,148 @@ def test_to_api_repr(self):
table = self._make_one(resource)
self.assertEqual(table.to_api_repr(), resource)

def test__eq__wrong_type(self):
resource = {
"tableReference": {
"projectId": "project_foo",
"datasetId": "dataset_bar",
"tableId": "table_baz",
}
}
table = self._make_one(resource)

class FakeTableListItem:
project = "project_foo"
dataset_id = "dataset_bar"
table_id = "table_baz"

not_a_table = FakeTableListItem()

assert table != not_a_table # Can't fake it.

def test__eq__same_table(self):
resource = {
"tableReference": {
"projectId": "project_foo",
"datasetId": "dataset_bar",
"tableId": "table_baz",
}
}
table_1 = self._make_one(resource)
table_2 = self._make_one(resource)

assert table_1 == table_2

def test__eq__same_table_property_different(self):
table_ref_resource = {
"projectId": "project_foo",
"datasetId": "dataset_bar",
"tableId": "table_baz",
}

resource_1 = {"tableReference": table_ref_resource, "friendlyName": "Table One"}
table_1 = self._make_one(resource_1)

resource_2 = {"tableReference": table_ref_resource, "friendlyName": "Table Two"}
table_2 = self._make_one(resource_2)

assert table_1 == table_2 # Still equal, only table reference is important.

def test__eq__different_table(self):
resource_1 = {
"tableReference": {
"projectId": "project_foo",
"datasetId": "dataset_bar",
"tableId": "table_baz",
}
}
table_1 = self._make_one(resource_1)

resource_2 = {
"tableReference": {
"projectId": "project_foo",
"datasetId": "dataset_bar",
"tableId": "table_quux",
}
}
table_2 = self._make_one(resource_2)

assert table_1 != table_2

def test_hashable(self):
resource = {
"tableReference": {
"projectId": "project_foo",
"datasetId": "dataset_bar",
"tableId": "table_baz",
}
}
table_item = self._make_one(resource)
table_item_2 = self._make_one(resource)

assert hash(table_item) == hash(table_item_2)


class TestTableClassesInterchangeability:
@staticmethod
def _make_table(*args, **kwargs):
from google.cloud.bigquery.table import Table

return Table(*args, **kwargs)

@staticmethod
def _make_table_ref(*args, **kwargs):
from google.cloud.bigquery.table import TableReference

return TableReference(*args, **kwargs)

@staticmethod
def _make_table_list_item(*args, **kwargs):
from google.cloud.bigquery.table import TableListItem

return TableListItem(*args, **kwargs)

def test_table_eq_table_ref(self):

table = self._make_table("project_foo.dataset_bar.table_baz")
dataset_ref = DatasetReference("project_foo", "dataset_bar")
table_ref = self._make_table_ref(dataset_ref, "table_baz")

assert table == table_ref
assert table_ref == table

def test_table_eq_table_list_item(self):
table = self._make_table("project_foo.dataset_bar.table_baz")
table_list_item = self._make_table_list_item(
{
"tableReference": {
"projectId": "project_foo",
"datasetId": "dataset_bar",
"tableId": "table_baz",
}
}
)

assert table == table_list_item
assert table_list_item == table

def test_table_ref_eq_table_list_item(self):

dataset_ref = DatasetReference("project_foo", "dataset_bar")
table_ref = self._make_table_ref(dataset_ref, "table_baz")
table_list_item = self._make_table_list_item(
{
"tableReference": {
"projectId": "project_foo",
"datasetId": "dataset_bar",
"tableId": "table_baz",
}
}
)

assert table_ref == table_list_item
assert table_list_item == table_ref


class TestSnapshotDefinition:
@staticmethod

0 comments on commit c1a3d44

Please sign in to comment.