From f0bcbd6aa375c12153fde2571baed31cf434bff1 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 31 Jan 2020 08:18:29 +0000 Subject: [PATCH] fix(bigquery): fix inserting missing repeated fields (#10196) * fix(bigquery): do not insert missing fields as explicit None * Omit all None values from JSON request body * Add an extra test for all missing row values * Flatten a block of code a bit --- bigquery/google/cloud/bigquery/_helpers.py | 11 ++++---- bigquery/tests/unit/test__helpers.py | 29 ++++++++++++++++++++-- bigquery/tests/unit/test_client.py | 20 ++++++++------- 3 files changed, 44 insertions(+), 16 deletions(-) diff --git a/bigquery/google/cloud/bigquery/_helpers.py b/bigquery/google/cloud/bigquery/_helpers.py index 98eadb0a2f8e..21a8e3636d24 100644 --- a/bigquery/google/cloud/bigquery/_helpers.py +++ b/bigquery/google/cloud/bigquery/_helpers.py @@ -424,11 +424,12 @@ def _record_field_to_json(fields, row_value): for subindex, subfield in enumerate(fields): subname = subfield.name - if isdict: - subvalue = row_value.get(subname) - else: - subvalue = row_value[subindex] - record[subname] = _field_to_json(subfield, subvalue) + subvalue = row_value.get(subname) if isdict else row_value[subindex] + + # None values are unconditionally omitted + if subvalue is not None: + record[subname] = _field_to_json(subfield, subvalue) + return record diff --git a/bigquery/tests/unit/test__helpers.py b/bigquery/tests/unit/test__helpers.py index 6d92b4de73ba..fa6d27c981d8 100644 --- a/bigquery/tests/unit/test__helpers.py +++ b/bigquery/tests/unit/test__helpers.py @@ -856,14 +856,39 @@ def test_w_non_empty_dict(self): converted = self._call_fut(fields, original) self.assertEqual(converted, {"one": "42", "two": "two"}) - def test_w_missing_nullable(self): + def test_w_some_missing_nullables(self): fields = [ _make_field("INT64", name="one", mode="NULLABLE"), _make_field("STRING", name="two", mode="NULLABLE"), ] original = {"one": 42} converted = self._call_fut(fields, original) - self.assertEqual(converted, {"one": "42", "two": None}) + + # missing fields should not be converted to an explicit None + self.assertEqual(converted, {"one": "42"}) + + def test_w_all_missing_nullables(self): + fields = [ + _make_field("INT64", name="one", mode="NULLABLE"), + _make_field("STRING", name="two", mode="NULLABLE"), + ] + original = {} + converted = self._call_fut(fields, original) + + # we should get an empty dict, not None + self.assertEqual(converted, {}) + + def test_w_explicit_none_value(self): + fields = [ + _make_field("INT64", name="one", mode="NULLABLE"), + _make_field("STRING", name="two", mode="NULLABLE"), + _make_field("BOOL", name="three", mode="REPEATED"), + ] + original = {"three": None, "one": 42, "two": None} + converted = self._call_fut(fields, original) + + # None values should be dropped regardless of the field type + self.assertEqual(converted, {"one": "42"}) class Test_field_to_json(unittest.TestCase): diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py index 952c876dff39..2227183a9236 100644 --- a/bigquery/tests/unit/test_client.py +++ b/bigquery/tests/unit/test_client.py @@ -4668,10 +4668,13 @@ def test_insert_rows_w_schema(self): ] def _row_data(row): + result = {"full_name": row[0], "age": str(row[1])} joined = row[2] - if isinstance(row[2], datetime.datetime): + if isinstance(joined, datetime.datetime): joined = _microseconds_from_datetime(joined) * 1e-6 - return {"full_name": row[0], "age": str(row[1]), "joined": joined} + if joined is not None: + result["joined"] = joined + return result SENT = { "rows": [ @@ -4740,7 +4743,10 @@ def test_insert_rows_w_list_of_dictionaries(self): def _row_data(row): joined = row["joined"] - if isinstance(joined, datetime.datetime): + if joined is None: + row = copy.deepcopy(row) + del row["joined"] + elif isinstance(joined, datetime.datetime): row["joined"] = _microseconds_from_datetime(joined) * 1e-6 row["age"] = str(row["age"]) return row @@ -4959,9 +4965,8 @@ def test_insert_rows_w_repeated_fields(self): }, { "json": { - "color": None, "items": [], - "structs": [{"score": None, "times": [], "distances": [3.5]}], + "structs": [{"times": [], "distances": [3.5]}], }, "insertId": "1", }, @@ -5028,10 +5033,7 @@ def test_insert_rows_w_record_schema(self): }, "insertId": "1", }, - { - "json": {"full_name": "Wylma Phlyntstone", "phone": None}, - "insertId": "2", - }, + {"json": {"full_name": "Wylma Phlyntstone"}, "insertId": "2"}, ] }