From a9a0b30dc2784745130930a4efda23710bea3a21 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Thu, 12 Oct 2017 14:31:04 -0400 Subject: [PATCH] bigquery: generate row IDs in create_rows (#4173) If the user doesn't provide row IDs, create unique IDs for them. --- bigquery/google/cloud/bigquery/client.py | 6 ++-- bigquery/tests/unit/test_client.py | 42 +++++++++++++++++------- 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/bigquery/google/cloud/bigquery/client.py b/bigquery/google/cloud/bigquery/client.py index 488b409ff77c..6a312bff4514 100644 --- a/bigquery/google/cloud/bigquery/client.py +++ b/bigquery/google/cloud/bigquery/client.py @@ -860,7 +860,7 @@ def create_rows(self, table, rows, row_ids=None, selected_fields=None, :type row_ids: list of string :param row_ids: (Optional) Unique ids, one per row being inserted. - If not passed, no de-duplication occurs. + If omitted, unique IDs are created. :type selected_fields: list of :class:`SchemaField` :param selected_fields: @@ -923,7 +923,8 @@ def create_rows(self, table, rows, row_ids=None, selected_fields=None, info = {'json': row_info} if row_ids is not None: info['insertId'] = row_ids[index] - + else: + info['insertId'] = str(uuid.uuid4()) rows_info.append(info) if skip_invalid_rows is not None: @@ -935,6 +936,7 @@ def create_rows(self, table, rows, row_ids=None, selected_fields=None, if template_suffix is not None: data['templateSuffix'] = template_suffix + # TODO(jba): use self._call_api here after #4148 is merged. response = self._connection.api_request( method='POST', path='%s/insertAll' % table.path, diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py index 22df27c6358c..bb5517207ffc 100644 --- a/bigquery/tests/unit/test_client.py +++ b/bigquery/tests/unit/test_client.py @@ -1889,10 +1889,14 @@ def _row_data(row): 'joined': joined} SENT = { - 'rows': [{'json': _row_data(row)} for row in ROWS], + 'rows': [{ + 'json': _row_data(row), + 'insertId': str(i), + } for i, row in enumerate(ROWS)], } - errors = client.create_rows(table, ROWS) + with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): + errors = client.create_rows(table, ROWS) self.assertEqual(len(errors), 0) self.assertEqual(len(conn._requested), 1) @@ -1950,10 +1954,14 @@ def _row_data(row): return row SENT = { - 'rows': [{'json': _row_data(row)} for row in ROWS], + 'rows': [{ + 'json': _row_data(row), + 'insertId': str(i), + } for i, row in enumerate(ROWS)], } - errors = client.create_rows(table, ROWS) + with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): + errors = client.create_rows(table, ROWS) self.assertEqual(len(errors), 0) self.assertEqual(len(conn._requested), 1) @@ -1990,10 +1998,14 @@ def _row_data(row): return {'full_name': row[0], 'age': str(row[1])} SENT = { - 'rows': [{'json': _row_data(row)} for row in ROWS], + 'rows': [{ + 'json': _row_data(row), + 'insertId': str(i), + } for i, row in enumerate(ROWS)], } - errors = client.create_rows(table, ROWS) + with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): + errors = client.create_rows(table, ROWS) self.assertEqual(len(errors), 0) self.assertEqual(len(conn._requested), 1) @@ -2095,10 +2107,14 @@ def _row_data(row): 'struct': row[1]} SENT = { - 'rows': [{'json': _row_data(row)} for row in ROWS], + 'rows': [{ + 'json': _row_data(row), + 'insertId': str(i), + } for i, row in enumerate(ROWS)], } - errors = client.create_rows(table, ROWS) + with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): + errors = client.create_rows(table, ROWS) self.assertEqual(len(errors), 0) self.assertEqual(len(conn._requested), 1) @@ -2138,11 +2154,15 @@ def _row_data(row): 'phone': row[1]} SENT = { - 'rows': [{'json': _row_data(row)} for row in ROWS], + 'rows': [{ + 'json': _row_data(row), + 'insertId': str(i), + } for i, row in enumerate(ROWS)], } - errors = client.create_rows(self.TABLE_REF, ROWS, - selected_fields=[full_name, phone]) + with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))): + errors = client.create_rows(self.TABLE_REF, ROWS, + selected_fields=[full_name, phone]) self.assertEqual(len(errors), 0) self.assertEqual(len(conn._requested), 1)