diff --git a/bigquery/docs/usage/tables.rst b/bigquery/docs/usage/tables.rst index 6a6cbd356639..d58dcc5d9ac4 100644 --- a/bigquery/docs/usage/tables.rst +++ b/bigquery/docs/usage/tables.rst @@ -122,6 +122,20 @@ Insert rows into a table's data with the :start-after: [START bigquery_table_insert_rows] :end-before: [END bigquery_table_insert_rows] +Insert rows into a table's data with the +:func:`~google.cloud.bigquery.client.Client.insert_rows` method, achieving +higher write limit: + +.. literalinclude:: ../samples/table_insert_rows_explicit_none_insert_ids.py + :language: python + :dedent: 4 + :start-after: [START bigquery_table_insert_rows_explicit_none_insert_ids] + :end-before: [END bigquery_table_insert_rows_explicit_none_insert_ids] + +Mind that inserting data with ``None`` row insert IDs can come at the expense of +more duplicate inserts. See also: +`Streaming inserts `_. + Add an empty column to the existing table with the :func:`~google.cloud.bigquery.update_table` method: diff --git a/bigquery/google/cloud/bigquery/client.py b/bigquery/google/cloud/bigquery/client.py index 02bfc651af0d..bae4359300f8 100644 --- a/bigquery/google/cloud/bigquery/client.py +++ b/bigquery/google/cloud/bigquery/client.py @@ -2264,29 +2264,32 @@ def insert_rows_json( table (Union[ \ google.cloud.bigquery.table.Table \ google.cloud.bigquery.table.TableReference, \ - str, \ + str \ ]): The destination table for the row data, or a reference to it. json_rows (Sequence[Dict]): Row data to be inserted. Keys must match the table schema fields and values must be JSON-compatible representations. - row_ids (Sequence[str]): - (Optional) Unique ids, one per row being inserted. If omitted, - unique IDs are created. - skip_invalid_rows (bool): - (Optional) Insert all valid rows of a request, even if invalid - rows exist. The default value is False, which causes the entire - request to fail if any invalid rows exist. - ignore_unknown_values (bool): - (Optional) Accept rows that contain values that do not match the - schema. The unknown values are ignored. Default is False, which + row_ids (Optional[Sequence[Optional[str]]]): + Unique IDs, one per row being inserted. An ID can also be + ``None``, indicating that an explicit insert ID should **not** + be used for that row. If the argument is omitted altogether, + unique IDs are created automatically. + skip_invalid_rows (Optional[bool]): + Insert all valid rows of a request, even if invalid rows exist. + The default value is ``False``, which causes the entire request + to fail if any invalid rows exist. + ignore_unknown_values (Optional[bool]): + Accept rows that contain values that do not match the schema. + The unknown values are ignored. Default is ``False``, which treats unknown values as errors. - template_suffix (str): - (Optional) treat ``name`` as a template table and provide a suffix. - BigQuery will create the table `` + `` based - on the schema of the template table. See + template_suffix (Optional[str]): + Treat ``name`` as a template table and provide a suffix. + BigQuery will create the table `` + `` + based on the schema of the template table. See https://cloud.google.com/bigquery/streaming-data-into-bigquery#template-tables - retry (google.api_core.retry.Retry): (Optional) How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. Returns: Sequence[Mappings]: diff --git a/bigquery/samples/table_insert_rows_explicit_none_insert_ids.py b/bigquery/samples/table_insert_rows_explicit_none_insert_ids.py new file mode 100644 index 000000000000..953e7e210312 --- /dev/null +++ b/bigquery/samples/table_insert_rows_explicit_none_insert_ids.py @@ -0,0 +1,36 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def table_insert_rows_explicit_none_insert_ids(client, table_id): + + # [START bigquery_table_insert_rows_explicit_none_insert_ids] + # TODO(developer): Import the client library. + # from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the model to fetch. + # table_id = "your-project.your_dataset.your_table" + + table = client.get_table(table_id) # Make an API request. + rows_to_insert = [(u"Phred Phlyntstone", 32), (u"Wylma Phlyntstone", 29)] + + errors = client.insert_rows( + table, rows_to_insert, row_ids=[None] * len(rows_to_insert) + ) # Make an API request. + if errors == []: + print("New rows have been added.") + # [END bigquery_table_insert_rows_explicit_none_insert_ids] diff --git a/bigquery/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py b/bigquery/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py new file mode 100644 index 000000000000..6a59609baacf --- /dev/null +++ b/bigquery/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py @@ -0,0 +1,33 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from google.cloud import bigquery + +from .. import table_insert_rows_explicit_none_insert_ids as mut + + +def test_table_insert_rows_explicit_none_insert_ids(capsys, client, random_table_id): + + schema = [ + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + + table = bigquery.Table(random_table_id, schema=schema) + table = client.create_table(table) + + mut.table_insert_rows_explicit_none_insert_ids(client, random_table_id) + out, err = capsys.readouterr() + assert "New rows have been added." in out diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py index 91b9bc642187..b4e5e96f1e8e 100644 --- a/bigquery/tests/unit/test_client.py +++ b/bigquery/tests/unit/test_client.py @@ -4572,6 +4572,40 @@ def test_insert_rows_w_record_schema(self): method="POST", path="/%s" % PATH, data=SENT ) + def test_insert_rows_w_explicit_none_insert_ids(self): + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import Table + + PATH = "projects/{}/datasets/{}/tables/{}/insertAll".format( + self.PROJECT, self.DS_ID, self.TABLE_ID, + ) + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection({}) + schema = [ + SchemaField("full_name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + table = Table(self.TABLE_REF, schema=schema) + ROWS = [ + {"full_name": "Phred Phlyntstone", "age": 32}, + {"full_name": "Bharney Rhubble", "age": 33}, + ] + + def _row_data(row): + row["age"] = str(row["age"]) + return row + + SENT = {"rows": [{"json": _row_data(row), "insertId": None} for row in ROWS]} + + errors = client.insert_rows(table, ROWS, row_ids=[None] * len(ROWS)) + + self.assertEqual(len(errors), 0) + conn.api_request.assert_called_once_with( + method="POST", path="/{}".format(PATH), data=SENT + ) + def test_insert_rows_errors(self): from google.cloud.bigquery.table import Table @@ -4765,6 +4799,55 @@ def test_insert_rows_from_dataframe_many_columns(self): assert len(actual_calls) == 1 assert actual_calls[0] == expected_call + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_insert_rows_from_dataframe_w_explicit_none_insert_ids(self): + from google.cloud.bigquery.table import SchemaField + from google.cloud.bigquery.table import Table + + API_PATH = "/projects/{}/datasets/{}/tables/{}/insertAll".format( + self.PROJECT, self.DS_ID, self.TABLE_REF.table_id + ) + + dataframe = pandas.DataFrame( + [ + {"name": u"Little One", "adult": False}, + {"name": u"Young Gun", "adult": True}, + ] + ) + + # create client + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection({}, {}) + + # create table + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("adult", "BOOLEAN", mode="REQUIRED"), + ] + table = Table(self.TABLE_REF, schema=schema) + + error_info = client.insert_rows_from_dataframe( + table, dataframe, row_ids=[None] * len(dataframe) + ) + + self.assertEqual(len(error_info), 1) + assert error_info[0] == [] # no chunk errors + + EXPECTED_SENT_DATA = { + "rows": [ + {"insertId": None, "json": {"name": "Little One", "adult": "false"}}, + {"insertId": None, "json": {"name": "Young Gun", "adult": "true"}}, + ] + } + + actual_calls = conn.api_request.call_args_list + assert len(actual_calls) == 1 + assert actual_calls[0] == mock.call( + method="POST", path=API_PATH, data=EXPECTED_SENT_DATA + ) + def test_insert_rows_json(self): from google.cloud.bigquery.table import Table, SchemaField from google.cloud.bigquery.dataset import DatasetReference @@ -4833,6 +4916,27 @@ def test_insert_rows_json_with_string_id(self): data=expected, ) + def test_insert_rows_json_w_explicit_none_insert_ids(self): + rows = [{"col1": "val1"}, {"col2": "val2"}] + creds = _make_credentials() + http = object() + client = self._make_one( + project="default-project", credentials=creds, _http=http + ) + conn = client._connection = make_connection({}) + + errors = client.insert_rows_json( + "proj.dset.tbl", rows, row_ids=[None] * len(rows), + ) + + self.assertEqual(len(errors), 0) + expected = {"rows": [{"json": row, "insertId": None} for row in rows]} + conn.api_request.assert_called_once_with( + method="POST", + path="/projects/proj/datasets/dset/tables/tbl/insertAll", + data=expected, + ) + def test_list_partitions(self): from google.cloud.bigquery.table import Table