From 20a0768e80967c54fee72a355937dd7773fa05cc Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Wed, 15 Oct 2025 15:16:28 +0000 Subject: [PATCH 1/2] feat: Allow loading a DataFrame with a subset of BigQuery columns This change modifies the behavior when a DataFrame is loaded to BigQuery with a schema that contains fields not present in the DataFrame. Instead of raising a `ValueError`, a `UserWarning` is now issued, and the extra fields are appended to the schema. This allows for more flexible data loading scenarios. --- pandas_gbq/schema/pandas_to_bigquery.py | 14 +++++---- tests/unit/schema/test_pandas_to_bigquery.py | 33 +++++++++++++++----- 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/pandas_gbq/schema/pandas_to_bigquery.py b/pandas_gbq/schema/pandas_to_bigquery.py index 5afae356..f2a56f52 100644 --- a/pandas_gbq/schema/pandas_to_bigquery.py +++ b/pandas_gbq/schema/pandas_to_bigquery.py @@ -139,14 +139,16 @@ def dataframe_to_bigquery_fields( bq_schema_out.append(bq_field) unknown_type_fields.append(bq_field) - # Catch any schema mismatch. The developer explicitly asked to serialize a - # column, but it was not found. + # Append any fields from the BigQuery schema that are not in the + # DataFrame. if override_fields_unused: - raise ValueError( - "Provided BigQuery fields contain field(s) not present in DataFrame: {}".format( - override_fields_unused - ) + warnings.warn( + "Provided BigQuery fields contain field(s) not present in " + "DataFrame: {}".format(sorted(override_fields_unused)), + UserWarning, ) + for field_name in sorted(override_fields_unused): + bq_schema_out.append(override_fields_by_name[field_name]) # If schema detection was not successful for all columns, also try with # pyarrow, if available. diff --git a/tests/unit/schema/test_pandas_to_bigquery.py b/tests/unit/schema/test_pandas_to_bigquery.py index f3c4410b..fcba213e 100644 --- a/tests/unit/schema/test_pandas_to_bigquery.py +++ b/tests/unit/schema/test_pandas_to_bigquery.py @@ -179,16 +179,35 @@ def test_dataframe_to_bigquery_fields_fallback_needed_w_pyarrow(module_under_tes def test_dataframe_to_bigquery_fields_w_extra_fields(module_under_test): - with pytest.raises(ValueError) as exc_context: - module_under_test.dataframe_to_bigquery_fields( - pandas.DataFrame(), - override_bigquery_fields=(schema.SchemaField("not_in_df", "STRING"),), + dataframe = pandas.DataFrame({"in_df": [1, 2, 3]}) + bq_schema = ( + schema.SchemaField("in_df", "INTEGER"), + schema.SchemaField("not_in_df", "STRING"), + schema.SchemaField("also_not_in_df", "INTEGER"), + ) + + with pytest.warns(UserWarning) as record: + returned_schema = module_under_test.dataframe_to_bigquery_fields( + dataframe, override_bigquery_fields=bq_schema ) - message = str(exc_context.value) + + assert len(record) == 1 + message = str(record[0].message) assert ( - "Provided BigQuery fields contain field(s) not present in DataFrame:" in message + "Provided BigQuery fields contain field(s) not present in DataFrame" + in message ) - assert "not_in_df" in message + # Note: The field names are sorted in the warning message. + assert "['also_not_in_df', 'not_in_df']" in message + + expected_schema = ( + schema.SchemaField("in_df", "INTEGER"), + # Note: The fields are sorted by name as they are added from the set of + # unused fields. + schema.SchemaField("also_not_in_df", "INTEGER"), + schema.SchemaField("not_in_df", "STRING"), + ) + assert returned_schema == expected_schema def test_dataframe_to_bigquery_fields_geography(module_under_test): From 1d15e16dd1b6755c366574bf6091c7a214a6ed77 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Wed, 15 Oct 2025 16:24:23 +0000 Subject: [PATCH 2/2] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20po?= =?UTF-8?q?st-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- tests/unit/schema/test_pandas_to_bigquery.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/unit/schema/test_pandas_to_bigquery.py b/tests/unit/schema/test_pandas_to_bigquery.py index fcba213e..4f3be85c 100644 --- a/tests/unit/schema/test_pandas_to_bigquery.py +++ b/tests/unit/schema/test_pandas_to_bigquery.py @@ -194,8 +194,7 @@ def test_dataframe_to_bigquery_fields_w_extra_fields(module_under_test): assert len(record) == 1 message = str(record[0].message) assert ( - "Provided BigQuery fields contain field(s) not present in DataFrame" - in message + "Provided BigQuery fields contain field(s) not present in DataFrame" in message ) # Note: The field names are sorted in the warning message. assert "['also_not_in_df', 'not_in_df']" in message