diff --git a/pandas_gbq/schema/pandas_to_bigquery.py b/pandas_gbq/schema/pandas_to_bigquery.py index 5afae356..f2a56f52 100644 --- a/pandas_gbq/schema/pandas_to_bigquery.py +++ b/pandas_gbq/schema/pandas_to_bigquery.py @@ -139,14 +139,16 @@ def dataframe_to_bigquery_fields( bq_schema_out.append(bq_field) unknown_type_fields.append(bq_field) - # Catch any schema mismatch. The developer explicitly asked to serialize a - # column, but it was not found. + # Append any fields from the BigQuery schema that are not in the + # DataFrame. if override_fields_unused: - raise ValueError( - "Provided BigQuery fields contain field(s) not present in DataFrame: {}".format( - override_fields_unused - ) + warnings.warn( + "Provided BigQuery fields contain field(s) not present in " + "DataFrame: {}".format(sorted(override_fields_unused)), + UserWarning, ) + for field_name in sorted(override_fields_unused): + bq_schema_out.append(override_fields_by_name[field_name]) # If schema detection was not successful for all columns, also try with # pyarrow, if available. diff --git a/tests/unit/schema/test_pandas_to_bigquery.py b/tests/unit/schema/test_pandas_to_bigquery.py index f3c4410b..4f3be85c 100644 --- a/tests/unit/schema/test_pandas_to_bigquery.py +++ b/tests/unit/schema/test_pandas_to_bigquery.py @@ -179,16 +179,34 @@ def test_dataframe_to_bigquery_fields_fallback_needed_w_pyarrow(module_under_tes def test_dataframe_to_bigquery_fields_w_extra_fields(module_under_test): - with pytest.raises(ValueError) as exc_context: - module_under_test.dataframe_to_bigquery_fields( - pandas.DataFrame(), - override_bigquery_fields=(schema.SchemaField("not_in_df", "STRING"),), + dataframe = pandas.DataFrame({"in_df": [1, 2, 3]}) + bq_schema = ( + schema.SchemaField("in_df", "INTEGER"), + schema.SchemaField("not_in_df", "STRING"), + schema.SchemaField("also_not_in_df", "INTEGER"), + ) + + with pytest.warns(UserWarning) as record: + returned_schema = module_under_test.dataframe_to_bigquery_fields( + dataframe, override_bigquery_fields=bq_schema ) - message = str(exc_context.value) + + assert len(record) == 1 + message = str(record[0].message) assert ( - "Provided BigQuery fields contain field(s) not present in DataFrame:" in message + "Provided BigQuery fields contain field(s) not present in DataFrame" in message ) - assert "not_in_df" in message + # Note: The field names are sorted in the warning message. + assert "['also_not_in_df', 'not_in_df']" in message + + expected_schema = ( + schema.SchemaField("in_df", "INTEGER"), + # Note: The fields are sorted by name as they are added from the set of + # unused fields. + schema.SchemaField("also_not_in_df", "INTEGER"), + schema.SchemaField("not_in_df", "STRING"), + ) + assert returned_schema == expected_schema def test_dataframe_to_bigquery_fields_geography(module_under_test):