Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions pandas_gbq/schema/pandas_to_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,14 +139,16 @@ def dataframe_to_bigquery_fields(
bq_schema_out.append(bq_field)
unknown_type_fields.append(bq_field)

# Catch any schema mismatch. The developer explicitly asked to serialize a
# column, but it was not found.
# Append any fields from the BigQuery schema that are not in the
# DataFrame.
if override_fields_unused:
raise ValueError(
"Provided BigQuery fields contain field(s) not present in DataFrame: {}".format(
override_fields_unused
)
warnings.warn(
"Provided BigQuery fields contain field(s) not present in "
"DataFrame: {}".format(sorted(override_fields_unused)),
UserWarning,
)
for field_name in sorted(override_fields_unused):
bq_schema_out.append(override_fields_by_name[field_name])

# If schema detection was not successful for all columns, also try with
# pyarrow, if available.
Expand Down
32 changes: 25 additions & 7 deletions tests/unit/schema/test_pandas_to_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,16 +179,34 @@ def test_dataframe_to_bigquery_fields_fallback_needed_w_pyarrow(module_under_tes


def test_dataframe_to_bigquery_fields_w_extra_fields(module_under_test):
with pytest.raises(ValueError) as exc_context:
module_under_test.dataframe_to_bigquery_fields(
pandas.DataFrame(),
override_bigquery_fields=(schema.SchemaField("not_in_df", "STRING"),),
dataframe = pandas.DataFrame({"in_df": [1, 2, 3]})
bq_schema = (
schema.SchemaField("in_df", "INTEGER"),
schema.SchemaField("not_in_df", "STRING"),
schema.SchemaField("also_not_in_df", "INTEGER"),
)

with pytest.warns(UserWarning) as record:
returned_schema = module_under_test.dataframe_to_bigquery_fields(
dataframe, override_bigquery_fields=bq_schema
)
message = str(exc_context.value)

assert len(record) == 1
message = str(record[0].message)
assert (
"Provided BigQuery fields contain field(s) not present in DataFrame:" in message
"Provided BigQuery fields contain field(s) not present in DataFrame" in message
)
assert "not_in_df" in message
# Note: The field names are sorted in the warning message.
assert "['also_not_in_df', 'not_in_df']" in message

expected_schema = (
schema.SchemaField("in_df", "INTEGER"),
# Note: The fields are sorted by name as they are added from the set of
# unused fields.
schema.SchemaField("also_not_in_df", "INTEGER"),
schema.SchemaField("not_in_df", "STRING"),
)
assert returned_schema == expected_schema


def test_dataframe_to_bigquery_fields_geography(module_under_test):
Expand Down