Skip to content

Commit

Permalink
fix: load_table_from_dataframe does not error out when nan in a requi…
Browse files Browse the repository at this point in the history
…red column
  • Loading branch information
Gaurang033 committed Oct 31, 2023
1 parent f22eff2 commit d06a2ac
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 0 deletions.
14 changes: 14 additions & 0 deletions google/cloud/bigquery/_pandas_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,19 @@ def bq_to_arrow_array(series, bq_field):
return pyarrow.Array.from_pandas(series, type=arrow_type)


def _check_nullability(arrow_fields, dataframe):
"""Throws error if dataframe has null values and column doesn't allow nullable"""
if dataframe.index.name:
dataframe[dataframe.index.name] = dataframe.index
for arrow_field in arrow_fields:
col_name = arrow_field.name
if (
not arrow_field.nullable
and dataframe[arrow_field.name].isnull().values.any()
):
raise ValueError(f"required field {col_name} can not be nulls")


def get_column_or_index(dataframe, name):
"""Return a column or index as a pandas series."""
if name in dataframe.columns:
Expand Down Expand Up @@ -587,6 +600,7 @@ def dataframe_to_arrow(dataframe, bq_schema):
)
arrow_fields.append(bq_to_arrow_field(bq_field, arrow_arrays[-1].type))

_check_nullability(arrow_fields, dataframe)
if all((field is not None for field in arrow_fields)):
return pyarrow.Table.from_arrays(
arrow_arrays, schema=pyarrow.schema(arrow_fields)
Expand Down
26 changes: 26 additions & 0 deletions tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -8677,6 +8677,32 @@ def test_load_table_from_dataframe_w_nulls(self):
assert sent_config.schema == schema
assert sent_config.source_format == job.SourceFormat.PARQUET

@unittest.skipIf(pandas is None, "Requires `pandas`")
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
def test_load_table_from_dataframe_w_nulls_for_required_cols(self):
"""Test that a DataFrame with null columns should throw error if
corresponding field in bigquery schema is required.
See: https://github.com/googleapis/python-bigquery/issues/1692
"""
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery import job

client = self._make_client()
records = [{"name": None, "age": None}, {"name": None, "age": None}]
dataframe = pandas.DataFrame(records, columns=["name", "age"])
schema = [
SchemaField("name", "STRING"),
SchemaField("age", "INTEGER", mode="REQUIRED"),
]
job_config = job.LoadJobConfig(schema=schema)
with pytest.raises(ValueError) as e:
client.load_table_from_dataframe(
dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION
)

assert str(e.value) == "required field age can not be nulls"

@unittest.skipIf(pandas is None, "Requires `pandas`")
def test_load_table_from_dataframe_w_invaild_job_config(self):
from google.cloud.bigquery import job
Expand Down

0 comments on commit d06a2ac

Please sign in to comment.