googleapis · Gaurang033 · Oct 23, 2023 · Nov 3, 2023 · Nov 14, 2023 · Linchin
@@ -302,6 +302,20 @@ def bq_to_arrow_array(series, bq_field):
     return pyarrow.Array.from_pandas(series, type=arrow_type)
 
 
+def _check_nullability(arrow_fields, dataframe):
+    """Throws error if dataframe has null values and column doesn't allow nullable"""
+    if dataframe.index.name:
+        dataframe[dataframe.index.name] = dataframe.index
+    for arrow_field in arrow_fields:
+        if arrow_field:
+            col_name = arrow_field.name
+            if (
+                not arrow_field.nullable
+                and dataframe[arrow_field.name].isnull().values.any()
+            ):
+                raise ValueError(f"required field {col_name} can not be nulls")
+
+
 def get_column_or_index(dataframe, name):
     """Return a column or index as a pandas series."""
     if name in dataframe.columns:
@@ -587,6 +601,7 @@ def dataframe_to_arrow(dataframe, bq_schema):
         )
         arrow_fields.append(bq_to_arrow_field(bq_field, arrow_arrays[-1].type))
 
+    _check_nullability(arrow_fields, dataframe)
     if all((field is not None for field in arrow_fields)):
         return pyarrow.Table.from_arrays(
             arrow_arrays, schema=pyarrow.schema(arrow_fields)

@@ -8677,6 +8677,32 @@ def test_load_table_from_dataframe_w_nulls(self):
         assert sent_config.schema == schema
         assert sent_config.source_format == job.SourceFormat.PARQUET
 
+    @unittest.skipIf(pandas is None, "Requires `pandas`")
+    @unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
+    def test_load_table_from_dataframe_w_nulls_for_required_cols(self):
+        """Test that a DataFrame with null columns should throw error if
+        corresponding field in bigquery schema is required.
+
+        See: https://github.com/googleapis/python-bigquery/issues/1692
+        """
+        from google.cloud.bigquery.schema import SchemaField
+        from google.cloud.bigquery import job
+
+        client = self._make_client()
+        records = [{"name": None, "age": None}, {"name": None, "age": None}]
+        dataframe = pandas.DataFrame(records, columns=["name", "age"])
+        schema = [
+            SchemaField("name", "STRING"),
+            SchemaField("age", "INTEGER", mode="REQUIRED"),
+        ]
+        job_config = job.LoadJobConfig(schema=schema)
+        with pytest.raises(ValueError) as e:
+            client.load_table_from_dataframe(
+                dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION
+            )
+
+        assert str(e.value) == "required field age can not be nulls"
+
     @unittest.skipIf(pandas is None, "Requires `pandas`")
     def test_load_table_from_dataframe_w_invaild_job_config(self):
         from google.cloud.bigquery import job