Fix test coverage.

googleapis · Aug 20, 2019 · a6126b7 · a6126b7
1 parent b901cf2
commit a6126b7
Show file tree

Hide file tree

Showing 2 changed files with 74 additions and 3 deletions.
diff --git a/bigquery/google/cloud/bigquery/client.py b/bigquery/google/cloud/bigquery/client.py
@@ -1532,7 +1532,13 @@ def load_table_from_dataframe(
             location = self.location
 
         if not job_config.schema:
-            job_config.schema = _pandas_helpers.dataframe_to_bq_schema(dataframe)
+            autodetected_schema = _pandas_helpers.dataframe_to_bq_schema(dataframe)
+
+            # Only use an explicit schema if we were able to determine one
+            # matching the dataframe. If not, fallback to the pandas to_parquet
+            # method.
+            if autodetected_schema:
+                job_config.schema = autodetected_schema
 
         tmpfd, tmppath = tempfile.mkstemp(suffix="_job_{}.parquet".format(job_id[:8]))
         os.close(tmpfd)

diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py
@@ -5325,9 +5325,74 @@ def test_load_table_from_dataframe_w_custom_job_config(self):
         )
 
         sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
-        assert sent_config is job_config
         assert sent_config.source_format == job.SourceFormat.PARQUET
 
+    @unittest.skipIf(pandas is None, "Requires `pandas`")
+    @unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
+    def test_load_table_from_dataframe_w_automatic_schema(self):
+        from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
+        from google.cloud.bigquery import job
+        from google.cloud.bigquery.schema import SchemaField
+
+        client = self._make_client()
+        dt_col = pandas.Series(
+            [
+                datetime.datetime(2010, 1, 2, 3, 44, 50),
+                datetime.datetime(2011, 2, 3, 14, 50, 59),
+                datetime.datetime(2012, 3, 14, 15, 16),
+            ],
+            dtype="datetime64[ns]",
+        )
+        ts_col = pandas.Series(
+            [
+                datetime.datetime(2010, 1, 2, 3, 44, 50),
+                datetime.datetime(2011, 2, 3, 14, 50, 59),
+                datetime.datetime(2012, 3, 14, 15, 16),
+            ],
+            dtype="datetime64[ns]",
+        ).dt.tz_localize(pytz.utc)
+        df_data = {
+            "int_col": [1, 2, 3],
+            "float_col": [1.0, 2.0, 3.0],
+            "bool_col": [True, False, True],
+            "dt_col": dt_col,
+            "ts_col": ts_col,
+        }
+        dataframe = pandas.DataFrame(
+            df_data, columns=["int_col", "float_col", "bool_col", "dt_col", "ts_col"]
+        )
+        load_patch = mock.patch(
+            "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
+        )
+
+        with load_patch as load_table_from_file:
+            client.load_table_from_dataframe(
+                dataframe, self.TABLE_REF, location=self.LOCATION
+            )
+
+        load_table_from_file.assert_called_once_with(
+            client,
+            mock.ANY,
+            self.TABLE_REF,
+            num_retries=_DEFAULT_NUM_RETRIES,
+            rewind=True,
+            job_id=mock.ANY,
+            job_id_prefix=None,
+            location=self.LOCATION,
+            project=None,
+            job_config=mock.ANY,
+        )
+
+        sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
+        assert sent_config.source_format == job.SourceFormat.PARQUET
+        assert tuple(sent_config.schema) == (
+            SchemaField("int_col", "INTEGER"),
+            SchemaField("float_col", "FLOAT"),
+            SchemaField("bool_col", "BOOLEAN"),
+            SchemaField("dt_col", "DATETIME"),
+            SchemaField("ts_col", "TIMESTAMP"),
+        )
+
     @unittest.skipIf(pandas is None, "Requires `pandas`")
     @unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
     def test_load_table_from_dataframe_w_schema_wo_pyarrow(self):
@@ -5475,7 +5540,7 @@ def test_load_table_from_dataframe_w_nulls(self):
         )
 
         sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
-        assert sent_config is job_config
+        assert sent_config.schema == schema
         assert sent_config.source_format == job.SourceFormat.PARQUET
 
     # Low-level tests