Skip to content
Permalink
Browse files
deps: expand pyarrow dependencies to include version 2 (#368)
Pyarrow 2.0 includes several bug fixes. The wire format remains the same, so it continues to be compatible with the BigQuery Storage API.
  • Loading branch information
tswast committed Nov 10, 2020
1 parent 30de15f commit cd9febd
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 3 deletions.
@@ -46,12 +46,12 @@
# grpc.Channel.close() method isn't added until 1.32.0.
# https://github.com/grpc/grpc/pull/15254
"grpcio >= 1.32.0, < 2.0dev",
"pyarrow >= 1.0.0, < 2.0dev",
"pyarrow >= 1.0.0, < 3.0dev",
],
"pandas": [
"pandas>=0.23.0",
# pyarrow 1.0.0 is required for the use of timestamp_as_object keyword.
"pyarrow >= 1.0.0, < 2.0dev",
"pyarrow >= 1.0.0, < 3.0dev",
],
"tqdm": ["tqdm >= 4.7.4, <5.0.0dev"],
"opentelemetry": [
@@ -19,6 +19,7 @@
import warnings

import mock
import pkg_resources
import pytest
import six

@@ -41,8 +42,11 @@
try:
import pyarrow
import pyarrow.types

PYARROW_VERSION = pkg_resources.parse_version(pyarrow.__version__)
except ImportError: # pragma: NO COVER
pyarrow = None
PYARROW_VERSION = pkg_resources.parse_version("0.0.1")

try:
from tqdm import tqdm
@@ -52,6 +56,9 @@
from google.cloud.bigquery.dataset import DatasetReference


PYARROW_TIMESTAMP_VERSION = pkg_resources.parse_version("2.0.0")


def _mock_client():
from google.cloud.bigquery import client

@@ -2339,12 +2346,19 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self):

df = row_iterator.to_dataframe(create_bqstorage_client=False)

tzinfo = None
if PYARROW_VERSION >= PYARROW_TIMESTAMP_VERSION:
tzinfo = dt.timezone.utc

self.assertIsInstance(df, pandas.DataFrame)
self.assertEqual(len(df), 2) # verify the number of rows
self.assertEqual(list(df.columns), ["some_timestamp"])
self.assertEqual(
list(df["some_timestamp"]),
[dt.datetime(4567, 1, 1), dt.datetime(9999, 12, 31)],
[
dt.datetime(4567, 1, 1, tzinfo=tzinfo),
dt.datetime(9999, 12, 31, tzinfo=tzinfo),
],
)

@pytest.mark.xfail(

2 comments on commit cd9febd

@emkornfield
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just a note there is an issue with arrow 2.0 for writing nested parquet data. we are discussing a patch release: ARROW-10493

@tswast
Copy link
Contributor Author

@tswast tswast commented on cd9febd Nov 11, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the heads up. We don't officially support nested data yet, so we didn't have any system / samples tests fail for this.

Please sign in to comment.