Skip to content
Permalink
Browse files
deps: expand pyarrow dependencies to include version 2 (#368)
Pyarrow 2.0 includes several bug fixes. The wire format remains the same, so it continues to be compatible with the BigQuery Storage API.
  • Loading branch information
tswast committed Nov 10, 2020
1 parent 30de15f commit cd9febd20c34983781386c3bf603e5fca7135695
Showing with 17 additions and 3 deletions.
  1. +2 −2 setup.py
  2. +15 −1 tests/unit/test_table.py
@@ -46,12 +46,12 @@
# grpc.Channel.close() method isn't added until 1.32.0.
# https://github.com/grpc/grpc/pull/15254
"grpcio >= 1.32.0, < 2.0dev",
"pyarrow >= 1.0.0, < 2.0dev",
"pyarrow >= 1.0.0, < 3.0dev",
],
"pandas": [
"pandas>=0.23.0",
# pyarrow 1.0.0 is required for the use of timestamp_as_object keyword.
"pyarrow >= 1.0.0, < 2.0dev",
"pyarrow >= 1.0.0, < 3.0dev",
],
"tqdm": ["tqdm >= 4.7.4, <5.0.0dev"],
"opentelemetry": [
@@ -19,6 +19,7 @@
import warnings

import mock
import pkg_resources
import pytest
import six

@@ -41,8 +42,11 @@
try:
import pyarrow
import pyarrow.types

PYARROW_VERSION = pkg_resources.parse_version(pyarrow.__version__)
except ImportError: # pragma: NO COVER
pyarrow = None
PYARROW_VERSION = pkg_resources.parse_version("0.0.1")

try:
from tqdm import tqdm
@@ -52,6 +56,9 @@
from google.cloud.bigquery.dataset import DatasetReference


PYARROW_TIMESTAMP_VERSION = pkg_resources.parse_version("2.0.0")


def _mock_client():
from google.cloud.bigquery import client

@@ -2339,12 +2346,19 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self):

df = row_iterator.to_dataframe(create_bqstorage_client=False)

tzinfo = None
if PYARROW_VERSION >= PYARROW_TIMESTAMP_VERSION:
tzinfo = dt.timezone.utc

self.assertIsInstance(df, pandas.DataFrame)
self.assertEqual(len(df), 2) # verify the number of rows
self.assertEqual(list(df.columns), ["some_timestamp"])
self.assertEqual(
list(df["some_timestamp"]),
[dt.datetime(4567, 1, 1), dt.datetime(9999, 12, 31)],
[
dt.datetime(4567, 1, 1, tzinfo=tzinfo),
dt.datetime(9999, 12, 31, tzinfo=tzinfo),
],
)

@pytest.mark.xfail(

2 comments on commit cd9febd

@emkornfield

This comment has been minimized.

Copy link

@emkornfield emkornfield replied Nov 11, 2020

just a note there is an issue with arrow 2.0 for writing nested parquet data. we are discussing a patch release: ARROW-10493

@tswast

This comment has been minimized.

Copy link
Contributor Author

@tswast tswast replied Nov 11, 2020

Thanks for the heads up. We don't officially support nested data yet, so we didn't have any system / samples tests fail for this.

Please sign in to comment.