Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-7950: [Python] Determine + test minimal pandas version + raise error when pandas is too old #6992

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/python.yml
Expand Up @@ -83,7 +83,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python: [3.6]
python: [3.8]
env:
PYTHON: ${{ matrix.python }}
steps:
Expand Down Expand Up @@ -126,8 +126,8 @@ jobs:
strategy:
fail-fast: false
matrix:
python: [3.8]
pandas: ["latest"]
python: [3.6]
pandas: ["latest", "0.23"]
env:
PYTHON: ${{ matrix.python }}
PANDAS: ${{ matrix.pandas }}
Expand Down
16 changes: 16 additions & 0 deletions dev/tasks/tasks.yml
Expand Up @@ -2026,6 +2026,22 @@ tasks:
run:
- conda-python-pandas

test-conda-python-3.6-pandas-0.23:
ci: circle
platform: linux
template: docker-tests/circle.linux.yml
params:
env:
PYTHON: 3.6
PANDAS: 0.23
build:
- conda-cpp
- conda-python
nocache:
- conda-python-pandas
run:
- conda-python-pandas

test-conda-python-3.7-dask-latest:
ci: circle
platform: linux
Expand Down
46 changes: 24 additions & 22 deletions python/pyarrow/pandas-shim.pxi
Expand Up @@ -17,6 +17,9 @@

# pandas lazy-loading API shim that reduces API call and import overhead

import warnings


cdef class _PandasAPIShim(object):
"""
Lazy pandas importer that isolates usages of pandas APIs and avoids
Expand Down Expand Up @@ -55,39 +58,38 @@ cdef class _PandasAPIShim(object):
from distutils.version import LooseVersion
self._loose_version = LooseVersion(pd.__version__)

if self._loose_version < LooseVersion('0.23.0'):
self._have_pandas = False
if raise_:
raise ImportError(
"pyarrow requires pandas 0.23.0 or above, pandas {} is "
"installed".format(self._version)
)
else:
warnings.warn(
"pyarrow requires pandas 0.23.0 or above, pandas {} is "
"installed. Therefore, pandas-specific integration is not "
"used.".format(self._version), stacklevel=2)
return
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we perhaps emit a warning here? I don't think that users expect their Pandas installation to be silently ignored.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that's a good idea


self._compat_module = pdcompat
self._data_frame = pd.DataFrame
self._index = pd.Index
self._categorical_type = pd.Categorical
self._series = pd.Series
if self._loose_version >= LooseVersion('0.23.0'):
self._extension_array = pd.api.extensions.ExtensionArray
self._array_like_types = (
self._series, self._index, self._categorical_type,
self._extension_array)
self._extension_dtype = pd.api.extensions.ExtensionDtype
else:
self._extension_array = None
self._array_like_types = (
self._series, self._index, self._categorical_type)
self._extension_dtype = None
self._extension_array = pd.api.extensions.ExtensionArray
self._array_like_types = (
self._series, self._index, self._categorical_type,
self._extension_array)
self._extension_dtype = pd.api.extensions.ExtensionDtype
if self._loose_version >= LooseVersion('0.24.0'):
self._is_extension_array_dtype = \
pd.api.types.is_extension_array_dtype
else:
self._is_extension_array_dtype = None

if self._loose_version >= LooseVersion('0.20.0'):
from pandas.api.types import DatetimeTZDtype
self._types_api = pd.api.types
elif self._loose_version >= LooseVersion('0.19.0'):
from pandas.types.dtypes import DatetimeTZDtype
self._types_api = pd.api.types
else:
from pandas.types.dtypes import DatetimeTZDtype
self._types_api = pd.core.common

self._datetimetz_type = DatetimeTZDtype
self._types_api = pd.api.types
self._datetimetz_type = pd.api.types.DatetimeTZDtype
self._have_pandas = True

if self._loose_version > LooseVersion('0.25'):
Expand Down
13 changes: 8 additions & 5 deletions python/pyarrow/tests/test_pandas.py
Expand Up @@ -2675,8 +2675,8 @@ class A:

df = pd.DataFrame({'a': [A(), A()]})

expected_msg = 'Conversion failed for column a with type object'
with pytest.raises(ValueError, match=expected_msg):
msg = 'Conversion failed for column a with type object'
with pytest.raises(ValueError, match=msg):
pa.Table.from_pandas(df)

# period unsupported for pandas <= 0.25
Expand All @@ -2685,8 +2685,8 @@ class A:
'a': pd.period_range('2000-01-01', periods=20),
})

expected_msg = 'Conversion failed for column a with type period'
with pytest.raises(TypeError, match=expected_msg):
msg = 'Conversion failed for column a with type (period|object)'
with pytest.raises((TypeError, ValueError), match=msg):
pa.Table.from_pandas(df)


Expand Down Expand Up @@ -3560,7 +3560,7 @@ def test_array_protocol_pandas_extension_types(monkeypatch):
# ARROW-7022 - ensure protocol works for Period / Interval extension dtypes

if LooseVersion(pd.__version__) < '0.24.0':
pytest.skip(reason='Period/IntervalArray only introduced in 0.24')
pytest.skip('Period/IntervalArray only introduced in 0.24')

storage = pa.array([1, 2, 3], type=pa.int64())
expected = pa.ExtensionArray.from_storage(DummyExtensionType(), storage)
Expand Down Expand Up @@ -3654,6 +3654,9 @@ def test_conversion_extensiontype_to_extensionarray(monkeypatch):
# converting extension type to linked pandas ExtensionDtype/Array
import pandas.core.internals as _int

if LooseVersion(pd.__version__) < "0.24.0":
pytest.skip("ExtensionDtype introduced in pandas 0.24")

storage = pa.array([1, 2, 3, 4], pa.int64())
arr = pa.ExtensionArray.from_storage(MyCustomIntegerType(), storage)
table = pa.table({'a': arr})
Expand Down