Skip to content

Commit

Permalink
COMPAT: pyarrow >= 0.7.0 compat (pandas-dev#17588)
Browse files Browse the repository at this point in the history
  • Loading branch information
jreback committed Sep 19, 2017
1 parent 0e85ca7 commit 6630c4e
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 8 deletions.
2 changes: 1 addition & 1 deletion doc/source/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4492,7 +4492,7 @@ Several caveats.
- The format will NOT write an ``Index``, or ``MultiIndex`` for the ``DataFrame`` and will raise an
error if a non-default one is provided. You can simply ``.reset_index(drop=True)`` in order to store the index.
- Duplicate column names and non-string columns names are not supported
- Categorical dtypes are currently not-supported (for ``pyarrow``).
- Categorical dtypes can be serialized to parquet, but will de-serialize as ``object`` dtype.
- Non supported types include ``Period`` and actual python object types. These will raise a helpful error message
on an attempt at serialization.

Expand Down
44 changes: 37 additions & 7 deletions pandas/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@

import pytest
import datetime
from distutils.version import LooseVersion
from warnings import catch_warnings

import numpy as np
import pandas as pd
from pandas.compat import PY3, is_platform_windows
from pandas.compat import PY3
from pandas.io.parquet import (to_parquet, read_parquet, get_engine,
PyArrowImpl, FastParquetImpl)
from pandas.util import testing as tm
Expand Down Expand Up @@ -42,8 +43,24 @@ def engine(request):
def pa():
if not _HAVE_PYARROW:
pytest.skip("pyarrow is not installed")
if is_platform_windows():
pytest.skip("pyarrow-parquet not building on windows")
return 'pyarrow'


@pytest.fixture
def pa_lt_070():
if not _HAVE_PYARROW:
pytest.skip("pyarrow is not installed")
if LooseVersion(pyarrow.__version__) >= '0.7.0':
pytest.skip("pyarrow is >= 0.7.0")
return 'pyarrow'


@pytest.fixture
def pa_ge_070():
if not _HAVE_PYARROW:
pytest.skip("pyarrow is not installed")
if LooseVersion(pyarrow.__version__) < '0.7.0':
pytest.skip("pyarrow is < 0.7.0")
return 'pyarrow'


Expand Down Expand Up @@ -302,10 +319,6 @@ def test_unsupported(self, pa):
df = pd.DataFrame({'a': pd.period_range('2013', freq='M', periods=3)})
self.check_error_on_write(df, pa, ValueError)

# categorical
df = pd.DataFrame({'a': pd.Categorical(list('abc'))})
self.check_error_on_write(df, pa, NotImplementedError)

# timedelta
df = pd.DataFrame({'a': pd.timedelta_range('1 day',
periods=3)})
Expand All @@ -315,6 +328,23 @@ def test_unsupported(self, pa):
df = pd.DataFrame({'a': ['a', 1, 2.0]})
self.check_error_on_write(df, pa, ValueError)

def test_categorical(self, pa_ge_070):
pa = pa_ge_070

# supported in >= 0.7.0
df = pd.DataFrame({'a': pd.Categorical(list('abc'))})

# de-serialized as object
expected = df.assign(a=df.a.astype(object))
self.check_round_trip(df, pa, expected)

def test_categorical_unsupported(self, pa_lt_070):
pa = pa_lt_070

# supported in >= 0.7.0
df = pd.DataFrame({'a': pd.Categorical(list('abc'))})
self.check_error_on_write(df, pa, NotImplementedError)


class TestParquetFastParquet(Base):

Expand Down

0 comments on commit 6630c4e

Please sign in to comment.