Skip to content

Commit

Permalink
ARROW-8416: [Python] Add feather alias for ipc format in dataset API
Browse files Browse the repository at this point in the history
Closes #6919 from jorisvandenbossche/ARROW-8416

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
  • Loading branch information
jorisvandenbossche authored and bkietz committed Apr 13, 2020
1 parent 1a4caa9 commit a5ee89a
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 3 deletions.
5 changes: 3 additions & 2 deletions python/pyarrow/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ def _ensure_format(obj):
return obj
elif obj == "parquet":
return ParquetFileFormat()
elif obj == "ipc":
elif obj in {"ipc", "arrow", "feather"}:
return IpcFileFormat()
else:
raise ValueError("format '{}' is not supported".format(obj))
Expand Down Expand Up @@ -326,7 +326,8 @@ def dataset(paths_or_factories, filesystem=None, partitioning=None,
function. A flavor string can be used as shortcut, and with a list of
field names a DirectionaryPartitioning will be inferred.
format : str
Currently only "parquet" is supported.
Currently "parquet" and "ipc"/"arrow"/"feather" are supported. For
Feather, only version 2 files are supported.
schema : Schema, optional
Optionally provide the Schema for the Dataset, in which case it will
not be inferred from the source.
Expand Down
27 changes: 26 additions & 1 deletion python/pyarrow/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1234,6 +1234,31 @@ def test_ipc_format(tempdir):
result = dataset.to_table()
assert result.equals(table)

dataset = ds.dataset(path, format="ipc")
for format_str in ["ipc", "arrow"]:
dataset = ds.dataset(path, format=format_str)
result = dataset.to_table()
assert result.equals(table)


def test_feather_format(tempdir):
from pyarrow.feather import write_feather

table = pa.table({'a': pa.array([1, 2, 3], type="int8"),
'b': pa.array([.1, .2, .3], type="float64")})

basedir = tempdir / "feather_dataset"
basedir.mkdir()
write_feather(table, str(basedir / "data.feather"))

dataset = ds.dataset(basedir, format=ds.IpcFileFormat())
result = dataset.to_table()
assert result.equals(table)

dataset = ds.dataset(basedir, format="feather")
result = dataset.to_table()
assert result.equals(table)

# error with Feather v1 files
write_feather(table, str(basedir / "data1.feather"), version=1)
with pytest.raises(ValueError):
ds.dataset(basedir, format="feather").to_table()

0 comments on commit a5ee89a

Please sign in to comment.