Skip to content

Commit

Permalink
ARROW-7892: [Python] Add FileSystemDataset.format attribute
Browse files Browse the repository at this point in the history
Closes #6462 from jorisvandenbossche/dataset-various-api and squashes the following commits:

2a40709 <Joris Van den Bossche> add more informative repr to Expression
865ccb5 <Joris Van den Bossche> ARROW-7892:  Add FilesystemSource.format attribute

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Wes McKinney <wesm+git@apache.org>
  • Loading branch information
jorisvandenbossche authored and wesm committed Mar 6, 2020
1 parent 6ff1569 commit 7830ce3
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 1 deletion.
12 changes: 11 additions & 1 deletion python/pyarrow/_dataset.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,11 @@ cdef class FileSystemDataset(Dataset):
cdef vector[c_string] files = self.filesystem_dataset.files()
return [frombytes(f) for f in files]

@property
def format(self):
"""The FileFormat of this source."""
return FileFormat.wrap(self.filesystem_dataset.format())


cdef class FileFormat:

Expand All @@ -327,7 +332,7 @@ cdef class FileFormat:
self.format = sp.get()

@staticmethod
cdef wrap(shared_ptr[CFileFormat]& sp):
cdef wrap(const shared_ptr[CFileFormat]& sp):
cdef FileFormat self

typ = frombytes(sp.get().type_name())
Expand Down Expand Up @@ -1139,6 +1144,11 @@ cdef class Expression:
def __str__(self):
return frombytes(self.expr.ToString())

def __repr__(self):
return "<pyarrow.dataset.{0} {1}>".format(
self.__class__.__name__, str(self)
)

def validate(self, Schema schema not None):
"""Validate this expression for execution against a schema.
Expand Down
1 change: 1 addition & 0 deletions python/pyarrow/includes/libarrow_dataset.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
vector[c_string] files()
shared_ptr[CFragmentIterator] GetFragments(
shared_ptr[CScanOptions] options)
const shared_ptr[CFileFormat] format()

cdef cppclass CParquetFileFormatReaderOptions \
"arrow::dataset::ParquetFileFormat::ReaderOptions":
Expand Down
2 changes: 2 additions & 0 deletions python/pyarrow/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ def test_filesystem_dataset(mockfs):
paths_or_selector=paths,
partitions=partitions
)
assert isinstance(source.format, ds.ParquetFileFormat)

root_partition = ds.ComparisonExpression(
ds.CompareOperator.Equal,
Expand Down Expand Up @@ -360,6 +361,7 @@ def test_expression():
assert condition.assume(i64_is_5).equals(ds.ScalarExpression(False))
assert condition.assume(i64_is_7).equals(ds.ScalarExpression(True))
assert str(condition) == "(i64 > 5:int64)"
assert "(i64 > 5:int64)" in repr(condition)


def test_expression_ergonomics():
Expand Down

0 comments on commit 7830ce3

Please sign in to comment.