Skip to content

Commit

Permalink
ARROW-7311: [Python] Return filesystem and path from URI
Browse files Browse the repository at this point in the history
This should supersede #5977

Closes #6197 from kszucs/fs-from-uri and squashes the following commits:

523972f <Krisztián Szűcs> relative paths
4b1f80b <Krisztián Szűcs> filesystem from uri
4ec33b1 <Krisztián Szűcs> from URI

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
  • Loading branch information
kszucs authored and jorisvandenbossche committed Jan 16, 2020
1 parent 68b0e9c commit 6642791
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 0 deletions.
23 changes: 23 additions & 0 deletions python/pyarrow/_fs.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,29 @@ cdef class FileSystem:
"the subclasses instead: LocalFileSystem or "
"SubTreeFileSystem")

@staticmethod
def from_uri(uri):
"""Create a new FileSystem from by URI
A scheme-less URI is considered a local filesystem path.
Recognized schemes are "file", "mock", "hdfs" and "viewfs".
Parameters
----------
uri : string
URI-based path, for example: file:///some/local/path
Returns
-------
With (filesystem, path) tuple where path is the abtract path inside the
FileSystem instance.
"""
cdef:
c_string path
CResult[shared_ptr[CFileSystem]] result
result = CFileSystemFromUri(tobytes(uri), &path)
return FileSystem.wrap(GetResultValue(result)), frombytes(path)

cdef init(self, const shared_ptr[CFileSystem]& wrapped):
self.wrapped = wrapped
self.fs = wrapped.get()
Expand Down
3 changes: 3 additions & 0 deletions python/pyarrow/includes/libarrow_fs.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil:
CResult[shared_ptr[COutputStream]] OpenAppendStream(
const c_string& path)

CResult[shared_ptr[CFileSystem]] CFileSystemFromUri \
"arrow::fs::FileSystemFromUri"(const c_string& uri, c_string* out_path)

cdef cppclass CLocalFileSystemOptions "arrow::fs::LocalFileSystemOptions":
c_bool use_mmap

Expand Down
21 changes: 21 additions & 0 deletions python/pyarrow/tests/test_fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -590,3 +590,24 @@ def test_hdfs_options(hdfs_server):
uri = "hdfs://{}:{}/?user={}".format(host, port, user)
fs = HadoopFileSystem(uri)
assert fs.get_target_stats(FileSelector('/'))


@pytest.mark.parametrize(('uri', 'expected_klass', 'expected_path'), [
# leading slashes are removed intentionally, becuase MockFileSystem doesn't
# have a distinction between relative and absolute paths
('mock:', _MockFileSystem, ''),
('mock:foo/bar', _MockFileSystem, 'foo/bar'),
('mock:/foo/bar', _MockFileSystem, 'foo/bar'),
('mock:///foo/bar', _MockFileSystem, 'foo/bar'),
('file:', LocalFileSystem, ''),
('file:foo/bar', LocalFileSystem, 'foo/bar'),
('file:/foo/bar', LocalFileSystem, '/foo/bar'),
('file:///foo/bar', LocalFileSystem, '/foo/bar'),
('', LocalFileSystem, ''),
('foo/bar', LocalFileSystem, 'foo/bar'),
('/foo/bar', LocalFileSystem, '/foo/bar'),
])
def test_filesystem_from_uri(uri, expected_klass, expected_path):
fs, path = FileSystem.from_uri(uri)
assert isinstance(fs, expected_klass)
assert path == expected_path

0 comments on commit 6642791

Please sign in to comment.