Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-7311: [Python] Return filesystem and path from URI #6197

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
23 changes: 23 additions & 0 deletions python/pyarrow/_fs.pyx
Expand Up @@ -190,6 +190,29 @@ cdef class FileSystem:
"the subclasses instead: LocalFileSystem or "
"SubTreeFileSystem")

@staticmethod
def from_uri(uri):
"""Create a new FileSystem from by URI

A scheme-less URI is considered a local filesystem path.
Recognized schemes are "file", "mock", "hdfs" and "viewfs".

Parameters
----------
uri : string
URI-based path, for example: file:///some/local/path

Returns
-------
With (filesystem, path) tuple where path is the abtract path inside the
FileSystem instance.
"""
cdef:
c_string path
CResult[shared_ptr[CFileSystem]] result
result = CFileSystemFromUri(tobytes(uri), &path)
return FileSystem.wrap(GetResultValue(result)), frombytes(path)

cdef init(self, const shared_ptr[CFileSystem]& wrapped):
self.wrapped = wrapped
self.fs = wrapped.get()
Expand Down
3 changes: 3 additions & 0 deletions python/pyarrow/includes/libarrow_fs.pxd
Expand Up @@ -79,6 +79,9 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil:
CResult[shared_ptr[COutputStream]] OpenAppendStream(
const c_string& path)

CResult[shared_ptr[CFileSystem]] CFileSystemFromUri \
"arrow::fs::FileSystemFromUri"(const c_string& uri, c_string* out_path)

cdef cppclass CLocalFileSystemOptions "arrow::fs::LocalFileSystemOptions":
c_bool use_mmap

Expand Down
19 changes: 19 additions & 0 deletions python/pyarrow/tests/test_fs.py
Expand Up @@ -590,3 +590,22 @@ def test_hdfs_options(hdfs_server):
uri = "hdfs://{}:{}/?user={}".format(host, port, user)
fs = HadoopFileSystem(uri)
assert fs.get_target_stats(FileSelector('/'))


@pytest.mark.parametrize(('uri', 'expected_klass', 'expected_path'), [
# leading slashes are removed intentionally, becuase MockFileSystem doesn't
# have a distinction between relative and absolute paths
('mock:', _MockFileSystem, ''),
('mock:foo/bar', _MockFileSystem, 'foo/bar'),
('mock:/foo/bar', _MockFileSystem, 'foo/bar'),
('mock:///foo/bar', _MockFileSystem, 'foo/bar'),
('file:', LocalFileSystem, ''),
('file:/foo/bar', LocalFileSystem, '/foo/bar'),
('file:///foo/bar', LocalFileSystem, '/foo/bar'),
('', LocalFileSystem, ''),
('/foo/bar', LocalFileSystem, '/foo/bar'),
pitrou marked this conversation as resolved.
Show resolved Hide resolved
])
def test_filesystem_from_uri(uri, expected_klass, expected_path):
fs, path = FileSystem.from_uri(uri)
assert isinstance(fs, expected_klass)
assert path == expected_path