Skip to content

Commit

Permalink
ARROW-8213: [Python][Dataset] Opening a dataset with a local incorrec…
Browse files Browse the repository at this point in the history
…t path gives confusing error message

Workaround until it is properly handled in the C++ implementation.

Closes #6854 from kszucs/ARROW-8213

Lead-authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Wes McKinney <wesm+git@apache.org>
  • Loading branch information
2 people authored and wesm committed Apr 8, 2020
1 parent 241e79d commit e279a7e
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 6 deletions.
22 changes: 16 additions & 6 deletions python/pyarrow/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,15 +159,24 @@ def _ensure_fs(filesystem, path):
FileSystem, LocalFileSystem, FileType, _normalize_path)

if filesystem is None:
# first check if the file exists as a local (relative) file path
# First check if the file exists as a local (relative) file path
filesystem = LocalFileSystem()
try:
infos = filesystem.get_file_info([path])[0]
except OSError:
return FileSystem.from_uri(path)

if infos.type == FileType.NotFound:
return FileSystem.from_uri(path)
local_path_exists = False
else:
local_path_exists = (infos.type != FileType.NotFound)

if not local_path_exists:
# Perhaps it's a URI?
try:
return FileSystem.from_uri(path)
except ValueError as e:
if "empty scheme" not in str(e):
raise
# ARROW-8213: not a URI, assume local path
# to get a nice error message.

# ensure we have a proper path (eg no backslashes on Windows)
path = _normalize_path(filesystem, path)
Expand All @@ -179,7 +188,8 @@ def _ensure_fs_and_paths(path, filesystem=None):
# Return filesystem and list of string paths or FileSelector
from pyarrow.fs import FileType, FileSelector

filesystem, path = _ensure_fs(filesystem, _stringify_path(path))
path = _stringify_path(path)
filesystem, path = _ensure_fs(filesystem, path)
infos = filesystem.get_file_info([path])[0]
if infos.type == FileType.Directory:
# for directory, pass a selector
Expand Down
7 changes: 7 additions & 0 deletions python/pyarrow/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1042,6 +1042,13 @@ def test_open_dataset_from_source_additional_kwargs(multisourcefs):
ds.dataset(child, format="parquet")


def test_open_dataset_non_existing_file():
# ARROW-8213: Opening a dataset with a local incorrect path gives confusing
# error message
with pytest.raises(FileNotFoundError):
ds.dataset('i-am-not-existing.parquet', format='parquet')


@pytest.mark.parquet
@pytest.mark.s3
def test_open_dataset_from_uri_s3(s3_connection, s3_server):
Expand Down

0 comments on commit e279a7e

Please sign in to comment.