From da7754870bb9ab30c99fcd6dccdd39421f420691 Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Tue, 7 May 2024 19:18:24 +0200 Subject: [PATCH] LocalFileSystem restore _strip_protocol signature (#1567) --- ci/environment-py38.yml | 2 +- fsspec/implementations/local.py | 121 +++++---- fsspec/implementations/tests/test_local.py | 277 +++++++++++++++++++-- fsspec/tests/test_utils.py | 38 ++- fsspec/utils.py | 2 - 5 files changed, 373 insertions(+), 67 deletions(-) diff --git a/ci/environment-py38.yml b/ci/environment-py38.yml index 25f68e5dd..9c514de98 100644 --- a/ci/environment-py38.yml +++ b/ci/environment-py38.yml @@ -3,7 +3,7 @@ channels: - conda-forge dependencies: - pip - - git + - git <2.45.0 - py - pip: - hadoop-test-cluster diff --git a/fsspec/implementations/local.py b/fsspec/implementations/local.py index 7938a9147..9881606f1 100644 --- a/fsspec/implementations/local.py +++ b/fsspec/implementations/local.py @@ -15,12 +15,6 @@ logger = logging.getLogger("fsspec.local") -def _remove_prefix(text: str, prefix: str): - if text.startswith(prefix): - return text[len(prefix) :] - return text - - class LocalFileSystem(AbstractFileSystem): """Interface to files on local storage @@ -121,8 +115,8 @@ def lexists(self, path, **kwargs): return osp.lexists(path) def cp_file(self, path1, path2, **kwargs): - path1 = self._strip_protocol(path1, remove_trailing_slash=True) - path2 = self._strip_protocol(path2, remove_trailing_slash=True) + path1 = self._strip_protocol(path1) + path2 = self._strip_protocol(path2) if self.auto_mkdir: self.makedirs(self._parent(path2), exist_ok=True) if self.isfile(path1): @@ -151,8 +145,8 @@ def put_file(self, path1, path2, callback=None, **kwargs): return self.cp_file(path1, path2, **kwargs) def mv(self, path1, path2, **kwargs): - path1 = self._strip_protocol(path1, remove_trailing_slash=True) - path2 = self._strip_protocol(path2, remove_trailing_slash=True) + path1 = self._strip_protocol(path1) + path2 = self._strip_protocol(path2) shutil.move(path1, path2) def link(self, src, dst, **kwargs): @@ -176,7 +170,7 @@ def rm(self, path, recursive=False, maxdepth=None): path = [path] for p in path: - p = self._strip_protocol(p, remove_trailing_slash=True) + p = self._strip_protocol(p) if self.isdir(p): if not recursive: raise ValueError("Cannot delete directory, set recursive=True") @@ -219,7 +213,7 @@ def modified(self, path): @classmethod def _parent(cls, path): - path = cls._strip_protocol(path, remove_trailing_slash=True) + path = cls._strip_protocol(path) if os.sep == "/": # posix native return path.rsplit("/", 1)[0] or "/" @@ -234,17 +228,43 @@ def _parent(cls, path): return path_ @classmethod - def _strip_protocol(cls, path, remove_trailing_slash=False): + def _strip_protocol(cls, path): path = stringify_path(path) - if path.startswith("file:"): - path = _remove_prefix(_remove_prefix(path, "file://"), "file:") - if os.sep == "\\": - path = path.lstrip("/") + if path.startswith("file://"): + path = path[7:] + elif path.startswith("file:"): + path = path[5:] + elif path.startswith("local://"): + path = path[8:] elif path.startswith("local:"): - path = _remove_prefix(_remove_prefix(path, "local://"), "local:") - if os.sep == "\\": - path = path.lstrip("/") - return make_path_posix(path, remove_trailing_slash) + path = path[6:] + + path = make_path_posix(path) + if os.sep != "/": + # This code-path is a stripped down version of + # > drive, path = ntpath.splitdrive(path) + if path[1:2] == ":": + # Absolute drive-letter path, e.g. X:\Windows + # Relative path with drive, e.g. X:Windows + drive, path = path[:2], path[2:] + elif path[:2] == "//": + # UNC drives, e.g. \\server\share or \\?\UNC\server\share + # Device drives, e.g. \\.\device or \\?\device + if (index1 := path.find("/", 2)) == -1 or ( + index2 := path.find("/", index1 + 1) + ) == -1: + drive, path = path, "" + else: + drive, path = path[:index2], path[index2:] + else: + # Relative path, e.g. Windows + drive = "" + + path = path.rstrip("/") or cls.root_marker + return drive + path + + else: + return path.rstrip("/") or cls.root_marker def _isfilestore(self): # Inheriting from DaskFileSystem makes this False (S3, etc. were) @@ -257,42 +277,55 @@ def chmod(self, path, mode): return os.chmod(path, mode) -def make_path_posix(path, remove_trailing_slash=False): - """Make path generic for current OS""" +def make_path_posix(path): + """Make path generic and absolute for current OS""" if not isinstance(path, str): if isinstance(path, (list, set, tuple)): - return type(path)(make_path_posix(p, remove_trailing_slash) for p in path) + return type(path)(make_path_posix(p) for p in path) else: - path = str(stringify_path(path)) + path = stringify_path(path) + if not isinstance(path, str): + raise TypeError(f"could not convert {path!r} to string") if os.sep == "/": # Native posix if path.startswith("/"): # most common fast case for posix - return path.rstrip("/") or "/" if remove_trailing_slash else path + return path elif path.startswith("~"): - return make_path_posix(osp.expanduser(path), remove_trailing_slash) + return osp.expanduser(path) elif path.startswith("./"): path = path[2:] - path = f"{os.getcwd()}/{path}" - return path.rstrip("/") or "/" if remove_trailing_slash else path + elif path == ".": + path = "" return f"{os.getcwd()}/{path}" else: # NT handling - if len(path) > 1: - if path[1] == ":": - # windows full path like "C:\\local\\path" - if len(path) <= 3: - # nt root (something like c:/) - return path[0] + ":/" - path = path.replace("\\", "/").replace("//", "/") - return path.rstrip("/") if remove_trailing_slash else path - elif path[0] == "~": - return make_path_posix(osp.expanduser(path), remove_trailing_slash) - elif path.startswith(("\\\\", "//")): - # windows UNC/DFS-style paths - path = "//" + path[2:].replace("\\", "/").replace("//", "/") - return path.rstrip("/") if remove_trailing_slash else path - return make_path_posix(osp.abspath(path), remove_trailing_slash) + if path[0:1] == "/" and path[2:3] == ":": + # path is like "/c:/local/path" + path = path[1:] + if path[1:2] == ":": + # windows full path like "C:\\local\\path" + if len(path) <= 3: + # nt root (something like c:/) + return path[0] + ":/" + path = path.replace("\\", "/") + return path + elif path[0:1] == "~": + return make_path_posix(osp.expanduser(path)) + elif path.startswith(("\\\\", "//")): + # windows UNC/DFS-style paths + return "//" + path[2:].replace("\\", "/") + elif path.startswith(("\\", "/")): + # windows relative path with root + path = path.replace("\\", "/") + return f"{osp.splitdrive(os.getcwd())[0]}{path}" + else: + path = path.replace("\\", "/") + if path.startswith("./"): + path = path[2:] + elif path == ".": + path = "" + return f"{make_path_posix(os.getcwd())}/{path}" def trailing_sep(path): diff --git a/fsspec/implementations/tests/test_local.py b/fsspec/implementations/tests/test_local.py index 428abc980..ef3927912 100644 --- a/fsspec/implementations/tests/test_local.py +++ b/fsspec/implementations/tests/test_local.py @@ -40,6 +40,35 @@ odir = os.getcwd() +@pytest.fixture() +def cwd(): + pth = os.getcwd().replace("\\", "/") + assert not pth.endswith("/") + yield pth + + +@pytest.fixture() +def current_drive(cwd): + drive = os.path.splitdrive(cwd)[0] + assert not drive or (len(drive) == 2 and drive.endswith(":")) + yield drive + + +@pytest.fixture() +def user_home(): + pth = os.path.expanduser("~").replace("\\", "/") + assert not pth.endswith("/") + yield pth + + +def winonly(*args): + return pytest.param(*args, marks=pytest.mark.skipif(not WIN, reason="Windows only")) + + +def posixonly(*args): + return pytest.param(*args, marks=pytest.mark.skipif(WIN, reason="Posix only")) + + @contextmanager def filetexts(d, open=open, mode="t"): """Dumps a number of textfiles to disk @@ -472,7 +501,6 @@ def test_make_path_posix(): assert make_path_posix("/posix") == f"{drive}:/posix" # Windows drive requires trailing slash assert make_path_posix("C:\\") == "C:/" - assert make_path_posix("C:\\", remove_trailing_slash=True) == "C:/" else: assert make_path_posix("/a/posix/path") == "/a/posix/path" assert make_path_posix("/posix") == "/posix" @@ -505,6 +533,71 @@ def test_make_path_posix(): assert userpath.endswith("/path") +@pytest.mark.parametrize( + "path", + [ + "/abc/def", + "abc/def", + "", + ".", + "//server/share/", + "\\\\server\\share\\", + "C:\\", + "d:/abc/def", + "e:", + pytest.param( + "\\\\server\\share", + marks=[ + pytest.mark.xfail( + WIN and sys.version_info < (3, 11), + reason="requires py3.11+ see: python/cpython#96290", + ) + ], + ), + pytest.param( + "f:foo", + marks=[pytest.mark.xfail(WIN, reason="unsupported")], + id="relative-path-with-drive", + ), + ], +) +def test_make_path_posix_returns_absolute_paths(path): + posix_pth = make_path_posix(path) + assert os.path.isabs(posix_pth) + + +@pytest.mark.parametrize("container_cls", [list, set, tuple]) +def test_make_path_posix_set_list_tuple(container_cls): + paths = container_cls( + [ + "/foo/bar", + "bar/foo", + ] + ) + posix_paths = make_path_posix(paths) + assert isinstance(posix_paths, container_cls) + assert posix_paths == container_cls( + [ + make_path_posix("/foo/bar"), + make_path_posix("bar/foo"), + ] + ) + + +@pytest.mark.parametrize( + "obj", + [ + 1, + True, + None, + object(), + ], +) +def test_make_path_posix_wrong_type(obj): + with pytest.raises(TypeError): + make_path_posix(obj) + + def test_parent(): if WIN: assert LocalFileSystem._parent("C:\\file or folder") == "C:/" @@ -514,6 +607,48 @@ def test_parent(): assert LocalFileSystem._parent("/") == "/" +@pytest.mark.parametrize( + "path,parent", + [ + ("C:\\", "C:/"), + ("C:\\.", "C:/"), + ("C:\\.\\", "C:/"), + ("file:C:/", "C:/"), + ("file://C:/", "C:/"), + ("local:C:/", "C:/"), + ("local://C:/", "C:/"), + ("\\\\server\\share", "//server/share"), + ("\\\\server\\share\\", "//server/share"), + ("\\\\server\\share\\path", "//server/share"), + ("//server/share", "//server/share"), + ("//server/share/", "//server/share"), + ("//server/share/path", "//server/share"), + ("C:\\file or folder", "C:/"), + ("C:\\file or folder\\", "C:/"), + ("file:///", "{current_drive}/"), + ("file:///path", "{current_drive}/"), + ] + if WIN + else [ + ("/", "/"), + ("/.", "/"), + ("/./", "/"), + ("file:/", "/"), + ("file:///", "/"), + ("local:/", "/"), + ("local:///", "/"), + ("/file or folder", "/"), + ("/file or folder/", "/"), + ("file:///path", "/"), + ("file://c/", "{cwd}"), + ], +) +def test_parent_edge_cases(path, parent, cwd, current_drive): + parent = parent.format(cwd=cwd, current_drive=current_drive) + + assert LocalFileSystem._parent(path) == parent + + def test_linked_files(tmpdir): tmpdir = str(tmpdir) fn0 = os.path.join(tmpdir, "target") @@ -647,26 +782,130 @@ def test_pickle(tmpdir): assert f2.read() == f.read() -def test_strip_protocol_expanduser(): - path = "file://~\\foo\\bar" if WIN else "file://~/foo/bar" - stripped = LocalFileSystem._strip_protocol(path) - assert path != stripped - assert "~" not in stripped - assert "file://" not in stripped - assert stripped.startswith(os.path.expanduser("~").replace("\\", "/")) - path = LocalFileSystem._strip_protocol("./", remove_trailing_slash=True) - assert not path.endswith("/") +@pytest.mark.parametrize( + "uri, expected", + [ + ("file://~/foo/bar", "{user_home}/foo/bar"), + ("~/foo/bar", "{user_home}/foo/bar"), + winonly("~\\foo\\bar", "{user_home}/foo/bar"), + winonly("file://~\\foo\\bar", "{user_home}/foo/bar"), + ], +) +def test_strip_protocol_expanduser(uri, expected, user_home): + expected = expected.format(user_home=user_home) + stripped = LocalFileSystem._strip_protocol(uri) + assert expected == stripped -def test_strip_protocol_no_authority(): - path = "file:\\foo\\bar" if WIN else "file:/foo/bar" - stripped = LocalFileSystem._strip_protocol(path) - assert "file:" not in stripped - assert stripped.endswith("/foo/bar") - if WIN: - assert ( - LocalFileSystem._strip_protocol("file://C:\\path\\file") == "C:/path/file" - ) + +@pytest.mark.parametrize( + "uri, expected", + [ + ("file://", "{cwd}"), + ("file://.", "{cwd}"), + ("file://./", "{cwd}"), + ("./", "{cwd}"), + ("file:path", "{cwd}/path"), + ("file://path", "{cwd}/path"), + ("path", "{cwd}/path"), + ("./path", "{cwd}/path"), + winonly(".\\", "{cwd}"), + winonly("file://.\\path", "{cwd}/path"), + ], +) +def test_strip_protocol_relative_paths(uri, expected, cwd): + expected = expected.format(cwd=cwd) + + stripped = LocalFileSystem._strip_protocol(uri) + assert expected == stripped + + +@pytest.mark.parametrize( + "uri, expected", + [ + posixonly("file:/foo/bar", "/foo/bar"), + winonly("file:/foo/bar", "{current_drive}/foo/bar"), + winonly("file:\\foo\\bar", "{current_drive}/foo/bar"), + winonly("file:D:\\path\\file", "D:/path/file"), + winonly("file:/D:\\path\\file", "D:/path/file"), + winonly("file://D:\\path\\file", "D:/path/file"), + ], +) +def test_strip_protocol_no_authority(uri, expected, cwd, current_drive): + expected = expected.format(cwd=cwd, current_drive=current_drive) + + stripped = LocalFileSystem._strip_protocol(uri) + assert expected == stripped + + +@pytest.mark.parametrize( + "uri, expected", + [ + ("file:/path", "/path"), + ("file:///path", "/path"), + ("file:////path", "//path"), + ("local:/path", "/path"), + ("s3://bucket/key", "{cwd}/s3://bucket/key"), + ("/path", "/path"), + ("file:///", "/"), + ] + if not WIN + else [ + ("file:c:/path", "c:/path"), + ("file:/c:/path", "c:/path"), + ("file:/C:/path", "C:/path"), + ("file://c:/path", "c:/path"), + ("file:///c:/path", "c:/path"), + ("local:/path", "{current_drive}/path"), + ("s3://bucket/key", "{cwd}/s3://bucket/key"), + ("c:/path", "c:/path"), + ("c:\\path", "c:/path"), + ("file:///", "{current_drive}/"), + pytest.param( + "file://localhost/c:/path", + "c:/path", + marks=pytest.mark.xfail( + reason="rfc8089 section3 'localhost uri' not supported" + ), + ), + ], +) +def test_strip_protocol_absolute_paths(uri, expected, current_drive, cwd): + expected = expected.format(current_drive=current_drive, cwd=cwd) + + stripped = LocalFileSystem._strip_protocol(uri) + assert expected == stripped + + +@pytest.mark.parametrize( + "uri, expected", + [ + ("file:c|/path", "c:/path"), + ("file:/D|/path", "D:/path"), + ("file:///C|/path", "C:/path"), + ], +) +@pytest.mark.skipif(not WIN, reason="Windows only") +@pytest.mark.xfail(WIN, reason="legacy dos uris not supported") +def test_strip_protocol_legacy_dos_uris(uri, expected): + stripped = LocalFileSystem._strip_protocol(uri) + assert expected == stripped + + +@pytest.mark.parametrize( + "uri, stripped", + [ + ("file://remote/share/pth", "{cwd}/remote/share/pth"), + ("file:////remote/share/pth", "//remote/share/pth"), + ("file://///remote/share/pth", "///remote/share/pth"), + ("//remote/share/pth", "//remote/share/pth"), + winonly("\\\\remote\\share\\pth", "//remote/share/pth"), + ], +) +def test_strip_protocol_windows_remote_shares(uri, stripped, cwd): + stripped = stripped.format(cwd=cwd) + + assert LocalFileSystem._strip_protocol(uri) == stripped def test_mkdir_twice_faile(tmpdir): diff --git a/fsspec/tests/test_utils.py b/fsspec/tests/test_utils.py index 0efd5d91d..10fa89a2d 100644 --- a/fsspec/tests/test_utils.py +++ b/fsspec/tests/test_utils.py @@ -1,6 +1,6 @@ import io import sys -from pathlib import Path +from pathlib import Path, PurePath from unittest.mock import Mock import pytest @@ -440,3 +440,39 @@ def test_size(): f = io.BytesIO(b"hello") assert fsspec.utils.file_size(f) == 5 assert f.tell() == 0 + + +class _HasFspath: + def __fspath__(self): + return "foo" + + +class _HasPathAttr: + def __init__(self): + self.path = "foo" + + +@pytest.mark.parametrize( + "path,expected", + [ + # coerce to string + ("foo", "foo"), + (Path("foo"), "foo"), + (PurePath("foo"), "foo"), + (_HasFspath(), "foo"), + (_HasPathAttr(), "foo"), + # passthrough + (b"bytes", b"bytes"), + (None, None), + (1, 1), + (True, True), + (o := object(), o), + ([], []), + ((), ()), + (set(), set()), + ], +) +def test_stringify_path(path, expected): + path = fsspec.utils.stringify_path(path) + + assert path == expected diff --git a/fsspec/utils.py b/fsspec/utils.py index ba3f80be3..6e130aa75 100644 --- a/fsspec/utils.py +++ b/fsspec/utils.py @@ -350,8 +350,6 @@ def stringify_path(filepath: str | os.PathLike[str] | pathlib.Path) -> str: return filepath elif hasattr(filepath, "__fspath__"): return filepath.__fspath__() - elif isinstance(filepath, pathlib.Path): - return str(filepath) elif hasattr(filepath, "path"): return filepath.path else: