Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LocalFileSystem fix _strip_protocol for root directory #1477

Merged
merged 34 commits into from
Mar 18, 2024
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
93d92c8
Fix _strip_protocol for windows root directory
fleming79 Dec 22, 2023
f6d44d4
Update tests for windows root and remove_trailing_slash
fleming79 Dec 23, 2023
5b77125
FSMap._key_to_str now always strips trailing separator
fleming79 Dec 23, 2023
ffb2eb8
make_path_posix now uses sep to identify os type
fleming79 Dec 23, 2023
00b742f
LocalFileSystem - switch from using os.name to sep
fleming79 Dec 23, 2023
9d01d9b
local.py - make_path_posix removed temporary nested function _make_p…
fleming79 Dec 23, 2023
e15955d
Fixed LocalFileSystem._parent & added test
fleming79 Dec 24, 2023
c0c7399
make_path_posix - made slightly faster in recursive calls by calling …
fleming79 Dec 24, 2023
7abb0c4
Divide make_path_posix and _parent method into native posix and nt ve…
fleming79 Dec 26, 2023
6e2df0f
FSMap - use new `remove_trailing_slash` to get root
fleming79 Dec 26, 2023
16c2c82
make_path_posix
fleming79 Dec 26, 2023
0b4d069
Merge branch 'master' of https://github.com/fsspec/filesystem_spec
fleming79 Jan 3, 2024
9fa75a9
make_path_posix - use stringify_path
fleming79 Jan 3, 2024
4b25084
Merge branch 'fsspec:master' into master
fleming79 Jan 4, 2024
97ee256
LocalFileSystem._strip_protocol - will now lstrip for 'local' on NT.
fleming79 Jan 4, 2024
a0ee144
_strip_protocol fix for 3.8 string missing removeprefix
Jan 8, 2024
a68e536
Merge branch 'fsspec:master' into master
fleming79 Jan 8, 2024
41147e6
Fix version check
Jan 9, 2024
2e902a3
Merge branch 'fsspec:master' into master
fleming79 Jan 15, 2024
aaefdc5
Drop version specific version of _strip_protocol
Jan 16, 2024
778cd95
remove sep as argument for make_path_posix , _strip_protocol, _parent
Jan 24, 2024
7570de8
fix test_make_path_posix
fleming79 Jan 24, 2024
502413f
Merge branch 'fsspec:master' into master
fleming79 Feb 6, 2024
64433f5
Merge branch 'fsspec:master' into master
fleming79 Feb 9, 2024
fe93b1d
Merge branch 'fsspec:master' into master
fleming79 Feb 23, 2024
ac990da
Merge branch 'fsspec:master' into master
fleming79 Mar 1, 2024
e0de0a6
Fix ruff lint PIE810
Mar 1, 2024
05f073a
Merge branch 'fsspec:master' into master
fleming79 Mar 5, 2024
7799d4e
Merge branch 'fsspec:master' into master
fleming79 Mar 7, 2024
c2352bf
Re-enable jupyter test_simple for windows
fleming79 Mar 8, 2024
006718d
Merge branch 'fsspec:master' into master
fleming79 Mar 8, 2024
ec56ea4
Merge branch 'master' of https://github.com/fleming79/filesystem_spec
fleming79 Mar 8, 2024
03b3106
Revert setting of root in 'FSMap' and added line to test_strip_protoc…
fleming79 Mar 9, 2024
082b544
Merge branch 'fsspec:master' into master
fleming79 Mar 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 62 additions & 49 deletions fsspec/implementations/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import logging
import os
import os.path as osp
import re

import shutil
import stat
import tempfile
Expand Down Expand Up @@ -116,8 +116,8 @@ def lexists(self, path, **kwargs):
return osp.lexists(path)

def cp_file(self, path1, path2, **kwargs):
path1 = self._strip_protocol(path1).rstrip("/")
path2 = self._strip_protocol(path2).rstrip("/")
path1 = self._strip_protocol(path1, remove_trailing_slash=True)
path2 = self._strip_protocol(path2, remove_trailing_slash=True)
if self.auto_mkdir:
self.makedirs(self._parent(path2), exist_ok=True)
if self.isfile(path1):
Expand All @@ -138,8 +138,8 @@ def put_file(self, path1, path2, callback=None, **kwargs):
return self.cp_file(path1, path2, **kwargs)

def mv_file(self, path1, path2, **kwargs):
path1 = self._strip_protocol(path1).rstrip("/")
path2 = self._strip_protocol(path2).rstrip("/")
path1 = self._strip_protocol(path1, remove_trailing_slash=True)
path2 = self._strip_protocol(path2, remove_trailing_slash=True)
shutil.move(path1, path2)

def link(self, src, dst, **kwargs):
Expand All @@ -163,7 +163,7 @@ def rm(self, path, recursive=False, maxdepth=None):
path = [path]

for p in path:
p = self._strip_protocol(p).rstrip("/")
p = self._strip_protocol(p, remove_trailing_slash=True)
if self.isdir(p):
if not recursive:
raise ValueError("Cannot delete directory, set recursive=True")
Expand All @@ -179,6 +179,8 @@ def unstrip_protocol(self, name):

def _open(self, path, mode="rb", block_size=None, **kwargs):
path = self._strip_protocol(path)
if path.endswith("/"):
fleming79 marked this conversation as resolved.
Show resolved Hide resolved
raise IsADirectoryError(path)
if self.auto_mkdir and "w" in mode:
self.makedirs(self._parent(path), exist_ok=True)
return LocalFileOpener(path, mode, fs=self, **kwargs)
Expand All @@ -205,25 +207,36 @@ def modified(self, path):
return datetime.datetime.fromtimestamp(info["mtime"], tz=datetime.timezone.utc)

@classmethod
def _parent(cls, path):
path = cls._strip_protocol(path).rstrip("/")
if "/" in path:
return path.rsplit("/", 1)[0]
def _parent(cls, path, sep=os.sep):
path = cls._strip_protocol(path, sep=sep, remove_trailing_slash=True)
if sep == "/":
# posix native
return path.rsplit("/", 1)[0] or "/"
else:
return cls.root_marker
# NT
path_ = path.rsplit("/", 1)[0]
if len(path_) <= 3:
if path_[1:2] == ":":
# nt root (something like c:/)
return path_[0] + ":/"
raise NotImplementedError(path)
fleming79 marked this conversation as resolved.
Show resolved Hide resolved
# More cases may be required here
return path_

@classmethod
def _strip_protocol(cls, path):
def _strip_protocol(cls, path, sep=os.sep, remove_trailing_slash=False):
path = stringify_path(path)
if path.startswith("file://"):
path = path[7:]
if sep == "\\":
path = path.lstrip("/")
martindurant marked this conversation as resolved.
Show resolved Hide resolved
elif path.startswith("file:"):
path = path[5:]
elif path.startswith("local://"):
path = path[8:]
elif path.startswith("local:"):
path = path[6:]
return make_path_posix(path).rstrip("/") or cls.root_marker
return make_path_posix(path, sep, remove_trailing_slash)

def _isfilestore(self):
# Inheriting from DaskFileSystem makes this False (S3, etc. were)
Expand All @@ -236,47 +249,47 @@ def chmod(self, path, mode):
return os.chmod(path, mode)


def make_path_posix(path, sep=os.sep):
def make_path_posix(path, sep=os.sep, remove_trailing_slash=False):
"""Make path generic"""
if isinstance(path, (list, set, tuple)):
return type(path)(make_path_posix(p) for p in path)
if "~" in path:
path = osp.expanduser(path)
if not isinstance(path, str):
if isinstance(path, (list, set, tuple)):
return type(path)(
make_path_posix(p, sep, remove_trailing_slash) for p in path
)
else:
path = str(stringify_path(path))
if sep == "/":
# most common fast case for posix
# Native posix
if path.startswith("/"):
return path
if path.startswith("./"):
# most common fast case for posix
return path.rstrip("/") or "/" if remove_trailing_slash else path
elif path.startswith("~"):
return make_path_posix(osp.expanduser(path), sep, remove_trailing_slash)
elif path.startswith("./"):
path = path[2:]
path = f"{os.getcwd()}/{path}"
return path.rstrip("/") or "/" if remove_trailing_slash else path
return f"{os.getcwd()}/{path}"
if (
(sep not in path and "/" not in path)
or (sep == "/" and not path.startswith("/"))
or (sep == "\\" and ":" not in path and not path.startswith("\\\\"))
):
# relative path like "path" or "rel\\path" (win) or rel/path"
if os.sep == "\\":
# abspath made some more '\\' separators
return make_path_posix(osp.abspath(path))
else:
return f"{os.getcwd()}/{path}"
if path.startswith("file://"):
path = path[7:]
if re.match("/[A-Za-z]:", path):
# for windows file URI like "file:///C:/folder/file"
# or "file:///C:\\dir\\file"
path = path[1:].replace("\\", "/").replace("//", "/")
if path.startswith("\\\\"):
# special case for windows UNC/DFS-style paths, do nothing,
# just flip the slashes around (case below does not work!)
return path.replace("\\", "/")
if re.match("[A-Za-z]:", path):
# windows full path like "C:\\local\\path"
return path.lstrip("\\").replace("\\", "/").replace("//", "/")
if path.startswith("\\"):
# windows network path like "\\server\\path"
return "/" + path.lstrip("\\").replace("\\", "/").replace("//", "/")
return path
else:
# NT handling
if len(path) > 1:
if path[1] == ":":
# windows full path like "C:\\local\\path"
if len(path) <= 3:
# nt root (something like c:/)
return path[0] + ":/"
path = path.replace("\\", "/").replace("//", "/")
return path.rstrip("/") if remove_trailing_slash else path
elif path.startswith("\\\\") or path.startswith("//"):
# windows UNC/DFS-style paths
path = "//" + path[2:].replace("\\", "/").replace("//", "/")
return path.rstrip("/") if remove_trailing_slash else path
elif path.startswith("file://"):
path = path.removeprefix("file://").lstrip("/")
return make_path_posix(path, sep, remove_trailing_slash)
elif path[0] == "~":
return make_path_posix(osp.expanduser(path), sep, remove_trailing_slash)
return make_path_posix(osp.abspath(path), sep, remove_trailing_slash)


def trailing_sep(path):
Expand Down
51 changes: 34 additions & 17 deletions fsspec/implementations/tests/test_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,33 +472,49 @@ def test_make_path_posix():
drive = cwd[0]
assert make_path_posix("/a/posix/path") == f"{drive}:/a/posix/path"
assert make_path_posix("/posix") == f"{drive}:/posix"
# Windows drive requires trailing slash
assert make_path_posix("C:\\") == "C:/"
assert make_path_posix("C:\\", remove_trailing_slash=True) == "C:/"
else:
assert make_path_posix("/a/posix/path") == "/a/posix/path"
assert make_path_posix("/posix") == "/posix"
assert make_path_posix("relpath") == posixpath.join(make_path_posix(cwd), "relpath")
assert make_path_posix("rel/path") == posixpath.join(
make_path_posix(cwd), "rel/path"
)
if WIN:
assert make_path_posix("C:\\path") == "C:/path"
assert make_path_posix("file://C:\\path\\file") == "C:/path/file"
martindurant marked this conversation as resolved.
Show resolved Hide resolved
if WIN:
assert (
make_path_posix(
"\\\\windows-server\\someshare\\path\\more\\path\\dir\\foo.parquet"
)
== "//windows-server/someshare/path/more/path/dir/foo.parquet"
# NT style
assert make_path_posix("C:\\path", sep="\\") == "C:/path"
assert make_path_posix("file://C:\\path\\file", sep="\\") == "C:/path/file"
assert (
make_path_posix(
"\\\\windows-server\\someshare\\path\\more\\path\\dir\\foo.parquet",
sep="\\",
)
assert (
make_path_posix(
r"\\SERVER\UserHomeFolder$\me\My Documents\project1\data\filen.csv"
)
== "//SERVER/UserHomeFolder$/me/My Documents/project1/data/filen.csv"
== "//windows-server/someshare/path/more/path/dir/foo.parquet"
)
assert (
make_path_posix(
"\\\\SERVER\\UserHomeFolder$\\me\\My Documents\\project1\\data\\filen.csv",
sep="\\",
)
== "//SERVER/UserHomeFolder$/me/My Documents/project1/data/filen.csv"
)
assert "/" in make_path_posix("rel\\path")

# Relative
pp = make_path_posix("./path")
assert "./" not in pp and ".\\" not in pp
cd = make_path_posix(cwd)
assert pp == cd + "/path"
# Userpath
userpath = make_path_posix("~/path")
assert userpath.endswith("/path")


def test_parent():
assert LocalFileSystem._parent("/file or folder", sep="/") == "/"
martindurant marked this conversation as resolved.
Show resolved Hide resolved
assert LocalFileSystem._parent("/", sep="/") == "/"
# NT
assert LocalFileSystem._parent("C:\\file or folder", sep="\\") == "C:/"
assert LocalFileSystem._parent("C:\\", sep="\\") == "C:/"


def test_linked_files(tmpdir):
Expand Down Expand Up @@ -640,7 +656,8 @@ def test_strip_protocol_expanduser():
assert path != stripped
assert "file://" not in stripped
assert stripped.startswith(os.path.expanduser("~").replace("\\", "/"))
assert not LocalFileSystem._strip_protocol("./").endswith("/")
path = LocalFileSystem._strip_protocol("./", remove_trailing_slash=True)
assert not path.endswith("/")


def test_strip_protocol_no_authority():
Expand Down
7 changes: 5 additions & 2 deletions fsspec/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@ class FSMap(MutableMapping):

def __init__(self, root, fs, check=False, create=False, missing_exceptions=None):
self.fs = fs
self.root = fs._strip_protocol(root).rstrip("/")
try:
self.root = fs._strip_protocol(root, remove_trailing_slash=True)
except TypeError:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is unfortunate; in what case does rstrip fail? In this case, I think we might be explicitly joining on the separator anyway.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On Windows if the root is something like c:/, rstrip will cause a root of c: which is treated like '.', the current working directory instead of the root.
On posix rstrip will make / change to .

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the only place that the special case for LocalFileSystem leaks, and I don't like it :|. I think it may be better to let through strange behaviour if the user thinks they want to may their whole C drive (and gets only cwd files instead).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've changed this back.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But without .rstrip("/")

self.root = fs._strip_protocol(root).rstrip("/")
self._root_key_to_str = fs._strip_protocol(posixpath.join(root, "x"))[:-1]
if missing_exceptions is None:
missing_exceptions = (
Expand Down Expand Up @@ -138,7 +141,7 @@ def _key_to_str(self, key):
if isinstance(key, list):
key = tuple(key)
key = str(key)
return f"{self._root_key_to_str}{key}"
return f"{self._root_key_to_str}{key}".rstrip("/")

def _str_to_key(self, s):
"""Strip path of to leave key name"""
Expand Down
20 changes: 12 additions & 8 deletions fsspec/tests/test_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,19 +200,23 @@ def test_fsmap_error_on_protocol_keys():
_ = m[f"memory://{root}/a"]


# on Windows opening a directory will raise PermissionError
# see: https://bugs.python.org/issue43095
@pytest.mark.skipif(
platform.system() == "Windows", reason="raises PermissionError on windows"
)
def test_fsmap_access_with_suffix(tmp_path):
tmp_path.joinpath("b").mkdir()
tmp_path.joinpath("b", "a").write_bytes(b"data")
m = fsspec.get_mapper(f"file://{tmp_path}")

if platform.system() == "Windows":
# on Windows opening a directory will raise PermissionError
# see: https://bugs.python.org/issue43095
missing_exceptions = (
FileNotFoundError,
IsADirectoryError,
NotADirectoryError,
PermissionError,
)
else:
None
m = fsspec.get_mapper(f"file://{tmp_path}", missing_exceptions=missing_exceptions)
with pytest.raises(KeyError):
_ = m["b/"]

assert m["b/a/"] == b"data"


Expand Down