Skip to content

Commit

Permalink
LocalFileSystem fix _strip_protocol for root directory (fsspec#1477)
Browse files Browse the repository at this point in the history
  • Loading branch information
fleming79 authored Mar 18, 2024
1 parent ecc5765 commit 1653552
Show file tree
Hide file tree
Showing 5 changed files with 109 additions and 77 deletions.
122 changes: 65 additions & 57 deletions fsspec/implementations/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import logging
import os
import os.path as osp
import re
import shutil
import stat
import tempfile
Expand All @@ -16,6 +15,12 @@
logger = logging.getLogger("fsspec.local")


def _remove_prefix(text: str, prefix: str):
if text.startswith(prefix):
return text[len(prefix) :]
return text


class LocalFileSystem(AbstractFileSystem):
"""Interface to files on local storage
Expand Down Expand Up @@ -116,8 +121,8 @@ def lexists(self, path, **kwargs):
return osp.lexists(path)

def cp_file(self, path1, path2, **kwargs):
path1 = self._strip_protocol(path1).rstrip("/")
path2 = self._strip_protocol(path2).rstrip("/")
path1 = self._strip_protocol(path1, remove_trailing_slash=True)
path2 = self._strip_protocol(path2, remove_trailing_slash=True)
if self.auto_mkdir:
self.makedirs(self._parent(path2), exist_ok=True)
if self.isfile(path1):
Expand All @@ -138,8 +143,8 @@ def put_file(self, path1, path2, callback=None, **kwargs):
return self.cp_file(path1, path2, **kwargs)

def mv_file(self, path1, path2, **kwargs):
path1 = self._strip_protocol(path1).rstrip("/")
path2 = self._strip_protocol(path2).rstrip("/")
path1 = self._strip_protocol(path1, remove_trailing_slash=True)
path2 = self._strip_protocol(path2, remove_trailing_slash=True)
shutil.move(path1, path2)

def link(self, src, dst, **kwargs):
Expand All @@ -163,7 +168,7 @@ def rm(self, path, recursive=False, maxdepth=None):
path = [path]

for p in path:
p = self._strip_protocol(p).rstrip("/")
p = self._strip_protocol(p, remove_trailing_slash=True)
if self.isdir(p):
if not recursive:
raise ValueError("Cannot delete directory, set recursive=True")
Expand Down Expand Up @@ -206,24 +211,32 @@ def modified(self, path):

@classmethod
def _parent(cls, path):
path = cls._strip_protocol(path).rstrip("/")
if "/" in path:
return path.rsplit("/", 1)[0]
path = cls._strip_protocol(path, remove_trailing_slash=True)
if os.sep == "/":
# posix native
return path.rsplit("/", 1)[0] or "/"
else:
return cls.root_marker
# NT
path_ = path.rsplit("/", 1)[0]
if len(path_) <= 3:
if path_[1:2] == ":":
# nt root (something like c:/)
return path_[0] + ":/"
# More cases may be required here
return path_

@classmethod
def _strip_protocol(cls, path):
def _strip_protocol(cls, path, remove_trailing_slash=False):
path = stringify_path(path)
if path.startswith("file://"):
path = path[7:]
elif path.startswith("file:"):
path = path[5:]
elif path.startswith("local://"):
path = path[8:]
if path.startswith("file:"):
path = _remove_prefix(_remove_prefix(path, "file://"), "file:")
if os.sep == "\\":
path = path.lstrip("/")
elif path.startswith("local:"):
path = path[6:]
return make_path_posix(path).rstrip("/") or cls.root_marker
path = _remove_prefix(_remove_prefix(path, "local://"), "local:")
if os.sep == "\\":
path = path.lstrip("/")
return make_path_posix(path, remove_trailing_slash)

def _isfilestore(self):
# Inheriting from DaskFileSystem makes this False (S3, etc. were)
Expand All @@ -236,47 +249,42 @@ def chmod(self, path, mode):
return os.chmod(path, mode)


def make_path_posix(path, sep=os.sep):
"""Make path generic"""
if isinstance(path, (list, set, tuple)):
return type(path)(make_path_posix(p) for p in path)
if "~" in path:
path = osp.expanduser(path)
if sep == "/":
# most common fast case for posix
def make_path_posix(path, remove_trailing_slash=False):
"""Make path generic for current OS"""
if not isinstance(path, str):
if isinstance(path, (list, set, tuple)):
return type(path)(make_path_posix(p, remove_trailing_slash) for p in path)
else:
path = str(stringify_path(path))
if os.sep == "/":
# Native posix
if path.startswith("/"):
return path
if path.startswith("./"):
# most common fast case for posix
return path.rstrip("/") or "/" if remove_trailing_slash else path
elif path.startswith("~"):
return make_path_posix(osp.expanduser(path), remove_trailing_slash)
elif path.startswith("./"):
path = path[2:]
path = f"{os.getcwd()}/{path}"
return path.rstrip("/") or "/" if remove_trailing_slash else path
return f"{os.getcwd()}/{path}"
if (
(sep not in path and "/" not in path)
or (sep == "/" and not path.startswith("/"))
or (sep == "\\" and ":" not in path and not path.startswith("\\\\"))
):
# relative path like "path" or "rel\\path" (win) or rel/path"
if os.sep == "\\":
# abspath made some more '\\' separators
return make_path_posix(osp.abspath(path))
else:
return f"{os.getcwd()}/{path}"
if path.startswith("file://"):
path = path[7:]
if re.match("/[A-Za-z]:", path):
# for windows file URI like "file:///C:/folder/file"
# or "file:///C:\\dir\\file"
path = path[1:].replace("\\", "/").replace("//", "/")
if path.startswith("\\\\"):
# special case for windows UNC/DFS-style paths, do nothing,
# just flip the slashes around (case below does not work!)
return path.replace("\\", "/")
if re.match("[A-Za-z]:", path):
# windows full path like "C:\\local\\path"
return path.lstrip("\\").replace("\\", "/").replace("//", "/")
if path.startswith("\\"):
# windows network path like "\\server\\path"
return "/" + path.lstrip("\\").replace("\\", "/").replace("//", "/")
return path
else:
# NT handling
if len(path) > 1:
if path[1] == ":":
# windows full path like "C:\\local\\path"
if len(path) <= 3:
# nt root (something like c:/)
return path[0] + ":/"
path = path.replace("\\", "/").replace("//", "/")
return path.rstrip("/") if remove_trailing_slash else path
elif path[0] == "~":
return make_path_posix(osp.expanduser(path), remove_trailing_slash)
elif path.startswith(("\\\\", "//")):
# windows UNC/DFS-style paths
path = "//" + path[2:].replace("\\", "/").replace("//", "/")
return path.rstrip("/") if remove_trailing_slash else path
return make_path_posix(osp.abspath(path), remove_trailing_slash)


def trailing_sep(path):
Expand Down
3 changes: 1 addition & 2 deletions fsspec/implementations/tests/test_jupyter.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,10 @@

@pytest.fixture()
def jupyter(tmpdir):

tmpdir = str(tmpdir)
os.environ["JUPYTER_TOKEN"] = "blah"
try:
cmd = f"jupyter notebook --notebook-dir={tmpdir} --no-browser --port=5566"
cmd = f'jupyter notebook --notebook-dir="{tmpdir}" --no-browser --port=5566'
P = subprocess.Popen(shlex.split(cmd))
except FileNotFoundError:
pytest.skip("notebook not installed correctly")
Expand Down
37 changes: 29 additions & 8 deletions fsspec/implementations/tests/test_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,33 +472,48 @@ def test_make_path_posix():
drive = cwd[0]
assert make_path_posix("/a/posix/path") == f"{drive}:/a/posix/path"
assert make_path_posix("/posix") == f"{drive}:/posix"
# Windows drive requires trailing slash
assert make_path_posix("C:\\") == "C:/"
assert make_path_posix("C:\\", remove_trailing_slash=True) == "C:/"
else:
assert make_path_posix("/a/posix/path") == "/a/posix/path"
assert make_path_posix("/posix") == "/posix"
assert make_path_posix("relpath") == posixpath.join(make_path_posix(cwd), "relpath")
assert make_path_posix("rel/path") == posixpath.join(
make_path_posix(cwd), "rel/path"
)
# NT style
if WIN:
assert make_path_posix("C:\\path") == "C:/path"
assert make_path_posix("file://C:\\path\\file") == "C:/path/file"
if WIN:
assert (
make_path_posix(
"\\\\windows-server\\someshare\\path\\more\\path\\dir\\foo.parquet"
"\\\\windows-server\\someshare\\path\\more\\path\\dir\\foo.parquet",
)
== "//windows-server/someshare/path/more/path/dir/foo.parquet"
)
assert (
make_path_posix(
r"\\SERVER\UserHomeFolder$\me\My Documents\project1\data\filen.csv"
"\\\\SERVER\\UserHomeFolder$\\me\\My Documents\\proj\\data\\fname.csv",
)
== "//SERVER/UserHomeFolder$/me/My Documents/project1/data/filen.csv"
== "//SERVER/UserHomeFolder$/me/My Documents/proj/data/fname.csv"
)
assert "/" in make_path_posix("rel\\path")

# Relative
pp = make_path_posix("./path")
assert "./" not in pp and ".\\" not in pp
cd = make_path_posix(cwd)
assert pp == cd + "/path"
# Userpath
userpath = make_path_posix("~/path")
assert userpath.endswith("/path")


def test_parent():
if WIN:
assert LocalFileSystem._parent("C:\\file or folder") == "C:/"
assert LocalFileSystem._parent("C:\\") == "C:/"
else:
assert LocalFileSystem._parent("/file or folder") == "/"
assert LocalFileSystem._parent("/") == "/"


def test_linked_files(tmpdir):
Expand Down Expand Up @@ -638,16 +653,22 @@ def test_strip_protocol_expanduser():
path = "file://~\\foo\\bar" if WIN else "file://~/foo/bar"
stripped = LocalFileSystem._strip_protocol(path)
assert path != stripped
assert "~" not in stripped
assert "file://" not in stripped
assert stripped.startswith(os.path.expanduser("~").replace("\\", "/"))
assert not LocalFileSystem._strip_protocol("./").endswith("/")
path = LocalFileSystem._strip_protocol("./", remove_trailing_slash=True)
assert not path.endswith("/")


def test_strip_protocol_no_authority():
path = "file:\\foo\\bar" if WIN else "file:/foo/bar"
stripped = LocalFileSystem._strip_protocol(path)
assert "file:" not in stripped
assert stripped.endswith("/foo/bar")
if WIN:
assert (
LocalFileSystem._strip_protocol("file://C:\\path\\file") == "C:/path/file"
)


def test_mkdir_twice_faile(tmpdir):
Expand Down
4 changes: 2 additions & 2 deletions fsspec/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class FSMap(MutableMapping):

def __init__(self, root, fs, check=False, create=False, missing_exceptions=None):
self.fs = fs
self.root = fs._strip_protocol(root).rstrip("/")
self.root = fs._strip_protocol(root)
self._root_key_to_str = fs._strip_protocol(posixpath.join(root, "x"))[:-1]
if missing_exceptions is None:
missing_exceptions = (
Expand Down Expand Up @@ -142,7 +142,7 @@ def _key_to_str(self, key):
if isinstance(key, list):
key = tuple(key)
key = str(key)
return f"{self._root_key_to_str}{key}"
return f"{self._root_key_to_str}{key}".rstrip("/")

def _str_to_key(self, s):
"""Strip path of to leave key name"""
Expand Down
20 changes: 12 additions & 8 deletions fsspec/tests/test_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,19 +200,23 @@ def test_fsmap_error_on_protocol_keys():
_ = m[f"memory://{root}/a"]


# on Windows opening a directory will raise PermissionError
# see: https://bugs.python.org/issue43095
@pytest.mark.skipif(
platform.system() == "Windows", reason="raises PermissionError on windows"
)
def test_fsmap_access_with_suffix(tmp_path):
tmp_path.joinpath("b").mkdir()
tmp_path.joinpath("b", "a").write_bytes(b"data")
m = fsspec.get_mapper(f"file://{tmp_path}")

if platform.system() == "Windows":
# on Windows opening a directory will raise PermissionError
# see: https://bugs.python.org/issue43095
missing_exceptions = (
FileNotFoundError,
IsADirectoryError,
NotADirectoryError,
PermissionError,
)
else:
missing_exceptions = None
m = fsspec.get_mapper(f"file://{tmp_path}", missing_exceptions=missing_exceptions)
with pytest.raises(KeyError):
_ = m["b/"]

assert m["b/a/"] == b"data"


Expand Down

0 comments on commit 1653552

Please sign in to comment.