Skip to content

Commit

Permalink
Remove trailing "/" from path names in archives (#1445)
Browse files Browse the repository at this point in the history
* Remove trailing "/" from path names in ZIP

* Fix path names in all archives

* update tests
  • Loading branch information
martindurant committed Dec 5, 2023
1 parent 5cf9cd9 commit b6536be
Show file tree
Hide file tree
Showing 8 changed files with 36 additions and 18 deletions.
7 changes: 7 additions & 0 deletions docs/source/changelog.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
Changelog
=========

2023.12.1
---------

Fixes

- Remove trailing "/" from directory names in zipFS (#1445)

2023.12.0
---------

Expand Down
4 changes: 2 additions & 2 deletions fsspec/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def info(self, path, **kwargs):
self._get_dirs()
path = self._strip_protocol(path)
if path in {"", "/"} and self.dir_cache:
return {"name": "/", "type": "directory", "size": 0}
return {"name": "", "type": "directory", "size": 0}
if path in self.dir_cache:
return self.dir_cache[path]
elif path + "/" in self.dir_cache:
Expand All @@ -64,7 +64,7 @@ def ls(self, path, detail=True, **kwargs):
# root directory entry
ppath = p.rstrip("/").split("/", 1)[0]
if ppath not in paths:
out = {"name": ppath + "/", "size": 0, "type": "directory"}
out = {"name": ppath, "size": 0, "type": "directory"}
paths[ppath] = out
out = sorted(paths.values(), key=lambda _: _["name"])
if detail:
Expand Down
5 changes: 2 additions & 3 deletions fsspec/implementations/libarchive.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,7 @@ def _get_dirs(self):
continue
self.dir_cache.update(
{
dirname
+ "/": {"name": dirname + "/", "size": 0, "type": "directory"}
dirname: {"name": dirname, "size": 0, "type": "directory"}
for dirname in self._all_dirnames(set(entry.name))
}
)
Expand All @@ -178,7 +177,7 @@ def _get_dirs(self):
# not in all formats), so get the directories names from the files names
self.dir_cache.update(
{
dirname + "/": {"name": dirname + "/", "size": 0, "type": "directory"}
dirname: {"name": dirname, "size": 0, "type": "directory"}
for dirname in self._all_dirnames(list_names)
}
)
Expand Down
3 changes: 2 additions & 1 deletion fsspec/implementations/tar.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,12 @@ def _get_dirs(self):

# This enables ls to get directories as children as well as files
self.dir_cache = {
dirname + "/": {"name": dirname + "/", "size": 0, "type": "directory"}
dirname: {"name": dirname, "size": 0, "type": "directory"}
for dirname in self._all_dirnames(self.tar.getnames())
}
for member in self.tar.getmembers():
info = member.get_info()
info["name"] = info["name"].rstrip("/")
info["type"] = typemap.get(info["type"], "file")
self.dir_cache[info["name"]] = info

Expand Down
10 changes: 5 additions & 5 deletions fsspec/implementations/tests/test_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,10 +276,10 @@ def test_ls(self, scenario: ArchiveTestScenario):
with scenario.provider(archive_data) as archive:
fs = fsspec.filesystem(scenario.protocol, fo=archive)

assert fs.ls("", detail=False) == ["a", "b", "deeply/"]
assert fs.ls("", detail=False) == ["a", "b", "deeply"]
assert fs.ls("/") == fs.ls("")

assert fs.ls("deeply", detail=False) == ["deeply/nested/"]
assert fs.ls("deeply", detail=False) == ["deeply/nested"]
assert fs.ls("deeply/") == fs.ls("deeply")

assert fs.ls("deeply/nested", detail=False) == ["deeply/nested/path"]
Expand All @@ -293,8 +293,8 @@ def test_find(self, scenario: ArchiveTestScenario):
assert fs.find("", withdirs=True) == [
"a",
"b",
"deeply/",
"deeply/nested/",
"deeply",
"deeply/nested",
"deeply/nested/path",
]

Expand Down Expand Up @@ -347,7 +347,7 @@ def project(mapping, keys):
# Iterate over all directories.
for d in fs._all_dirnames(archive_data.keys()):
lhs = project(fs.info(d), ["name", "size", "type"])
expected = {"name": f"{d}/", "size": 0, "type": "directory"}
expected = {"name": f"{d}", "size": 0, "type": "directory"}
assert lhs == expected

# Iterate over all files.
Expand Down
8 changes: 4 additions & 4 deletions fsspec/implementations/tests/test_tar.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def test_info():
lhs = fs.info(d)
del lhs["chksum"]
expected = {
"name": f"{d}/",
"name": f"{d}",
"size": 0,
"type": "directory",
"devmajor": 0,
Expand Down Expand Up @@ -234,10 +234,10 @@ def test_ls_with_folders(compression: str, tmp_path: Path):
fs = TarFileSystem(fd)
assert fs.find("/", withdirs=True) == [
"a.pdf",
"b/",
"b",
"b/c.pdf",
"d/",
"d/e/",
"d",
"d/e",
"d/e/f.pdf",
"d/g.pdf",
]
15 changes: 13 additions & 2 deletions fsspec/implementations/tests/test_zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ def test_not_cached():
def test_root_info():
with tempzip(archive_data) as z:
fs = fsspec.filesystem("zip", fo=z)
assert fs.info("/") == {"name": "/", "type": "directory", "size": 0}
assert fs.info("") == {"name": "/", "type": "directory", "size": 0}
assert fs.info("/") == {"name": "", "type": "directory", "size": 0}
assert fs.info("") == {"name": "", "type": "directory", "size": 0}


def test_write_seek(m):
Expand Down Expand Up @@ -83,3 +83,14 @@ def test_mapper(m):
# fails because this is write mode and we cannot also read
mapper["a"]
assert "a" in mapper # but be can list


def test_zip_glob_star(m):
with fsspec.open(
"zip://adir/afile::memory://out.zip", mode="wb", zip={"mode": "w"}
) as f:
f.write(b"data")

fs, _ = fsspec.core.url_to_fs("zip::memory://out.zip")
outfiles = fs.glob("*")
assert len(outfiles) == 1
2 changes: 1 addition & 1 deletion fsspec/implementations/zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def _get_dirs(self):
# not read from the file.
files = self.zip.infolist()
self.dir_cache = {
dirname + "/": {"name": dirname + "/", "size": 0, "type": "directory"}
dirname: {"name": dirname, "size": 0, "type": "directory"}
for dirname in self._all_dirnames(self.zip.namelist())
}
for z in files:
Expand Down

0 comments on commit b6536be

Please sign in to comment.