From e936502ddebe858a2402e68db0e2d684ee0bed33 Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Tue, 22 Oct 2024 16:03:03 -0600 Subject: [PATCH 1/4] Check if cls is a caching file system. --- fsspec/core.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fsspec/core.py b/fsspec/core.py index 1954667fe..b8bf519b2 100644 --- a/fsspec/core.py +++ b/fsspec/core.py @@ -329,6 +329,9 @@ def open_files( def _un_chain(path, kwargs): + # Avoid a circular import + from fsspec.implementations.cached import CachingFileSystem + x = re.compile(".*[^a-z]+.*") # test for non protocol-like single word bits = ( [p if "://" in p or x.match(p) else p + "://" for p in path.split("::")] @@ -352,8 +355,8 @@ def _un_chain(path, kwargs): ) bit = cls._strip_protocol(bit) if ( - protocol in {"blockcache", "filecache", "simplecache"} - and "target_protocol" not in kw + "target_protocol" not in kw + and issubclass(cls, CachingFileSystem) ): bit = previous_bit out.append((bit, protocol, kw)) From 046c9892a05476cb37995b1ad2bcc21b5f094d07 Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Tue, 22 Oct 2024 16:06:40 -0600 Subject: [PATCH 2/4] Expand statement for better readability/performance. --- fsspec/core.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/fsspec/core.py b/fsspec/core.py index b8bf519b2..fd16fe4e2 100644 --- a/fsspec/core.py +++ b/fsspec/core.py @@ -332,12 +332,16 @@ def _un_chain(path, kwargs): # Avoid a circular import from fsspec.implementations.cached import CachingFileSystem - x = re.compile(".*[^a-z]+.*") # test for non protocol-like single word - bits = ( - [p if "://" in p or x.match(p) else p + "://" for p in path.split("::")] - if "::" in path - else [path] - ) + if "::" in path: + x = re.compile(".*[^a-z]+.*") # test for non protocol-like single word + bits = [] + for p in path.split("::"): + if "://" in p or x.match(p): + bits.append(p) + else: + bits.append(p + "://") + else: + bits = [path] # [[url, protocol, kwargs], ...] out = [] previous_bit = None From 77aa0f6c5a227691c79e532d5526cdbef469807b Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Tue, 22 Oct 2024 16:14:50 -0600 Subject: [PATCH 3/4] Test all builtin names for caches. --- fsspec/implementations/tests/test_cached.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/fsspec/implementations/tests/test_cached.py b/fsspec/implementations/tests/test_cached.py index 67edbc8df..3ce18f02f 100644 --- a/fsspec/implementations/tests/test_cached.py +++ b/fsspec/implementations/tests/test_cached.py @@ -267,7 +267,7 @@ def test_blockcache_workflow(ftp_writable, tmp_path, force_save_pickle): assert f.read(5) == b"test\n" -@pytest.mark.parametrize("impl", ["filecache", "blockcache"]) +@pytest.mark.parametrize("impl", ["filecache", "blockcache", "cached"]) def test_workflow(ftp_writable, impl): host, port, user, pw = ftp_writable fs = FTPFileSystem(host, port, user, pw) @@ -295,7 +295,7 @@ def test_workflow(ftp_writable, impl): ) # new value, because we overwrote the cached location -@pytest.mark.parametrize("impl", ["simplecache", "blockcache"]) +@pytest.mark.parametrize("impl", ["simplecache", "blockcache", "cached"]) def test_glob(ftp_writable, impl): host, port, user, pw = ftp_writable fs = FTPFileSystem(host, port, user, pw) @@ -622,7 +622,7 @@ def open_raise(*_, **__): assert "Cache save failed due to interpreter shutdown" in caplog.text -@pytest.mark.parametrize("impl", ["filecache", "simplecache", "blockcache"]) +@pytest.mark.parametrize("impl", ["filecache", "simplecache", "blockcache", "cached"]) def test_local_filecache_creates_dir_if_needed(impl): import tempfile @@ -875,7 +875,7 @@ def test_filecache_with_checks(): assert fs.cat(f1) == data * 2 # changed, since origin changed -@pytest.mark.parametrize("impl", ["filecache", "simplecache", "blockcache"]) +@pytest.mark.parametrize("impl", ["filecache", "simplecache", "blockcache", "cached"]) @pytest.mark.parametrize("fs", ["local", "multi"], indirect=["fs"]) def test_filecache_takes_fs_instance(impl, fs): origin = tempfile.mkdtemp() @@ -889,7 +889,7 @@ def test_filecache_takes_fs_instance(impl, fs): assert fs2.cat(f1) == data -@pytest.mark.parametrize("impl", ["filecache", "simplecache", "blockcache"]) +@pytest.mark.parametrize("impl", ["filecache", "simplecache", "blockcache", "cached"]) @pytest.mark.parametrize("fs", ["local", "multi"], indirect=["fs"]) def test_filecache_serialization(impl, fs): fs1 = fsspec.filesystem(impl, fs=fs) @@ -1031,7 +1031,7 @@ def test_multi_cache(protocol): assert f.read() == b"hello" -@pytest.mark.parametrize("protocol", ["simplecache", "filecache", "blockcache"]) +@pytest.mark.parametrize("protocol", ["simplecache", "filecache", "blockcache", "cached"]) def test_multi_cat(protocol, ftp_writable): host, port, user, pw = ftp_writable fs = FTPFileSystem(host, port, user, pw) @@ -1064,7 +1064,7 @@ def test_multi_cache_chain(protocol): assert files[0].read() == b"hello" -@pytest.mark.parametrize("protocol", ["blockcache", "simplecache", "filecache"]) +@pytest.mark.parametrize("protocol", ["blockcache", "cached", "simplecache", "filecache"]) def test_strip(protocol): fs = fsspec.filesystem(protocol, target_protocol="memory") url1 = "memory://afile" @@ -1235,9 +1235,9 @@ def test_cache_dir_auto_deleted(temp_cache, tmpdir): assert local.exists(cache_dir) -@pytest.mark.parametrize("protocol", ["filecache", "blockcache", "simplecache"]) +@pytest.mark.parametrize("protocol", ["filecache", "blockcache", "cached", "simplecache"]) def test_cache_size(tmpdir, protocol): - if win and protocol == "blockcache": + if win and protocol in {"blockcache", "cached"}: pytest.skip("Windows file locking affects blockcache size tests") source = os.path.join(tmpdir, "source") From 03224eeb7069118a00e78bf9e3ee249309b7ac61 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 23 Oct 2024 10:10:04 -0400 Subject: [PATCH 4/4] lint --- fsspec/core.py | 5 +---- fsspec/implementations/tests/test_cached.py | 12 +++++++++--- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/fsspec/core.py b/fsspec/core.py index fd16fe4e2..79580bee9 100644 --- a/fsspec/core.py +++ b/fsspec/core.py @@ -358,10 +358,7 @@ def _un_chain(path, kwargs): **kws, ) bit = cls._strip_protocol(bit) - if ( - "target_protocol" not in kw - and issubclass(cls, CachingFileSystem) - ): + if "target_protocol" not in kw and issubclass(cls, CachingFileSystem): bit = previous_bit out.append((bit, protocol, kw)) previous_bit = bit diff --git a/fsspec/implementations/tests/test_cached.py b/fsspec/implementations/tests/test_cached.py index 3ce18f02f..c9222d5b5 100644 --- a/fsspec/implementations/tests/test_cached.py +++ b/fsspec/implementations/tests/test_cached.py @@ -1031,7 +1031,9 @@ def test_multi_cache(protocol): assert f.read() == b"hello" -@pytest.mark.parametrize("protocol", ["simplecache", "filecache", "blockcache", "cached"]) +@pytest.mark.parametrize( + "protocol", ["simplecache", "filecache", "blockcache", "cached"] +) def test_multi_cat(protocol, ftp_writable): host, port, user, pw = ftp_writable fs = FTPFileSystem(host, port, user, pw) @@ -1064,7 +1066,9 @@ def test_multi_cache_chain(protocol): assert files[0].read() == b"hello" -@pytest.mark.parametrize("protocol", ["blockcache", "cached", "simplecache", "filecache"]) +@pytest.mark.parametrize( + "protocol", ["blockcache", "cached", "simplecache", "filecache"] +) def test_strip(protocol): fs = fsspec.filesystem(protocol, target_protocol="memory") url1 = "memory://afile" @@ -1235,7 +1239,9 @@ def test_cache_dir_auto_deleted(temp_cache, tmpdir): assert local.exists(cache_dir) -@pytest.mark.parametrize("protocol", ["filecache", "blockcache", "cached", "simplecache"]) +@pytest.mark.parametrize( + "protocol", ["filecache", "blockcache", "cached", "simplecache"] +) def test_cache_size(tmpdir, protocol): if win and protocol in {"blockcache", "cached"}: pytest.skip("Windows file locking affects blockcache size tests")