From 14a4ae43c2e80453e6319b2fff539ce4017bcfbc Mon Sep 17 00:00:00 2001 From: Tim Paine <3105306+timkpaine@users.noreply.github.com> Date: Tue, 7 Oct 2025 10:57:47 -0400 Subject: [PATCH 1/5] Add marker for chained filesystems for path passthrough --- docs/source/features.rst | 4 +++- fsspec/core.py | 4 ++-- fsspec/implementations/cached.py | 5 +++-- fsspec/implementations/chained.py | 9 ++++++++ fsspec/tests/test_chained.py | 37 +++++++++++++++++++++++++++++++ 5 files changed, 54 insertions(+), 5 deletions(-) create mode 100644 fsspec/implementations/chained.py create mode 100644 fsspec/tests/test_chained.py diff --git a/docs/source/features.rst b/docs/source/features.rst index 949e47107..badc1fb24 100644 --- a/docs/source/features.rst +++ b/docs/source/features.rst @@ -241,7 +241,9 @@ reads the same zip-file, but extracts the CSV files and stores them locally in t **For developers**: this "chaining" methods works by formatting the arguments passed to ``open_*`` into ``target_protocol`` (a simple string) and ``target_options`` (a dict) and also optionally ``fo`` (target path, if a specific file is required). In order for an implementation to chain -successfully like this, it must look for exactly those named arguments. +successfully like this, it must look for exactly those named arguments. Implementations that +require access to the target path of their nested targets should inherit from ``ChainedFileSystem``, +which will trigger pass-through of the nested path automatically. Caching Files Locally --------------------- diff --git a/fsspec/core.py b/fsspec/core.py index d8e75572b..a7db808ae 100644 --- a/fsspec/core.py +++ b/fsspec/core.py @@ -330,7 +330,7 @@ def open_files( def _un_chain(path, kwargs): # Avoid a circular import - from fsspec.implementations.cached import CachingFileSystem + from fsspec.implementations.chained import ChainedFileSystem if "::" in path: x = re.compile(".*[^a-z]+.*") # test for non protocol-like single word @@ -358,7 +358,7 @@ def _un_chain(path, kwargs): **kws, ) bit = cls._strip_protocol(bit) - if "target_protocol" not in kw and issubclass(cls, CachingFileSystem): + if "target_protocol" not in kw and issubclass(cls, ChainedFileSystem): bit = previous_bit out.append((bit, protocol, kw)) previous_bit = bit diff --git a/fsspec/implementations/cached.py b/fsspec/implementations/cached.py index 74e6a59e4..8e4d0a0ba 100644 --- a/fsspec/implementations/cached.py +++ b/fsspec/implementations/cached.py @@ -9,13 +9,14 @@ from shutil import rmtree from typing import TYPE_CHECKING, Any, Callable, ClassVar -from fsspec import AbstractFileSystem, filesystem +from fsspec import filesystem from fsspec.callbacks import DEFAULT_CALLBACK from fsspec.compression import compr from fsspec.core import BaseCache, MMapCache from fsspec.exceptions import BlocksizeMismatchError from fsspec.implementations.cache_mapper import create_cache_mapper from fsspec.implementations.cache_metadata import CacheMetadata +from fsspec.implementations.chained import ChainedFileSystem from fsspec.implementations.local import LocalFileSystem from fsspec.spec import AbstractBufferedFile from fsspec.transaction import Transaction @@ -39,7 +40,7 @@ def complete(self, commit=True): self.fs = None # break cycle -class CachingFileSystem(AbstractFileSystem): +class CachingFileSystem(ChainedFileSystem): """Locally caching filesystem, layer over any other FS This class implements chunk-wise local storage of remote files, for quick diff --git a/fsspec/implementations/chained.py b/fsspec/implementations/chained.py new file mode 100644 index 000000000..aedf3b4b4 --- /dev/null +++ b/fsspec/implementations/chained.py @@ -0,0 +1,9 @@ +from typing import ClassVar + +from fsspec import AbstractFileSystem + +__all__ = ("ChainedFileSystem",) + + +class ChainedFileSystem(AbstractFileSystem): + chained: ClassVar[str] = "chained" diff --git a/fsspec/tests/test_chained.py b/fsspec/tests/test_chained.py new file mode 100644 index 000000000..345769709 --- /dev/null +++ b/fsspec/tests/test_chained.py @@ -0,0 +1,37 @@ +import pytest + +from fsspec import AbstractFileSystem, filesystem +from fsspec import url_to_fs, register_implementation +from fsspec.implementations.cached import ChainedFileSystem + + +class MyChainedFS(ChainedFileSystem): + protocol = "mychain" + + def __init__(self, target_protocol="", target_options=None, **kwargs): + super().__init__(**kwargs) + self.fs = filesystem(target_protocol, **target_options) + +class MyNonChainedFS(AbstractFileSystem): + protocol = "mynonchain" + +@pytest.fixture(scope="module") +def register_fs(): + register_implementation(MyChainedFS.protocol, MyChainedFS) + register_implementation(MyNonChainedFS.protocol, MyNonChainedFS) + yield + +def test_token_passthrough_to_chained(register_fs): + # First, run a sanity check + fs, rest = url_to_fs("mynonchain://path/to/file") + assert isinstance(fs, MyNonChainedFS) + assert fs.protocol == "mynonchain" + assert rest == "path/to/file" + + # Now test that the chained FS works + fs, rest = url_to_fs("mychain::mynonchain://path/to/file") + assert isinstance(fs, MyChainedFS) + assert fs.protocol == "mychain" + assert rest == "path/to/file" + assert isinstance(fs.fs, MyNonChainedFS) + assert fs.fs.protocol == "mynonchain" From 710f9cd3ce86be438e2de8fe0731b04243f16666 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Tue, 7 Oct 2025 14:15:32 -0400 Subject: [PATCH 2/5] lint --- fsspec/tests/test_chained.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fsspec/tests/test_chained.py b/fsspec/tests/test_chained.py index 345769709..0c7579481 100644 --- a/fsspec/tests/test_chained.py +++ b/fsspec/tests/test_chained.py @@ -1,7 +1,6 @@ import pytest -from fsspec import AbstractFileSystem, filesystem -from fsspec import url_to_fs, register_implementation +from fsspec import AbstractFileSystem, filesystem, register_implementation, url_to_fs from fsspec.implementations.cached import ChainedFileSystem @@ -12,15 +11,18 @@ def __init__(self, target_protocol="", target_options=None, **kwargs): super().__init__(**kwargs) self.fs = filesystem(target_protocol, **target_options) + class MyNonChainedFS(AbstractFileSystem): protocol = "mynonchain" + @pytest.fixture(scope="module") def register_fs(): register_implementation(MyChainedFS.protocol, MyChainedFS) register_implementation(MyNonChainedFS.protocol, MyNonChainedFS) yield + def test_token_passthrough_to_chained(register_fs): # First, run a sanity check fs, rest = url_to_fs("mynonchain://path/to/file") From 159cd69ced3fdd9258539d1c004330eafd1d8064 Mon Sep 17 00:00:00 2001 From: Tim Paine <3105306+timkpaine@users.noreply.github.com> Date: Tue, 7 Oct 2025 14:42:48 -0400 Subject: [PATCH 3/5] Update fsspec/implementations/chained.py Co-authored-by: Martin Durant --- fsspec/implementations/chained.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fsspec/implementations/chained.py b/fsspec/implementations/chained.py index aedf3b4b4..169353287 100644 --- a/fsspec/implementations/chained.py +++ b/fsspec/implementations/chained.py @@ -6,4 +6,4 @@ class ChainedFileSystem(AbstractFileSystem): - chained: ClassVar[str] = "chained" + protocol: ClassVar[str] = "chained" From 8ec8133fd409b46ea7dc2a334a65948d65683075 Mon Sep 17 00:00:00 2001 From: Tim Paine <3105306+timkpaine@users.noreply.github.com> Date: Thu, 9 Oct 2025 15:08:23 -0400 Subject: [PATCH 4/5] Add docstring --- fsspec/implementations/chained.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fsspec/implementations/chained.py b/fsspec/implementations/chained.py index 169353287..e46d0c44a 100644 --- a/fsspec/implementations/chained.py +++ b/fsspec/implementations/chained.py @@ -6,4 +6,17 @@ class ChainedFileSystem(AbstractFileSystem): + """Chained filesystem base class. + + A chained filesystem is designed to be layered over another FS. + This is useful to implement things like caching. + + This base class does very little on its own, but is used as a marker + that the class is designed for chaining. + + Right now this is only used in `url_to_fs` to provide the path argument + (`fo`) to the chained filesystem from the underlying filesystem. + + Additional functionality may be added in the future. + """ protocol: ClassVar[str] = "chained" From 1c1528f1cd6fe32c06580df9bb4fd83309bb985a Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 9 Oct 2025 15:11:26 -0400 Subject: [PATCH 5/5] lint --- fsspec/implementations/chained.py | 1 + 1 file changed, 1 insertion(+) diff --git a/fsspec/implementations/chained.py b/fsspec/implementations/chained.py index e46d0c44a..bfce64334 100644 --- a/fsspec/implementations/chained.py +++ b/fsspec/implementations/chained.py @@ -19,4 +19,5 @@ class ChainedFileSystem(AbstractFileSystem): Additional functionality may be added in the future. """ + protocol: ClassVar[str] = "chained"