From 064f22156e540be629636fbd9222c12323fb5fee Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Wed, 12 Nov 2025 13:18:44 +0200 Subject: [PATCH 1/7] Add support for Python 3.14 and drop EOL 3.9 --- .github/workflows/main.yaml | 10 +++++----- .github/workflows/pypipublish.yaml | 4 ++-- README.md | 2 +- docs/environment.yml | 2 +- fsspec/implementations/tests/test_dbfs.py | 4 +--- pyproject.toml | 4 ++-- 6 files changed, 12 insertions(+), 14 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 0f7967487..a12dc5a24 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -14,18 +14,18 @@ jobs: fail-fast: false matrix: PY: - - "3.9" - "3.10" - "3.11" - "3.12" - "3.13" + - "3.14" env: CIRUN: true steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: fetch-depth: 0 @@ -50,7 +50,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: fetch-depth: 0 @@ -81,7 +81,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: fetch-depth: 0 @@ -124,7 +124,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Setup conda uses: conda-incubator/setup-miniconda@v3 diff --git a/.github/workflows/pypipublish.yaml b/.github/workflows/pypipublish.yaml index 068c52393..821a4bcc9 100644 --- a/.github/workflows/pypipublish.yaml +++ b/.github/workflows/pypipublish.yaml @@ -8,9 +8,9 @@ jobs: deploy: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v6 with: python-version: "3.x" - name: Install dependencies diff --git a/README.md b/README.md index 9b85e00cd..0969ac68b 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ CI runtime. For local use, pick a version suitable for you. ```bash # For a new environment (mamba / conda). -mamba create -n fsspec -c conda-forge python=3.9 -y +mamba create -n fsspec -c conda-forge python=3.10 -y conda activate fsspec # Standard dev install with docs and tests. diff --git a/docs/environment.yml b/docs/environment.yml index fe05bd7e9..8c3631221 100644 --- a/docs/environment.yml +++ b/docs/environment.yml @@ -2,4 +2,4 @@ name: fsspec channels: - defaults dependencies: - - python=3.9 + - python=3.10 diff --git a/fsspec/implementations/tests/test_dbfs.py b/fsspec/implementations/tests/test_dbfs.py index 9884d64f9..f1f9d35b2 100644 --- a/fsspec/implementations/tests/test_dbfs.py +++ b/fsspec/implementations/tests/test_dbfs.py @@ -23,7 +23,6 @@ """ import os -import sys from urllib.parse import urlparse import numpy @@ -31,8 +30,7 @@ import fsspec -if sys.version_info >= (3, 10): - pytest.skip("These tests need to be re-recorded.", allow_module_level=True) +pytest.skip("These tests need to be re-recorded.", allow_module_level=True) DUMMY_INSTANCE = "my_instance.com" INSTANCE = os.getenv("DBFS_INSTANCE", DUMMY_INSTANCE) diff --git a/pyproject.toml b/pyproject.toml index e60b924d3..60c423c24 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,18 +9,18 @@ description = "File-system specification" readme = "README.md" license = "BSD-3-Clause" license-files = ["LICENSE"] -requires-python = ">=3.9" +requires-python = ">=3.10" maintainers = [{ name = "Martin Durant", email = "mdurant@anaconda.com" }] keywords = ["file"] classifiers = [ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "Operating System :: OS Independent", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", ] [project.optional-dependencies] From 4dee5695fc13b9586d4cedf71d82cf2b3202e946 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Wed, 12 Nov 2025 13:24:15 +0200 Subject: [PATCH 2/7] ruff check --select UP --fix --- fsspec/caching.py | 11 ++--------- fsspec/implementations/cache_metadata.py | 6 ++---- fsspec/implementations/cached.py | 3 ++- fsspec/implementations/data.py | 3 +-- fsspec/implementations/libarchive.py | 2 +- fsspec/implementations/reference.py | 2 +- fsspec/implementations/tests/test_cached.py | 6 +++--- fsspec/implementations/tests/test_git.py | 2 +- fsspec/json.py | 19 +++++++------------ fsspec/tests/test_core.py | 6 +++--- fsspec/tests/test_fuse.py | 2 +- fsspec/utils.py | 13 +++---------- 12 files changed, 27 insertions(+), 48 deletions(-) diff --git a/fsspec/caching.py b/fsspec/caching.py index de6a4e340..bccf8ebb6 100644 --- a/fsspec/caching.py +++ b/fsspec/caching.py @@ -8,18 +8,11 @@ import threading import warnings from collections import OrderedDict +from collections.abc import Callable from concurrent.futures import Future, ThreadPoolExecutor from itertools import groupby from operator import itemgetter -from typing import ( - TYPE_CHECKING, - Any, - Callable, - ClassVar, - Generic, - NamedTuple, - TypeVar, -) +from typing import TYPE_CHECKING, Any, ClassVar, Generic, NamedTuple, TypeVar if TYPE_CHECKING: import mmap diff --git a/fsspec/implementations/cache_metadata.py b/fsspec/implementations/cache_metadata.py index 4a519158d..baa21ad06 100644 --- a/fsspec/implementations/cache_metadata.py +++ b/fsspec/implementations/cache_metadata.py @@ -15,9 +15,7 @@ if TYPE_CHECKING: from collections.abc import Iterator - from typing import Any, Literal - - from typing_extensions import TypeAlias + from typing import Any, Literal, TypeAlias from .cached import CachingFileSystem @@ -57,7 +55,7 @@ def __init__(self, storage: list[str]): def _load(self, fn: str) -> Detail: """Low-level function to load metadata from specific file""" try: - with open(fn, "r") as f: + with open(fn) as f: loaded = json.load(f) except ValueError: with open(fn, "rb") as f: diff --git a/fsspec/implementations/cached.py b/fsspec/implementations/cached.py index cc587c3e0..a55888bdc 100644 --- a/fsspec/implementations/cached.py +++ b/fsspec/implementations/cached.py @@ -6,8 +6,9 @@ import tempfile import time import weakref +from collections.abc import Callable from shutil import rmtree -from typing import TYPE_CHECKING, Any, Callable, ClassVar +from typing import TYPE_CHECKING, Any, ClassVar from fsspec import filesystem from fsspec.callbacks import DEFAULT_CALLBACK diff --git a/fsspec/implementations/data.py b/fsspec/implementations/data.py index 519032305..f11542b48 100644 --- a/fsspec/implementations/data.py +++ b/fsspec/implementations/data.py @@ -1,6 +1,5 @@ import base64 import io -from typing import Optional from urllib.parse import unquote from fsspec import AbstractFileSystem @@ -50,7 +49,7 @@ def _open( return io.BytesIO(self.cat_file(path)) @staticmethod - def encode(data: bytes, mime: Optional[str] = None): + def encode(data: bytes, mime: str | None = None): """Format the given data into data-URL syntax This version always base64 encodes, even when the data is ascii/url-safe. diff --git a/fsspec/implementations/libarchive.py b/fsspec/implementations/libarchive.py index eb6f14535..6f8e75000 100644 --- a/fsspec/implementations/libarchive.py +++ b/fsspec/implementations/libarchive.py @@ -195,7 +195,7 @@ def _open( if mode != "rb": raise NotImplementedError - data = bytes() + data = b"" with self._open_archive() as arc: for entry in arc: if entry.pathname != path: diff --git a/fsspec/implementations/reference.py b/fsspec/implementations/reference.py index d33215ad3..54e81224b 100644 --- a/fsspec/implementations/reference.py +++ b/fsspec/implementations/reference.py @@ -219,7 +219,7 @@ def create(root, storage_options=None, fs=None, record_size=10000, **kwargs): fs.pipe("/".join([root, ".zmetadata"]), json.dumps(met).encode()) return LazyReferenceMapper(root, fs, **kwargs) - @lru_cache() + @lru_cache def listdir(self): """List top-level directories""" dirs = (p.rsplit("/", 1)[0] for p in self.zmetadata if not p.startswith(".z")) diff --git a/fsspec/implementations/tests/test_cached.py b/fsspec/implementations/tests/test_cached.py index 814929d8c..249e044b4 100644 --- a/fsspec/implementations/tests/test_cached.py +++ b/fsspec/implementations/tests/test_cached.py @@ -185,7 +185,7 @@ def test_metadata_replace_pickle_with_json(tmpdir): assert f.read(5) == b"test" # Confirm metadata is in json format - with open(cache_fn, "r") as f: + with open(cache_fn) as f: metadata = json.load(f) assert list(metadata.keys()) == [make_path_posix(afile)] @@ -253,7 +253,7 @@ def test_blockcache_workflow(ftp_writable, tmp_path, force_save_pickle): with open(tmp_path / "cache", "rb") as f: cache = pickle.load(f) else: - with open(tmp_path / "cache", "r") as f: + with open(tmp_path / "cache") as f: cache = json.load(f) assert "/out" in cache assert cache["/out"]["blocks"] == [0, 1] @@ -370,7 +370,7 @@ def __ager(cache_fn, fn, del_fn=False): with open(cache_fn, "rb") as f: cached_files = pickle.load(f) else: - with open(cache_fn, "r") as f: + with open(cache_fn) as f: cached_files = json.load(f) fn_posix = pathlib.Path(fn).as_posix() cached_files[fn_posix]["time"] = cached_files[fn_posix]["time"] - 691200 diff --git a/fsspec/implementations/tests/test_git.py b/fsspec/implementations/tests/test_git.py index 2aeb544a1..0182ae855 100644 --- a/fsspec/implementations/tests/test_git.py +++ b/fsspec/implementations/tests/test_git.py @@ -24,7 +24,7 @@ def repo(): open(os.path.join(d, "file1"), "wb").write(b"data0") subprocess.call("git add file1", shell=True, cwd=d) subprocess.call('git commit -m "init"', shell=True, cwd=d) - sha = open(os.path.join(d, ".git/refs/heads/master"), "r").read().strip() + sha = open(os.path.join(d, ".git/refs/heads/master")).read().strip() open(os.path.join(d, "file1"), "wb").write(b"data00") subprocess.check_output('git commit -a -m "tagger"', shell=True, cwd=d) subprocess.call('git tag -a thetag -m "make tag"', shell=True, cwd=d) diff --git a/fsspec/json.py b/fsspec/json.py index 3bd2485ef..5c53a2491 100644 --- a/fsspec/json.py +++ b/fsspec/json.py @@ -1,13 +1,8 @@ import json -from collections.abc import Mapping, Sequence +from collections.abc import Callable, Mapping, Sequence from contextlib import suppress from pathlib import PurePath -from typing import ( - Any, - Callable, - ClassVar, - Optional, -) +from typing import Any, ClassVar from .registry import _import_class, get_filesystem_class from .spec import AbstractFileSystem @@ -45,12 +40,12 @@ class FilesystemJSONDecoder(json.JSONDecoder): def __init__( self, *, - object_hook: Optional[Callable[[dict[str, Any]], Any]] = None, - parse_float: Optional[Callable[[str], Any]] = None, - parse_int: Optional[Callable[[str], Any]] = None, - parse_constant: Optional[Callable[[str], Any]] = None, + object_hook: Callable[[dict[str, Any]], Any] | None = None, + parse_float: Callable[[str], Any] | None = None, + parse_int: Callable[[str], Any] | None = None, + parse_constant: Callable[[str], Any] | None = None, strict: bool = True, - object_pairs_hook: Optional[Callable[[list[tuple[str, Any]]], Any]] = None, + object_pairs_hook: Callable[[list[tuple[str, Any]]], Any] | None = None, ) -> None: self.original_object_hook = object_hook diff --git a/fsspec/tests/test_core.py b/fsspec/tests/test_core.py index 0dacd3830..1c67fb6c0 100644 --- a/fsspec/tests/test_core.py +++ b/fsspec/tests/test_core.py @@ -304,14 +304,14 @@ def test_open_file_write_with_special_characters(tmp_path, char, monkeypatch): with fsspec.open(file_path, "w", expand=False) as f: f.write(expected_content) - with open(file_path, "r") as f: + with open(file_path) as f: actual_content = f.read() monkeypatch.setattr(fsspec.core, "DEFAULT_EXPAND", False) with fsspec.open(file_path, "w") as f: f.write(expected_content * 2) - with open(file_path, "r") as f: + with open(file_path) as f: assert f.read() == actual_content * 2 assert actual_content == expected_content @@ -347,7 +347,7 @@ def test_open_files_write_with_special_characters(tmp_path, char): )[0] as f: f.write(expected_content) - with open(file_path, "r") as f: + with open(file_path) as f: actual_content = f.read() assert actual_content == expected_content diff --git a/fsspec/tests/test_fuse.py b/fsspec/tests/test_fuse.py index ef3005367..6fb6d2772 100644 --- a/fsspec/tests/test_fuse.py +++ b/fsspec/tests/test_fuse.py @@ -139,7 +139,7 @@ def test_seek_rw(mount_local): fh.write("st") fh.close() - fh = open(mount_dir / "text", "r") + fh = open(mount_dir / "text") assert fh.read() == "test" fh.seek(2) assert fh.read() == "st" diff --git a/fsspec/utils.py b/fsspec/utils.py index 208d0f7d8..1e5e5e70b 100644 --- a/fsspec/utils.py +++ b/fsspec/utils.py @@ -7,23 +7,16 @@ import re import sys import tempfile -from collections.abc import Iterable, Iterator, Sequence +from collections.abc import Callable, Iterable, Iterator, Sequence from functools import partial from hashlib import md5 from importlib.metadata import version -from typing import ( - IO, - TYPE_CHECKING, - Any, - Callable, - TypeVar, -) +from typing import IO, TYPE_CHECKING, Any, TypeVar from urllib.parse import urlsplit if TYPE_CHECKING: import pathlib - - from typing_extensions import TypeGuard + from typing import TypeGuard from fsspec.spec import AbstractFileSystem From a9a2d8432ad11b84adf2778fb4054629119f00da Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Wed, 12 Nov 2025 13:25:56 +0200 Subject: [PATCH 3/7] Fix B905 zip() without an explicit strict= parameter --- fsspec/archive.py | 4 ++- fsspec/asyn.py | 17 +++++++----- fsspec/caching.py | 2 +- fsspec/generic.py | 4 +-- fsspec/implementations/cache_metadata.py | 8 ++++-- fsspec/implementations/cached.py | 16 ++++++------ fsspec/implementations/reference.py | 27 +++++++++++--------- fsspec/implementations/tests/test_archive.py | 2 +- fsspec/implementations/tests/test_local.py | 10 +++++--- fsspec/implementations/zip.py | 2 +- fsspec/mapping.py | 2 +- fsspec/parquet.py | 2 +- fsspec/spec.py | 8 +++--- fsspec/tests/abstract/copy.py | 12 ++++++--- fsspec/tests/abstract/get.py | 12 ++++++--- fsspec/tests/abstract/put.py | 12 ++++++--- fsspec/tests/test_spec.py | 2 +- fsspec/utils.py | 5 ++-- 18 files changed, 88 insertions(+), 59 deletions(-) diff --git a/fsspec/archive.py b/fsspec/archive.py index 13a4da8df..36b6d1fd4 100644 --- a/fsspec/archive.py +++ b/fsspec/archive.py @@ -61,7 +61,9 @@ def ls(self, path, detail=True, **kwargs): paths[p] = f elif all( (a == b) - for a, b in zip(path.split("/"), [""] + p.strip("/").split("/")) + for a, b in zip( + path.split("/"), [""] + p.strip("/").split("/"), strict=False + ) ): # root directory entry ppath = p.rstrip("/").split("/", 1)[0] diff --git a/fsspec/asyn.py b/fsspec/asyn.py index 837728394..5ed1532ff 100644 --- a/fsspec/asyn.py +++ b/fsspec/asyn.py @@ -397,7 +397,10 @@ async def _copy( ) batch_size = batch_size or self.batch_size - coros = [self._cp_file(p1, p2, **kwargs) for p1, p2 in zip(paths1, paths2)] + coros = [ + self._cp_file(p1, p2, **kwargs) + for p1, p2 in zip(paths1, paths2, strict=False) + ] result = await _run_coros_in_chunks( coros, batch_size=batch_size, return_exceptions=True, nofiles=True ) @@ -469,7 +472,7 @@ async def _cat( ): return { k: v - for k, v in zip(paths, out) + for k, v in zip(paths, out, strict=False) if on_error != "omit" or not is_exception(v) } else: @@ -509,7 +512,7 @@ async def _cat_ranges( raise ValueError coros = [ self._cat_file(p, start=s, end=e, **kwargs) - for p, s, e in zip(paths, starts, ends) + for p, s, e in zip(paths, starts, ends, strict=False) ] batch_size = batch_size or self.batch_size return await _run_coros_in_chunks( @@ -577,8 +580,10 @@ async def _put( ) is_dir = {l: os.path.isdir(l) for l in lpaths} - rdirs = [r for l, r in zip(lpaths, rpaths) if is_dir[l]] - file_pairs = [(l, r) for l, r in zip(lpaths, rpaths) if not is_dir[l]] + rdirs = [r for l, r in zip(lpaths, rpaths, strict=False) if is_dir[l]] + file_pairs = [ + (l, r) for l, r in zip(lpaths, rpaths, strict=False) if not is_dir[l] + ] await asyncio.gather(*[self._makedirs(d, exist_ok=True) for d in rdirs]) batch_size = batch_size or self.batch_size @@ -662,7 +667,7 @@ async def _get( coros = [] callback.set_size(len(lpaths)) - for lpath, rpath in zip(lpaths, rpaths): + for lpath, rpath in zip(lpaths, rpaths, strict=False): get_file = callback.branch_coro(self._get_file) coros.append(get_file(rpath, lpath, **kwargs)) return await _run_coros_in_chunks( diff --git a/fsspec/caching.py b/fsspec/caching.py index bccf8ebb6..cb36ce592 100644 --- a/fsspec/caching.py +++ b/fsspec/caching.py @@ -642,7 +642,7 @@ def __init__( offsets.append((start, stop)) blocks.append(data.pop((start, stop))) - self.data = dict(zip(offsets, blocks)) + self.data = dict(zip(offsets, blocks, strict=False)) else: self.data = {} diff --git a/fsspec/generic.py b/fsspec/generic.py index 0a641b0e2..83a4cefa7 100644 --- a/fsspec/generic.py +++ b/fsspec/generic.py @@ -135,7 +135,7 @@ def rsync( allfiles[k] = otherfile logger.debug(f"{len(allfiles)} files to copy") if allfiles: - source_files, target_files = zip(*allfiles.items()) + source_files, target_files = zip(*allfiles.items(), strict=False) fs.cp(source_files, target_files, **kwargs) logger.debug(f"{len(to_delete)} files to delete") if delete_missing and to_delete: @@ -361,7 +361,7 @@ async def copy_file_op( u2, os.path.join(tempdir, uuid.uuid4().hex), ) - for u1, u2 in zip(url1, url2) + for u1, u2 in zip(url1, url2, strict=False) ] out = await _run_coros_in_chunks( coros, batch_size=batch_size, return_exceptions=True diff --git a/fsspec/implementations/cache_metadata.py b/fsspec/implementations/cache_metadata.py index baa21ad06..f8b59e565 100644 --- a/fsspec/implementations/cache_metadata.py +++ b/fsspec/implementations/cache_metadata.py @@ -105,7 +105,9 @@ def check_file( perform extra checks to reject possible matches, such as if they are too old. """ - for (fn, base, _), cache in zip(self._scan_locations(), self.cached_files): + for (fn, base, _), cache in zip( + self._scan_locations(), self.cached_files, strict=False + ): if path not in cache: continue detail = cache[path].copy() @@ -192,7 +194,9 @@ def pop_file(self, path: str) -> str | None: def save(self) -> None: """Save metadata to disk""" - for (fn, _, writable), cache in zip(self._scan_locations(), self.cached_files): + for (fn, _, writable), cache in zip( + self._scan_locations(), self.cached_files, strict=False + ): if not writable: continue diff --git a/fsspec/implementations/cached.py b/fsspec/implementations/cached.py index a55888bdc..b4f453609 100644 --- a/fsspec/implementations/cached.py +++ b/fsspec/implementations/cached.py @@ -574,12 +574,12 @@ def open_many(self, open_files, **kwargs): if self.compression: raise NotImplementedError details = [self._check_file(sp) for sp in paths] - downpath = [p for p, d in zip(paths, details) if not d] + downpath = [p for p, d in zip(paths, details, strict=False) if not d] downfn0 = [ os.path.join(self.storage[-1], self._mapper(p)) - for p, d in zip(paths, details) + for p, d in zip(paths, details, strict=False) ] # keep these path names for opening later - downfn = [fn for fn, d in zip(downfn0, details) if not d] + downfn = [fn for fn, d in zip(downfn0, details, strict=False) if not d] if downpath: # skip if all files are already cached and up to date self.fs.get(downpath, downfn) @@ -595,7 +595,7 @@ def open_many(self, open_files, **kwargs): } for path in downpath ] - for path, detail in zip(downpath, newdetail): + for path, detail in zip(downpath, newdetail, strict=False): self._metadata.update_file(path, detail) self.save_cache() @@ -605,7 +605,7 @@ def firstpart(fn): return [ open(firstpart(fn0) if fn0 else fn1, mode=open_files.mode) - for fn0, fn1 in zip(details, downfn0) + for fn0, fn1 in zip(details, downfn0, strict=False) ] def commit_many(self, open_files): @@ -670,7 +670,7 @@ def cat( self.save_cache() callback.set_size(len(paths)) - for p, fn in zip(paths, fns): + for p, fn in zip(paths, fns, strict=False): with open(fn, "rb") as f: out[p] = f.read() callback.relative_update(1) @@ -886,8 +886,8 @@ def cat_ranges( ): logger.debug("cat ranges %s", paths) lpaths = [self._check_file(p) for p in paths] - rpaths = [p for l, p in zip(lpaths, paths) if l is False] - lpaths = [l for l, p in zip(lpaths, paths) if l is False] + rpaths = [p for l, p in zip(lpaths, paths, strict=False) if l is False] + lpaths = [l for l, p in zip(lpaths, paths, strict=False) if l is False] self.fs.get(rpaths, lpaths) paths = [self._check_file(p) for p in paths] return LocalFileSystem().cat_ranges( diff --git a/fsspec/implementations/reference.py b/fsspec/implementations/reference.py index 54e81224b..52e1e16a7 100644 --- a/fsspec/implementations/reference.py +++ b/fsspec/implementations/reference.py @@ -76,13 +76,13 @@ def __iter__(self): class RefsItemsView(collections.abc.ItemsView): def __iter__(self): - return zip(self._mapping.keys(), self._mapping.values()) + return zip(self._mapping.keys(), self._mapping.values(), strict=False) def ravel_multi_index(idx, sizes): val = 0 mult = 1 - for i, s in zip(idx[::-1], sizes[::-1]): + for i, s in zip(idx[::-1], sizes[::-1], strict=False): val += i * mult mult *= s return val @@ -286,7 +286,7 @@ def ls(self, path="", detail=True): recs = self._generate_all_records(field) recinfo = [ {"name": name, "type": "file", "size": rec[-1]} - for name, rec in zip(keys, recs) + for name, rec in zip(keys, recs, strict=False) if rec[0] # filters out path==None, deleted/missing ] return fileinfo + recinfo @@ -349,7 +349,8 @@ def _get_chunk_sizes(self, field): if field not in self.chunk_sizes: zarray = self.zmetadata[f"{field}/.zarray"] size_ratio = [ - math.ceil(s / c) for s, c in zip(zarray["shape"], zarray["chunks"]) + math.ceil(s / c) + for s, c in zip(zarray["shape"], zarray["chunks"], strict=False) ] self.chunk_sizes[field] = size_ratio or [1] return self.chunk_sizes[field] @@ -357,7 +358,7 @@ def _get_chunk_sizes(self, field): def _generate_record(self, field, record): """The references for a given parquet file of a given field""" refs = self.open_refs(field, record) - it = iter(zip(*refs.values())) + it = iter(zip(*refs.values(), strict=False)) if len(refs) == 3: # All urls return (list(t) for t in it) @@ -878,7 +879,7 @@ def get(self, rpath, lpath, recursive=False, **kwargs): data = self.cat([r for r in rpath if not self.isdir(r)]) else: data = self.cat(rpath) - for remote, local in zip(rpath, targets): + for remote, local in zip(rpath, targets, strict=False): if remote in data: fs.pipe_file(local, data[remote]) @@ -918,7 +919,7 @@ def cat(self, path, recursive=False, on_error="raise", **kwargs): ends2 = [] paths2 = [] whole_files = set() - for u, s, e, p in zip(urls, starts, ends, valid_paths): + for u, s, e, p in zip(urls, starts, ends, valid_paths, strict=False): if isinstance(u, bytes): # data out[p] = u @@ -930,7 +931,7 @@ def cat(self, path, recursive=False, on_error="raise", **kwargs): starts2.append(s) ends2.append(e) paths2.append(p) - for u, s, e, p in zip(urls, starts, ends, valid_paths): + for u, s, e, p in zip(urls, starts, ends, valid_paths, strict=False): # second run to account for files that are to be loaded whole if s is not None and u not in whole_files: urls2.append(u) @@ -950,10 +951,12 @@ def cat(self, path, recursive=False, on_error="raise", **kwargs): bytes_out = fs.cat_ranges(new_paths, new_starts, new_ends) # unbundle from merged bytes - simple approach - for u, s, e, p in zip(urls, starts, ends, valid_paths): + for u, s, e, p in zip(urls, starts, ends, valid_paths, strict=False): if p in out: continue # was bytes, already handled - for np, ns, ne, b in zip(new_paths, new_starts, new_ends, bytes_out): + for np, ns, ne, b in zip( + new_paths, new_starts, new_ends, bytes_out, strict=False + ): if np == u and (ns is None or ne is None): if isinstance(b, Exception): out[p] = b @@ -1061,7 +1064,7 @@ def _process_gen(self, gens): for k, v in gen["dimensions"].items() } products = ( - dict(zip(dimension.keys(), values)) + dict(zip(dimension.keys(), values, strict=False)) for values in itertools.product(*dimension.values()) ) for pr in products: @@ -1105,7 +1108,7 @@ def _dircache_from_items(self): subdirs.append(par0) subdirs.reverse() - for parent, child in zip(subdirs, subdirs[1:]): + for parent, child in zip(subdirs, subdirs[1:], strict=False): # register newly discovered directories assert child not in self.dircache assert parent in self.dircache diff --git a/fsspec/implementations/tests/test_archive.py b/fsspec/implementations/tests/test_archive.py index 457714b39..ed72bb559 100644 --- a/fsspec/implementations/tests/test_archive.py +++ b/fsspec/implementations/tests/test_archive.py @@ -322,7 +322,7 @@ def test_walk(self, scenario: ArchiveTestScenario, topdown, prune_nested): # prior py3.10 zip() does not support strict=True, we need # a manual len check here assert len(result) == len(expected) - for lhs, rhs in zip(result, expected): + for lhs, rhs in zip(result, expected, strict=False): assert lhs[0] == rhs[0] assert sorted(lhs[1]) == sorted(rhs[1]) assert sorted(lhs[2]) == sorted(rhs[2]) diff --git a/fsspec/implementations/tests/test_local.py b/fsspec/implementations/tests/test_local.py index fd6b656bd..1c539eecb 100644 --- a/fsspec/implementations/tests/test_local.py +++ b/fsspec/implementations/tests/test_local.py @@ -172,11 +172,13 @@ def test_urlpath_expand_write(): """Make sure * is expanded in file paths when writing.""" _, _, paths = get_fs_token_paths("prefix-*.csv", mode="wb", num=2) assert all( - p.endswith(pa) for p, pa in zip(paths, ["/prefix-0.csv", "/prefix-1.csv"]) + p.endswith(pa) + for p, pa in zip(paths, ["/prefix-0.csv", "/prefix-1.csv"], strict=False) ) _, _, paths = get_fs_token_paths(["prefix-*.csv"], mode="wb", num=2) assert all( - p.endswith(pa) for p, pa in zip(paths, ["/prefix-0.csv", "/prefix-1.csv"]) + p.endswith(pa) + for p, pa in zip(paths, ["/prefix-0.csv", "/prefix-1.csv"], strict=False) ) # we can read with multiple masks, but not write with pytest.raises(ValueError): @@ -189,7 +191,7 @@ def test_open_files(): with filetexts(files, mode="b"): myfiles = open_files("./.test.accounts.*") assert len(myfiles) == len(files) - for lazy_file, data_file in zip(myfiles, sorted(files)): + for lazy_file, data_file in zip(myfiles, sorted(files), strict=False): with lazy_file as f: x = f.read() assert x == files[data_file] @@ -291,7 +293,7 @@ def test_pickability_of_lazy_files(tmpdir): myfiles = open_files("./.test.accounts.*") myfiles2 = cloudpickle.loads(cloudpickle.dumps(myfiles)) - for f, f2 in zip(myfiles, myfiles2): + for f, f2 in zip(myfiles, myfiles2, strict=False): assert f.path == f2.path assert isinstance(f.fs, type(f2.fs)) with f as f_open, f2 as f2_open: diff --git a/fsspec/implementations/zip.py b/fsspec/implementations/zip.py index 6db3ae278..d1336ef48 100644 --- a/fsspec/implementations/zip.py +++ b/fsspec/implementations/zip.py @@ -145,7 +145,7 @@ def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs): def _matching_starts(file_path): file_parts = filter(lambda s: bool(s), file_path.split("/")) - return all(a == b for a, b in zip(path_parts, file_parts)) + return all(a == b for a, b in zip(path_parts, file_parts, strict=False)) self._get_dirs() diff --git a/fsspec/mapping.py b/fsspec/mapping.py index 752eef352..0188bd517 100644 --- a/fsspec/mapping.py +++ b/fsspec/mapping.py @@ -113,7 +113,7 @@ def getitems(self, keys, on_error="raise"): } return { key: out[k2] if on_error == "raise" else out.get(k2, KeyError(k2)) - for key, k2 in zip(keys, keys2) + for key, k2 in zip(keys, keys2, strict=False) if on_error == "return" or not isinstance(out[k2], BaseException) } diff --git a/fsspec/parquet.py b/fsspec/parquet.py index faedb7b9e..71d4d6b3a 100644 --- a/fsspec/parquet.py +++ b/fsspec/parquet.py @@ -328,7 +328,7 @@ def _get_parquet_byte_ranges_from_metadata( def _transfer_ranges(fs, blocks, paths, starts, ends): # Use cat_ranges to gather the data byte_ranges ranges = (paths, starts, ends) - for path, start, stop, data in zip(*ranges, fs.cat_ranges(*ranges)): + for path, start, stop, data in zip(*ranges, fs.cat_ranges(*ranges), strict=False): blocks[path][(start, stop)] = data diff --git a/fsspec/spec.py b/fsspec/spec.py index b67d5c16f..1891e1319 100644 --- a/fsspec/spec.py +++ b/fsspec/spec.py @@ -866,7 +866,7 @@ def cat_ranges( if len(starts) != len(paths) or len(ends) != len(paths): raise ValueError out = [] - for p, s, e in zip(paths, starts, ends): + for p, s, e in zip(paths, starts, ends, strict=False): try: out.append(self.cat_file(p, s, e)) except Exception as e: @@ -1006,7 +1006,7 @@ def get( ) callback.set_size(len(lpaths)) - for lpath, rpath in callback.wrap(zip(lpaths, rpaths)): + for lpath, rpath in callback.wrap(zip(lpaths, rpaths, strict=False)): with callback.branched(rpath, lpath) as child: self.get_file(rpath, lpath, callback=child, **kwargs) @@ -1098,7 +1098,7 @@ def put( ) callback.set_size(len(rpaths)) - for lpath, rpath in callback.wrap(zip(lpaths, rpaths)): + for lpath, rpath in callback.wrap(zip(lpaths, rpaths, strict=False)): with callback.branched(lpath, rpath) as child: self.put_file(lpath, rpath, callback=child, **kwargs) @@ -1165,7 +1165,7 @@ def copy( flatten=not source_is_str, ) - for p1, p2 in zip(paths1, paths2): + for p1, p2 in zip(paths1, paths2, strict=False): try: self.cp_file(p1, p2, **kwargs) except FileNotFoundError: diff --git a/fsspec/tests/abstract/copy.py b/fsspec/tests/abstract/copy.py index e39e57e5f..354c1417c 100644 --- a/fsspec/tests/abstract/copy.py +++ b/fsspec/tests/abstract/copy.py @@ -120,7 +120,9 @@ def test_copy_directory_to_existing_directory( fs.touch(dummy) assert fs.isdir(target) - for source_slash, target_slash in zip([False, True], [False, True]): + for source_slash, target_slash in zip( + [False, True], [False, True], strict=False + ): s = fs_join(source, "subdir") if source_slash: s += "/" @@ -201,7 +203,9 @@ def test_copy_directory_to_new_directory( target = fs_target fs.mkdir(target) - for source_slash, target_slash in zip([False, True], [False, True]): + for source_slash, target_slash in zip( + [False, True], [False, True], strict=False + ): s = fs_join(source, "subdir") if source_slash: s += "/" @@ -282,7 +286,7 @@ def test_copy_glob_to_existing_directory( ) # With recursive - for glob, recursive in zip(["*", "**"], [True, False]): + for glob, recursive in zip(["*", "**"], [True, False], strict=False): fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive) assert fs.isfile(fs_join(target, "subfile1")) assert fs.isfile(fs_join(target, "subfile2")) @@ -350,7 +354,7 @@ def test_copy_glob_to_new_directory( assert not fs.exists(fs_join(target, "newdir")) # With recursive - for glob, recursive in zip(["*", "**"], [True, False]): + for glob, recursive in zip(["*", "**"], [True, False], strict=False): fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive) assert fs.isdir(fs_join(target, "newdir")) assert fs.isfile(fs_join(target, "newdir", "subfile1")) diff --git a/fsspec/tests/abstract/get.py b/fsspec/tests/abstract/get.py index 851ab81ee..6e16db15f 100644 --- a/fsspec/tests/abstract/get.py +++ b/fsspec/tests/abstract/get.py @@ -127,7 +127,9 @@ def test_get_directory_to_existing_directory( local_fs.mkdir(target) assert local_fs.isdir(target) - for source_slash, target_slash in zip([False, True], [False, True]): + for source_slash, target_slash in zip( + [False, True], [False, True], strict=False + ): s = fs_join(source, "subdir") if source_slash: s += "/" @@ -205,7 +207,9 @@ def test_get_directory_to_new_directory( target = local_target local_fs.mkdir(target) - for source_slash, target_slash in zip([False, True], [False, True]): + for source_slash, target_slash in zip( + [False, True], [False, True], strict=False + ): s = fs_join(source, "subdir") if source_slash: s += "/" @@ -278,7 +282,7 @@ def test_get_glob_to_existing_directory( assert local_fs.ls(target) == [] # With recursive - for glob, recursive in zip(["*", "**"], [True, False]): + for glob, recursive in zip(["*", "**"], [True, False], strict=False): fs.get(fs_join(source, "subdir", glob), t, recursive=recursive) assert local_fs.isfile(local_join(target, "subfile1")) assert local_fs.isfile(local_join(target, "subfile2")) @@ -350,7 +354,7 @@ def test_get_glob_to_new_directory( assert local_fs.ls(target) == [] # With recursive - for glob, recursive in zip(["*", "**"], [True, False]): + for glob, recursive in zip(["*", "**"], [True, False], strict=False): fs.get(fs_join(source, "subdir", glob), t, recursive=recursive) assert local_fs.isdir(local_join(target, "newdir")) assert local_fs.isfile(local_join(target, "newdir", "subfile1")) diff --git a/fsspec/tests/abstract/put.py b/fsspec/tests/abstract/put.py index 9fc349977..8cffe1f4c 100644 --- a/fsspec/tests/abstract/put.py +++ b/fsspec/tests/abstract/put.py @@ -123,7 +123,9 @@ def test_put_directory_to_existing_directory( fs.touch(dummy) assert fs.isdir(target) - for source_slash, target_slash in zip([False, True], [False, True]): + for source_slash, target_slash in zip( + [False, True], [False, True], strict=False + ): s = fs_join(source, "subdir") if source_slash: s += "/" @@ -204,7 +206,9 @@ def test_put_directory_to_new_directory( target = fs_target fs.mkdir(target) - for source_slash, target_slash in zip([False, True], [False, True]): + for source_slash, target_slash in zip( + [False, True], [False, True], strict=False + ): s = fs_join(source, "subdir") if source_slash: s += "/" @@ -286,7 +290,7 @@ def test_put_glob_to_existing_directory( ) # With recursive - for glob, recursive in zip(["*", "**"], [True, False]): + for glob, recursive in zip(["*", "**"], [True, False], strict=False): fs.put(local_join(source, "subdir", glob), t, recursive=recursive) assert fs.isfile(fs_join(target, "subfile1")) assert fs.isfile(fs_join(target, "subfile2")) @@ -357,7 +361,7 @@ def test_put_glob_to_new_directory( assert not fs.exists(fs_join(target, "newdir")) # With recursive - for glob, recursive in zip(["*", "**"], [True, False]): + for glob, recursive in zip(["*", "**"], [True, False], strict=False): fs.put(local_join(source, "subdir", glob), t, recursive=recursive) assert fs.isdir(fs_join(target, "newdir")) assert fs.isfile(fs_join(target, "newdir", "subfile1")) diff --git a/fsspec/tests/test_spec.py b/fsspec/tests/test_spec.py index dafb0d004..39c8086ae 100644 --- a/fsspec/tests/test_spec.py +++ b/fsspec/tests/test_spec.py @@ -1258,7 +1258,7 @@ def test_dummy_callbacks_files_branched(tmpdir): def check_events(lpaths, rpaths): from fsspec.implementations.local import make_path_posix - base_keys = zip(make_path_posix(lpaths), make_path_posix(rpaths)) + base_keys = zip(make_path_posix(lpaths), make_path_posix(rpaths), strict=False) assert set(callback.events.keys()) == {("top-level",), *base_keys} assert callback.events["top-level",] == imitate_transfer(10, 10, file=False) diff --git a/fsspec/utils.py b/fsspec/utils.py index 1e5e5e70b..0c021118c 100644 --- a/fsspec/utils.py +++ b/fsspec/utils.py @@ -562,8 +562,9 @@ def merge_offset_ranges( list(v) for v in zip( *sorted( - zip(paths, starts, ends), - ) + zip(paths, starts, ends, strict=False), + ), + strict=False, ) ) From 09911e9774d0f75cfba20d2824750b58b370af43 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Wed, 12 Nov 2025 17:37:42 +0200 Subject: [PATCH 4/7] Ignore pytest.PytestRemovedIn9Warning for gcsfs to pass --- .github/workflows/main.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index a12dc5a24..710840585 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -145,5 +145,5 @@ jobs: shell: bash -l {0} run: | cd ${{ matrix.FRIEND }} - pytest -v + pytest -v -W ignore::pytest.PytestRemovedIn9Warning cd .. From 385ed727b2a8b1e3ae2c7d17f9f12f710c469017 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Thu, 13 Nov 2025 21:45:03 +0200 Subject: [PATCH 5/7] Revert "Fix B905 zip() without an explicit strict= parameter" This reverts commit a9a2d8432ad11b84adf2778fb4054629119f00da. --- fsspec/archive.py | 4 +-- fsspec/asyn.py | 17 +++++------- fsspec/caching.py | 2 +- fsspec/generic.py | 4 +-- fsspec/implementations/cache_metadata.py | 8 ++---- fsspec/implementations/cached.py | 16 ++++++------ fsspec/implementations/reference.py | 27 +++++++++----------- fsspec/implementations/tests/test_archive.py | 2 +- fsspec/implementations/tests/test_local.py | 10 +++----- fsspec/implementations/zip.py | 2 +- fsspec/mapping.py | 2 +- fsspec/parquet.py | 2 +- fsspec/spec.py | 8 +++--- fsspec/tests/abstract/copy.py | 12 +++------ fsspec/tests/abstract/get.py | 12 +++------ fsspec/tests/abstract/put.py | 12 +++------ fsspec/tests/test_spec.py | 2 +- fsspec/utils.py | 5 ++-- 18 files changed, 59 insertions(+), 88 deletions(-) diff --git a/fsspec/archive.py b/fsspec/archive.py index 36b6d1fd4..13a4da8df 100644 --- a/fsspec/archive.py +++ b/fsspec/archive.py @@ -61,9 +61,7 @@ def ls(self, path, detail=True, **kwargs): paths[p] = f elif all( (a == b) - for a, b in zip( - path.split("/"), [""] + p.strip("/").split("/"), strict=False - ) + for a, b in zip(path.split("/"), [""] + p.strip("/").split("/")) ): # root directory entry ppath = p.rstrip("/").split("/", 1)[0] diff --git a/fsspec/asyn.py b/fsspec/asyn.py index 5ed1532ff..837728394 100644 --- a/fsspec/asyn.py +++ b/fsspec/asyn.py @@ -397,10 +397,7 @@ async def _copy( ) batch_size = batch_size or self.batch_size - coros = [ - self._cp_file(p1, p2, **kwargs) - for p1, p2 in zip(paths1, paths2, strict=False) - ] + coros = [self._cp_file(p1, p2, **kwargs) for p1, p2 in zip(paths1, paths2)] result = await _run_coros_in_chunks( coros, batch_size=batch_size, return_exceptions=True, nofiles=True ) @@ -472,7 +469,7 @@ async def _cat( ): return { k: v - for k, v in zip(paths, out, strict=False) + for k, v in zip(paths, out) if on_error != "omit" or not is_exception(v) } else: @@ -512,7 +509,7 @@ async def _cat_ranges( raise ValueError coros = [ self._cat_file(p, start=s, end=e, **kwargs) - for p, s, e in zip(paths, starts, ends, strict=False) + for p, s, e in zip(paths, starts, ends) ] batch_size = batch_size or self.batch_size return await _run_coros_in_chunks( @@ -580,10 +577,8 @@ async def _put( ) is_dir = {l: os.path.isdir(l) for l in lpaths} - rdirs = [r for l, r in zip(lpaths, rpaths, strict=False) if is_dir[l]] - file_pairs = [ - (l, r) for l, r in zip(lpaths, rpaths, strict=False) if not is_dir[l] - ] + rdirs = [r for l, r in zip(lpaths, rpaths) if is_dir[l]] + file_pairs = [(l, r) for l, r in zip(lpaths, rpaths) if not is_dir[l]] await asyncio.gather(*[self._makedirs(d, exist_ok=True) for d in rdirs]) batch_size = batch_size or self.batch_size @@ -667,7 +662,7 @@ async def _get( coros = [] callback.set_size(len(lpaths)) - for lpath, rpath in zip(lpaths, rpaths, strict=False): + for lpath, rpath in zip(lpaths, rpaths): get_file = callback.branch_coro(self._get_file) coros.append(get_file(rpath, lpath, **kwargs)) return await _run_coros_in_chunks( diff --git a/fsspec/caching.py b/fsspec/caching.py index cb36ce592..bccf8ebb6 100644 --- a/fsspec/caching.py +++ b/fsspec/caching.py @@ -642,7 +642,7 @@ def __init__( offsets.append((start, stop)) blocks.append(data.pop((start, stop))) - self.data = dict(zip(offsets, blocks, strict=False)) + self.data = dict(zip(offsets, blocks)) else: self.data = {} diff --git a/fsspec/generic.py b/fsspec/generic.py index 83a4cefa7..0a641b0e2 100644 --- a/fsspec/generic.py +++ b/fsspec/generic.py @@ -135,7 +135,7 @@ def rsync( allfiles[k] = otherfile logger.debug(f"{len(allfiles)} files to copy") if allfiles: - source_files, target_files = zip(*allfiles.items(), strict=False) + source_files, target_files = zip(*allfiles.items()) fs.cp(source_files, target_files, **kwargs) logger.debug(f"{len(to_delete)} files to delete") if delete_missing and to_delete: @@ -361,7 +361,7 @@ async def copy_file_op( u2, os.path.join(tempdir, uuid.uuid4().hex), ) - for u1, u2 in zip(url1, url2, strict=False) + for u1, u2 in zip(url1, url2) ] out = await _run_coros_in_chunks( coros, batch_size=batch_size, return_exceptions=True diff --git a/fsspec/implementations/cache_metadata.py b/fsspec/implementations/cache_metadata.py index f8b59e565..baa21ad06 100644 --- a/fsspec/implementations/cache_metadata.py +++ b/fsspec/implementations/cache_metadata.py @@ -105,9 +105,7 @@ def check_file( perform extra checks to reject possible matches, such as if they are too old. """ - for (fn, base, _), cache in zip( - self._scan_locations(), self.cached_files, strict=False - ): + for (fn, base, _), cache in zip(self._scan_locations(), self.cached_files): if path not in cache: continue detail = cache[path].copy() @@ -194,9 +192,7 @@ def pop_file(self, path: str) -> str | None: def save(self) -> None: """Save metadata to disk""" - for (fn, _, writable), cache in zip( - self._scan_locations(), self.cached_files, strict=False - ): + for (fn, _, writable), cache in zip(self._scan_locations(), self.cached_files): if not writable: continue diff --git a/fsspec/implementations/cached.py b/fsspec/implementations/cached.py index b4f453609..a55888bdc 100644 --- a/fsspec/implementations/cached.py +++ b/fsspec/implementations/cached.py @@ -574,12 +574,12 @@ def open_many(self, open_files, **kwargs): if self.compression: raise NotImplementedError details = [self._check_file(sp) for sp in paths] - downpath = [p for p, d in zip(paths, details, strict=False) if not d] + downpath = [p for p, d in zip(paths, details) if not d] downfn0 = [ os.path.join(self.storage[-1], self._mapper(p)) - for p, d in zip(paths, details, strict=False) + for p, d in zip(paths, details) ] # keep these path names for opening later - downfn = [fn for fn, d in zip(downfn0, details, strict=False) if not d] + downfn = [fn for fn, d in zip(downfn0, details) if not d] if downpath: # skip if all files are already cached and up to date self.fs.get(downpath, downfn) @@ -595,7 +595,7 @@ def open_many(self, open_files, **kwargs): } for path in downpath ] - for path, detail in zip(downpath, newdetail, strict=False): + for path, detail in zip(downpath, newdetail): self._metadata.update_file(path, detail) self.save_cache() @@ -605,7 +605,7 @@ def firstpart(fn): return [ open(firstpart(fn0) if fn0 else fn1, mode=open_files.mode) - for fn0, fn1 in zip(details, downfn0, strict=False) + for fn0, fn1 in zip(details, downfn0) ] def commit_many(self, open_files): @@ -670,7 +670,7 @@ def cat( self.save_cache() callback.set_size(len(paths)) - for p, fn in zip(paths, fns, strict=False): + for p, fn in zip(paths, fns): with open(fn, "rb") as f: out[p] = f.read() callback.relative_update(1) @@ -886,8 +886,8 @@ def cat_ranges( ): logger.debug("cat ranges %s", paths) lpaths = [self._check_file(p) for p in paths] - rpaths = [p for l, p in zip(lpaths, paths, strict=False) if l is False] - lpaths = [l for l, p in zip(lpaths, paths, strict=False) if l is False] + rpaths = [p for l, p in zip(lpaths, paths) if l is False] + lpaths = [l for l, p in zip(lpaths, paths) if l is False] self.fs.get(rpaths, lpaths) paths = [self._check_file(p) for p in paths] return LocalFileSystem().cat_ranges( diff --git a/fsspec/implementations/reference.py b/fsspec/implementations/reference.py index 52e1e16a7..54e81224b 100644 --- a/fsspec/implementations/reference.py +++ b/fsspec/implementations/reference.py @@ -76,13 +76,13 @@ def __iter__(self): class RefsItemsView(collections.abc.ItemsView): def __iter__(self): - return zip(self._mapping.keys(), self._mapping.values(), strict=False) + return zip(self._mapping.keys(), self._mapping.values()) def ravel_multi_index(idx, sizes): val = 0 mult = 1 - for i, s in zip(idx[::-1], sizes[::-1], strict=False): + for i, s in zip(idx[::-1], sizes[::-1]): val += i * mult mult *= s return val @@ -286,7 +286,7 @@ def ls(self, path="", detail=True): recs = self._generate_all_records(field) recinfo = [ {"name": name, "type": "file", "size": rec[-1]} - for name, rec in zip(keys, recs, strict=False) + for name, rec in zip(keys, recs) if rec[0] # filters out path==None, deleted/missing ] return fileinfo + recinfo @@ -349,8 +349,7 @@ def _get_chunk_sizes(self, field): if field not in self.chunk_sizes: zarray = self.zmetadata[f"{field}/.zarray"] size_ratio = [ - math.ceil(s / c) - for s, c in zip(zarray["shape"], zarray["chunks"], strict=False) + math.ceil(s / c) for s, c in zip(zarray["shape"], zarray["chunks"]) ] self.chunk_sizes[field] = size_ratio or [1] return self.chunk_sizes[field] @@ -358,7 +357,7 @@ def _get_chunk_sizes(self, field): def _generate_record(self, field, record): """The references for a given parquet file of a given field""" refs = self.open_refs(field, record) - it = iter(zip(*refs.values(), strict=False)) + it = iter(zip(*refs.values())) if len(refs) == 3: # All urls return (list(t) for t in it) @@ -879,7 +878,7 @@ def get(self, rpath, lpath, recursive=False, **kwargs): data = self.cat([r for r in rpath if not self.isdir(r)]) else: data = self.cat(rpath) - for remote, local in zip(rpath, targets, strict=False): + for remote, local in zip(rpath, targets): if remote in data: fs.pipe_file(local, data[remote]) @@ -919,7 +918,7 @@ def cat(self, path, recursive=False, on_error="raise", **kwargs): ends2 = [] paths2 = [] whole_files = set() - for u, s, e, p in zip(urls, starts, ends, valid_paths, strict=False): + for u, s, e, p in zip(urls, starts, ends, valid_paths): if isinstance(u, bytes): # data out[p] = u @@ -931,7 +930,7 @@ def cat(self, path, recursive=False, on_error="raise", **kwargs): starts2.append(s) ends2.append(e) paths2.append(p) - for u, s, e, p in zip(urls, starts, ends, valid_paths, strict=False): + for u, s, e, p in zip(urls, starts, ends, valid_paths): # second run to account for files that are to be loaded whole if s is not None and u not in whole_files: urls2.append(u) @@ -951,12 +950,10 @@ def cat(self, path, recursive=False, on_error="raise", **kwargs): bytes_out = fs.cat_ranges(new_paths, new_starts, new_ends) # unbundle from merged bytes - simple approach - for u, s, e, p in zip(urls, starts, ends, valid_paths, strict=False): + for u, s, e, p in zip(urls, starts, ends, valid_paths): if p in out: continue # was bytes, already handled - for np, ns, ne, b in zip( - new_paths, new_starts, new_ends, bytes_out, strict=False - ): + for np, ns, ne, b in zip(new_paths, new_starts, new_ends, bytes_out): if np == u and (ns is None or ne is None): if isinstance(b, Exception): out[p] = b @@ -1064,7 +1061,7 @@ def _process_gen(self, gens): for k, v in gen["dimensions"].items() } products = ( - dict(zip(dimension.keys(), values, strict=False)) + dict(zip(dimension.keys(), values)) for values in itertools.product(*dimension.values()) ) for pr in products: @@ -1108,7 +1105,7 @@ def _dircache_from_items(self): subdirs.append(par0) subdirs.reverse() - for parent, child in zip(subdirs, subdirs[1:], strict=False): + for parent, child in zip(subdirs, subdirs[1:]): # register newly discovered directories assert child not in self.dircache assert parent in self.dircache diff --git a/fsspec/implementations/tests/test_archive.py b/fsspec/implementations/tests/test_archive.py index ed72bb559..457714b39 100644 --- a/fsspec/implementations/tests/test_archive.py +++ b/fsspec/implementations/tests/test_archive.py @@ -322,7 +322,7 @@ def test_walk(self, scenario: ArchiveTestScenario, topdown, prune_nested): # prior py3.10 zip() does not support strict=True, we need # a manual len check here assert len(result) == len(expected) - for lhs, rhs in zip(result, expected, strict=False): + for lhs, rhs in zip(result, expected): assert lhs[0] == rhs[0] assert sorted(lhs[1]) == sorted(rhs[1]) assert sorted(lhs[2]) == sorted(rhs[2]) diff --git a/fsspec/implementations/tests/test_local.py b/fsspec/implementations/tests/test_local.py index 1c539eecb..fd6b656bd 100644 --- a/fsspec/implementations/tests/test_local.py +++ b/fsspec/implementations/tests/test_local.py @@ -172,13 +172,11 @@ def test_urlpath_expand_write(): """Make sure * is expanded in file paths when writing.""" _, _, paths = get_fs_token_paths("prefix-*.csv", mode="wb", num=2) assert all( - p.endswith(pa) - for p, pa in zip(paths, ["/prefix-0.csv", "/prefix-1.csv"], strict=False) + p.endswith(pa) for p, pa in zip(paths, ["/prefix-0.csv", "/prefix-1.csv"]) ) _, _, paths = get_fs_token_paths(["prefix-*.csv"], mode="wb", num=2) assert all( - p.endswith(pa) - for p, pa in zip(paths, ["/prefix-0.csv", "/prefix-1.csv"], strict=False) + p.endswith(pa) for p, pa in zip(paths, ["/prefix-0.csv", "/prefix-1.csv"]) ) # we can read with multiple masks, but not write with pytest.raises(ValueError): @@ -191,7 +189,7 @@ def test_open_files(): with filetexts(files, mode="b"): myfiles = open_files("./.test.accounts.*") assert len(myfiles) == len(files) - for lazy_file, data_file in zip(myfiles, sorted(files), strict=False): + for lazy_file, data_file in zip(myfiles, sorted(files)): with lazy_file as f: x = f.read() assert x == files[data_file] @@ -293,7 +291,7 @@ def test_pickability_of_lazy_files(tmpdir): myfiles = open_files("./.test.accounts.*") myfiles2 = cloudpickle.loads(cloudpickle.dumps(myfiles)) - for f, f2 in zip(myfiles, myfiles2, strict=False): + for f, f2 in zip(myfiles, myfiles2): assert f.path == f2.path assert isinstance(f.fs, type(f2.fs)) with f as f_open, f2 as f2_open: diff --git a/fsspec/implementations/zip.py b/fsspec/implementations/zip.py index d1336ef48..6db3ae278 100644 --- a/fsspec/implementations/zip.py +++ b/fsspec/implementations/zip.py @@ -145,7 +145,7 @@ def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs): def _matching_starts(file_path): file_parts = filter(lambda s: bool(s), file_path.split("/")) - return all(a == b for a, b in zip(path_parts, file_parts, strict=False)) + return all(a == b for a, b in zip(path_parts, file_parts)) self._get_dirs() diff --git a/fsspec/mapping.py b/fsspec/mapping.py index 0188bd517..752eef352 100644 --- a/fsspec/mapping.py +++ b/fsspec/mapping.py @@ -113,7 +113,7 @@ def getitems(self, keys, on_error="raise"): } return { key: out[k2] if on_error == "raise" else out.get(k2, KeyError(k2)) - for key, k2 in zip(keys, keys2, strict=False) + for key, k2 in zip(keys, keys2) if on_error == "return" or not isinstance(out[k2], BaseException) } diff --git a/fsspec/parquet.py b/fsspec/parquet.py index 71d4d6b3a..faedb7b9e 100644 --- a/fsspec/parquet.py +++ b/fsspec/parquet.py @@ -328,7 +328,7 @@ def _get_parquet_byte_ranges_from_metadata( def _transfer_ranges(fs, blocks, paths, starts, ends): # Use cat_ranges to gather the data byte_ranges ranges = (paths, starts, ends) - for path, start, stop, data in zip(*ranges, fs.cat_ranges(*ranges), strict=False): + for path, start, stop, data in zip(*ranges, fs.cat_ranges(*ranges)): blocks[path][(start, stop)] = data diff --git a/fsspec/spec.py b/fsspec/spec.py index 1891e1319..b67d5c16f 100644 --- a/fsspec/spec.py +++ b/fsspec/spec.py @@ -866,7 +866,7 @@ def cat_ranges( if len(starts) != len(paths) or len(ends) != len(paths): raise ValueError out = [] - for p, s, e in zip(paths, starts, ends, strict=False): + for p, s, e in zip(paths, starts, ends): try: out.append(self.cat_file(p, s, e)) except Exception as e: @@ -1006,7 +1006,7 @@ def get( ) callback.set_size(len(lpaths)) - for lpath, rpath in callback.wrap(zip(lpaths, rpaths, strict=False)): + for lpath, rpath in callback.wrap(zip(lpaths, rpaths)): with callback.branched(rpath, lpath) as child: self.get_file(rpath, lpath, callback=child, **kwargs) @@ -1098,7 +1098,7 @@ def put( ) callback.set_size(len(rpaths)) - for lpath, rpath in callback.wrap(zip(lpaths, rpaths, strict=False)): + for lpath, rpath in callback.wrap(zip(lpaths, rpaths)): with callback.branched(lpath, rpath) as child: self.put_file(lpath, rpath, callback=child, **kwargs) @@ -1165,7 +1165,7 @@ def copy( flatten=not source_is_str, ) - for p1, p2 in zip(paths1, paths2, strict=False): + for p1, p2 in zip(paths1, paths2): try: self.cp_file(p1, p2, **kwargs) except FileNotFoundError: diff --git a/fsspec/tests/abstract/copy.py b/fsspec/tests/abstract/copy.py index 354c1417c..e39e57e5f 100644 --- a/fsspec/tests/abstract/copy.py +++ b/fsspec/tests/abstract/copy.py @@ -120,9 +120,7 @@ def test_copy_directory_to_existing_directory( fs.touch(dummy) assert fs.isdir(target) - for source_slash, target_slash in zip( - [False, True], [False, True], strict=False - ): + for source_slash, target_slash in zip([False, True], [False, True]): s = fs_join(source, "subdir") if source_slash: s += "/" @@ -203,9 +201,7 @@ def test_copy_directory_to_new_directory( target = fs_target fs.mkdir(target) - for source_slash, target_slash in zip( - [False, True], [False, True], strict=False - ): + for source_slash, target_slash in zip([False, True], [False, True]): s = fs_join(source, "subdir") if source_slash: s += "/" @@ -286,7 +282,7 @@ def test_copy_glob_to_existing_directory( ) # With recursive - for glob, recursive in zip(["*", "**"], [True, False], strict=False): + for glob, recursive in zip(["*", "**"], [True, False]): fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive) assert fs.isfile(fs_join(target, "subfile1")) assert fs.isfile(fs_join(target, "subfile2")) @@ -354,7 +350,7 @@ def test_copy_glob_to_new_directory( assert not fs.exists(fs_join(target, "newdir")) # With recursive - for glob, recursive in zip(["*", "**"], [True, False], strict=False): + for glob, recursive in zip(["*", "**"], [True, False]): fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive) assert fs.isdir(fs_join(target, "newdir")) assert fs.isfile(fs_join(target, "newdir", "subfile1")) diff --git a/fsspec/tests/abstract/get.py b/fsspec/tests/abstract/get.py index 6e16db15f..851ab81ee 100644 --- a/fsspec/tests/abstract/get.py +++ b/fsspec/tests/abstract/get.py @@ -127,9 +127,7 @@ def test_get_directory_to_existing_directory( local_fs.mkdir(target) assert local_fs.isdir(target) - for source_slash, target_slash in zip( - [False, True], [False, True], strict=False - ): + for source_slash, target_slash in zip([False, True], [False, True]): s = fs_join(source, "subdir") if source_slash: s += "/" @@ -207,9 +205,7 @@ def test_get_directory_to_new_directory( target = local_target local_fs.mkdir(target) - for source_slash, target_slash in zip( - [False, True], [False, True], strict=False - ): + for source_slash, target_slash in zip([False, True], [False, True]): s = fs_join(source, "subdir") if source_slash: s += "/" @@ -282,7 +278,7 @@ def test_get_glob_to_existing_directory( assert local_fs.ls(target) == [] # With recursive - for glob, recursive in zip(["*", "**"], [True, False], strict=False): + for glob, recursive in zip(["*", "**"], [True, False]): fs.get(fs_join(source, "subdir", glob), t, recursive=recursive) assert local_fs.isfile(local_join(target, "subfile1")) assert local_fs.isfile(local_join(target, "subfile2")) @@ -354,7 +350,7 @@ def test_get_glob_to_new_directory( assert local_fs.ls(target) == [] # With recursive - for glob, recursive in zip(["*", "**"], [True, False], strict=False): + for glob, recursive in zip(["*", "**"], [True, False]): fs.get(fs_join(source, "subdir", glob), t, recursive=recursive) assert local_fs.isdir(local_join(target, "newdir")) assert local_fs.isfile(local_join(target, "newdir", "subfile1")) diff --git a/fsspec/tests/abstract/put.py b/fsspec/tests/abstract/put.py index 8cffe1f4c..9fc349977 100644 --- a/fsspec/tests/abstract/put.py +++ b/fsspec/tests/abstract/put.py @@ -123,9 +123,7 @@ def test_put_directory_to_existing_directory( fs.touch(dummy) assert fs.isdir(target) - for source_slash, target_slash in zip( - [False, True], [False, True], strict=False - ): + for source_slash, target_slash in zip([False, True], [False, True]): s = fs_join(source, "subdir") if source_slash: s += "/" @@ -206,9 +204,7 @@ def test_put_directory_to_new_directory( target = fs_target fs.mkdir(target) - for source_slash, target_slash in zip( - [False, True], [False, True], strict=False - ): + for source_slash, target_slash in zip([False, True], [False, True]): s = fs_join(source, "subdir") if source_slash: s += "/" @@ -290,7 +286,7 @@ def test_put_glob_to_existing_directory( ) # With recursive - for glob, recursive in zip(["*", "**"], [True, False], strict=False): + for glob, recursive in zip(["*", "**"], [True, False]): fs.put(local_join(source, "subdir", glob), t, recursive=recursive) assert fs.isfile(fs_join(target, "subfile1")) assert fs.isfile(fs_join(target, "subfile2")) @@ -361,7 +357,7 @@ def test_put_glob_to_new_directory( assert not fs.exists(fs_join(target, "newdir")) # With recursive - for glob, recursive in zip(["*", "**"], [True, False], strict=False): + for glob, recursive in zip(["*", "**"], [True, False]): fs.put(local_join(source, "subdir", glob), t, recursive=recursive) assert fs.isdir(fs_join(target, "newdir")) assert fs.isfile(fs_join(target, "newdir", "subfile1")) diff --git a/fsspec/tests/test_spec.py b/fsspec/tests/test_spec.py index 39c8086ae..dafb0d004 100644 --- a/fsspec/tests/test_spec.py +++ b/fsspec/tests/test_spec.py @@ -1258,7 +1258,7 @@ def test_dummy_callbacks_files_branched(tmpdir): def check_events(lpaths, rpaths): from fsspec.implementations.local import make_path_posix - base_keys = zip(make_path_posix(lpaths), make_path_posix(rpaths), strict=False) + base_keys = zip(make_path_posix(lpaths), make_path_posix(rpaths)) assert set(callback.events.keys()) == {("top-level",), *base_keys} assert callback.events["top-level",] == imitate_transfer(10, 10, file=False) diff --git a/fsspec/utils.py b/fsspec/utils.py index 0c021118c..1e5e5e70b 100644 --- a/fsspec/utils.py +++ b/fsspec/utils.py @@ -562,9 +562,8 @@ def merge_offset_ranges( list(v) for v in zip( *sorted( - zip(paths, starts, ends, strict=False), - ), - strict=False, + zip(paths, starts, ends), + ) ) ) From fdecd1f213abd9d491cd16d1b81cb43e94dd6eb9 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Thu, 13 Nov 2025 21:46:30 +0200 Subject: [PATCH 6/7] Allow zip without explicit strict keyword --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 60c423c24..9eabf1842 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -194,6 +194,8 @@ ignore = [ "B026", # No explicit `stacklevel` keyword argument found "B028", + # `zip` without explicit `strict` keyword + "B905", # Assigning lambda expression "E731", # Ambiguous variable names From 56c44fc2e83a5f5f39298d990ccd47953d3163d6 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Thu, 13 Nov 2025 21:54:33 +0200 Subject: [PATCH 7/7] Revert removing the mode from open() --- fsspec/implementations/cache_metadata.py | 2 +- fsspec/implementations/tests/test_cached.py | 6 +++--- fsspec/implementations/tests/test_git.py | 2 +- fsspec/tests/test_core.py | 6 +++--- fsspec/tests/test_fuse.py | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fsspec/implementations/cache_metadata.py b/fsspec/implementations/cache_metadata.py index baa21ad06..9d1f7eb7f 100644 --- a/fsspec/implementations/cache_metadata.py +++ b/fsspec/implementations/cache_metadata.py @@ -55,7 +55,7 @@ def __init__(self, storage: list[str]): def _load(self, fn: str) -> Detail: """Low-level function to load metadata from specific file""" try: - with open(fn) as f: + with open(fn, "r") as f: loaded = json.load(f) except ValueError: with open(fn, "rb") as f: diff --git a/fsspec/implementations/tests/test_cached.py b/fsspec/implementations/tests/test_cached.py index 249e044b4..814929d8c 100644 --- a/fsspec/implementations/tests/test_cached.py +++ b/fsspec/implementations/tests/test_cached.py @@ -185,7 +185,7 @@ def test_metadata_replace_pickle_with_json(tmpdir): assert f.read(5) == b"test" # Confirm metadata is in json format - with open(cache_fn) as f: + with open(cache_fn, "r") as f: metadata = json.load(f) assert list(metadata.keys()) == [make_path_posix(afile)] @@ -253,7 +253,7 @@ def test_blockcache_workflow(ftp_writable, tmp_path, force_save_pickle): with open(tmp_path / "cache", "rb") as f: cache = pickle.load(f) else: - with open(tmp_path / "cache") as f: + with open(tmp_path / "cache", "r") as f: cache = json.load(f) assert "/out" in cache assert cache["/out"]["blocks"] == [0, 1] @@ -370,7 +370,7 @@ def __ager(cache_fn, fn, del_fn=False): with open(cache_fn, "rb") as f: cached_files = pickle.load(f) else: - with open(cache_fn) as f: + with open(cache_fn, "r") as f: cached_files = json.load(f) fn_posix = pathlib.Path(fn).as_posix() cached_files[fn_posix]["time"] = cached_files[fn_posix]["time"] - 691200 diff --git a/fsspec/implementations/tests/test_git.py b/fsspec/implementations/tests/test_git.py index 0182ae855..2aeb544a1 100644 --- a/fsspec/implementations/tests/test_git.py +++ b/fsspec/implementations/tests/test_git.py @@ -24,7 +24,7 @@ def repo(): open(os.path.join(d, "file1"), "wb").write(b"data0") subprocess.call("git add file1", shell=True, cwd=d) subprocess.call('git commit -m "init"', shell=True, cwd=d) - sha = open(os.path.join(d, ".git/refs/heads/master")).read().strip() + sha = open(os.path.join(d, ".git/refs/heads/master"), "r").read().strip() open(os.path.join(d, "file1"), "wb").write(b"data00") subprocess.check_output('git commit -a -m "tagger"', shell=True, cwd=d) subprocess.call('git tag -a thetag -m "make tag"', shell=True, cwd=d) diff --git a/fsspec/tests/test_core.py b/fsspec/tests/test_core.py index 1c67fb6c0..0dacd3830 100644 --- a/fsspec/tests/test_core.py +++ b/fsspec/tests/test_core.py @@ -304,14 +304,14 @@ def test_open_file_write_with_special_characters(tmp_path, char, monkeypatch): with fsspec.open(file_path, "w", expand=False) as f: f.write(expected_content) - with open(file_path) as f: + with open(file_path, "r") as f: actual_content = f.read() monkeypatch.setattr(fsspec.core, "DEFAULT_EXPAND", False) with fsspec.open(file_path, "w") as f: f.write(expected_content * 2) - with open(file_path) as f: + with open(file_path, "r") as f: assert f.read() == actual_content * 2 assert actual_content == expected_content @@ -347,7 +347,7 @@ def test_open_files_write_with_special_characters(tmp_path, char): )[0] as f: f.write(expected_content) - with open(file_path) as f: + with open(file_path, "r") as f: actual_content = f.read() assert actual_content == expected_content diff --git a/fsspec/tests/test_fuse.py b/fsspec/tests/test_fuse.py index 6fb6d2772..ef3005367 100644 --- a/fsspec/tests/test_fuse.py +++ b/fsspec/tests/test_fuse.py @@ -139,7 +139,7 @@ def test_seek_rw(mount_local): fh.write("st") fh.close() - fh = open(mount_dir / "text") + fh = open(mount_dir / "text", "r") assert fh.read() == "test" fh.seek(2) assert fh.read() == "st"