From db97bd1b4707cacc8975c37823e3932cb3803b0f Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Fri, 23 May 2025 15:15:04 +0200 Subject: [PATCH 1/4] Add option to expose connection errors in httpfs.exists --- fsspec/implementations/http.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fsspec/implementations/http.py b/fsspec/implementations/http.py index 093fa29be..8fa6a129d 100644 --- a/fsspec/implementations/http.py +++ b/fsspec/implementations/http.py @@ -326,7 +326,7 @@ async def gen_chunks(): async with meth(self.encode_url(rpath), data=gen_chunks(), **kw) as resp: self._raise_not_found_for_status(resp, rpath) - async def _exists(self, path, **kwargs): + async def _exists(self, path, strict=False, **kwargs): kw = self.kwargs.copy() kw.update(kwargs) try: @@ -336,7 +336,10 @@ async def _exists(self, path, **kwargs): async with r: return r.status < 400 except aiohttp.ClientError: - return False + if strict: + raise + else: + return False async def _isfile(self, path, **kwargs): return await self._exists(path, **kwargs) From 841cf3b07b7900fca6d4c4ca81c39bbfa086c1c0 Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Fri, 21 Nov 2025 10:35:14 +0100 Subject: [PATCH 2/4] fixup! Add option to expose connection errors in httpfs.exists --- fsspec/implementations/http.py | 8 ++++++-- fsspec/implementations/http_sync.py | 13 ++++++++++--- fsspec/implementations/tests/test_http.py | 8 ++++++++ fsspec/implementations/tests/test_http_sync.py | 10 +++++++++- fsspec/tests/conftest.py | 3 +++ 5 files changed, 36 insertions(+), 6 deletions(-) diff --git a/fsspec/implementations/http.py b/fsspec/implementations/http.py index 8fa6a129d..6aa761c6f 100644 --- a/fsspec/implementations/http.py +++ b/fsspec/implementations/http.py @@ -334,9 +334,13 @@ async def _exists(self, path, strict=False, **kwargs): session = await self.set_session() r = await session.get(self.encode_url(path), **kw) async with r: + if strict: + self._raise_not_found_for_status(r, path) return r.status < 400 - except aiohttp.ClientError: - if strict: + except (FileNotFoundError, aiohttp.ClientError) as e: + if strict and isinstance(e, FileNotFoundError): + return False + elif strict: raise else: return False diff --git a/fsspec/implementations/http_sync.py b/fsspec/implementations/http_sync.py index 08799f20a..463d1e8b9 100644 --- a/fsspec/implementations/http_sync.py +++ b/fsspec/implementations/http_sync.py @@ -463,15 +463,22 @@ def _process_limits(self, url, start, end): end -= 1 # bytes range is inclusive return f"bytes={start}-{end}" - def exists(self, path, **kwargs): + def exists(self, path, strict=False, **kwargs): kw = self.kwargs.copy() kw.update(kwargs) try: logger.debug(path) r = self.session.get(self.encode_url(path), **kw) + if strict: + self._raise_not_found_for_status(r, path) return r.status_code < 400 - except Exception: - return False + except Exception as e: + if strict and isinstance(e, FileNotFoundError): + return False + elif strict: + raise + else: + return False def isfile(self, path, **kwargs): return self.exists(path, **kwargs) diff --git a/fsspec/implementations/tests/test_http.py b/fsspec/implementations/tests/test_http.py index d014d1155..856b51811 100644 --- a/fsspec/implementations/tests/test_http.py +++ b/fsspec/implementations/tests/test_http.py @@ -163,6 +163,14 @@ def test_exists(server): h.cat(server.address + "/notafile") +def test_exists_strict(server): + h = fsspec.filesystem("http") + assert not h.exists(server.address + "/notafile", strict=True) + with pytest.raises(aiohttp.ClientResponseError) as e: + h.exists(server.address + "/unauthorized", strict=True) + assert e.value.status == 401 + + def test_read(server): h = fsspec.filesystem("http") out = server.realfile diff --git a/fsspec/implementations/tests/test_http_sync.py b/fsspec/implementations/tests/test_http_sync.py index 330cf4d07..c802cba4d 100644 --- a/fsspec/implementations/tests/test_http_sync.py +++ b/fsspec/implementations/tests/test_http_sync.py @@ -6,7 +6,7 @@ import pytest import fsspec.utils -from fsspec.tests.conftest import data, reset_files, server, win # noqa: F401 +from fsspec.tests.conftest import data, reset_files, server, win, requests # noqa: F401 @pytest.fixture() @@ -147,6 +147,14 @@ def test_exists(server, sync): h.cat(server.address + "/notafile") +def test_exists_strict(server, sync): + h = fsspec.filesystem("http") + assert not h.exists(server.address + "/notafile", strict=True) + with pytest.raises(requests.exceptions.HTTPError) as e: + h.exists(server.address + "/unauthorized", strict=True) + assert e.value.response.status_code == 401 + + def test_read(server, sync): h = fsspec.filesystem("http") out = server.address + "/index/realfile" diff --git a/fsspec/tests/conftest.py b/fsspec/tests/conftest.py index ccf7bdccf..0a56521ed 100644 --- a/fsspec/tests/conftest.py +++ b/fsspec/tests/conftest.py @@ -54,6 +54,7 @@ class HTTPTestHandler(BaseHTTPRequestHandler): "/simple/file": data, "/simple/dir/": _make_listing("/simple/dir/file"), "/simple/dir/file": data, + "/unauthorized": AssertionError("shouldn't access"), } dynamic_files = {} @@ -85,6 +86,8 @@ def do_GET(self): if "redirect" in self.headers and file_path != "/index/realfile": new_url = _make_realfile(baseurl) return self._respond(301, {"Location": new_url}) + if file_path == "/unauthorized": + return self._respond(401) if file_data is None: return self._respond(404) From 3860ee637881e65794521adcfe821bef7c7aed10 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Fri, 21 Nov 2025 09:26:42 -0500 Subject: [PATCH 3/4] lint --- fsspec/implementations/tests/test_http_sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fsspec/implementations/tests/test_http_sync.py b/fsspec/implementations/tests/test_http_sync.py index c802cba4d..5b0efab41 100644 --- a/fsspec/implementations/tests/test_http_sync.py +++ b/fsspec/implementations/tests/test_http_sync.py @@ -6,7 +6,7 @@ import pytest import fsspec.utils -from fsspec.tests.conftest import data, reset_files, server, win, requests # noqa: F401 +from fsspec.tests.conftest import data, requests, reset_files, server, win # noqa: F401 @pytest.fixture() From 90456269fc0cb9c24f89927317f708708c4f5302 Mon Sep 17 00:00:00 2001 From: Benedikt Best <63287233+btbest@users.noreply.github.com> Date: Fri, 21 Nov 2025 18:04:22 +0100 Subject: [PATCH 4/4] fixup! Add option to expose connection errors in httpfs.exists --- fsspec/implementations/http.py | 11 +++++------ fsspec/implementations/http_sync.py | 11 +++++------ 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/fsspec/implementations/http.py b/fsspec/implementations/http.py index 6aa761c6f..41a49cc3d 100644 --- a/fsspec/implementations/http.py +++ b/fsspec/implementations/http.py @@ -337,13 +337,12 @@ async def _exists(self, path, strict=False, **kwargs): if strict: self._raise_not_found_for_status(r, path) return r.status < 400 - except (FileNotFoundError, aiohttp.ClientError) as e: - if strict and isinstance(e, FileNotFoundError): - return False - elif strict: + except FileNotFoundError: + return False + except aiohttp.ClientError: + if strict: raise - else: - return False + return False async def _isfile(self, path, **kwargs): return await self._exists(path, **kwargs) diff --git a/fsspec/implementations/http_sync.py b/fsspec/implementations/http_sync.py index 463d1e8b9..a67ea3ea5 100644 --- a/fsspec/implementations/http_sync.py +++ b/fsspec/implementations/http_sync.py @@ -472,13 +472,12 @@ def exists(self, path, strict=False, **kwargs): if strict: self._raise_not_found_for_status(r, path) return r.status_code < 400 - except Exception as e: - if strict and isinstance(e, FileNotFoundError): - return False - elif strict: + except FileNotFoundError: + return False + except Exception: + if strict: raise - else: - return False + return False def isfile(self, path, **kwargs): return self.exists(path, **kwargs)