From 4876d19d0e831c37f1e69a91f0ab78eeb8777354 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sun, 24 May 2026 23:03:57 -0500 Subject: [PATCH] drop dependency on 'urllib3' --- .pre-commit-config.yaml | 1 - .../environment-mindeps.yaml | 1 - .../recipes/distributed/meta.yaml | 1 - distributed/preloading.py | 32 +++++++++++-------- distributed/tests/test_preload.py | 20 +++++------- pixi.toml | 1 - pyproject.toml | 1 - 7 files changed, 26 insertions(+), 31 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 424fc9edd16..64fdf955dab 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -38,7 +38,6 @@ repos: - pytest - tornado - pyarrow - - urllib3 - git+https://github.com/dask/dask - git+https://github.com/dask/zict diff --git a/continuous_integration/environment-mindeps.yaml b/continuous_integration/environment-mindeps.yaml index 5337d70fba7..c45cffb70c5 100644 --- a/continuous_integration/environment-mindeps.yaml +++ b/continuous_integration/environment-mindeps.yaml @@ -19,7 +19,6 @@ dependencies: - tblib=1.6.0 - toolz=0.12.0 - tornado=6.2.0 - - urllib3=1.26.5 - zict=3.0.0 # Distributed depends on the latest version of Dask - pip diff --git a/continuous_integration/recipes/distributed/meta.yaml b/continuous_integration/recipes/distributed/meta.yaml index 7e84c25a114..abbf683673b 100644 --- a/continuous_integration/recipes/distributed/meta.yaml +++ b/continuous_integration/recipes/distributed/meta.yaml @@ -41,7 +41,6 @@ requirements: - tblib >=1.6.0,!=3.2.0,!=3.2.1 - toolz >=0.10.0 - tornado >=6.2.0 - - urllib3 >=1.26.5 - zict >=3.0.0 run_constrained: - openssl !=1.1.1e diff --git a/distributed/preloading.py b/distributed/preloading.py index 90f55e4846f..83de6e94416 100644 --- a/distributed/preloading.py +++ b/distributed/preloading.py @@ -6,6 +6,9 @@ import os import shutil import sys +import time +import urllib.error +import urllib.request from collections.abc import Iterable, Sequence from importlib import import_module from types import ModuleType @@ -129,21 +132,22 @@ def _import_module(name: str, file_dir: str | None = None) -> ModuleType: def _download_module(url: str) -> ModuleType: logger.info("Downloading preload at %s", url) assert is_webaddress(url) - # This is the only place where urllib3 is used and it is a relatively heavy - # import. Do lazy import to reduce import time - import urllib3 - - with urllib3.PoolManager() as http: - response = http.request( - method="GET", - url=url, - retries=urllib3.util.Retry( - status_forcelist=[429, 504, 503, 502], - backoff_factor=0.2, - ), - ) - source = response.data + retryable_codes = {429, 502, 503, 504} + backoff_factor = 0.2 + max_retries = 3 + + for attempt in range(max_retries + 1): + try: + with urllib.request.urlopen(url) as response: + source = response.read() + break + except urllib.error.HTTPError as e: + if e.code in retryable_codes and attempt < max_retries: + retry_delay_seconds = backoff_factor * (2**attempt) + time.sleep(retry_delay_seconds) + continue + raise compiled = compile(source, url, "exec") module = ModuleType(url) diff --git a/distributed/tests/test_preload.py b/distributed/tests/test_preload.py index b1b7ce84df4..115803f0f68 100644 --- a/distributed/tests/test_preload.py +++ b/distributed/tests/test_preload.py @@ -176,13 +176,13 @@ async def test_preload_import_time(): async def test_web_preload(): with ( mock.patch( - "urllib3.PoolManager.request", + "urllib.request.urlopen", **{ - "return_value.data": b"def dask_setup(dask_server):" + "return_value.__enter__.return_value.read.return_value": b"def dask_setup(dask_server):" b"\n dask_server.foo = 1" b"\n" }, - ) as request, + ) as mock_urlopen, captured_logger("distributed.preloading") as log, ): async with Scheduler( @@ -200,9 +200,7 @@ async def test_web_preload(): ) is not None ) - assert request.mock_calls == [ - mock.call(method="GET", url="http://example.com/preload", retries=mock.ANY) - ] + assert mock_urlopen.call_args_list == [mock.call("http://example.com/preload")] @gen_cluster(nthreads=[]) @@ -233,15 +231,13 @@ async def test_web_preload_worker(): dask.config.set(scheduler_address="tcp://127.0.0.1:{port}") """).encode() with mock.patch( - "urllib3.PoolManager.request", - **{"return_value.data": data}, - ) as request: + "urllib.request.urlopen", + **{"return_value.__enter__.return_value.read.return_value": data}, + ) as mock_urlopen: async with Scheduler(port=port, host="localhost", dashboard_address=":0") as s: async with Nanny(preload_nanny=["http://example.com/preload"]) as nanny: assert nanny.scheduler_addr == s.address - assert request.mock_calls == [ - mock.call(method="GET", url="http://example.com/preload", retries=mock.ANY) - ] + assert mock_urlopen.call_args_list == [mock.call("http://example.com/preload")] # This test is blocked on https://github.com/dask/distributed/issues/5819 diff --git a/pixi.toml b/pixi.toml index 4e42aed97d4..5f44678aae1 100644 --- a/pixi.toml +++ b/pixi.toml @@ -30,5 +30,4 @@ psutil = ">=5.8.0" sortedcontainers = ">=2.0.5" tblib = ">=1.6.0,!=3.2.0,!=3.2.1" tornado = ">=6.2.0" -urllib3 = ">=1.26.5" zict = ">=3.0.0" diff --git a/pyproject.toml b/pyproject.toml index bad74c7ede8..9066ef79764 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,6 @@ dependencies = [ "tblib >= 1.6.0,!=3.2.0,!=3.2.1", "toolz >= 0.12.0", "tornado >= 6.2.0", - "urllib3 >= 1.26.5", "zict >= 3.0.0", ] dynamic = ["version"]