From 34f0b006c6e541f0c65a8c64844da1b88835c44c Mon Sep 17 00:00:00 2001 From: fjetter Date: Wed, 16 Apr 2025 15:06:04 +0200 Subject: [PATCH 1/3] Avoid importing critical packages on process startup --- distributed/tests/test_versions.py | 11 +++---- distributed/utils.py | 2 +- distributed/versions.py | 48 +++++++++++------------------- 3 files changed, 24 insertions(+), 37 deletions(-) diff --git a/distributed/tests/test_versions.py b/distributed/tests/test_versions.py index 8d7b8704124..4a3710055d4 100644 --- a/distributed/tests/test_versions.py +++ b/distributed/tests/test_versions.py @@ -6,6 +6,7 @@ import pytest import tornado +import distributed from distributed import Client, Worker from distributed.utils_test import gen_cluster from distributed.versions import error_message, get_versions @@ -156,15 +157,15 @@ def test_version_custom_pkgs(): out = get_versions( [ # Use custom function - ("distributed", lambda mod: "123"), + "distributed", # Use version_of_package "notexist", - ("pytest", None), # has __version__ - "tornado", # has version - "math", # has nothing + "pytest", + "tornado", + "math", ] )["packages"] - assert out["distributed"] == "123" + assert out["distributed"] == distributed.__version__ assert out["notexist"] is None assert out["pytest"] == pytest.__version__ assert out["tornado"] == tornado.version diff --git a/distributed/utils.py b/distributed/utils.py index bd8711d6ca6..d8fb8565ff7 100644 --- a/distributed/utils.py +++ b/distributed/utils.py @@ -124,7 +124,7 @@ def get_mp_context(): from distributed.versions import optional_packages, required_packages - for pkg, _ in required_packages + optional_packages: + for pkg in required_packages + optional_packages: try: importlib.import_module(pkg) except ImportError: diff --git a/distributed/versions.py b/distributed/versions.py index 7a3a7a5b055..60910a95375 100644 --- a/distributed/versions.py +++ b/distributed/versions.py @@ -1,13 +1,13 @@ -""" utilities for package version introspection """ +"""utilities for package version introspection""" from __future__ import annotations -import importlib import os import platform import struct import sys -from collections.abc import Callable, Iterable +from collections.abc import Iterable +from importlib.metadata import version from itertools import chain from types import ModuleType from typing import Any @@ -17,23 +17,23 @@ BOKEH_REQUIREMENT = Requirement("bokeh>=3.1.0") required_packages = [ - ("dask", lambda p: p.__version__), - ("distributed", lambda p: p.__version__), - ("msgpack", lambda p: ".".join([str(v) for v in p.version])), - ("cloudpickle", lambda p: p.__version__), - ("tornado", lambda p: p.version), - ("toolz", lambda p: p.__version__), + "dask", + "distributed", + "msgpack", + "cloudpickle", + "tornado", + "toolz", ] optional_packages = [ - ("numpy", lambda p: p.__version__), - ("pandas", lambda p: p.__version__), - ("lz4", lambda p: p.__version__), + "numpy", + "pandas", + "lz4", ] # only these scheduler packages will be checked for version mismatch -scheduler_relevant_packages = {pkg for pkg, _ in required_packages} | { +scheduler_relevant_packages = set(required_packages) | { "lz4", "python", } - {"msgpack"} @@ -44,9 +44,7 @@ def get_versions( - packages: ( - Iterable[str | tuple[str, Callable[[ModuleType], str | None]]] | None - ) = None + packages: Iterable[str] | None = None, ) -> dict[str, dict[str, Any]]: """Return basic information on our software installation, and our installed versions of packages @@ -87,27 +85,15 @@ def version_of_package(pkg: ModuleType) -> str | None: return None -def get_package_info( - pkgs: Iterable[str | tuple[str, Callable[[ModuleType], str | None] | None]] -) -> dict[str, str | None]: +def get_package_info(pkgs: Iterable[str]) -> dict[str, str | None]: """get package versions for the passed required & optional packages""" pversions: dict[str, str | None] = {"python": ".".join(map(str, sys.version_info))} for pkg in pkgs: - if isinstance(pkg, (tuple, list)): - modname, ver_f = pkg - if ver_f is None: - ver_f = version_of_package - else: - modname = pkg - ver_f = version_of_package - try: - mod = importlib.import_module(modname) - pversions[modname] = ver_f(mod) + pversions[pkg] = version(pkg) except Exception: - pversions[modname] = None - + pversions[pkg] = None return pversions From 345c0eb5134d30bfa35ea91c80d332ad33d1c6a7 Mon Sep 17 00:00:00 2001 From: fjetter Date: Wed, 16 Apr 2025 17:30:43 +0200 Subject: [PATCH 2/3] cache the version lookup --- distributed/versions.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/distributed/versions.py b/distributed/versions.py index 60910a95375..0403118579f 100644 --- a/distributed/versions.py +++ b/distributed/versions.py @@ -7,6 +7,7 @@ import struct import sys from collections.abc import Iterable +from functools import cache from importlib.metadata import version from itertools import chain from types import ModuleType @@ -85,15 +86,25 @@ def version_of_package(pkg: ModuleType) -> str | None: return None +@cache +def _version_cached(pkg: str) -> str | None: + """Try to get the version of a package from the cache""" + try: + # using importlib.metadata.version is much faster than importing the + # actual package. + # However, it is much slower than using pkg.__version__ iff the package + # is already imported, e.g. in our test suite! Therefore, cache this. + return version(pkg) + except Exception: + return None + + def get_package_info(pkgs: Iterable[str]) -> dict[str, str | None]: """get package versions for the passed required & optional packages""" pversions: dict[str, str | None] = {"python": ".".join(map(str, sys.version_info))} for pkg in pkgs: - try: - pversions[pkg] = version(pkg) - except Exception: - pversions[pkg] = None + pversions[pkg] = _version_cached(pkg) return pversions From 6200a25669afaeb473cf0143f588b51d52d22751 Mon Sep 17 00:00:00 2001 From: fjetter Date: Thu, 17 Apr 2025 14:28:17 +0200 Subject: [PATCH 3/3] dead code --- distributed/tests/test_versions.py | 2 -- distributed/versions.py | 14 -------------- 2 files changed, 16 deletions(-) diff --git a/distributed/tests/test_versions.py b/distributed/tests/test_versions.py index 4a3710055d4..99397547ee8 100644 --- a/distributed/tests/test_versions.py +++ b/distributed/tests/test_versions.py @@ -156,9 +156,7 @@ def test_python_version(): def test_version_custom_pkgs(): out = get_versions( [ - # Use custom function "distributed", - # Use version_of_package "notexist", "pytest", "tornado", diff --git a/distributed/versions.py b/distributed/versions.py index 0403118579f..1dbe202a2c5 100644 --- a/distributed/versions.py +++ b/distributed/versions.py @@ -10,7 +10,6 @@ from functools import cache from importlib.metadata import version from itertools import chain -from types import ModuleType from typing import Any from packaging.requirements import Requirement @@ -73,19 +72,6 @@ def get_system_info() -> dict[str, Any]: } -def version_of_package(pkg: ModuleType) -> str | None: - """Try a variety of common ways to get the version of a package""" - from contextlib import suppress - - with suppress(AttributeError): - return pkg.__version__ - with suppress(AttributeError): - return str(pkg.version) - with suppress(AttributeError): - return ".".join(map(str, pkg.version_info)) - return None - - @cache def _version_cached(pkg: str) -> str | None: """Try to get the version of a package from the cache"""