From d4186e099e847e1c143b620719c45943508621ec Mon Sep 17 00:00:00 2001 From: Chandrasekharan M <117059509+chandrasekharan-zipstack@users.noreply.github.com> Date: Tue, 21 Apr 2026 11:58:49 +0530 Subject: [PATCH] [HOTFIX] Use importlib.util.find_spec for pluggable worker discovery (#1918) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [FIX] Use importlib.util.find_spec for pluggable worker discovery _verify_pluggable_worker_exists() previously checked for the literal file `pluggable_worker//worker.py` on disk, which breaks when the plugin has been compiled to a .so (Nuitka, Cython, or any C extension) — the module is perfectly importable but the pre-check rejects it because only the .py extension is considered. Replace the filesystem check with importlib.util.find_spec(), which is Python's standard way to ask "is this module resolvable by the import system?". It honors every registered finder — source .py, compiled .so, bytecode .pyc, namespace packages, zipimports — so the function now matches what its docstring claims: verifying the module can be loaded, not that a specific file extension is present. Behavior is preserved for existing deployments: - Images with no `pluggable_worker//` subpackage → find_spec raises ModuleNotFoundError (ImportError subclass) → returns False. - Images with source .py → find_spec resolves the .py → returns True. - Images with compiled .so → find_spec resolves the .so → returns True. Co-Authored-By: Claude Opus 4.7 (1M context) * [FIX] Handle ValueError from find_spec in pluggable worker verification Greptile-flagged edge case: importlib.util.find_spec() can raise ValueError (not just ImportError) when sys.modules has a partially initialised module entry with __spec__ = None from a prior failed import. Broaden the except to catch both. Co-Authored-By: Claude Opus 4.7 (1M context) * [FIX] Resolve api-deployment worker directory from enum import path worker.py:452 did worker_type.value.replace("-", "_") to derive the on-disk dir name. All WorkerType enum values already use underscores, so the replace was a no-op; for API_DEPLOYMENT whose dir is "api-deployment" (hyphen), it resolved to "api_deployment" and the os.path.exists() check failed. Boot then logged a spurious "❌ Worker directory not found: /app/api_deployment" at ERROR level. The task registration path (builder + celery autodiscover via to_import_path) is unaffected, so this was purely log noise — but noise at ERROR level that masks real failures in log scans. Fix: derive the directory from the authoritative to_import_path() which already handles the hyphen case (api_deployment -> api-deployment). Co-Authored-By: Claude Opus 4.7 (1M context) --------- Co-authored-by: Claude Opus 4.7 (1M context) --- .../shared/infrastructure/config/builder.py | 31 +++++++++---------- workers/worker.py | 6 ++-- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/workers/shared/infrastructure/config/builder.py b/workers/shared/infrastructure/config/builder.py index 2d9491177f..859d823b98 100644 --- a/workers/shared/infrastructure/config/builder.py +++ b/workers/shared/infrastructure/config/builder.py @@ -323,27 +323,26 @@ def _verify_pluggable_worker_exists(worker_type: WorkerType) -> bool: try: import importlib - from pathlib import Path - - # Check if the worker.py file exists - # Path resolution: builder.py is at /app/workers/shared/infrastructure/config/ - # We need to get to /app/workers/, so go up 3 levels - pluggable_worker_path = ( - Path(__file__).resolve().parents[3] - / "pluggable_worker" - / worker_type.value - / "worker.py" - ) + import importlib.util + + # Ask Python's import system whether the module is resolvable. + # find_spec() consults all registered finders and handles every + # module representation (source .py, Nuitka/Cython .so, .pyc, + # namespace packages, zipimports) — unlike a filesystem check + # for a specific file extension, which breaks for compiled plugins. + module_path = f"pluggable_worker.{worker_type.value}.worker" - if not pluggable_worker_path.exists(): - logger.error(f"Pluggable worker file not found: {pluggable_worker_path}") + if importlib.util.find_spec(module_path) is None: + logger.error(f"Pluggable worker module not importable: {module_path}") return False - # Try to import the module to verify it's valid - module_path = f"pluggable_worker.{worker_type.value}.worker" + # Load it to catch findable-but-broken modules (e.g. import errors + # inside worker.py that find_spec wouldn't surface). importlib.import_module(module_path) - except ImportError: + except (ImportError, ValueError): + # ValueError: find_spec raises this when sys.modules[module_path] is + # populated but has __spec__ = None (from a prior failed import). logger.exception(f"Failed to import pluggable worker {worker_type.value}") return False except (OSError, AttributeError): diff --git a/workers/worker.py b/workers/worker.py index ed2a605848..3822e81a16 100755 --- a/workers/worker.py +++ b/workers/worker.py @@ -448,8 +448,10 @@ def on_task_postrun(sender=None, task_id=None, **kwargs): worker_directory = os.path.join("pluggable_worker", worker_type.value) worker_path = os.path.join(base_dir, worker_directory) else: - # Core workers use their value directly (with hyphens converted to underscores where needed) - worker_directory = worker_type.value.replace("-", "_") + # Enum values use underscores (Python module names); a few on-disk dirs + # still use hyphens (e.g. api-deployment). Derive the directory from the + # authoritative import-path map on WorkerType instead of a blind replace. + worker_directory = worker_type.to_import_path().rsplit(".", 1)[0] worker_path = os.path.join(base_dir, worker_directory) # Add worker directory to path for task imports