diff --git a/distributed/distributed.yaml b/distributed/distributed.yaml index 6ef7555376..18d1bd6c39 100644 --- a/distributed/distributed.yaml +++ b/distributed/distributed.yaml @@ -312,7 +312,7 @@ distributed: disk: true # Monitor host-wide disk I/O host-cpu: false # Monitor host-wide CPU usage, with very granular breakdown gil: - enabled: false # Monitor GIL contention + enabled: true # Monitor GIL contention interval: "1ms" # Frequency to poll GIL event-loop: tornado rmm: diff --git a/distributed/http/scheduler/tests/test_scheduler_http.py b/distributed/http/scheduler/tests/test_scheduler_http.py index bc6f8cd13a..944aa8aeb0 100644 --- a/distributed/http/scheduler/tests/test_scheduler_http.py +++ b/distributed/http/scheduler/tests/test_scheduler_http.py @@ -107,6 +107,7 @@ async def test_prometheus(c, s, a, b): expected_metrics = { "dask_scheduler_clients", "dask_scheduler_desired_workers", + "dask_scheduler_gil_contention", "dask_scheduler_workers", "dask_scheduler_last_time", "dask_scheduler_tasks", diff --git a/distributed/http/worker/tests/test_worker_http.py b/distributed/http/worker/tests/test_worker_http.py index 3291883368..c1ad5048b6 100644 --- a/distributed/http/worker/tests/test_worker_http.py +++ b/distributed/http/worker/tests/test_worker_http.py @@ -32,6 +32,7 @@ async def test_prometheus(c, s, a): ) expected_metrics = { "dask_worker_concurrent_fetch_requests", + "dask_worker_gil_contention_total", "dask_worker_latency_seconds", "dask_worker_memory_bytes", "dask_worker_spill_bytes_total", diff --git a/distributed/system_monitor.py b/distributed/system_monitor.py index 85bdae0d42..9d4834db98 100644 --- a/distributed/system_monitor.py +++ b/distributed/system_monitor.py @@ -233,7 +233,4 @@ def range_query(self, start: int) -> dict[str, list]: def close(self) -> None: if self.monitor_gil_contention: - try: - self._gilknocker.stop() - except ValueError: # Wasn't started or already stopped - pass + self._gilknocker.stop() diff --git a/distributed/tests/test_system_monitor.py b/distributed/tests/test_system_monitor.py index 6c3389ec7a..912267e1d3 100644 --- a/distributed/tests/test_system_monitor.py +++ b/distributed/tests/test_system_monitor.py @@ -101,20 +101,22 @@ def test_host_cpu(): def test_gil_contention(): pytest.importorskip("gilknocker") + # Default enabled if gilknocker installed sm = SystemMonitor() a = sm.update() - assert "gil_contention" not in a - - sm = SystemMonitor(monitor_gil_contention=True) - a = sm.update() assert "gil_contention" in a - with dask.config.set({"distributed.admin.system-monitor.gil.enabled": True}): - sm = SystemMonitor() - a = sm.update() - assert "gil_contention" in a - assert sm._gilknocker.is_running sm.close() sm.close() # Idempotent assert not sm._gilknocker.is_running + + sm = SystemMonitor(monitor_gil_contention=False) + a = sm.update() + assert "gil_contention" not in a + + assert dask.config.get("distributed.admin.system-monitor.gil.enabled") + with dask.config.set({"distributed.admin.system-monitor.gil.enabled": False}): + sm = SystemMonitor() + a = sm.update() + assert "gil_contention" not in a