From dc6aed1b86d800bfd9f949e47769243ae6d686e1 Mon Sep 17 00:00:00 2001 From: Kefu Chai Date: Tue, 5 May 2026 09:35:00 +0800 Subject: [PATCH 1/2] mgr: narrow get_metadata return type with @overload Enable type narrowing for get_metadata() when a non-None default is provided. Previously, the return type was always `Optional[Dict[str, str]]`, forcing callers to use defensive `assert metadata` checks even when a result was guaranteed. The wrapper returns either the metadata from `_ceph_get_metadata()` or the caller-supplied default. Providing an `@overload` allows type checkers to prove the result is non-None, avoiding invalid assertions for falsy defaults (like an empty defaultdict). This is a hygienic change with no runtime impact. Signed-off-by: Kefu Chai (cherry picked from commit 5722e489f8fbacb865eabca5f94221c36d7eea00) --- src/pybind/mgr/mgr_module.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/src/pybind/mgr/mgr_module.py b/src/pybind/mgr/mgr_module.py index e929447497a7a..638ef7c21503a 100644 --- a/src/pybind/mgr/mgr_module.py +++ b/src/pybind/mgr/mgr_module.py @@ -11,6 +11,7 @@ NamedTuple, no_type_check, Optional, + overload, Sequence, Set, TYPE_CHECKING, @@ -1729,6 +1730,26 @@ def list_servers(self) -> List[ServerInfoT]: """ return cast(List[ServerInfoT], self._ceph_get_server(None)) + @overload + def get_metadata(self, + svc_type: str, + svc_id: str) -> Optional[Dict[str, str]]: + ... + + @overload + def get_metadata(self, + svc_type: str, + svc_id: str, + default: None) -> Optional[Dict[str, str]]: + ... + + @overload + def get_metadata(self, + svc_type: str, + svc_id: str, + default: Dict[str, str]) -> Dict[str, str]: + ... + def get_metadata(self, svc_type: str, svc_id: str, @@ -1738,12 +1759,14 @@ def get_metadata(self, ceph-mgr fetches metadata asynchronously, so are windows of time during addition/removal of services where the metadata is not available to - modules. ``None`` is returned if no metadata is available. + modules. ``None`` is returned if no metadata is available, unless + ``default`` is provided, in which case ``default`` is returned. :param str svc_type: service type (e.g., 'mds', 'osd', 'mon') :param str svc_id: service id. convert OSD integer IDs to strings when calling this - :rtype: dict, or None if no metadata found + :param default: value to return when no metadata is available + :rtype: dict, or None if no metadata found and no default given """ metadata = self._ceph_get_metadata(svc_type, svc_id) if not metadata: From d4fbddfb1adec6ae17f071b31ae5474a2535e632 Mon Sep 17 00:00:00 2001 From: Kefu Chai Date: Tue, 5 May 2026 09:36:01 +0800 Subject: [PATCH 2/2] pybind/mgr/status: drop asserts that fight the defaultdict defaults The 'assert metadata' checks in the status module were actually fighting against our own defaults. Since an empty defaultdict is falsy, these asserts would blow up the whole command if a single daemon was down after a mgr restart. This drops those four grumpy asserts. Now, instead of a traceback, `ceph osd status` and `ceph fs status` will just show a blank hostname or "unknown" version as intended. The trigger is common in practice: any mgr restart leaves daemons that are currently down without metadata in daemon_state, since they never reconnect via MMgrOpen to repopulate it. After such a restart, `ceph osd status` and `ceph fs status` blow up: ``` Error EINVAL: Traceback (most recent call last): ... File ".../status/module.py", line 340, in handle_osd_status assert metadata AssertionError ``` The bug was introduced in 5ac2901f54ff Fixes: https://tracker.ceph.com/issues/76416 Reported-by: Maximiliano Sandoval Signed-off-by: Kefu Chai (cherry picked from commit 1c5a7395776d77a5ea7677c31c7040dcd66ff5ed) --- src/pybind/mgr/status/module.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/pybind/mgr/status/module.py b/src/pybind/mgr/status/module.py index 7e7264a762f60..2e7b2539c909a 100644 --- a/src/pybind/mgr/status/module.py +++ b/src/pybind/mgr/status/module.py @@ -109,7 +109,6 @@ def handle_fs_status(self, metadata = self.get_metadata('mds', info['name'], default=defaultdict(lambda: 'unknown')) - assert metadata mds_versions[metadata['ceph_version']].append(info['name']) if output_format in ('json', 'json-pretty'): @@ -159,7 +158,6 @@ def handle_fs_status(self, metadata = self.get_metadata('mds', daemon_info['name'], default=defaultdict(lambda: 'unknown')) - assert metadata mds_versions[metadata['ceph_version']].append(daemon_info['name']) if output_format in ('json', 'json-pretty'): @@ -234,7 +232,6 @@ def handle_fs_status(self, for standby in fsmap['standbys']: metadata = self.get_metadata('mds', standby['name'], default=defaultdict(lambda: 'unknown')) - assert metadata mds_versions[metadata['ceph_version']].append(standby['name']) if output_format in ('json', 'json-pretty'): @@ -337,7 +334,6 @@ def handle_osd_status(self, bucket: Optional[str] = None, format: str = 'plain') if osd_id in osd_stats: metadata = self.get_metadata('osd', str(osd_id), default=defaultdict(str)) stats = osd_stats[osd_id] - assert metadata hostname = metadata['hostname'] kb_used = stats['kb_used'] * 1024 kb_avail = stats['kb_avail'] * 1024