canonical · TheRealFalcon · Feb 2, 2024 · Feb 2, 2024 · Feb 2, 2024 · blackboxsw
diff --git a/cloudinit/cmd/status.py b/cloudinit/cmd/status.py
@@ -93,6 +93,36 @@ class StatusDetails(NamedTuple):
 {description}"""
 
 
+def query_systemctl(
+    systemctl_args: List[str],
+    *,
+    wait: bool,
+    existing_status: Optional[UXAppStatus] = None,
+) -> str:
+    """Query systemd with retries and return output."""
+    while True:
+        try:
+            return subp.subp(["systemctl", *systemctl_args]).stdout.strip()
+        except subp.ProcessExecutionError as e:
+            if existing_status and existing_status in (
+                UXAppStatus.DEGRADED_RUNNING,
+                UXAppStatus.RUNNING,
+            ):
+                return ""
+            last_exception = e
+            if wait:
+                sleep(0.25)
+            else:
+                break
+    print(
+        "Failed to get status from systemd. "
+        "Cloud-init status may be inaccurate. ",
+        f"Error from systemctl: {last_exception.stderr}",
+        file=sys.stderr,
+    )
+    return ""
+
+
 def get_parser(parser=None):
     """Build or extend an arg parser for status utility.
 
@@ -229,7 +259,9 @@ def handle_status_args(name, args) -> int:
     return 0
 
 
-def get_bootstatus(disable_file, paths) -> Tuple[UXAppBootStatusCode, str]:
+def get_bootstatus(
+    disable_file, paths, wait
+) -> Tuple[UXAppBootStatusCode, str]:
     """Report whether cloud-init current boot status
 
     @param disable_file: The path to the cloud-init disable file.
@@ -253,7 +285,7 @@ def get_bootstatus(disable_file, paths) -> Tuple[UXAppBootStatusCode, str]:
     elif "cloud-init=disabled" in os.environ.get("KERNEL_CMDLINE", "") or (
         uses_systemd()
         and "cloud-init=disabled"
-        in subp.subp(["systemctl", "show-environment"]).stdout
+        in query_systemctl(["show-environment"], wait=wait)
     ):
         bootstatus_code = UXAppBootStatusCode.DISABLED_BY_ENV_VARIABLE
         reason = (
@@ -272,7 +304,9 @@ def get_bootstatus(disable_file, paths) -> Tuple[UXAppBootStatusCode, str]:
     return (bootstatus_code, reason)
 
 
-def _get_error_or_running_from_systemd() -> Optional[UXAppStatus]:
+def _get_error_or_running_from_systemd(
+    existing_status: UXAppStatus, wait: bool
+) -> Optional[UXAppStatus]:
     """Get if systemd is in error or running state.
 
     Using systemd, we can get more fine-grained status of the
@@ -288,14 +322,18 @@ def _get_error_or_running_from_systemd() -> Optional[UXAppStatus]:
         "cloud-init.service",
         "cloud-init-local.service",
     ]:
-        stdout = subp.subp(
+        stdout = query_systemctl(
             [
-                "systemctl",
                 "show",
                 "--property=ActiveState,UnitFileState,SubState,MainPID",
                 service,
             ],
-        ).stdout
+            wait=wait,
+            existing_status=existing_status,
+        )
+        if not stdout:
+            # Systemd isn't ready
+            return None
         states = dict(
             [[x.strip() for x in r.split("=")] for r in stdout.splitlines()]
         )
@@ -327,39 +365,6 @@ def _get_error_or_running_from_systemd() -> Optional[UXAppStatus]:
     return None
 
 
-def _get_error_or_running_from_systemd_with_retry(
-    existing_status: UXAppStatus, *, wait: bool
-) -> Optional[UXAppStatus]:
-    """Get systemd status and retry if dbus isn't ready.
-
-    If cloud-init has determined that we're still running, then we can
-    ignore the status from systemd. However, if cloud-init has detected error,
-    then we should retry on systemd status so we don't incorrectly report
-    error state while cloud-init is still running.
-    """
-    while True:
-        try:
-            return _get_error_or_running_from_systemd()
-        except subp.ProcessExecutionError as e:
-            last_exception = e
-            if existing_status in (
-                UXAppStatus.DEGRADED_RUNNING,
-                UXAppStatus.RUNNING,
-            ):
-                return None
-            if wait:
-                sleep(0.25)
-            else:
-                break
-    print(
-        "Failed to get status from systemd. "
-        "Cloud-init status may be inaccurate. ",
-        f"Error from systemctl: {last_exception.stderr}",
-        file=sys.stderr,
-    )
-    return None
-
-
 def get_status_details(
     paths: Optional[Paths] = None, wait: bool = False
 ) -> StatusDetails:
@@ -380,7 +385,7 @@ def get_status_details(
     result_file = os.path.join(paths.run_dir, "result.json")
 
     boot_status_code, description = get_bootstatus(
-        CLOUDINIT_DISABLED_FILE, paths
+        CLOUDINIT_DISABLED_FILE, paths, wait
     )
     if boot_status_code in DISABLED_BOOT_CODES:
         status = UXAppStatus.DISABLED
@@ -433,9 +438,7 @@ def get_status_details(
         UXAppStatus.NOT_RUN,
         UXAppStatus.DISABLED,
     ):
-        systemd_status = _get_error_or_running_from_systemd_with_retry(
-            status, wait=wait
-        )
+        systemd_status = _get_error_or_running_from_systemd(status, wait=wait)
         if systemd_status:
             status = systemd_status
 

diff --git a/tests/unittests/cmd/test_status.py b/tests/unittests/cmd/test_status.py
@@ -15,7 +15,6 @@
 from cloudinit.cmd.status import (
     UXAppStatus,
     _get_error_or_running_from_systemd,
-    _get_error_or_running_from_systemd_with_retry,
 )
 from cloudinit.subp import SubpResult
 from cloudinit.util import ensure_file
@@ -65,7 +64,7 @@ class TestStatus:
         ),
     )
     @mock.patch(
-        f"{M_PATH}_get_error_or_running_from_systemd_with_retry",
+        f"{M_PATH}_get_error_or_running_from_systemd",
         return_value=None,
     )
     def test_get_status_details_ds_none(
@@ -188,6 +187,7 @@ def test_get_bootstatus(
         failure_msg: str,
         expected_reason: Union[str, Callable],
         config: Config,
+        mocker,
     ):
         if ensured_file is not None:
             ensure_file(ensured_file(config))
@@ -201,15 +201,10 @@ def test_get_bootstatus(
                 stderr=None,
             ),
         ):
-            code, reason = wrap_and_call(
-                M_NAME,
-                {
-                    "uses_systemd": uses_systemd,
-                    "get_cmdline": get_cmdline,
-                },
-                status.get_bootstatus,
-                config.disable_file,
-                config.paths,
+            mocker.patch(f"{M_PATH}uses_systemd", return_value=uses_systemd)
+            mocker.patch(f"{M_PATH}get_cmdline", return_value=get_cmdline)
+            code, reason = status.get_bootstatus(
+                config.disable_file, config.paths, False
             )
         assert code == expected_bootstatus, failure_msg
         if isinstance(expected_reason, str):
@@ -713,7 +708,7 @@ def fakeexists(filepath):
     )
     @mock.patch(M_PATH + "read_cfg_paths")
     @mock.patch(
-        f"{M_PATH}_get_error_or_running_from_systemd_with_retry",
+        f"{M_PATH}_get_error_or_running_from_systemd",
         return_value=None,
     )
     def test_status_output(
@@ -757,7 +752,7 @@ def test_status_output(
 
     @mock.patch(M_PATH + "read_cfg_paths")
     @mock.patch(
-        f"{M_PATH}_get_error_or_running_from_systemd_with_retry",
+        f"{M_PATH}_get_error_or_running_from_systemd",
         return_value=None,
     )
     def test_status_wait_blocks_until_done(
@@ -811,7 +806,7 @@ def fake_sleep(interval):
 
     @mock.patch(M_PATH + "read_cfg_paths")
     @mock.patch(
-        f"{M_PATH}_get_error_or_running_from_systemd_with_retry",
+        f"{M_PATH}_get_error_or_running_from_systemd",
         return_value=None,
     )
     def test_status_wait_blocks_until_error(
@@ -867,7 +862,7 @@ def fake_sleep(interval):
 
     @mock.patch(M_PATH + "read_cfg_paths")
     @mock.patch(
-        f"{M_PATH}_get_error_or_running_from_systemd_with_retry",
+        f"{M_PATH}_get_error_or_running_from_systemd",
         return_value=None,
     )
     def test_status_main(
@@ -948,7 +943,10 @@ def test_get_error_or_running_from_systemd(
                 stderr=None,
             ),
         ):
-            assert _get_error_or_running_from_systemd() == status
+            assert (
+                _get_error_or_running_from_systemd(UXAppStatus.RUNNING, False)
+                == status
+            )
 
     def test_exception_while_running(self, mocker, capsys):
         m_subp = mocker.patch(
@@ -959,9 +957,7 @@ def test_exception_while_running(self, mocker, capsys):
             ),
         )
         assert (
-            _get_error_or_running_from_systemd_with_retry(
-                UXAppStatus.RUNNING, wait=True
-            )
+            _get_error_or_running_from_systemd(UXAppStatus.RUNNING, wait=True)
             is None
         )
         assert 1 == m_subp.call_count
@@ -987,9 +983,7 @@ def test_retry(self, mocker, capsys):
             ],
         )
         assert (
-            _get_error_or_running_from_systemd_with_retry(
-                UXAppStatus.ERROR, wait=True
-            )
+            _get_error_or_running_from_systemd(UXAppStatus.ERROR, wait=True)
             is UXAppStatus.RUNNING
         )
         assert 3 == m_subp.call_count
@@ -1005,13 +999,68 @@ def test_retry_no_wait(self, mocker, capsys):
         )
         mocker.patch("time.time", side_effect=[1, 2, 50])
         assert (
-            _get_error_or_running_from_systemd_with_retry(
-                UXAppStatus.ERROR, wait=False
-            )
+            _get_error_or_running_from_systemd(UXAppStatus.ERROR, wait=False)
             is None
         )
         assert 1 == m_subp.call_count
         assert (
             "Failed to get status from systemd. "
             "Cloud-init status may be inaccurate."
         ) in capsys.readouterr().err
+
+
+class TestQuerySystemctl:
+    def test_query_systemctl(self, mocker):
+        m_subp = mocker.patch(
+            f"{M_PATH}subp.subp",
+            return_value=SubpResult(stdout="hello", stderr=None),
+        )
+        assert status.query_systemctl(["some", "args"], wait=False) == "hello"
+        m_subp.assert_called_once_with(["systemctl", "some", "args"])
+
+    def test_query_systemctl_with_exception(self, mocker, capsys):
+        m_subp = mocker.patch(
+            f"{M_PATH}subp.subp",
+            side_effect=subp.ProcessExecutionError(
+                "Message recipient disconnected", stderr="oh noes!"
+            ),
+        )
+        assert status.query_systemctl(["some", "args"], wait=False) == ""
+        m_subp.assert_called_once_with(["systemctl", "some", "args"])
+        assert "Error from systemctl: oh noes!" in capsys.readouterr().err
+
+    def test_query_systemctl_wait_with_exception(self, mocker):
+        m_sleep = mocker.patch(f"{M_PATH}sleep")
+        m_subp = mocker.patch(
+            f"{M_PATH}subp.subp",
+            side_effect=[
+                subp.ProcessExecutionError("Message recipient disconnected"),
+                subp.ProcessExecutionError("Message recipient disconnected"),
+                subp.ProcessExecutionError("Message recipient disconnected"),
+                SubpResult(stdout="hello", stderr=None),
+            ],
+        )
+
+        assert status.query_systemctl(["some", "args"], wait=True) == "hello"
+        assert m_subp.call_count == 4
+        assert m_sleep.call_count == 3
+
+    def test_query_systemctl_wait_with_exception_status(self, mocker):
+        m_sleep = mocker.patch(f"{M_PATH}sleep")
+        m_subp = mocker.patch(
+            f"{M_PATH}subp.subp",
+            side_effect=subp.ProcessExecutionError(
+                "Message recipient disconnected"
+            ),
+        )
+
+        assert (
+            status.query_systemctl(
+                ["some", "args"],
+                wait=True,
+                existing_status=UXAppStatus.RUNNING,
+            )
+            == ""
+        )
+        assert m_subp.call_count == 1
+        assert m_sleep.call_count == 0