diff --git a/CHANGELOG.md b/CHANGELOG.md index fe3e8b7e..b581e203 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,8 +6,8 @@ This file is used to list changes made in each version of the aws-parallelcluste 3.13.1 ------ -**BUG FIXES** -- Fix a bug that was causing compute node self‑termination to hang on Ubuntu 24.04 nodes. +**CHANGES** +- There were no changes for this version. 3.13.0 ------ diff --git a/src/slurm_plugin/computemgtd.py b/src/slurm_plugin/computemgtd.py index 5c88a891..3ae4f326 100644 --- a/src/slurm_plugin/computemgtd.py +++ b/src/slurm_plugin/computemgtd.py @@ -125,34 +125,14 @@ def _read_nodename_from_file(nodename_file_path): raise -def _is_ubuntu2404(): - """Return True if the OS is Ubuntu 24.04.""" - try: - with open("/etc/os-release", "r") as f: - info = dict(line.strip().split("=", 1) for line in f if "=" in line) - os_id = info.get("ID", "").strip('"').lower() - version = info.get("VERSION_ID", "").strip('"') - return os_id == "ubuntu" and version.startswith("24.04") - except Exception as e: - log.warning("Unable to detect OS version from /etc/os-release: %s", e) - return False - - @log_exception(log, "self terminating compute instance", catch_exception=CalledProcessError, raise_on_error=False) def _self_terminate(): """Self terminate the instance.""" # Sleep for 10 seconds so termination log entries are uploaded to CW logs log.info("Preparing to self terminate the instance in 10 seconds!") time.sleep(10) - if _is_ubuntu2404(): - shutdown_cmd = "sudo systemctl poweroff --force" - log.info("Detected Ubuntu 24.04 – using `%s`", shutdown_cmd) - else: - shutdown_cmd = "sudo shutdown -h now" - log.info("Using default shutdown command `%s`", shutdown_cmd) - log.info("Self terminating instance now!") - run_command(shutdown_cmd) + run_command("sudo shutdown -h now") @retry(stop_max_attempt_number=3, wait_fixed=1500) diff --git a/tests/slurm_plugin/test_computemgtd.py b/tests/slurm_plugin/test_computemgtd.py index d2835421..ba974560 100644 --- a/tests/slurm_plugin/test_computemgtd.py +++ b/tests/slurm_plugin/test_computemgtd.py @@ -12,12 +12,11 @@ import logging import os -from unittest.mock import mock_open import pytest import slurm_plugin from assertpy import assert_that -from slurm_plugin.computemgtd import ComputemgtdConfig, _is_self_node_down, _is_ubuntu2404, _self_terminate +from slurm_plugin.computemgtd import ComputemgtdConfig, _is_self_node_down, _self_terminate from slurm_plugin.slurm_resources import DynamicNode @@ -104,38 +103,13 @@ def test_is_self_node_down(mock_node_info, expected_result, mocker): assert_that(_is_self_node_down("queue1-st-c5xlarge-1")).is_equal_to(expected_result) -@pytest.mark.parametrize( - ("is_ubuntu2404", "expected_cmd"), - [ - (True, "sudo systemctl poweroff --force"), - (False, "sudo shutdown -h now"), - ], -) -def test_self_terminate(mocker, caplog, is_ubuntu2404, expected_cmd): +def test_self_terminate(mocker, caplog): """Verify self-termination is implemented via a shutdown command rather than calling TerminateInstances.""" - mocker.patch("slurm_plugin.computemgtd._is_ubuntu2404", return_value=is_ubuntu2404) run_command_patch = mocker.patch("slurm_plugin.computemgtd.run_command") sleep_patch = mocker.patch("slurm_plugin.computemgtd.time.sleep") with caplog.at_level(logging.INFO): _self_terminate() assert_that(caplog.text).contains("Preparing to self terminate the instance in 10 seconds!") assert_that(caplog.text).contains("Self terminating instance now!") - run_command_patch.assert_called_with(expected_cmd) + run_command_patch.assert_called_with("sudo shutdown -h now") sleep_patch.assert_called_with(10) - - -@pytest.mark.parametrize( - ("file_content", "expected"), - [ - ('ID=ubuntu\nVERSION_ID="24.04"\n', True), - ('ID=ubuntu\nVERSION_ID="24.04.2"\n', True), - ('ID=ubuntu\nVERSION_ID="22.04"\n', False), - ('ID=rocky\nVERSION_ID="9.3"\n', False), - ("ID=ubuntu\n", False), - ], -) -def test_is_ubuntu2404(file_content, expected, mocker): - m = mock_open(read_data=file_content) - mocker.patch("builtins.open", m) - - assert _is_ubuntu2404() is expected