From c3b4c5526f3ff58559ce8c458de0ca6a70741b81 Mon Sep 17 00:00:00 2001 From: Jacopo De Amicis Date: Tue, 10 Jan 2023 14:11:17 +0100 Subject: [PATCH 1/4] Add integration test for scontrol update nodelist sorting bug Slurm 22.05 (up to 22.05.7) sorts the nodes in nodelist provided as nodename field to the `scontrol update` command. If `scontrol update nodename=nodelist nodeaddr=nodeaddrlist` is given, this causes a mismatch between the nodenames and the nodeaddrs because the order of nodeaddrlist is not changed to match the reordering of nodelist. The bug above will be fixed in Slurm 22.05.8 and later. The added test checks that the sorting of the nodelist does not happen. Signed-off-by: Jacopo De Amicis --- .../configs/common/common.yaml | 6 +++ .../tests/schedulers/test_slurm.py | 52 +++++++++++++++++++ .../pcluster.config.yaml | 31 +++++++++++ 3 files changed, 89 insertions(+) create mode 100644 tests/integration-tests/tests/schedulers/test_slurm/test_scontrol_update_nodelist_sorting/pcluster.config.yaml diff --git a/tests/integration-tests/configs/common/common.yaml b/tests/integration-tests/configs/common/common.yaml index 6b7d68e151..ebd6cd1a60 100644 --- a/tests/integration-tests/configs/common/common.yaml +++ b/tests/integration-tests/configs/common/common.yaml @@ -511,6 +511,12 @@ schedulers: instances: {{ common.INSTANCES_DEFAULT_X86 }} oss: ["ubuntu2004"] schedulers: ["slurm"] + test_slurm.py::test_scontrol_update_nodelist_sorting: + dimensions: + - regions: ["ca-central-2"] + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: ["alinux2"] + schedulers: ["slurm"] test_slurm_accounting.py::test_slurm_accounting: dimensions: - regions: ["us-east-1", "ap-south-1"] diff --git a/tests/integration-tests/tests/schedulers/test_slurm.py b/tests/integration-tests/tests/schedulers/test_slurm.py index c8feca56b1..113ccde757 100644 --- a/tests/integration-tests/tests/schedulers/test_slurm.py +++ b/tests/integration-tests/tests/schedulers/test_slurm.py @@ -560,6 +560,58 @@ def test_update_slurm_reconfigure_race_condition( ) +@pytest.mark.usefixtures("region", "os", "instance", "scheduler") +def test_scontrol_update_nodelist_sorting( + pcluster_config_reader, + clusters_factory, + test_datadir, + scheduler_commands_factory, +): + """ + Test that scontrol update node follows the order of the nodelist provided by the user. + + In Slurm 22.05 the scontrol update node logic was modified and a sorting routine was + introduced, which modified the order of the nodes in the nodelist. + If `scontrol update node nodename=nodelist nodeaddr=nodeaddrlist` is called, only the + nodelist was sorted (not the nodeaddrlist). This causes mismatches between the Slurm + nodenames and the assigned addresses. + + See https://bugs.schedmd.com/show_bug.cgi?id=15731 + """ + + max_count_cr1 = max_count_cr2 = 4 + + cluster_config = pcluster_config_reader( + config_file="pcluster.config.yaml", + output_file="pcluster.config.initial.yaml", + max_count_cr1=max_count_cr1, + max_count_cr2=max_count_cr2, + ) + cluster = clusters_factory(cluster_config) + remote_command_executor = RemoteCommandExecutor(cluster) + slurm_commands = scheduler_commands_factory(remote_command_executor) + + assert_compute_node_states(slurm_commands, compute_nodes=None, expected_states=["idle~"]) + + nodes_in_queue1 = slurm_commands.get_compute_nodes("queue1", all_nodes=True) + nodes_in_queue2 = slurm_commands.get_compute_nodes("queue2", all_nodes=True) + + # Create an unsorted list of nodes to be updated (queue2 is alphabetically after queue1)``:s + nodelist = f"{nodes_in_queue2[0]},{nodes_in_queue1[0]}" + + # Stop clustermgtd since it may fix the situation under the hood if it calls scontrol update + # with a sorted list of nodes + remote_command_executor.run_remote_command(f"sudo systemctl stop supervisord") + + # Run scontrol update with unsorted list of nodes + remote_command_executor.run_remote_command(f"sudo -i scontrol update nodename={nodelist} nodeaddr={nodelist}") + + assert_that(slurm_commands.get_node_attribute(nodes_in_queue1[0], "NodeAddr")).is_equal_to(nodes_in_queue1[0]) + assert_that(slurm_commands.get_node_attribute(nodes_in_queue2[0], "NodeAddr")).is_equal_to(nodes_in_queue2[0]) + + remote_command_executor.run_remote_command(f"sudo systemctl start supervisord") + + @pytest.mark.usefixtures("region", "os", "instance", "scheduler") def test_slurm_overrides( scheduler, diff --git a/tests/integration-tests/tests/schedulers/test_slurm/test_scontrol_update_nodelist_sorting/pcluster.config.yaml b/tests/integration-tests/tests/schedulers/test_slurm/test_scontrol_update_nodelist_sorting/pcluster.config.yaml new file mode 100644 index 0000000000..213caeda29 --- /dev/null +++ b/tests/integration-tests/tests/schedulers/test_slurm/test_scontrol_update_nodelist_sorting/pcluster.config.yaml @@ -0,0 +1,31 @@ +Image: + Os: {{ os }} +HeadNode: + InstanceType: {{ instance }} + Networking: + SubnetId: {{ public_subnet_id }} + Ssh: + KeyName: {{ key_name }} +Scheduling: + Scheduler: slurm + SlurmQueues: + - Name: queue1 + Networking: + SubnetIds: + - {{ private_subnet_id }} + ComputeResources: + - Name: resource1 + Instances: + - InstanceType: {{ instance }} + MinCount: 0 + MaxCount: {{ max_count_cr1 }} + - Name: queue2 + Networking: + SubnetIds: + - {{ private_subnet_id }} + ComputeResources: + - Name: resource2 + Instances: + - InstanceType: {{ instance }} + MinCount: 0 + MaxCount: {{ max_count_cr2 }} From a69968005e4cee2149381641a0bb2894ae667784 Mon Sep 17 00:00:00 2001 From: Jacopo De Amicis Date: Tue, 10 Jan 2023 15:59:12 +0100 Subject: [PATCH 2/4] Add entry to the CHANGELOG.md Signed-off-by: Jacopo De Amicis --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ccaf232d83..745e9fb90a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ CHANGELOG ========= + +3.4.1 +----- + +**BUG FIXES** +- Fix an issue with the Slurm scheduler that might incorrectly apply updates to its internal registry of compute nodes. This might result in EC2 instances to become inaccessible or backed by an incorrect instance type. + 3.4.0 ----- From f6b9eaf110d6f2db8527c9547b2bef6e6b257b97 Mon Sep 17 00:00:00 2001 From: Jacopo De Amicis Date: Tue, 10 Jan 2023 16:11:55 +0100 Subject: [PATCH 3/4] Fix code-linters failures Signed-off-by: Jacopo De Amicis --- tests/integration-tests/tests/schedulers/test_slurm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration-tests/tests/schedulers/test_slurm.py b/tests/integration-tests/tests/schedulers/test_slurm.py index 113ccde757..cf4c7d331f 100644 --- a/tests/integration-tests/tests/schedulers/test_slurm.py +++ b/tests/integration-tests/tests/schedulers/test_slurm.py @@ -601,7 +601,7 @@ def test_scontrol_update_nodelist_sorting( # Stop clustermgtd since it may fix the situation under the hood if it calls scontrol update # with a sorted list of nodes - remote_command_executor.run_remote_command(f"sudo systemctl stop supervisord") + remote_command_executor.run_remote_command("sudo systemctl stop supervisord") # Run scontrol update with unsorted list of nodes remote_command_executor.run_remote_command(f"sudo -i scontrol update nodename={nodelist} nodeaddr={nodelist}") @@ -609,7 +609,7 @@ def test_scontrol_update_nodelist_sorting( assert_that(slurm_commands.get_node_attribute(nodes_in_queue1[0], "NodeAddr")).is_equal_to(nodes_in_queue1[0]) assert_that(slurm_commands.get_node_attribute(nodes_in_queue2[0], "NodeAddr")).is_equal_to(nodes_in_queue2[0]) - remote_command_executor.run_remote_command(f"sudo systemctl start supervisord") + remote_command_executor.run_remote_command("sudo systemctl start supervisord") @pytest.mark.usefixtures("region", "os", "instance", "scheduler") From 513acf24d2f446bb91f322691bb6a791d49b6a84 Mon Sep 17 00:00:00 2001 From: Jacopo De Amicis Date: Tue, 10 Jan 2023 18:13:54 +0100 Subject: [PATCH 4/4] Remove useless restart of supervisord Signed-off-by: Jacopo De Amicis --- tests/integration-tests/tests/schedulers/test_slurm.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/integration-tests/tests/schedulers/test_slurm.py b/tests/integration-tests/tests/schedulers/test_slurm.py index cf4c7d331f..503f0bf218 100644 --- a/tests/integration-tests/tests/schedulers/test_slurm.py +++ b/tests/integration-tests/tests/schedulers/test_slurm.py @@ -609,8 +609,6 @@ def test_scontrol_update_nodelist_sorting( assert_that(slurm_commands.get_node_attribute(nodes_in_queue1[0], "NodeAddr")).is_equal_to(nodes_in_queue1[0]) assert_that(slurm_commands.get_node_attribute(nodes_in_queue2[0], "NodeAddr")).is_equal_to(nodes_in_queue2[0]) - remote_command_executor.run_remote_command("sudo systemctl start supervisord") - @pytest.mark.usefixtures("region", "os", "instance", "scheduler") def test_slurm_overrides(