diff --git a/tests/integration-tests/conftest.py b/tests/integration-tests/conftest.py index 445da74e9a..a87dfde063 100644 --- a/tests/integration-tests/conftest.py +++ b/tests/integration-tests/conftest.py @@ -48,6 +48,7 @@ delete_s3_bucket, generate_stack_name, get_architecture_supported_by_instance_type, + get_instance_info, get_vpc_snakecase_value, random_alphanumeric, set_credentials, @@ -826,6 +827,24 @@ def architecture(request, instance, region): return supported_architecture +@pytest.fixture() +def default_threads_per_core(request, instance, region): + """Return the default threads per core for the given instance type.""" + # NOTE: currently, .metal instances do not contain the DefaultThreadsPerCore + # attribute in their VCpuInfo section. This is a known limitation with the + # ec2 DescribeInstanceTypes API. For these instance types an assumption + # is made that if the instance's supported architectures list includes + # x86_64 then the default is 2, otherwise it's 1. + logging.info(f"Getting defaul threads per core for instance type {instance}") + instance_type_data = get_instance_info(instance, region) + threads_per_core = instance_type_data.get("VCpuInfo", {}).get("DefaultThreadsPerCore") + if threads_per_core is None: + supported_architectures = instance_type_data.get("ProcessorInfo", {}).get("SupportedArchitectures", []) + threads_per_core = 2 if "x86_64" in supported_architectures else 1 + logging.info(f"Defaul threads per core for instance type {instance} : {threads_per_core}") + return threads_per_core + + @pytest.fixture(scope="session") def key_name(request): """Return the EC2 key pair name to be used.""" diff --git a/tests/integration-tests/tests/disable_hyperthreading/test_disable_hyperthreading.py b/tests/integration-tests/tests/disable_hyperthreading/test_disable_hyperthreading.py index 4f34f62640..faf405e6ee 100644 --- a/tests/integration-tests/tests/disable_hyperthreading/test_disable_hyperthreading.py +++ b/tests/integration-tests/tests/disable_hyperthreading/test_disable_hyperthreading.py @@ -28,14 +28,22 @@ # HT disabled via CpuOptions @pytest.mark.dimensions("sa-east-1", "c5.xlarge", "alinux2", "sge") @pytest.mark.dimensions("sa-east-1", "c5.xlarge", "centos7", "torque") -def test_sit_disable_hyperthreading(region, scheduler, instance, os, pcluster_config_reader, clusters_factory): +def test_sit_disable_hyperthreading( + region, scheduler, instance, os, pcluster_config_reader, clusters_factory, default_threads_per_core +): """Test Disable Hyperthreading for SIT clusters.""" slots_per_instance = fetch_instance_slots(region, instance) cluster_config = pcluster_config_reader() cluster = clusters_factory(cluster_config) remote_command_executor = RemoteCommandExecutor(cluster) scheduler_commands = get_scheduler_commands(scheduler, remote_command_executor) - _test_disable_hyperthreading_settings(remote_command_executor, scheduler_commands, slots_per_instance, scheduler) + _test_disable_hyperthreading_settings( + remote_command_executor, + scheduler_commands, + slots_per_instance, + scheduler, + default_threads_per_core=default_threads_per_core, + ) assert_no_errors_in_logs(remote_command_executor, scheduler) @@ -47,7 +55,9 @@ def test_sit_disable_hyperthreading(region, scheduler, instance, os, pcluster_co @pytest.mark.dimensions("us-west-2", "m4.xlarge", "centos8", "slurm") # HT disabled via CpuOptions @pytest.mark.dimensions("us-west-1", "c5.xlarge", "ubuntu1804", "slurm") -def test_hit_disable_hyperthreading(region, scheduler, instance, os, pcluster_config_reader, clusters_factory): +def test_hit_disable_hyperthreading( + region, scheduler, instance, os, pcluster_config_reader, clusters_factory, default_threads_per_core +): """Test Disable Hyperthreading for HIT clusters.""" slots_per_instance = fetch_instance_slots(region, instance) cluster_config = pcluster_config_reader() @@ -61,6 +71,7 @@ def test_hit_disable_hyperthreading(region, scheduler, instance, os, pcluster_co scheduler, hyperthreading_disabled=False, partition="ht-enabled", + default_threads_per_core=default_threads_per_core, ) _test_disable_hyperthreading_settings( remote_command_executor, @@ -69,6 +80,7 @@ def test_hit_disable_hyperthreading(region, scheduler, instance, os, pcluster_co scheduler, hyperthreading_disabled=True, partition="ht-disabled", + default_threads_per_core=default_threads_per_core, ) assert_no_errors_in_logs(remote_command_executor, scheduler) @@ -81,9 +93,12 @@ def _test_disable_hyperthreading_settings( scheduler, hyperthreading_disabled=True, partition=None, + default_threads_per_core=2, ): - expected_cpus_per_instance = slots_per_instance // 2 if hyperthreading_disabled else slots_per_instance - expected_threads_per_core = 1 if hyperthreading_disabled else 2 + expected_cpus_per_instance = ( + slots_per_instance // default_threads_per_core if hyperthreading_disabled else slots_per_instance + ) + expected_threads_per_core = 1 if hyperthreading_disabled else default_threads_per_core # Test disable hyperthreading on head node logging.info("Test Disable Hyperthreading on head node") @@ -91,7 +106,7 @@ def _test_disable_hyperthreading_settings( if partition: # If partition is supplied, assume this is HIT setting where ht settings are at the queue level # In this case, ht is not disabled on head node - assert_that(result.stdout).matches(r"Thread\(s\) per core:\s+{0}".format(2)) + assert_that(result.stdout).matches(r"Thread\(s\) per core:\s+{0}".format(default_threads_per_core)) _assert_active_cpus(result.stdout, slots_per_instance) else: assert_that(result.stdout).matches(r"Thread\(s\) per core:\s+{0}".format(expected_threads_per_core)) @@ -128,10 +143,12 @@ def _test_disable_hyperthreading_settings( # check scale up to 2 nodes if partition: result = scheduler_commands.submit_command( - "hostname > /shared/hostname.out", slots=slots_per_instance, partition=partition + "hostname > /shared/hostname.out", slots=2 * expected_cpus_per_instance, partition=partition ) else: - result = scheduler_commands.submit_command("hostname > /shared/hostname.out", slots=slots_per_instance) + result = scheduler_commands.submit_command( + "hostname > /shared/hostname.out", slots=2 * expected_cpus_per_instance + ) job_id = scheduler_commands.assert_job_submitted(result.stdout) scheduler_commands.wait_job_completed(job_id) scheduler_commands.assert_job_succeeded(job_id)