From 48e8f4b384723cb328643aaedb475aaac96fa503 Mon Sep 17 00:00:00 2001 From: Hanwen Date: Wed, 27 Dec 2023 08:35:05 -0800 Subject: [PATCH 1/8] Add support for RHEL9 and Rocky9 Signed-off-by: Hanwen --- CHANGELOG.md | 2 ++ cli/src/pcluster/cli/commands/dcv_util.py | 2 +- cli/src/pcluster/constants.py | 6 +++++- .../resources/imagebuilder/parallelcluster.yaml | 8 ++++++-- .../resources/imagebuilder/parallelcluster_tag.yaml | 4 ++++ .../resources/imagebuilder/parallelcluster_test.yaml | 4 ++++ .../imagebuilder/parallelcluster_validate.yaml | 6 +++++- .../resources/imagebuilder/update_and_reboot.yaml | 10 +++++++--- cli/src/pcluster/templates/cw_dashboard_builder.py | 2 +- pc_support/os_3.9.0.json | 4 ++++ 10 files changed, 39 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b60a50c715..55ff70bf1e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ CHANGELOG stop the compute fleet. It's now possible to update them by setting `Scheduling/SlurmSettings/QueueUpdateStrategy` to TERMINATE. ParallelCluster will terminate only the nodes removed during a resize of the cluster capacity performed through a cluster update. +- Add support for RHEL9. +- Add support for Rocky Linux 9 as `CustomAmi` created through `build-image` process. No public official ParallelCluster Rocky9 Linux AMI is made available at this time. - Remove `CommunicationParameters` from the Custom Slurm Settings deny list. - Add `DeploymentSettings/DisableSudoAccessForDefaultUser` parameter to disable sudo access of default user in supported OSes. diff --git a/cli/src/pcluster/cli/commands/dcv_util.py b/cli/src/pcluster/cli/commands/dcv_util.py index 53345621ff..6cd96bfe84 100644 --- a/cli/src/pcluster/cli/commands/dcv_util.py +++ b/cli/src/pcluster/cli/commands/dcv_util.py @@ -16,6 +16,6 @@ def get_supported_dcv_os(architecture): """Return a list of all the operating system supported by DCV.""" architectures_dict = { "x86_64": SUPPORTED_OSES, - "arm64": ["alinux2", "centos7", "rhel8", "rocky8"], + "arm64": ["alinux2", "centos7", "rhel8", "rocky8", "rhel9", "rocky9"], } return architectures_dict.get(architecture, []) diff --git a/cli/src/pcluster/constants.py b/cli/src/pcluster/constants.py index 78e1d83db4..a4afbe49f1 100644 --- a/cli/src/pcluster/constants.py +++ b/cli/src/pcluster/constants.py @@ -23,7 +23,7 @@ SUPPORTED_SCHEDULERS = ["slurm", "awsbatch"] SCHEDULERS_SUPPORTING_IMDS_SECURED = ["slurm"] -SUPPORTED_OSES = ["alinux2", "centos7", "ubuntu2004", "ubuntu2204", "rhel8", "rocky8"] +SUPPORTED_OSES = ["alinux2", "centos7", "ubuntu2004", "ubuntu2204", "rhel8", "rocky8", "rhel9", "rocky9"] SUPPORTED_OSES_FOR_SCHEDULER = {"slurm": SUPPORTED_OSES, "awsbatch": ["alinux2"]} DELETE_POLICY = "Delete" RETAIN_POLICY = "Retain" @@ -42,6 +42,8 @@ "ubuntu2204": {"user": "ubuntu"}, "rhel8": {"user": "ec2-user"}, "rocky8": {"user": "rocky"}, + "rhel9": {"user": "ec2-user"}, + "rocky9": {"user": "rocky"}, } OS_TO_IMAGE_NAME_PART_MAP = { @@ -51,6 +53,8 @@ "ubuntu2204": "ubuntu-2204-lts-hvm", "rhel8": "rhel8-hvm", "rocky8": "rocky8-hvm", + "rhel9": "rhel9-hvm", + "rocky9": "rocky9-hvm", } # We do not publicly publish/release Parallelcluster AMI of below OSSes PRIVATE_OSES = ["rocky8"] diff --git a/cli/src/pcluster/resources/imagebuilder/parallelcluster.yaml b/cli/src/pcluster/resources/imagebuilder/parallelcluster.yaml index eca3fdca8b..c2dd9d10d1 100644 --- a/cli/src/pcluster/resources/imagebuilder/parallelcluster.yaml +++ b/cli/src/pcluster/resources/imagebuilder/parallelcluster.yaml @@ -94,6 +94,10 @@ phases: OS='rhel8' elif [ `echo "${!RELEASE}" | grep '^rocky\.8'` ]; then OS='rocky8' + elif [ `echo "${!RELEASE}" | grep '^rhel\.9'` ]; then + OS='rhel9' + elif [ `echo "${!RELEASE}" | grep '^rocky\.9'` ]; then + OS='rocky9' else echo "Operating System '${!RELEASE}' is not supported. Failing build." exit {{ FailExitCode }} @@ -153,10 +157,10 @@ phases: exit {{ FailExitCode }} fi - # This component only supports aarch64 CPUs on Amazon Linux 2, Ubuntu2004, Ubuntu2204, Centos7, RHEL8 and Rocky8 + # This component only supports aarch64 CPUs on Amazon Linux 2, Ubuntu2004, Ubuntu2204, Centos7, RHEL8, Rocky8, RHEL9 and Rocky9 ARCH=$(uname -m) if [[ `echo ${!ARCH}` == 'aarch64' ]]; then - if [ `echo "${!RELEASE}" | grep -Ev '^(amzn\.2|centos\.7|ubuntu\.20\.04|ubuntu\.22\.04|rhel\.8|rocky\.8)'` ]; then + if [ `echo "${!RELEASE}" | grep -Ev '^(amzn\.2|centos\.7|ubuntu\.20\.04|ubuntu\.22\.04|rhel\.8|rocky\.8|rhel\.9|rocky\.9)'` ]; then echo "This component does not support '${!RELEASE}' on ARM64 CPUs. Failing build." exit {{ FailExitCode }} fi diff --git a/cli/src/pcluster/resources/imagebuilder/parallelcluster_tag.yaml b/cli/src/pcluster/resources/imagebuilder/parallelcluster_tag.yaml index 79bc885782..743f15331f 100644 --- a/cli/src/pcluster/resources/imagebuilder/parallelcluster_tag.yaml +++ b/cli/src/pcluster/resources/imagebuilder/parallelcluster_tag.yaml @@ -57,6 +57,10 @@ phases: OS='rhel8' elif [ $(echo "${RELEASE}" | grep '^rocky\.8') ]; then OS='rocky8' + elif [ $(echo "${RELEASE}" | grep '^rhel\.9') ]; then + OS='rhel9' + elif [ $(echo "${RELEASE}" | grep '^rocky\.9') ]; then + OS='rocky9' fi echo ${OS} diff --git a/cli/src/pcluster/resources/imagebuilder/parallelcluster_test.yaml b/cli/src/pcluster/resources/imagebuilder/parallelcluster_test.yaml index d2c683e2ad..bb62602043 100644 --- a/cli/src/pcluster/resources/imagebuilder/parallelcluster_test.yaml +++ b/cli/src/pcluster/resources/imagebuilder/parallelcluster_test.yaml @@ -46,6 +46,10 @@ phases: OS='rhel8' elif [ `echo "${RELEASE}" | grep '^rocky\.8'` ]; then OS='rocky8' + elif [ `echo "${RELEASE}" | grep '^rhel\.9'` ]; then + OS='rhel9' + elif [ `echo "${RELEASE}" | grep '^rocky\.9'` ]; then + OS='rocky9' else echo "Operating System '${RELEASE}' is not supported. Failing build." && exit 1 fi diff --git a/cli/src/pcluster/resources/imagebuilder/parallelcluster_validate.yaml b/cli/src/pcluster/resources/imagebuilder/parallelcluster_validate.yaml index f340def9ec..c30a8f39a4 100644 --- a/cli/src/pcluster/resources/imagebuilder/parallelcluster_validate.yaml +++ b/cli/src/pcluster/resources/imagebuilder/parallelcluster_validate.yaml @@ -46,6 +46,10 @@ phases: OS='rhel8' elif [ `echo "${RELEASE}" | grep '^rocky\.8'` ]; then OS='rocky8' + elif [ `echo "${RELEASE}" | grep '^rhel\.9'` ]; then + OS='rhel9' + elif [ `echo "${RELEASE}" | grep '^rocky\.9'` ]; then + OS='rocky9' else echo "Operating System '${RELEASE}' is not supported. Failing build." && exit 1 fi @@ -126,7 +130,7 @@ phases: set -v ARCHITECTURE='{{ validate.OperatingSystemArchitecture.outputs.stdout }}' OS='{{ validate.OperatingSystemName.outputs.stdout }}' - if [ ${ARCHITECTURE} == 'arm64' ] && [[ ${OS} =~ ^(ubuntu(20|22)04|alinux2|rhel8|rocky8)$ ]] || [ ${ARCHITECTURE} == 'x86_64' ]; then + if [ ${ARCHITECTURE} == 'arm64' ] && [[ ${OS} =~ ^(ubuntu(20|22)04|alinux2|rhel8|rocky8|rhel9|rocky9)$ ]] || [ ${ARCHITECTURE} == 'x86_64' ]; then echo "true" else echo "false" diff --git a/cli/src/pcluster/resources/imagebuilder/update_and_reboot.yaml b/cli/src/pcluster/resources/imagebuilder/update_and_reboot.yaml index 4e911fa6d3..eb74a13488 100644 --- a/cli/src/pcluster/resources/imagebuilder/update_and_reboot.yaml +++ b/cli/src/pcluster/resources/imagebuilder/update_and_reboot.yaml @@ -47,6 +47,10 @@ phases: OS='rhel8' elif [ `echo "${!RELEASE}" | grep '^rocky\.8'` ]; then OS='rocky8' + elif [ `echo "${!RELEASE}" | grep '^rhel\.9'` ]; then + OS='rhel9' + elif [ `echo "${!RELEASE}" | grep '^rocky\.9'` ]; then + OS='rocky9' else echo "Operating System '${!RELEASE}' is not supported. Failing build." exit {{ FailExitCode }} @@ -84,10 +88,10 @@ phases: exit {{ FailExitCode }} fi - # This component only supports aarch64 CPUs on Amazon Linux 2, Ubuntu2004, Ubuntu2204, Centos7, RHEL8 and Rocky8 + # This component only supports aarch64 CPUs on Amazon Linux 2, Ubuntu2004, Ubuntu2204, Centos7, RHEL8, Rocky8, RHEL9 and Rocky9 ARCH=$(uname -m) if [[ `echo ${!ARCH}` == 'aarch64' ]]; then - if [ `echo "${!RELEASE}" | grep -Ev '^(amzn\.2|centos\.7|ubuntu\.20\.04|ubuntu\.22\.04|rhel\.8|rocky\.8)'` ]; then + if [ `echo "${!RELEASE}" | grep -Ev '^(amzn\.2|centos\.7|ubuntu\.20\.04|ubuntu\.22\.04|rhel\.8|rocky\.8|rhel\.9|rocky\.9)'` ]; then echo "This component does not support '${!RELEASE}' on ARM64 CPUs. Failing build." exit {{ FailExitCode }} fi @@ -238,7 +242,7 @@ phases: if [[ ${!PLATFORM} == RHEL ]]; then yum -y update - if [[ ${!OS} == "rhel8" ]] || [[ ${!OS} == "rocky8" ]] ; then + if [[ ${!OS} == "rhel8" ]] || [[ ${!OS} == "rocky8" ]] || [[ ${!OS} == "rhel9" ]] || [[ ${!OS} == "rocky9" ]] ; then # package-cleanup has changed in RHEL8 and it works differently # RHEL8 keeps at least 2 kernel for fallback reason https://access.redhat.com/solutions/1227 # The kernel cleanup should be performed manually diff --git a/cli/src/pcluster/templates/cw_dashboard_builder.py b/cli/src/pcluster/templates/cw_dashboard_builder.py index 883397b6dc..13d8b1d7f2 100644 --- a/cli/src/pcluster/templates/cw_dashboard_builder.py +++ b/cli/src/pcluster/templates/cw_dashboard_builder.py @@ -733,7 +733,7 @@ def _add_cw_log(self): [ self._new_cw_log_widget( title="system-messages", - conditions=[Condition(["alinux2", "centos7", "rhel8", "rocky8"], base_os)], + conditions=[Condition(["alinux2", "centos7", "rhel8", "rocky8", "rhel9", "rocky9"], base_os)], filters=[self._new_filter(pattern=f"{head_private_ip}.*system-messages")], ), self._new_cw_log_widget( diff --git a/pc_support/os_3.9.0.json b/pc_support/os_3.9.0.json index 9c80eb05d0..93e50d2692 100644 --- a/pc_support/os_3.9.0.json +++ b/pc_support/os_3.9.0.json @@ -19,6 +19,10 @@ { "name": "rhel8", "description": "Red Hat Enterprise Linux 8" + }, + { + "name": "rhel9", + "description": "Red Hat Enterprise Linux 9" } ] } From 32a29cd344864243da5980a96a860f92925b22ac Mon Sep 17 00:00:00 2001 From: Hanwen Date: Wed, 3 Jan 2024 08:25:26 -0800 Subject: [PATCH 2/8] [Integ-tests] Add support for RHEL9 and Rocky9 Signed-off-by: Hanwen --- tests/integration-tests/configs/common.jinja2 | 14 +++++++------- .../cloudwatch_logging/test_cloudwatch_logging.py | 4 ++-- tests/integration-tests/tests/common/utils.py | 12 ++++++++++-- tests/integration-tests/utils.py | 2 ++ 4 files changed, 21 insertions(+), 11 deletions(-) diff --git a/tests/integration-tests/configs/common.jinja2 b/tests/integration-tests/configs/common.jinja2 index b852609e5c..d499659512 100644 --- a/tests/integration-tests/configs/common.jinja2 +++ b/tests/integration-tests/configs/common.jinja2 @@ -5,19 +5,19 @@ {%- set SCHEDULERS_ALL = ["slurm", "awsbatch"] -%} {%- set SCHEDULERS_TRAD = ["slurm"] -%} {%- set OSS_BATCH = ["alinux2"] -%} -{%- set OSS_COMMERCIAL_X86 = ["alinux2", "centos7", "ubuntu2004", "ubuntu2204", "rhel8"] -%} -{%- set OSS_CHINA_X86 = ["alinux2", "ubuntu2004", "ubuntu2204", "rhel8", "rocky8"] -%} -{%- set OSS_GOVCLOUD_X86 = ["alinux2", "ubuntu2004", "ubuntu2204", "rhel8", "rocky8"] -%} -{%- set OSS_COMMERCIAL_ARM = ["alinux2", "ubuntu2004", "ubuntu2204", "rhel8"] -%} -{%- set OSS_CHINA_ARM = ["alinux2", "ubuntu2004", "ubuntu2204", "rhel8"] -%} -{%- set OSS_GOVCLOUD_ARM = ["alinux2", "ubuntu2004", "ubuntu2204", "rhel8"] -%} +{%- set OSS_COMMERCIAL_X86 = ["alinux2", "centos7", "ubuntu2004", "ubuntu2204", "rhel8", "rhel9"] -%} +{%- set OSS_CHINA_X86 = ["alinux2", "ubuntu2004", "ubuntu2204", "rhel8", "rocky8", "rhel9", "rocky9"] -%} +{%- set OSS_GOVCLOUD_X86 = ["alinux2", "ubuntu2004", "ubuntu2204", "rhel8", "rocky8", "rhel9", "rocky9"] -%} +{%- set OSS_COMMERCIAL_ARM = ["alinux2", "ubuntu2004", "ubuntu2204", "rhel8", "rhel9"] -%} +{%- set OSS_CHINA_ARM = ["alinux2", "ubuntu2004", "ubuntu2204", "rhel8", "rhel9"] -%} +{%- set OSS_GOVCLOUD_ARM = ["alinux2", "ubuntu2004", "ubuntu2204", "rhel8", "rhel9"] -%} {%- set OSS_ONE_PER_DISTRO = ["centos7", "alinux2", "ubuntu2004", "rhel8", "rocky8"] -%} {%- set INSTANCES_DEFAULT_X86 = ["c5.xlarge"] -%} {%- set INSTANCES_DEFAULT_ARM = ["m6g.xlarge"] -%} # m6g.xlarge is not supported in af-south-1, eu-south-1, eu-west-3, me-south-1 {%- set INSTANCES_DEFAULT = ["c5.xlarge", "m6g.xlarge"] -%} {%- set INSTANCES_EFA_SUPPORTED_X86 = ["c5n.9xlarge"] -%} {%- set INSTANCES_EFA_UNSUPPORTED_X86 = ["t2.micro"] -%} -{%- set NOT_RELEASED_OSES = ["rocky8"] -%} +{%- set NOT_RELEASED_OSES = ["rocky8", "rocky9"] -%} {%- macro instance(instance_key) -%} {%- if additional_instance_types_map -%} diff --git a/tests/integration-tests/tests/cloudwatch_logging/test_cloudwatch_logging.py b/tests/integration-tests/tests/cloudwatch_logging/test_cloudwatch_logging.py index 6ec9cbbafb..d262b8f8f8 100644 --- a/tests/integration-tests/tests/cloudwatch_logging/test_cloudwatch_logging.py +++ b/tests/integration-tests/tests/cloudwatch_logging/test_cloudwatch_logging.py @@ -150,9 +150,9 @@ def _base_os_to_platform(base_os): # cookbooks/aws-parallelcluster-config/files/default/cloudwatch the configurations refers to: # * "alinux2" as platform "amazon" # * "rhel8" as platform "redhat" - if base_os == "alinux2": + if "alinux" in base_os: return "amazon" - elif base_os == "rhel8": + elif "rhel" in base_os: return "redhat" else: return base_os.rstrip(string.digits) diff --git a/tests/integration-tests/tests/common/utils.py b/tests/integration-tests/tests/common/utils.py index b02a04ebf1..4633a7660d 100644 --- a/tests/integration-tests/tests/common/utils.py +++ b/tests/integration-tests/tests/common/utils.py @@ -31,6 +31,8 @@ SYSTEM_ANALYZER_SCRIPT = pathlib.Path(__file__).parent / "data/system-analyzer.sh" +RHEL_OWNERS = ["309956199498", "841258680906", "219670896067"] + OS_TO_OFFICIAL_AMI_NAME_OWNER_MAP = { "alinux2": {"name": "amzn2-ami-kernel-5.10-hvm-*.*.*.*-*-gp2", "owners": ["amazon"]}, # TODO: use marketplace AMI if possible @@ -46,9 +48,11 @@ # FIXME: unpin once Lustre client is available for RHEL8.9 # FIXME: when fixed upstream, unpin the timestamp introduced because the `kernel-devel` package was missing for # the kernel released in 20231127 RHEL 8.8 AMI - "rhel8": {"name": "RHEL-8.8*_HVM-202309*", "owners": ["309956199498", "841258680906", "219670896067"]}, + "rhel8": {"name": "RHEL-8.8*_HVM-202309*", "owners": RHEL_OWNERS}, # FIXME: unpin once Lustre client is available for Rocky 8.9 "rocky8": {"name": "Rocky-8-EC2-Base-8.8*", "owners": ["792107900819"]}, # TODO add china and govcloud accounts + "rhel9": {"name": "RHEL-9.3*_HVM-*", "owners": RHEL_OWNERS}, + "rocky9": {"name": "Rocky-9-EC2-Base-9.3*", "owners": ["792107900819"]}, # TODO add china and govcloud accounts } # Remarkable AMIs are latest deep learning base AMI and FPGA developer AMI without pcluster infrastructure @@ -60,9 +64,11 @@ # FIXME: unpin once Lustre client is available for RHEL8.9 # FIXME: when fixed upstream, unpin the timestamp introduced because the `kernel-devel` package was missing for # the kernel released in 20231127 RHEL 8.8 AMI - "rhel8": {"name": "RHEL-8.8*_HVM-202309*", "owners": ["309956199498", "841258680906", "219670896067"]}, + "rhel8": {"name": "RHEL-8.8*_HVM-202309*", "owners": RHEL_OWNERS}, # FIXME: unpin once Lustre client is available for Rocky 8.9 "rocky8": {"name": "Rocky-8-EC2-Base-8.8*", "owners": ["792107900819"]}, # TODO add china and govcloud accounts + "rhel9": {"name": "RHEL-9.3*_HVM-*", "owners": RHEL_OWNERS}, + "rocky9": {"name": "Rocky-9-EC2-Base-9.3*", "owners": ["792107900819"]}, # TODO add china and govcloud accounts } OS_TO_KERNEL4_AMI_NAME_OWNER_MAP = { @@ -79,6 +85,8 @@ "ubuntu2204": {"name": "ubuntu-2204-lts-hvm-*-*", "owners": PCLUSTER_AMI_OWNERS}, "rhel8": {"name": "rhel8-hvm-*-*", "owners": PCLUSTER_AMI_OWNERS}, "rocky8": {"name": "rocky8-hvm-*-*", "owners": PCLUSTER_AMI_OWNERS}, + "rhel9": {"name": "rhel9-hvm-*-*", "owners": PCLUSTER_AMI_OWNERS}, + "rocky9": {"name": "rocky9-hvm-*-*", "owners": PCLUSTER_AMI_OWNERS}, } AMI_TYPE_DICT = { diff --git a/tests/integration-tests/utils.py b/tests/integration-tests/utils.py index c02585d624..b16e62511a 100644 --- a/tests/integration-tests/utils.py +++ b/tests/integration-tests/utils.py @@ -551,6 +551,8 @@ def get_username_for_os(os): "ubuntu2204": "ubuntu", "rhel8": "ec2-user", "rocky8": "rocky", + "rhel9": "ec2-user", + "rocky9": "rocky", } return usernames.get(os) From fb0e07efa4621c2142d14b8145e185b9f92e3eee Mon Sep 17 00:00:00 2001 From: Hanwen Date: Tue, 9 Jan 2024 06:12:24 -0800 Subject: [PATCH 3/8] Remove test_fsx_lustre from new_os test test_multiple_fsx covers the checks in test_fsx_lustre Signed-off-by: Hanwen --- tests/integration-tests/configs/new_os.yaml | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tests/integration-tests/configs/new_os.yaml b/tests/integration-tests/configs/new_os.yaml index 67bcea24b6..a5c24bd87b 100644 --- a/tests/integration-tests/configs/new_os.yaml +++ b/tests/integration-tests/configs/new_os.yaml @@ -201,16 +201,6 @@ test-suites: schedulers: ["slurm"] storage: # Commercial regions that can't test FSx: ap-northeast-1, ap-southeast-1, ap-southeast-2, eu-central-1, eu-north-1, eu-west-1, eu-west-2, us-east-1, us-east-2, us-west-1, us-west-2 - test_fsx_lustre.py::test_fsx_lustre: - dimensions: - - regions: ["eu-west-2"] - instances: {{ common.INSTANCES_DEFAULT_X86 }} - oss: {{ NEW_OS }} - schedulers: ["slurm"] - - regions: ["eu-north-1"] - instances: {{ common.INSTANCES_DEFAULT_ARM }} - oss: {{ NEW_OS }} - schedulers: ["slurm"] # The checks performed in test_multiple_fsx is the same as test_fsx_lustre. # We should consider this when assigning dimensions to each test. test_fsx_lustre.py::test_multiple_fsx: From 63be7ec6bd414b73d9148b2bc928b9a05119305a Mon Sep 17 00:00:00 2001 From: Hanwen Date: Thu, 11 Jan 2024 08:17:20 -0800 Subject: [PATCH 4/8] [Integ-tests] Use common instance type for test_multi_az_create_and_update While testing RHEL9, we found t2.micro does not have enough memory for RHEL9 Signed-off-by: Hanwen --- tests/integration-tests/configs/common/common.yaml | 0 tests/integration-tests/configs/develop.yaml | 1 + tests/integration-tests/configs/new_os.yaml | 1 + tests/integration-tests/conftest.py | 4 ++-- tests/integration-tests/tests/update/test_update.py | 2 +- .../pcluster_create.config.yaml | 6 +++--- .../pcluster_update_1.config.yaml | 6 +++--- .../pcluster_update_2.config.yaml | 6 +++--- 8 files changed, 14 insertions(+), 12 deletions(-) create mode 100644 tests/integration-tests/configs/common/common.yaml diff --git a/tests/integration-tests/configs/common/common.yaml b/tests/integration-tests/configs/common/common.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/integration-tests/configs/develop.yaml b/tests/integration-tests/configs/develop.yaml index 6abbce3b85..8ca5f13614 100644 --- a/tests/integration-tests/configs/develop.yaml +++ b/tests/integration-tests/configs/develop.yaml @@ -831,6 +831,7 @@ test-suites: - regions: ["eu-west-2"] schedulers: ["slurm"] oss: ["alinux2"] + instances: {{ common.INSTANCES_DEFAULT_X86 }} users: test_default_user_home.py::test_default_user_local_home: dimensions: diff --git a/tests/integration-tests/configs/new_os.yaml b/tests/integration-tests/configs/new_os.yaml index a5c24bd87b..a3d9a95571 100644 --- a/tests/integration-tests/configs/new_os.yaml +++ b/tests/integration-tests/configs/new_os.yaml @@ -285,3 +285,4 @@ test-suites: - regions: [ "eu-west-2" ] schedulers: [ "slurm" ] oss: {{ NEW_OS }} + instances: {{ common.INSTANCES_DEFAULT_X86 }} diff --git a/tests/integration-tests/conftest.py b/tests/integration-tests/conftest.py index febbe96dd9..2135528424 100644 --- a/tests/integration-tests/conftest.py +++ b/tests/integration-tests/conftest.py @@ -1407,14 +1407,14 @@ def odcr_stack( AvailabilityZone=availability_zone_1, InstanceCount=2, InstancePlatform="Linux/UNIX", - InstanceType="t3.micro", + InstanceType="c5.xlarge", ) az2_odcr = ec2.CapacityReservation( "az2Odcr", AvailabilityZone=availability_zone_2, InstanceCount=2, InstancePlatform="Linux/UNIX", - InstanceType="t3.micro", + InstanceType="c5.xlarge", ) multi_az_odcr_group = resourcegroups.Group( "multiAzOdcrGroup", diff --git a/tests/integration-tests/tests/update/test_update.py b/tests/integration-tests/tests/update/test_update.py index 7c10972cca..fc6fbb23c1 100644 --- a/tests/integration-tests/tests/update/test_update.py +++ b/tests/integration-tests/tests/update/test_update.py @@ -1536,7 +1536,7 @@ def _test_shared_storage_rollback( boto3.client("fsx", region).describe_file_systems(FileSystemIds=managed_fsx) -@pytest.mark.usefixtures("os") +@pytest.mark.usefixtures("os", "instance") def test_multi_az_create_and_update( region, pcluster_config_reader, clusters_factory, odcr_stack, scheduler_commands_factory, test_datadir ): diff --git a/tests/integration-tests/tests/update/test_update/test_multi_az_create_and_update/pcluster_create.config.yaml b/tests/integration-tests/tests/update/test_update/test_multi_az_create_and_update/pcluster_create.config.yaml index 2fd8010660..e386765228 100644 --- a/tests/integration-tests/tests/update/test_update/test_multi_az_create_and_update/pcluster_create.config.yaml +++ b/tests/integration-tests/tests/update/test_update/test_multi_az_create_and_update/pcluster_create.config.yaml @@ -1,7 +1,7 @@ Image: Os: {{ os }} HeadNode: - InstanceType: t2.micro + InstanceType: {{ instance }} Networking: SubnetId: {{ public_subnet_ids[0] }} Ssh: @@ -13,7 +13,7 @@ Scheduling: ComputeResources: - Name: compute-resource-1 Instances: - - InstanceType: t3.micro + - InstanceType: {{ instance }} MinCount: 6 MaxCount: 10 CapacityReservationTarget: @@ -26,7 +26,7 @@ Scheduling: ComputeResources: - Name: compute-resource-2 Instances: - - InstanceType: t2.micro + - InstanceType: {{ instance }} MinCount: 0 MaxCount: 4 Networking: diff --git a/tests/integration-tests/tests/update/test_update/test_multi_az_create_and_update/pcluster_update_1.config.yaml b/tests/integration-tests/tests/update/test_update/test_multi_az_create_and_update/pcluster_update_1.config.yaml index d37d29bce2..ca5a32a22b 100644 --- a/tests/integration-tests/tests/update/test_update/test_multi_az_create_and_update/pcluster_update_1.config.yaml +++ b/tests/integration-tests/tests/update/test_update/test_multi_az_create_and_update/pcluster_update_1.config.yaml @@ -1,7 +1,7 @@ Image: Os: {{ os }} HeadNode: - InstanceType: t2.micro + InstanceType: {{ instance }} Networking: SubnetId: {{ public_subnet_ids[0] }} Ssh: @@ -13,7 +13,7 @@ Scheduling: ComputeResources: - Name: compute-resource-1 Instances: - - InstanceType: t3.micro + - InstanceType: {{ instance }} MinCount: 6 MaxCount: 10 CapacityReservationTarget: @@ -26,7 +26,7 @@ Scheduling: ComputeResources: - Name: compute-resource-2 Instances: - - InstanceType: t2.micro + - InstanceType: {{ instance }} MinCount: 0 MaxCount: 4 Networking: diff --git a/tests/integration-tests/tests/update/test_update/test_multi_az_create_and_update/pcluster_update_2.config.yaml b/tests/integration-tests/tests/update/test_update/test_multi_az_create_and_update/pcluster_update_2.config.yaml index 2fd8010660..e386765228 100644 --- a/tests/integration-tests/tests/update/test_update/test_multi_az_create_and_update/pcluster_update_2.config.yaml +++ b/tests/integration-tests/tests/update/test_update/test_multi_az_create_and_update/pcluster_update_2.config.yaml @@ -1,7 +1,7 @@ Image: Os: {{ os }} HeadNode: - InstanceType: t2.micro + InstanceType: {{ instance }} Networking: SubnetId: {{ public_subnet_ids[0] }} Ssh: @@ -13,7 +13,7 @@ Scheduling: ComputeResources: - Name: compute-resource-1 Instances: - - InstanceType: t3.micro + - InstanceType: {{ instance }} MinCount: 6 MaxCount: 10 CapacityReservationTarget: @@ -26,7 +26,7 @@ Scheduling: ComputeResources: - Name: compute-resource-2 Instances: - - InstanceType: t2.micro + - InstanceType: {{ instance }} MinCount: 0 MaxCount: 4 Networking: From d52950774f96143f1fdc03d19ad127844b85a8d5 Mon Sep 17 00:00:00 2001 From: Hanwen Date: Thu, 11 Jan 2024 11:58:33 -0800 Subject: [PATCH 5/8] [Integ-tests] Upgrade Lustre server to 2.12 FYI: the version of Lustre file systems created by pcluster CLI has already been upgraded to 2.12. We forgot to update integration tests code. Signed-off-by: Hanwen --- cloudformation/storage/storage-stack.yaml | 1 + tests/integration-tests/tests/storage/test_fsx_lustre.py | 1 + 2 files changed, 2 insertions(+) diff --git a/cloudformation/storage/storage-stack.yaml b/cloudformation/storage/storage-stack.yaml index f4b03c4f01..ca36d8c889 100644 --- a/cloudformation/storage/storage-stack.yaml +++ b/cloudformation/storage/storage-stack.yaml @@ -195,6 +195,7 @@ Resources: Condition: CreateFsxLustre Properties: FileSystemType: LUSTRE + FileSystemTypeVersion: '2.12' LustreConfiguration: DeploymentType: PERSISTENT_1 ExportPath: !Ref FsxLustreExportPath diff --git a/tests/integration-tests/tests/storage/test_fsx_lustre.py b/tests/integration-tests/tests/storage/test_fsx_lustre.py index dfeacdc81e..6dc156bdb7 100644 --- a/tests/integration-tests/tests/storage/test_fsx_lustre.py +++ b/tests/integration-tests/tests/storage/test_fsx_lustre.py @@ -525,6 +525,7 @@ def _create_fsx_lustre_volume_ids(num_existing_fsx_lustre, fsx_factory, import_p DeploymentType="PERSISTENT_1", PerUnitStorageThroughput=200, ), + FileSystemTypeVersion="2.12", ) From 93e87e627c92d624cc3b0ecbc0b167e5a7d65d0f Mon Sep 17 00:00:00 2001 From: Hanwen Date: Thu, 11 Jan 2024 11:58:17 -0800 Subject: [PATCH 6/8] [Integ-tests] Use right permission to operate on log files In many of the operating systems ParallelCluster supported, `root` user could bypass any files permissions. However, with RHEL9 and Rocky9, `root` user could not bypass files permissions by default. Therefore, this commit improves integration tests to operate on files with the owner users of the files Signed-off-by: Hanwen --- tests/integration-tests/remote_command_executor.py | 12 +++++++++++- .../cloudwatch_logging/test_cloudwatch_logging.py | 3 ++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/tests/integration-tests/remote_command_executor.py b/tests/integration-tests/remote_command_executor.py index 901e494d32..8c314fc3a8 100644 --- a/tests/integration-tests/remote_command_executor.py +++ b/tests/integration-tests/remote_command_executor.py @@ -233,7 +233,7 @@ def get_remote_files(self, *args, **kwargs): def clear_log_file(self, path: str): """Clear a log file in a specific path.""" - self.run_remote_command(f"sudo truncate -s 0 {path}") + self.run_remote_command(f"sudo -u {self.get_user_to_operate_on_file(path)} truncate -s 0 {path}") def clear_clustermgtd_log(self): """Clear clustermgtd log file.""" @@ -249,3 +249,13 @@ def clear_slurmctld_log(self): """Clear slurmctld log file.""" self.clear_log_file("/var/log/slurmctld.log") + + def get_user_to_operate_on_file(self, path): + """Get the user to operate on the file.""" + + file_owner_result = self.run_remote_command(f"stat -c '%U' {path}", raise_on_error=False) + if file_owner_result.failed: + # If failed, it means the `path` does not contain files/directories. Use root as the default owner. + return "root" + else: + return file_owner_result.stdout.strip() diff --git a/tests/integration-tests/tests/cloudwatch_logging/test_cloudwatch_logging.py b/tests/integration-tests/tests/cloudwatch_logging/test_cloudwatch_logging.py index d262b8f8f8..5f298f6222 100644 --- a/tests/integration-tests/tests/cloudwatch_logging/test_cloudwatch_logging.py +++ b/tests/integration-tests/tests/cloudwatch_logging/test_cloudwatch_logging.py @@ -356,7 +356,8 @@ def _write_dummy_message_to_log(self, log_path, node_type): dummy_log_entry = "CloudWatch logs integ test - ensuring critical log file is not empty" self._run_command_on_head_node(f"echo '{dummy_log_entry}' > {dummy_log_message_path}") # Append the dummy entry to the log - cmd = f"sudo tee -a {log_path} < {dummy_log_message_path}" + log_file_user = self.remote_command_executor.get_user_to_operate_on_file(log_path) + cmd = f"sudo -u {log_file_user} tee -a {log_path} < {dummy_log_message_path}" if node_type == HEAD_NODE_ROLE_NAME: self._run_command_on_head_node(cmd) elif node_type == COMPUTE_NODE_ROLE_NAME: From e9acc05aa68f4749986ed53453280975b8ca23f5 Mon Sep 17 00:00:00 2001 From: Hanwen Date: Wed, 17 Jan 2024 06:46:56 -0800 Subject: [PATCH 7/8] [unit-tests] Add support for RHEL9 and Rocky9 Signed-off-by: Hanwen --- .../test_disabled_efa_no_placement_group/output.txt | 2 ++ .../test_efa_not_supported/output.txt | 2 ++ .../test_enabled_efa_default_placement_group/output.txt | 2 ++ .../test_enabled_efa_existing_placement_group/output.txt | 2 ++ .../output.txt | 2 ++ .../test_filtered_subnets_by_az/output.txt | 2 ++ .../test_no_automation_no_awsbatch_no_errors/output.txt | 2 ++ .../test_no_input_no_automation_no_errors/output.txt | 2 ++ .../test_subnet_automation_no_awsbatch_no_errors/output.txt | 2 ++ .../output.txt | 2 ++ .../test_vpc_automation_no_awsbatch_no_errors/output.txt | 2 ++ .../test_vpc_automation_no_vpc_in_region/output.txt | 2 ++ .../test_vpc_automation_no_vpc_in_region_public/output.txt | 2 ++ .../test_pcluster_configure/test_with_region_arg/output.txt | 2 ++ cli/tests/pcluster/test_utils.py | 6 +++--- 15 files changed, 31 insertions(+), 3 deletions(-) diff --git a/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_disabled_efa_no_placement_group/output.txt b/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_disabled_efa_no_placement_group/output.txt index 3c767bcb38..78340cf77f 100644 --- a/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_disabled_efa_no_placement_group/output.txt +++ b/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_disabled_efa_no_placement_group/output.txt @@ -36,6 +36,8 @@ Allowed values for Operating System: 4. ubuntu2204 5. rhel8 6. rocky8 +7. rhel9 +8. rocky9 The EC2 instance selected supports enhanced networking capabilities using Elastic Fabric Adapter (EFA). EFA enables you to run applications requiring high levels of inter-node communications at scale on AWS at no additional charge (https://docs.aws.amazon.com/parallelcluster/latest/ug/efa-v3.html). Allowed values for VPC ID: # id name number_of_subnets diff --git a/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_efa_not_supported/output.txt b/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_efa_not_supported/output.txt index c4d16ff91e..b87947ec7b 100644 --- a/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_efa_not_supported/output.txt +++ b/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_efa_not_supported/output.txt @@ -36,6 +36,8 @@ Allowed values for Operating System: 4. ubuntu2204 5. rhel8 6. rocky8 +7. rhel9 +8. rocky9 Allowed values for VPC ID: # id name number_of_subnets --- ------------ --------------------------------- ------------------- diff --git a/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_enabled_efa_default_placement_group/output.txt b/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_enabled_efa_default_placement_group/output.txt index 40a5362b67..92157a83c9 100644 --- a/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_enabled_efa_default_placement_group/output.txt +++ b/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_enabled_efa_default_placement_group/output.txt @@ -36,6 +36,8 @@ Allowed values for Operating System: 4. ubuntu2204 5. rhel8 6. rocky8 +7. rhel9 +8. rocky9 The EC2 instance selected supports enhanced networking capabilities using Elastic Fabric Adapter (EFA). EFA enables you to run applications requiring high levels of inter-node communications at scale on AWS at no additional charge (https://docs.aws.amazon.com/parallelcluster/latest/ug/efa-v3.html). Enabling EFA requires compute instances to be placed within a Placement Group. Please specify an existing Placement Group name or leave it blank for ParallelCluster to create one. Allowed values for VPC ID: diff --git a/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_enabled_efa_existing_placement_group/output.txt b/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_enabled_efa_existing_placement_group/output.txt index 40a5362b67..92157a83c9 100644 --- a/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_enabled_efa_existing_placement_group/output.txt +++ b/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_enabled_efa_existing_placement_group/output.txt @@ -36,6 +36,8 @@ Allowed values for Operating System: 4. ubuntu2204 5. rhel8 6. rocky8 +7. rhel9 +8. rocky9 The EC2 instance selected supports enhanced networking capabilities using Elastic Fabric Adapter (EFA). EFA enables you to run applications requiring high levels of inter-node communications at scale on AWS at no additional charge (https://docs.aws.amazon.com/parallelcluster/latest/ug/efa-v3.html). Enabling EFA requires compute instances to be placed within a Placement Group. Please specify an existing Placement Group name or leave it blank for ParallelCluster to create one. Allowed values for VPC ID: diff --git a/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_enabled_efa_non_existent_placement_group/output.txt b/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_enabled_efa_non_existent_placement_group/output.txt index 79f824d180..6ceb079b46 100644 --- a/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_enabled_efa_non_existent_placement_group/output.txt +++ b/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_enabled_efa_non_existent_placement_group/output.txt @@ -36,6 +36,8 @@ Allowed values for Operating System: 4. ubuntu2204 5. rhel8 6. rocky8 +7. rhel9 +8. rocky9 The EC2 instance selected supports enhanced networking capabilities using Elastic Fabric Adapter (EFA). EFA enables you to run applications requiring high levels of inter-node communications at scale on AWS at no additional charge (https://docs.aws.amazon.com/parallelcluster/latest/ug/efa-v3.html). Enabling EFA requires compute instances to be placed within a Placement Group. Please specify an existing Placement Group name or leave it blank for ParallelCluster to create one. ERROR: non-existent-test-pg is not an acceptable value for Placement Group name diff --git a/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_filtered_subnets_by_az/output.txt b/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_filtered_subnets_by_az/output.txt index 95614586c5..bc322d589d 100644 --- a/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_filtered_subnets_by_az/output.txt +++ b/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_filtered_subnets_by_az/output.txt @@ -36,6 +36,8 @@ Allowed values for Operating System: 4. ubuntu2204 5. rhel8 6. rocky8 +7. rhel9 +8. rocky9 Allowed values for VPC ID: # id name number_of_subnets --- ------------ --------------------------------- ------------------- diff --git a/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_no_automation_no_awsbatch_no_errors/output.txt b/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_no_automation_no_awsbatch_no_errors/output.txt index c4d16ff91e..b87947ec7b 100644 --- a/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_no_automation_no_awsbatch_no_errors/output.txt +++ b/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_no_automation_no_awsbatch_no_errors/output.txt @@ -36,6 +36,8 @@ Allowed values for Operating System: 4. ubuntu2204 5. rhel8 6. rocky8 +7. rhel9 +8. rocky9 Allowed values for VPC ID: # id name number_of_subnets --- ------------ --------------------------------- ------------------- diff --git a/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_no_input_no_automation_no_errors/output.txt b/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_no_input_no_automation_no_errors/output.txt index 8491bc28fc..36e30a95bf 100644 --- a/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_no_input_no_automation_no_errors/output.txt +++ b/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_no_input_no_automation_no_errors/output.txt @@ -36,6 +36,8 @@ Allowed values for Operating System: 4. ubuntu2204 5. rhel8 6. rocky8 +7. rhel9 +8. rocky9 Allowed values for VPC ID: # id name number_of_subnets --- ------------ --------------------------------- ------------------- diff --git a/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_subnet_automation_no_awsbatch_no_errors/output.txt b/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_subnet_automation_no_awsbatch_no_errors/output.txt index f8ce58a54d..09a69811cf 100644 --- a/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_subnet_automation_no_awsbatch_no_errors/output.txt +++ b/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_subnet_automation_no_awsbatch_no_errors/output.txt @@ -36,6 +36,8 @@ Allowed values for Operating System: 4. ubuntu2204 5. rhel8 6. rocky8 +7. rhel9 +8. rocky9 Allowed values for VPC ID: # id name number_of_subnets --- ------------ --------------------------------- ------------------- diff --git a/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_subnet_automation_no_awsbatch_no_errors_empty_vpc/output.txt b/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_subnet_automation_no_awsbatch_no_errors_empty_vpc/output.txt index 944a45aabb..152124bb11 100644 --- a/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_subnet_automation_no_awsbatch_no_errors_empty_vpc/output.txt +++ b/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_subnet_automation_no_awsbatch_no_errors_empty_vpc/output.txt @@ -36,6 +36,8 @@ Allowed values for Operating System: 4. ubuntu2204 5. rhel8 6. rocky8 +7. rhel9 +8. rocky9 Allowed values for VPC ID: # id name number_of_subnets --- ------------ --------------------------------- ------------------- diff --git a/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_vpc_automation_no_awsbatch_no_errors/output.txt b/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_vpc_automation_no_awsbatch_no_errors/output.txt index 0c67e1bdcb..1811eefc71 100644 --- a/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_vpc_automation_no_awsbatch_no_errors/output.txt +++ b/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_vpc_automation_no_awsbatch_no_errors/output.txt @@ -36,6 +36,8 @@ Allowed values for Operating System: 4. ubuntu2204 5. rhel8 6. rocky8 +7. rhel9 +8. rocky9 Allowed values for Availability Zone: 1. eu-west-1a 2. eu-west-1b diff --git a/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_vpc_automation_no_vpc_in_region/output.txt b/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_vpc_automation_no_vpc_in_region/output.txt index 961f70a8f9..e5c1844997 100644 --- a/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_vpc_automation_no_vpc_in_region/output.txt +++ b/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_vpc_automation_no_vpc_in_region/output.txt @@ -36,6 +36,8 @@ Allowed values for Operating System: 4. ubuntu2204 5. rhel8 6. rocky8 +7. rhel9 +8. rocky9 There are no VPC for the given region. Starting automatic creation of VPC and subnets... Allowed values for Availability Zone: 1. eu-west-1a diff --git a/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_vpc_automation_no_vpc_in_region_public/output.txt b/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_vpc_automation_no_vpc_in_region_public/output.txt index 961f70a8f9..e5c1844997 100644 --- a/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_vpc_automation_no_vpc_in_region_public/output.txt +++ b/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_vpc_automation_no_vpc_in_region_public/output.txt @@ -36,6 +36,8 @@ Allowed values for Operating System: 4. ubuntu2204 5. rhel8 6. rocky8 +7. rhel9 +8. rocky9 There are no VPC for the given region. Starting automatic creation of VPC and subnets... Allowed values for Availability Zone: 1. eu-west-1a diff --git a/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_with_region_arg/output.txt b/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_with_region_arg/output.txt index ef47c4e193..e8c27189b4 100644 --- a/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_with_region_arg/output.txt +++ b/cli/tests/pcluster/cli/configure/test_pcluster_configure/test_with_region_arg/output.txt @@ -19,6 +19,8 @@ Allowed values for Operating System: 4. ubuntu2204 5. rhel8 6. rocky8 +7. rhel9 +8. rocky9 Allowed values for VPC ID: # id name number_of_subnets --- ------------ --------------------------------- ------------------- diff --git a/cli/tests/pcluster/test_utils.py b/cli/tests/pcluster/test_utils.py index 2a092ab0b9..ad92f28559 100644 --- a/cli/tests/pcluster/test_utils.py +++ b/cli/tests/pcluster/test_utils.py @@ -86,8 +86,8 @@ def test_generate_random_prefix(): @pytest.mark.parametrize( "architecture, supported_oses", [ - ("x86_64", ["alinux2", "centos7", "ubuntu2004", "ubuntu2204", "rhel8", "rocky8"]), - ("arm64", ["alinux2", "centos7", "ubuntu2004", "ubuntu2204", "rhel8", "rocky8"]), + ("x86_64", ["alinux2", "centos7", "ubuntu2004", "ubuntu2204", "rhel8", "rocky8", "rhel9", "rocky9"]), + ("arm64", ["alinux2", "centos7", "ubuntu2004", "ubuntu2204", "rhel8", "rocky8", "rhel9", "rocky9"]), ], ) def test_get_supported_os_for_architecture(architecture, supported_oses): @@ -100,7 +100,7 @@ def test_get_supported_os_for_architecture(architecture, supported_oses): @pytest.mark.parametrize( "scheduler, supported_oses", [ - ("slurm", ["alinux2", "centos7", "ubuntu2004", "ubuntu2204", "rhel8", "rocky8"]), + ("slurm", ["alinux2", "centos7", "ubuntu2004", "ubuntu2204", "rhel8", "rocky8", "rhel9", "rocky9"]), ("awsbatch", ["alinux2"]), ], ) From 7bfddc2514547043f6fe8759db589b75933d5a73 Mon Sep 17 00:00:00 2001 From: Hanwen Date: Wed, 17 Jan 2024 11:55:18 -0800 Subject: [PATCH 8/8] Use command from RedHat guide to clean up old kernels The commands prior to this PR do not work with RHEL9. Therefore, we change the logic to use the command from the verified guide. I discussed the change with Giordano. We didn't have any concern. Signed-off-by: Hanwen --- .../resources/imagebuilder/update_and_reboot.yaml | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/cli/src/pcluster/resources/imagebuilder/update_and_reboot.yaml b/cli/src/pcluster/resources/imagebuilder/update_and_reboot.yaml index eb74a13488..ebb464f23a 100644 --- a/cli/src/pcluster/resources/imagebuilder/update_and_reboot.yaml +++ b/cli/src/pcluster/resources/imagebuilder/update_and_reboot.yaml @@ -243,19 +243,8 @@ phases: yum -y update if [[ ${!OS} == "rhel8" ]] || [[ ${!OS} == "rocky8" ]] || [[ ${!OS} == "rhel9" ]] || [[ ${!OS} == "rocky9" ]] ; then - # package-cleanup has changed in RHEL8 and it works differently - # RHEL8 keeps at least 2 kernel for fallback reason https://access.redhat.com/solutions/1227 - # The kernel cleanup should be performed manually - - # get the default kernel for the next boot - LAST_KERNEL_VERSION=$(grubby --default-kernel | sed -e "s/.*-\(4.18.0-.*\).$(uname -m)/\1/g") - - # get all the installed kernel versions except the one for the next boot - KERNEL_VERSIONS_CLEANUP=$(rpm -q --qf "%{VERSION}-%{RELEASE}\n" kernel | grep -v "$LAST_KERNEL_VERSION") - for VERSION in $KERNEL_VERSIONS_CLEANUP; do - echo "Removing kernel-$VERSION kernel-core-$VERSION kernel-modules-$VERSION" - rpm -e kernel-$VERSION kernel-core-$VERSION kernel-modules-$VERSION; - done + # package-cleanup has changed in RHEL8 and it works differently https://access.redhat.com/solutions/1227 + yum remove $(yum repoquery --installonly --latest-limit=-2 -q) else package-cleanup -y --oldkernels --count=1 fi