From 0f44cd657997fd39e3489e741fb50c40f0a87839 Mon Sep 17 00:00:00 2001 From: chenwany Date: Thu, 20 Oct 2022 17:08:22 -0700 Subject: [PATCH 1/3] Upgrade Intel MPI to version 2021.6.0 Backport https://github.com/aws/aws-parallelcluster-cookbook/pull/1503/files Signed-off-by: chenwany --- CHANGELOG.md | 6 ++++++ attributes/default.rb | 6 +++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 75a7a608ed..b496c07ced 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,12 @@ aws-parallelcluster-cookbook CHANGELOG This file is used to list changes made in each version of the AWS ParallelCluster cookbook. +2.11.8 +----- + +**CHANGES** +- Upgrade Intel MPI Library to 2021.6.0.602. + 2.11.7 ----- diff --git a/attributes/default.rb b/attributes/default.rb index 1112c9aecd..c708af8bbd 100644 --- a/attributes/default.rb +++ b/attributes/default.rb @@ -63,10 +63,10 @@ default['cfncluster']['intelpython3']['version'] = '2020.2-902' # Intel MPI -default['cfncluster']['intelmpi']['version'] = '2021.4.0' -default['cfncluster']['intelmpi']['full_version'] = "#{node['cfncluster']['intelmpi']['version']}.441" +default['cfncluster']['intelmpi']['version'] = '2021.6.0' +default['cfncluster']['intelmpi']['full_version'] = "#{node['cfncluster']['intelmpi']['version']}.602" default['cfncluster']['intelmpi']['modulefile'] = "/opt/intel/mpi/#{node['cfncluster']['intelmpi']['version']}/modulefiles/mpi" -default['cfncluster']['intelmpi']['kitchen_test_string'] = 'Version 2021.4' +default['cfncluster']['intelmpi']['kitchen_test_string'] = 'Version 2021.6' default['cfncluster']['intelmpi']['qt_version'] = '5.15.2' # Arm Performance Library From bdd5ed31508a1301609be7b6b40c19d0a0f9e362 Mon Sep 17 00:00:00 2001 From: chenwany Date: Thu, 20 Oct 2022 17:21:58 -0700 Subject: [PATCH 2/3] Backport upgrade EFA to 1.18.0 Signed-off-by: chenwany --- CHANGELOG.md | 7 +++++++ attributes/default.rb | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b496c07ced..d79539f2cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,13 @@ This file is used to list changes made in each version of the AWS ParallelCluste **CHANGES** - Upgrade Intel MPI Library to 2021.6.0.602. +- Upgrade EFA installer to `1.18.0` + - Efa-driver: `efa-1.16.0-1` + - Efa-config: `efa-config-1.11-1` + - Efa-profile: `efa-profile-1.5-1` + - Libfabric-aws: `libfabric-aws-1.16.0~amzn4.0-1` + - Rdma-core: `rdma-core-41.0-2` + - Open MPI: `openmpi40-aws-4.1.4-2` 2.11.7 ----- diff --git a/attributes/default.rb b/attributes/default.rb index c708af8bbd..1eda51e394 100644 --- a/attributes/default.rb +++ b/attributes/default.rb @@ -145,7 +145,7 @@ ) # EFA -default['cfncluster']['efa']['installer_version'] = '1.14.1' +default['cfncluster']['efa']['installer_version'] = '1.18.0' default['cfncluster']['efa']['installer_url'] = "https://efa-installer.amazonaws.com/aws-efa-installer-#{node['cfncluster']['efa']['installer_version']}.tar.gz" default['cfncluster']['efa']['unsupported_aarch64_oses'] = %w[centos7] From cdce0070b2e409811bd5e40450991cbd52a0e741 Mon Sep 17 00:00:00 2001 From: chenwany Date: Thu, 20 Oct 2022 17:23:16 -0700 Subject: [PATCH 3/3] Backport Remove check on EFA GDR Backport https://github.com/aws/aws-parallelcluster-cookbook/pull/1466 Signed-off-by: chenwany --- recipes/tests.rb | 7 ------- 1 file changed, 7 deletions(-) diff --git a/recipes/tests.rb b/recipes/tests.rb index 8e9ba8faed..1fb1e49ad8 100644 --- a/recipes/tests.rb +++ b/recipes/tests.rb @@ -310,13 +310,6 @@ module load intelmpi && mpirun --help | grep '#{node['cfncluster']['intelmpi'][' grep "EFA installer version: #{node['cfncluster']['efa']['installer_version']}" /opt/amazon/efa_installed_packages EFA end - # GDR (GPUDirect RDMA) - if node['conditions']['efa_supported'] - execute 'check efa gdr installed' do - command "modinfo efa | grep 'gdr:\ *Y'" - user node['cfncluster']['cfn_cluster_user'] - end - end end ###################