From 46daffb8275c26e59e14dceadec7a0373c76a964 Mon Sep 17 00:00:00 2001 From: Helena Greebe Date: Fri, 28 Jun 2024 13:26:10 -0400 Subject: [PATCH 1/7] [ADC Build Image] Get build dependencies through awscli --- .../recipes/awsbatch_virtualenv.rb | 20 ------------- .../recipes/install.rb | 25 +++++++++++++++-- .../install/custom_parallelcluster_node.rb | 15 +++++----- .../recipes/install/parallelcluster_node.rb | 24 ++++++++++++++-- .../recipes/install/cfn_bootstrap.rb | 11 ++------ .../cloudwatch/partial/_cloudwatch_common.rb | 2 +- .../resources/efa/efa_alinux2.rb | 2 +- .../resources/efa/partial/_common.rb | 4 +++ .../resources/lustre/lustre_redhat8.rb | 6 ++-- .../recipes/install/awscli.rb | 18 ++++++++++-- .../recipes/install/cookbook_virtualenv.rb | 9 +----- .../recipes/install/cuda.rb | 28 ++++++++++++------- .../recipes/install/intel_mpi.rb | 19 ++++++++----- .../resources/dcv/partial/_dcv_common.rb | 15 ++++++---- .../partial/_fabric_manager_install_rhel.rb | 14 ++-------- .../gdrcopy/partial/_gdrcopy_common.rb | 14 ++++++---- .../gdrcopy/partial/_gdrcopy_common_rhel.rb | 2 +- .../install_packages_amazon2.rb | 2 +- .../partial/_install_packages_common.rb | 2 +- .../nvidia_dcgm/partial/_nvidia_dcgm_rhel.rb | 12 ++------ .../partial/_nvidia_driver_common.rb | 13 ++++++--- .../attributes/environment.rb | 3 +- .../recipes/install/install_jwt.rb | 16 ++++++----- .../recipes/install/install_pmix.rb | 14 ++++++---- .../recipes/install/install_slurm.rb | 14 ++++++---- .../resources/munge/partial/_munge_actions.rb | 16 ++++++----- .../mysql_client/partial/_setup_rhel_based.rb | 19 ++++++++----- 27 files changed, 194 insertions(+), 145 deletions(-) diff --git a/cookbooks/aws-parallelcluster-awsbatch/recipes/awsbatch_virtualenv.rb b/cookbooks/aws-parallelcluster-awsbatch/recipes/awsbatch_virtualenv.rb index cbcd61cf00..936a15edd9 100644 --- a/cookbooks/aws-parallelcluster-awsbatch/recipes/awsbatch_virtualenv.rb +++ b/cookbooks/aws-parallelcluster-awsbatch/recipes/awsbatch_virtualenv.rb @@ -28,25 +28,5 @@ not_if { ::File.exist?("#{virtualenv_path}/bin/activate") } end -remote_file "#{node['cluster']['base_dir']}/awsbatch-dependencies.tgz" do - source "#{node['cluster']['artifacts_s3_url']}/dependencies/PyPi/#{node['kernel']['machine']}/awsbatch-dependencies.tgz" - mode '0644' - retries 3 - retry_delay 5 - action :create_if_missing -end - -bash 'pip install' do - user 'root' - group 'root' - cwd "#{node['cluster']['base_dir']}" - code <<-REQ - set -e - tar xzf awsbatch-dependencies.tgz - cd awsbatch - #{virtualenv_path}/bin/pip install * -f ./ --no-index - REQ -end - node.default['cluster']['awsbatch_virtualenv_path'] = virtualenv_path node_attributes "dump node attributes" diff --git a/cookbooks/aws-parallelcluster-awsbatch/recipes/install.rb b/cookbooks/aws-parallelcluster-awsbatch/recipes/install.rb index 0057c6bb65..64be5fb28c 100644 --- a/cookbooks/aws-parallelcluster-awsbatch/recipes/install.rb +++ b/cookbooks/aws-parallelcluster-awsbatch/recipes/install.rb @@ -42,6 +42,13 @@ curl --retry 3 -L -o aws-parallelcluster.tgz ${custom_package_url} mkdir aws-parallelcluster-awsbatch-cli tar -xzf aws-parallelcluster.tgz --directory aws-parallelcluster-awsbatch-cli + + aws s3 cp #{node['cluster']['artifacts_build_url']}/PyPi/#{node['kernel']['machine']}/awsbatch-dependencies.tgz awsbatch-dependencies.tgz --region #{node['cluster']['region']} + tar xzf awsbatch-dependencies.tgz + cd awsbatch + #{node['cluster']['awsbatch_virtualenv_path']}/bin/pip install * -f ./ --no-index + cd .. + cd aws-parallelcluster-awsbatch-cli/*aws-parallelcluster-* #{node['cluster']['awsbatch_virtualenv_path']}/bin/pip install awsbatch-cli/ @@ -49,7 +56,21 @@ end else # Install aws-parallelcluster-awsbatch-cli package - execute "pip_install_parallelcluster_awsbatch_cli" do - command "#{node['cluster']['awsbatch_virtualenv_path']}/bin/pip install aws-parallelcluster-awsbatch-cli==#{node['cluster']['parallelcluster-awsbatch-cli-version']}" + bash "install aws-parallelcluster-awsbatch-cli" do + cwd Chef::Config[:file_cache_path] + code <<-CLI + set -e + package_url=#{node['cluster']['artifacts_build_url']}/awsbatch/aws-parallelcluster.tgz + aws s3 cp ${package_url} aws-parallelcluster.tgz --region #{node['cluster']['region']} + mkdir aws-parallelcluster-awsbatch-cli + tar -xzf aws-parallelcluster.tgz --directory aws-parallelcluster-awsbatch-cli + aws s3 cp #{node['cluster']['artifacts_build_url']}/PyPi/#{node['kernel']['machine']}/awsbatch-dependencies.tgz awsbatch-dependencies.tgz --region #{node['cluster']['region']} + tar xzf awsbatch-dependencies.tgz + cd awsbatch + #{node['cluster']['awsbatch_virtualenv_path']}/bin/pip install * -f ./ --no-index + cd .. + cd aws-parallelcluster-awsbatch-cli/*aws-parallelcluster-* + #{node['cluster']['awsbatch_virtualenv_path']}/bin/pip install awsbatch-cli/ + CLI end end diff --git a/cookbooks/aws-parallelcluster-computefleet/recipes/install/custom_parallelcluster_node.rb b/cookbooks/aws-parallelcluster-computefleet/recipes/install/custom_parallelcluster_node.rb index 9bdee2da57..751f829fb1 100644 --- a/cookbooks/aws-parallelcluster-computefleet/recipes/install/custom_parallelcluster_node.rb +++ b/cookbooks/aws-parallelcluster-computefleet/recipes/install/custom_parallelcluster_node.rb @@ -20,14 +20,6 @@ # TODO: once the pyenv Chef resource supports installing packages from a path (e.g. `pip install .`), convert the # bash block to a recipe that uses the pyenv resource. -remote_file "#{Chef::Config[:file_cache_path]}/node-dependencies.tgz" do - source "#{node['cluster']['artifacts_s3_url']}/dependencies/PyPi/#{node['kernel']['machine']}/node-dependencies.tgz" - mode '0644' - retries 3 - retry_delay 5 - action :create_if_missing -end - bash "install custom aws-parallelcluster-node" do cwd Chef::Config[:file_cache_path] code <<-NODE @@ -45,6 +37,13 @@ rm -fr aws-parallelcluster-custom-node mkdir aws-parallelcluster-custom-node tar -xzf aws-parallelcluster-node.tgz --directory aws-parallelcluster-custom-node + + aws s3 cp #{node['cluster']['artifacts_build_url']}/PyPi/#{node['kernel']['machine']}/node-dependencies.tgz node-dependencies.tgz --region #{node['cluster']['region']} + tar xzf node-dependencies.tgz + cd node + #{node_virtualenv_path}/bin/pip install * -f ./ --no-index + cd .. + cd aws-parallelcluster-custom-node/*aws-parallelcluster-node-* pip install . deactivate diff --git a/cookbooks/aws-parallelcluster-computefleet/recipes/install/parallelcluster_node.rb b/cookbooks/aws-parallelcluster-computefleet/recipes/install/parallelcluster_node.rb index 8b3f0ebed5..ae7b70b222 100644 --- a/cookbooks/aws-parallelcluster-computefleet/recipes/install/parallelcluster_node.rb +++ b/cookbooks/aws-parallelcluster-computefleet/recipes/install/parallelcluster_node.rb @@ -36,7 +36,27 @@ if is_custom_node? include_recipe 'aws-parallelcluster-computefleet::custom_parallelcluster_node' else - execute "install official aws-parallelcluster-node" do - command "#{virtualenv_path}/bin/pip install aws-parallelcluster-node==#{node['cluster']['parallelcluster-node-version']}" + bash "install official aws-parallelcluster-node" do + cwd Chef::Config[:file_cache_path] + code <<-NODE + set -e + [[ ":$PATH:" != *":/usr/local/bin:"* ]] && PATH="/usr/local/bin:${PATH}" + echo "PATH is $PATH" + source #{node_virtualenv_path}/bin/activate + pip uninstall --yes aws-parallelcluster-node + node_url=#{node['cluster']['artifacts_build_url']}/node/aws-parallelcluster-node.tgz + aws s3 cp ${node_url} aws-parallelcluster-node.tgz --region #{node['cluster']['region']} + rm -fr aws-parallelcluster-node + mkdir aws-parallelcluster-node + tar -xzf aws-parallelcluster-node.tgz --directory aws-parallelcluster-node + aws s3 cp #{node['cluster']['artifacts_build_url']}/PyPi/#{node['kernel']['machine']}/node-dependencies.tgz node-dependencies.tgz --region #{node['cluster']['region']} + tar xzf node-dependencies.tgz + cd node + #{node_virtualenv_path}/bin/pip install * -f ./ --no-index + cd .. + cd aws-parallelcluster-node/*aws-parallelcluster-node-* + pip install . + deactivate + NODE end end diff --git a/cookbooks/aws-parallelcluster-environment/recipes/install/cfn_bootstrap.rb b/cookbooks/aws-parallelcluster-environment/recipes/install/cfn_bootstrap.rb index 616b4c93a7..471354f0a3 100644 --- a/cookbooks/aws-parallelcluster-environment/recipes/install/cfn_bootstrap.rb +++ b/cookbooks/aws-parallelcluster-environment/recipes/install/cfn_bootstrap.rb @@ -33,20 +33,13 @@ not_if { ::File.exist?("#{virtualenv_path}/bin/activate") } end -remote_file "#{node['cluster']['base_dir']}/cfn-dependencies.tgz" do - source "#{node['cluster']['artifacts_s3_url']}/dependencies/PyPi/#{node['kernel']['machine']}/cfn-dependencies.tgz" - mode '0644' - retries 3 - retry_delay 5 - action :create_if_missing -end - bash 'pip install' do user 'root' group 'root' cwd "#{node['cluster']['base_dir']}" code <<-REQ set -e + aws s3 cp #{node['cluster']['artifacts_build_url']}/PyPi/#{node['kernel']['machine']}/cfn-dependencies.tgz cfn-dependencies.tgz --region #{node['cluster']['region']} tar xzf cfn-dependencies.tgz cd cfn #{virtualenv_path}/bin/pip install * -f ./ --no-index @@ -57,7 +50,7 @@ cfnbootstrap_package = "aws-cfn-bootstrap-py3-#{cfnbootstrap_version}.tar.gz" region = node['cluster']['region'] -bucket = region.start_with?('cn-') ? 's3.cn-north-1.amazonaws.com.cn/cn-north-1-aws-parallelcluster' : "s3.amazonaws.com" +bucket = region.start_with?('cn-') ? 's3.cn-north-1.amazonaws.com.cn/cn-north-1-aws-parallelcluster' : "s3.#{aws_region}.#{aws_domain}" remote_file "/tmp/#{cfnbootstrap_package}" do source "https://#{bucket}/cloudformation-examples/#{cfnbootstrap_package}" diff --git a/cookbooks/aws-parallelcluster-environment/resources/cloudwatch/partial/_cloudwatch_common.rb b/cookbooks/aws-parallelcluster-environment/resources/cloudwatch/partial/_cloudwatch_common.rb index 856ec99de2..09310fb70b 100644 --- a/cookbooks/aws-parallelcluster-environment/resources/cloudwatch/partial/_cloudwatch_common.rb +++ b/cookbooks/aws-parallelcluster-environment/resources/cloudwatch/partial/_cloudwatch_common.rb @@ -29,7 +29,7 @@ public_key_local_path = "#{node['cluster']['sources_dir']}/amazon-cloudwatch-agent.gpg" remote_file public_key_local_path do - source 'https://s3.amazonaws.com/amazoncloudwatch-agent/assets/amazon-cloudwatch-agent.gpg' + source "https://s3.#{aws_region}.#{aws_domain}/amazoncloudwatch-agent/assets/amazon-cloudwatch-agent.gpg" retries 3 retry_delay 5 action :create_if_missing diff --git a/cookbooks/aws-parallelcluster-environment/resources/efa/efa_alinux2.rb b/cookbooks/aws-parallelcluster-environment/resources/efa/efa_alinux2.rb index e79a777858..a3ad4defd2 100644 --- a/cookbooks/aws-parallelcluster-environment/resources/efa/efa_alinux2.rb +++ b/cookbooks/aws-parallelcluster-environment/resources/efa/efa_alinux2.rb @@ -24,6 +24,6 @@ def conflicting_packages end def prerequisites - %w(environment-modules libibverbs-utils librdmacm-utils) + %w(libibverbs-utils librdmacm-utils) end end diff --git a/cookbooks/aws-parallelcluster-environment/resources/efa/partial/_common.rb b/cookbooks/aws-parallelcluster-environment/resources/efa/partial/_common.rb index ff23e9bd87..2235863d3d 100644 --- a/cookbooks/aws-parallelcluster-environment/resources/efa/partial/_common.rb +++ b/cookbooks/aws-parallelcluster-environment/resources/efa/partial/_common.rb @@ -55,7 +55,11 @@ action :download_and_install do # Get EFA Installer + region = aws_region efa_installer_url = "https://efa-installer.amazonaws.com/aws-efa-installer-#{new_resource.efa_version}.tar.gz" + if region.start_with?("us-iso") + efa_installer_url = "https://aws-efa-installer.s3.#{aws_region}.#{aws_domain}/aws-efa-installer-#{new_resource.efa_version}.tar.gz" + end remote_file efa_tarball do source efa_installer_url mode '0644' diff --git a/cookbooks/aws-parallelcluster-environment/resources/lustre/lustre_redhat8.rb b/cookbooks/aws-parallelcluster-environment/resources/lustre/lustre_redhat8.rb index 839740c032..8b727392ad 100644 --- a/cookbooks/aws-parallelcluster-environment/resources/lustre/lustre_redhat8.rb +++ b/cookbooks/aws-parallelcluster-environment/resources/lustre/lustre_redhat8.rb @@ -44,10 +44,8 @@ action_class do def base_url # https://docs.aws.amazon.com/fsx/latest/LustreGuide/install-lustre-client.html#lustre-client-rhel - "https://fsx-lustre-client-repo.s3.amazonaws.com/el/#{node['platform_version']}/$basearch" - end + "https://fsx-lustre-client-repo.s3.#{aws_region}.#{aws_domain}/el/#{node['platform_version']}/$basearch" end def public_key - "https://fsx-lustre-client-repo-public-keys.s3.amazonaws.com/fsx-rpm-public-key.asc" - end + "https://fsx-lustre-client-repo-public-keys.s3.#{aws_region}.#{aws_domain}/fsx-rpm-public-key.asc" end end diff --git a/cookbooks/aws-parallelcluster-platform/recipes/install/awscli.rb b/cookbooks/aws-parallelcluster-platform/recipes/install/awscli.rb index 9f6fb6aacf..8322ae959a 100644 --- a/cookbooks/aws-parallelcluster-platform/recipes/install/awscli.rb +++ b/cookbooks/aws-parallelcluster-platform/recipes/install/awscli.rb @@ -17,12 +17,18 @@ # See the License for the specific language governing permissions and limitations under the License. return if ::File.exist?("/usr/local/bin/aws") || redhat_on_docker? +return if platform?('amazon') file_cache_path = Chef::Config[:file_cache_path] +region = aws_region +awscli_url = "https://s3.amazonaws.com/aws-cli/awscli-bundle.zip" +if region.start_with?("us-iso") + awscli_url ="https://aws-sdk-common-infra-dca-prod-deployment-bucket.s3.#{aws_region}.#{aws_domain}/aws-cli-v2/linux/x86_64/awscli-exe-linux-x86_64.zip" +end remote_file 'download awscli bundle from s3' do path "#{file_cache_path}/awscli-bundle.zip" - source 'https://s3.amazonaws.com/aws-cli/awscli-bundle.zip' + source awscli_url path retries 5 retry_delay 5 @@ -34,6 +40,12 @@ overwrite true end -bash 'install awscli' do - code "#{cookbook_virtualenv_path}/bin/python #{file_cache_path}/awscli/awscli-bundle/install -i /usr/local/aws -b /usr/local/bin/aws" +if region.start_with?("us-iso") + bash 'install awscli' do + code "#{file_cache_path}/awscli/aws/install -i /usr/local/aws -b /usr/local/bin/aws" + end +else + bash 'install awscli' do + code "#{cookbook_virtualenv_path}/bin/python#{node['cluster']['python-major-minor-version']} #{file_cache_path}/awscli/awscli-bundle/install -i /usr/local/aws -b /usr/local/bin/aws" + end end diff --git a/cookbooks/aws-parallelcluster-platform/recipes/install/cookbook_virtualenv.rb b/cookbooks/aws-parallelcluster-platform/recipes/install/cookbook_virtualenv.rb index 63fe8b5ecf..4eacee3874 100644 --- a/cookbooks/aws-parallelcluster-platform/recipes/install/cookbook_virtualenv.rb +++ b/cookbooks/aws-parallelcluster-platform/recipes/install/cookbook_virtualenv.rb @@ -27,20 +27,13 @@ not_if { ::File.exist?("#{cookbook_virtualenv_path}/bin/activate") } end -remote_file "#{node['cluster']['base_dir']}/cookbook-dependencies.tgz" do - source "#{node['cluster']['artifacts_s3_url']}/dependencies/PyPi/#{node['kernel']['machine']}/cookbook-dependencies.tgz" - mode '0644' - retries 3 - retry_delay 5 - action :create_if_missing -end - bash 'pip install' do user 'root' group 'root' cwd "#{node['cluster']['base_dir']}" code <<-REQ set -e + aws s3 cp #{node['cluster']['artifacts_build_url']}/PyPi/#{node['kernel']['machine']}/cookbook-dependencies.tgz cookbook-dependencies.tgz --region #{node['cluster']['region']} tar xzf cookbook-dependencies.tgz cd dependencies #{virtualenv_path}/bin/pip install * -f ./ --no-index diff --git a/cookbooks/aws-parallelcluster-platform/recipes/install/cuda.rb b/cookbooks/aws-parallelcluster-platform/recipes/install/cuda.rb index cca8a2f443..9c414e6c1a 100644 --- a/cookbooks/aws-parallelcluster-platform/recipes/install/cuda.rb +++ b/cookbooks/aws-parallelcluster-platform/recipes/install/cuda.rb @@ -24,9 +24,7 @@ cuda_complete_version = "#{cuda_version}.#{cuda_patch}" cuda_version_suffix = '535.104.05' cuda_arch = arm_instance? ? 'linux_sbsa' : 'linux' -cuda_url = "#{node['cluster']['artifacts_s3_url']}/dependencies/cuda/cuda_#{cuda_complete_version}_#{cuda_version_suffix}_#{cuda_arch}.run" cuda_samples_version = '12.2' -cuda_samples_url = "#{node['cluster']['artifacts_s3_url']}/dependencies/cuda/samples/v#{cuda_samples_version}.tar.gz" tmp_cuda_run = '/tmp/cuda.run' tmp_cuda_sample_archive = '/tmp/cuda-sample.tar.gz' @@ -35,12 +33,17 @@ node_attributes 'Save cuda and cuda samples versions for InSpec tests' # Get CUDA run file -remote_file tmp_cuda_run do - source cuda_url - mode '0755' +bash 'Get CUDA run file from s3' do + user 'root' + group 'root' + cwd "#{node['cluster']['sources_dir']}" + code <<-CUDA + set -e + aws s3 cp #{node['cluster']['artifacts_build_url']}/cuda/cuda_#{cuda_complete_version}_#{cuda_version_suffix}_#{cuda_arch}.run #{tmp_cuda_run} --region #{node['cluster']['region']} + chmod 755 #{tmp_cuda_run} + CUDA retries 3 retry_delay 5 - not_if { ::File.exist?("/usr/local/cuda-#{cuda_version}") } end # Install CUDA driver @@ -59,12 +62,17 @@ end # Get CUDA Sample Files -remote_file tmp_cuda_sample_archive do - source cuda_samples_url - mode '0644' +bash 'get CUDA Sample Files from s3' do + user 'root' + group 'root' + cwd "#{node['cluster']['sources_dir']}" + code <<-CUDA + set -e + aws s3 cp #{node['cluster']['artifacts_build_url']}/cuda/samples/v#{cuda_samples_version}.tar.gz #{tmp_cuda_sample_archive} --region #{node['cluster']['region']} + chmod 644 #{tmp_cuda_sample_archive} + CUDA retries 3 retry_delay 5 - not_if { ::File.exist?("/usr/local/cuda-#{cuda_version}/samples") } end # Unpack CUDA Samples diff --git a/cookbooks/aws-parallelcluster-platform/recipes/install/intel_mpi.rb b/cookbooks/aws-parallelcluster-platform/recipes/install/intel_mpi.rb index f70a58f27f..e6e465af79 100644 --- a/cookbooks/aws-parallelcluster-platform/recipes/install/intel_mpi.rb +++ b/cookbooks/aws-parallelcluster-platform/recipes/install/intel_mpi.rb @@ -29,7 +29,7 @@ intelmpi_installation_path = "/opt/intel/mpi/#{intelmpi_version}" intelmpi_installer = "l_mpi_oneapi_p_#{intelmpi_full_version}_offline.sh" intelmpi_installer_path = "#{node['cluster']['sources_dir']}/#{intelmpi_installer}" -intelmpi_installer_url = "#{node['cluster']['artifacts_s3_url']}/impi/#{intelmpi_installer}" +intelmpi_installer_url = "#{node['cluster']['base_build_url']}/archives/impi/#{intelmpi_installer}" intelmpi_qt_version = '6.5.3' # Prerequisite for module install @@ -39,12 +39,17 @@ end # fetch intelmpi installer script -remote_file intelmpi_installer_path do - source intelmpi_installer_url - mode '0744' - retries 3 - retry_delay 5 - not_if { ::File.exist?(intelmpi_installation_path.to_s) } +bash 'get intelmpi from s3' do + user 'root' + group 'root' + cwd "#{node['cluster']['sources_dir']}" + code <<-IMPI + set -e + aws s3 cp #{intelmpi_installer_url} #{intelmpi_installer_path} --region #{node['cluster']['region']} + chmod 744 #{intelmpi_installer_path} + IMPI + retries 5 + retry_delay 10 end bash "install intel mpi" do diff --git a/cookbooks/aws-parallelcluster-platform/resources/dcv/partial/_dcv_common.rb b/cookbooks/aws-parallelcluster-platform/resources/dcv/partial/_dcv_common.rb index 3f8172ea99..c777305f39 100644 --- a/cookbooks/aws-parallelcluster-platform/resources/dcv/partial/_dcv_common.rb +++ b/cookbooks/aws-parallelcluster-platform/resources/dcv/partial/_dcv_common.rb @@ -147,10 +147,15 @@ def optionally_disable_rnd # Extract DCV packages unless ::File.exist?(dcv_tarball) - remote_file dcv_tarball do - source dcv_url - checksum dcv_sha256sum - mode '0644' + bash 'get dcv from s3' do + user 'root' + group 'root' + cwd "#{node['cluster']['sources_dir']}" + code <<-DCV + set -e + aws s3 cp #{dcv_url} #{dcv_tarball} --region #{node['cluster']['region']} + chmod 644 #{dcv_tarball} + DCV retries 3 retry_delay 5 end @@ -276,7 +281,7 @@ def dcv_url end def dcv_tarball - "#{node['cluster']['sources_dir']}/dcv-#{node['cluster']['dcv']['version']}.tgz" + "#{node['cluster']['artifacts_build_url']}/dcv/#{dcv_package}.tgz" end def dcvauth_virtualenv diff --git a/cookbooks/aws-parallelcluster-platform/resources/fabric_manager/partial/_fabric_manager_install_rhel.rb b/cookbooks/aws-parallelcluster-platform/resources/fabric_manager/partial/_fabric_manager_install_rhel.rb index 7a7b49ea2a..aa48c47331 100644 --- a/cookbooks/aws-parallelcluster-platform/resources/fabric_manager/partial/_fabric_manager_install_rhel.rb +++ b/cookbooks/aws-parallelcluster-platform/resources/fabric_manager/partial/_fabric_manager_install_rhel.rb @@ -13,22 +13,14 @@ # See the License for the specific language governing permissions and limitations under the License. action :install_package do - remote_file "#{node['cluster']['sources_dir']}/#{fabric_manager_package}-#{fabric_manager_version}.rpm" do - source "#{fabric_manager_url}" - mode '0644' - retries 3 - retry_delay 5 - action :create_if_missing - end - package 'yum-plugin-versionlock' bash "Install #{fabric_manager_package}" do user 'root' cwd node['cluster']['sources_dir'] code <<-FABRIC_MANAGER_INSTALL set -e - yum install -y #{fabric_manager_package}-#{fabric_manager_version}.rpm - yum versionlock #{fabric_manager_package} + aws s3 cp #{fabric_manager_url} #{fabric_manager_package}-#{fabric_manager_version}.rpm --region #{node['cluster']['region']} + yum install -y #{fabric_manager_package}-#{fabric_manager_version}.rpm yum versionlock #{fabric_manager_package} FABRIC_MANAGER_INSTALL retries 3 retry_delay 5 @@ -40,5 +32,5 @@ def arch_suffix end def fabric_manager_url - "#{node['cluster']['artifacts_s3_url']}/dependencies/nvidia_fabric/#{platform}/#{fabric_manager_package}-#{fabric_manager_version}-1.#{arch_suffix}.rpm" + "#{node['cluster']['artifacts_build_url']}/nvidia_fabric/#{platform}/#{fabric_manager_package}-#{fabric_manager_version}-1.#{arch_suffix}.rpm" end diff --git a/cookbooks/aws-parallelcluster-platform/resources/gdrcopy/partial/_gdrcopy_common.rb b/cookbooks/aws-parallelcluster-platform/resources/gdrcopy/partial/_gdrcopy_common.rb index bef0644160..0dd6fbd8b2 100644 --- a/cookbooks/aws-parallelcluster-platform/resources/gdrcopy/partial/_gdrcopy_common.rb +++ b/cookbooks/aws-parallelcluster-platform/resources/gdrcopy/partial/_gdrcopy_common.rb @@ -38,13 +38,17 @@ def gdrcopy_checksum recursive true end - remote_file gdrcopy_tarball do - source gdrcopy_url - mode '0644' + bash 'get gdrcopy from s3' do + user 'root' + group 'root' + cwd "#{node['cluster']['sources_dir']}" + code <<-GDR + set -e + aws s3 cp #{node['cluster']['artifacts_build_url']}/gdr_copy/v#{gdrcopy_version}.tar.gz #{gdrcopy_tarball} --region #{node['cluster']['region']} + chmod 644 #{gdrcopy_tarball} + GDR retries 3 retry_delay 5 - checksum gdrcopy_checksum - action :create_if_missing end package_repos 'update package repos' do diff --git a/cookbooks/aws-parallelcluster-platform/resources/gdrcopy/partial/_gdrcopy_common_rhel.rb b/cookbooks/aws-parallelcluster-platform/resources/gdrcopy/partial/_gdrcopy_common_rhel.rb index 824df60cfc..2d32d25e84 100644 --- a/cookbooks/aws-parallelcluster-platform/resources/gdrcopy/partial/_gdrcopy_common_rhel.rb +++ b/cookbooks/aws-parallelcluster-platform/resources/gdrcopy/partial/_gdrcopy_common_rhel.rb @@ -17,7 +17,7 @@ def gdrcopy_service end def gdrcopy_build_dependencies - %w(dkms rpm-build make check check-devel subunit subunit-devel) + %w(dkms rpm-build make check check-devel) end def installation_code diff --git a/cookbooks/aws-parallelcluster-platform/resources/install_packages/install_packages_amazon2.rb b/cookbooks/aws-parallelcluster-platform/resources/install_packages/install_packages_amazon2.rb index 87d2d6c49a..9b3afc0f24 100644 --- a/cookbooks/aws-parallelcluster-platform/resources/install_packages/install_packages_amazon2.rb +++ b/cookbooks/aws-parallelcluster-platform/resources/install_packages/install_packages_amazon2.rb @@ -31,7 +31,7 @@ def default_packages gcc-gfortran git indent intltool patchutils rcs subversion swig systemtap curl jq wget python-pip NetworkManager-config-routing-rules python3 python3-pip iptables libcurl-devel yum-plugin-versionlock - coreutils moreutils environment-modules bzip2) + coreutils environment-modules bzip2) end action :install_extras do diff --git a/cookbooks/aws-parallelcluster-platform/resources/install_packages/partial/_install_packages_common.rb b/cookbooks/aws-parallelcluster-platform/resources/install_packages/partial/_install_packages_common.rb index 6b4f1cadfb..81424cfabd 100644 --- a/cookbooks/aws-parallelcluster-platform/resources/install_packages/partial/_install_packages_common.rb +++ b/cookbooks/aws-parallelcluster-platform/resources/install_packages/partial/_install_packages_common.rb @@ -39,7 +39,7 @@ end action :setup do + action_install_extras action_install_kernel_source action_install_base_packages - action_install_extras end diff --git a/cookbooks/aws-parallelcluster-platform/resources/nvidia_dcgm/partial/_nvidia_dcgm_rhel.rb b/cookbooks/aws-parallelcluster-platform/resources/nvidia_dcgm/partial/_nvidia_dcgm_rhel.rb index 997762acd1..7ea74facd7 100644 --- a/cookbooks/aws-parallelcluster-platform/resources/nvidia_dcgm/partial/_nvidia_dcgm_rhel.rb +++ b/cookbooks/aws-parallelcluster-platform/resources/nvidia_dcgm/partial/_nvidia_dcgm_rhel.rb @@ -13,19 +13,11 @@ # See the License for the specific language governing permissions and limitations under the License. action :install_package do - remote_file "#{node['cluster']['sources_dir']}/#{dcgm_package}-#{package_version}.rpm" do - source "#{dcgm_url}" - mode '0644' - retries 3 - retry_delay 5 - action :create_if_missing - end - bash "Install #{dcgm_package}" do user 'root' - cwd node['cluster']['sources_dir'] code <<-DCGM_INSTALL set -e + aws s3 cp #{dcgm_url} #{dcgm_package}-#{package_version}.rpm --region #{node['cluster']['region']} yum install -y #{dcgm_package}-#{package_version}.rpm DCGM_INSTALL retries 3 @@ -34,7 +26,7 @@ end def dcgm_url - "#{node['cluster']['artifacts_s3_url']}/dependencies/nvidia_dcgm/#{platform}/#{dcgm_package}-#{package_version}-1-#{arch_suffix}.rpm" + "#{node['cluster']['artifacts_build_url']}/nvidia_dcgm/#{platform}/#{dcgm_package}-#{package_version}-1-#{arch_suffix}.rpm" end def dcgm_package diff --git a/cookbooks/aws-parallelcluster-platform/resources/nvidia_driver/partial/_nvidia_driver_common.rb b/cookbooks/aws-parallelcluster-platform/resources/nvidia_driver/partial/_nvidia_driver_common.rb index 947e68f49f..ca39c1909d 100644 --- a/cookbooks/aws-parallelcluster-platform/resources/nvidia_driver/partial/_nvidia_driver_common.rb +++ b/cookbooks/aws-parallelcluster-platform/resources/nvidia_driver/partial/_nvidia_driver_common.rb @@ -27,12 +27,17 @@ node.default['cluster']['nvidia']['driver_version'] = _nvidia_driver_version node_attributes "Save Nvidia driver version for Inspec tests" - remote_file tmp_nvidia_run do - source nvidia_driver_url - mode '0755' + bash 'get nvidia driver from s3' do + user 'root' + group 'root' + cwd "#{node['cluster']['sources_dir']}" + code <<-NVIDIA + set -e + aws s3 cp #{node['cluster']['artifacts_build_url']}/nvidia_driver/NVIDIA-Linux-#{nvidia_arch}-#{_nvidia_driver_version}.run #{tmp_nvidia_run} --region #{node['cluster']['region']} + chmod 755 #{tmp_nvidia_run} + NVIDIA retries 3 retry_delay 5 - not_if { ::File.exist?('/usr/bin/nvidia-smi') } end # Make sure nouveau kernel module is unloaded, otherwise installation of NVIDIA driver fails diff --git a/cookbooks/aws-parallelcluster-shared/attributes/environment.rb b/cookbooks/aws-parallelcluster-shared/attributes/environment.rb index e8916e7683..5992a98886 100644 --- a/cookbooks/aws-parallelcluster-shared/attributes/environment.rb +++ b/cookbooks/aws-parallelcluster-shared/attributes/environment.rb @@ -5,5 +5,6 @@ # URL for ParallelCluster Artifacts stored in public S3 buckets # ['cluster']['region'] will need to be defined by image_dna.json during AMI build. -default['cluster']['artifacts_build_url'] = "s3://#{node['cluster']['region']}-aws-parallelcluster/archives/dependencies" +default['cluster']['base_build_url'] = "s3://aws-parallelcluster-dev-build-dependencies" default['cluster']['artifacts_s3_url'] = "https://#{node['cluster']['region']}-aws-parallelcluster.s3.#{node['cluster']['region']}.#{node['cluster']['aws_domain']}/archives" +default['cluster']['artifacts_build_url'] = "#{node['cluster']['base_build_url']}/archives/dependencies" \ No newline at end of file diff --git a/cookbooks/aws-parallelcluster-slurm/recipes/install/install_jwt.rb b/cookbooks/aws-parallelcluster-slurm/recipes/install/install_jwt.rb index 5dd551857e..487d86fe51 100644 --- a/cookbooks/aws-parallelcluster-slurm/recipes/install/install_jwt.rb +++ b/cookbooks/aws-parallelcluster-slurm/recipes/install/install_jwt.rb @@ -16,17 +16,19 @@ # limitations under the License. jwt_version = '1.15.3' -jwt_url = "#{node['cluster']['artifacts_s3_url']}/dependencies/jwt/v#{jwt_version}.tar.gz" jwt_tarball = "#{node['cluster']['sources_dir']}/libjwt-#{jwt_version}.tar.gz" -jwt_sha256 = 'cb2fd95123689e7d209a3a8c060e02f68341c9a5ded524c0cd881a8cd20d711f' -remote_file jwt_tarball do - source jwt_url - mode '0644' +bash 'get jwt from s3' do + user 'root' + group 'root' + cwd "#{node['cluster']['sources_dir']}" + code <<-JWT + set -e + aws s3 cp #{node['cluster']['artifacts_build_url']}/jwt/v#{jwt_version}.tar.gz #{jwt_tarball} --region #{node['cluster']['region']} + chmod 644 #{jwt_tarball} + JWT retries 3 retry_delay 5 - checksum jwt_sha256 - action :create_if_missing end jwt_dependencies 'Install jwt dependencies' diff --git a/cookbooks/aws-parallelcluster-slurm/recipes/install/install_pmix.rb b/cookbooks/aws-parallelcluster-slurm/recipes/install/install_pmix.rb index 6548b203e7..126bc1f680 100644 --- a/cookbooks/aws-parallelcluster-slurm/recipes/install/install_pmix.rb +++ b/cookbooks/aws-parallelcluster-slurm/recipes/install/install_pmix.rb @@ -21,13 +21,17 @@ pmix_sha256 = node['cluster']['pmix']['sha256'] pmix_tarball = "#{node['cluster']['sources_dir']}/pmix-#{pmix_version}.tar.gz" -remote_file pmix_tarball do - source pmix_url - mode '0644' +bash 'get pmix from s3' do + user 'root' + group 'root' + cwd "#{node['cluster']['sources_dir']}" + code <<-PMIX + set -e + aws s3 cp #{node['cluster']['artifacts_build_url']}/pmix/pmix-#{pmix_version}.tar.gz #{pmix_tarball} --region #{node['cluster']['region']} + chmod 644 #{pmix_tarball} + PMIX retries 3 retry_delay 5 - checksum pmix_sha256 - action :create_if_missing end bash 'Install PMIx' do diff --git a/cookbooks/aws-parallelcluster-slurm/recipes/install/install_slurm.rb b/cookbooks/aws-parallelcluster-slurm/recipes/install/install_slurm.rb index 656c5b33ae..e7e269cd70 100644 --- a/cookbooks/aws-parallelcluster-slurm/recipes/install/install_slurm.rb +++ b/cookbooks/aws-parallelcluster-slurm/recipes/install/install_slurm.rb @@ -38,13 +38,17 @@ include_recipe 'aws-parallelcluster-slurm::slurm_users' # Get slurm tarball -remote_file slurm_tarball do - source slurm_url - mode '0644' +bash 'get slurm from s3' do + user 'root' + group 'root' + cwd "#{node['cluster']['sources_dir']}" + code <<-SLURM + set -e + aws s3 cp #{node['cluster']['artifacts_build_url']}/slurm/#{slurm_tar_name}.tar.gz #{slurm_tarball} --region #{node['cluster']['region']} + chmod 644 #{slurm_tarball} + SLURM retries 3 retry_delay 5 - checksum slurm_sha256 - action :create_if_missing end # Copy Slurm patches diff --git a/cookbooks/aws-parallelcluster-slurm/resources/munge/partial/_munge_actions.rb b/cookbooks/aws-parallelcluster-slurm/resources/munge/partial/_munge_actions.rb index b97e037eaf..37f89500b9 100644 --- a/cookbooks/aws-parallelcluster-slurm/resources/munge/partial/_munge_actions.rb +++ b/cookbooks/aws-parallelcluster-slurm/resources/munge/partial/_munge_actions.rb @@ -20,13 +20,11 @@ default_action :setup munge_version = node['cluster']['munge']['munge_version'] -munge_url = "#{node['cluster']['munge']['base_url']}/munge-#{munge_version}.tar.gz" munge_tarball = "#{node['cluster']['sources_dir']}/munge-#{munge_version}.tar.gz" munge_user = node['cluster']['munge']['user'] munge_user_id = node['cluster']['munge']['user_id'] munge_group = node['cluster']['munge']['group'] munge_group_id = node['cluster']['munge']['group_id'] -munge_sha256 = node['cluster']['munge']['sha256'] action :setup do directory node['cluster']['sources_dir'] do @@ -62,13 +60,17 @@ action :download_source_code do # Get munge tarball - remote_file munge_tarball do - source munge_url - mode '0644' + bash 'get munge from s3' do + user 'root' + group 'root' + cwd "#{node['cluster']['sources_dir']}" + code <<-MUNGE + set -e + aws s3 cp #{node['cluster']['artifacts_build_url']}/munge/munge-#{munge_version}.tar.gz #{munge_tarball} --region #{node['cluster']['region']} + chmod 644 #{munge_tarball} + MUNGE retries 3 retry_delay 5 - checksum munge_sha256 - action :create_if_missing end end diff --git a/cookbooks/aws-parallelcluster-slurm/resources/mysql_client/partial/_setup_rhel_based.rb b/cookbooks/aws-parallelcluster-slurm/resources/mysql_client/partial/_setup_rhel_based.rb index 38310a9a59..cdaa3cf58c 100644 --- a/cookbooks/aws-parallelcluster-slurm/resources/mysql_client/partial/_setup_rhel_based.rb +++ b/cookbooks/aws-parallelcluster-slurm/resources/mysql_client/partial/_setup_rhel_based.rb @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and limitations under the License. action :setup do - mysql_archive_url = package_archive(node['cluster']['artifacts_s3_url']) + mysql_archive_url = package_archive("#{node['cluster']['base_build_url']}/archives") mysql_tar_file = "/tmp/#{package_filename}" log "Downloading MySQL packages archive from #{mysql_archive_url}" @@ -22,12 +22,17 @@ # Add MySQL source file action_create_source_link - remote_file mysql_tar_file do - source mysql_archive_url - mode '0644' - retries 3 - retry_delay 5 - action :create_if_missing + bash 'get mysql from s3' do + user 'root' + group 'root' + cwd "#{node['cluster']['sources_dir']}" + code <<-MYSQL + set -e + aws s3 cp #{mysql_archive_url} #{mysql_tar_file} --region #{node['cluster']['region']} + chmod 644 #{mysql_tar_file} + MYSQL + retries 5 + retry_delay 10 end bash 'Install MySQL packages' do From 12e0a5dd86eb8fb4f22a4134c4c912a729b7ec80 Mon Sep 17 00:00:00 2001 From: Helena Greebe Date: Mon, 1 Jul 2024 19:59:57 -0400 Subject: [PATCH 2/7] [ADC Build Image] Disable epel repo for alinux2 --- .../recipes/install/cfn_bootstrap.rb | 8 +++++++- .../cloudwatch/partial/_cloudwatch_common.rb | 9 ++++++++- .../resources/dcv/partial/_dcv_common.rb | 4 ++-- .../resources/install_pyenv.rb | 15 ++------------- .../package_repos/package_repos_alinux2.rb | 10 ++++++++++ 5 files changed, 29 insertions(+), 17 deletions(-) diff --git a/cookbooks/aws-parallelcluster-environment/recipes/install/cfn_bootstrap.rb b/cookbooks/aws-parallelcluster-environment/recipes/install/cfn_bootstrap.rb index 471354f0a3..77d7bb05c0 100644 --- a/cookbooks/aws-parallelcluster-environment/recipes/install/cfn_bootstrap.rb +++ b/cookbooks/aws-parallelcluster-environment/recipes/install/cfn_bootstrap.rb @@ -50,7 +50,13 @@ cfnbootstrap_package = "aws-cfn-bootstrap-py3-#{cfnbootstrap_version}.tar.gz" region = node['cluster']['region'] -bucket = region.start_with?('cn-') ? 's3.cn-north-1.amazonaws.com.cn/cn-north-1-aws-parallelcluster' : "s3.#{aws_region}.#{aws_domain}" +bucket = "s3.amazonaws.com" + +if region.start_with?('cn-') + bucket = 's3.cn-north-1.amazonaws.com.cn/cn-north-1-aws-parallelcluster' +elsif region.start_with?("us-iso") + bucket = "s3.#{aws_region}.#{aws_domain}" +end remote_file "/tmp/#{cfnbootstrap_package}" do source "https://#{bucket}/cloudformation-examples/#{cfnbootstrap_package}" diff --git a/cookbooks/aws-parallelcluster-environment/resources/cloudwatch/partial/_cloudwatch_common.rb b/cookbooks/aws-parallelcluster-environment/resources/cloudwatch/partial/_cloudwatch_common.rb index 09310fb70b..8fc917d19d 100644 --- a/cookbooks/aws-parallelcluster-environment/resources/cloudwatch/partial/_cloudwatch_common.rb +++ b/cookbooks/aws-parallelcluster-environment/resources/cloudwatch/partial/_cloudwatch_common.rb @@ -27,9 +27,16 @@ action_cloudwatch_prerequisite + region = node['cluster']['region'] + key_path = "/amazoncloudwatch-agent/assets/amazon-cloudwatch-agent.gpg" + cloudwatch_key_url = "https://s3.amazonaws.com/#{key_path}" + if region.start_with?("us-iso") + cloudwatch_key_url = "https://s3.#{aws_region}.#{aws_domain}/#{key_path}" + end + public_key_local_path = "#{node['cluster']['sources_dir']}/amazon-cloudwatch-agent.gpg" remote_file public_key_local_path do - source "https://s3.#{aws_region}.#{aws_domain}/amazoncloudwatch-agent/assets/amazon-cloudwatch-agent.gpg" + source cloudwatch_key_url retries 3 retry_delay 5 action :create_if_missing diff --git a/cookbooks/aws-parallelcluster-platform/resources/dcv/partial/_dcv_common.rb b/cookbooks/aws-parallelcluster-platform/resources/dcv/partial/_dcv_common.rb index c777305f39..388b400dd2 100644 --- a/cookbooks/aws-parallelcluster-platform/resources/dcv/partial/_dcv_common.rb +++ b/cookbooks/aws-parallelcluster-platform/resources/dcv/partial/_dcv_common.rb @@ -277,11 +277,11 @@ def dcv_gpu_accel_supported? end def dcv_url - "#{node['cluster']['artifacts_s3_url']}/dependencies/dcv/#{dcv_package}.tgz" + "#{node['cluster']['artifacts_build_url']}/dcv/#{dcv_package}.tgz" end def dcv_tarball - "#{node['cluster']['artifacts_build_url']}/dcv/#{dcv_package}.tgz" + "#{node['cluster']['sources_dir']}/dcv-#{node['cluster']['dcv']['version']}.tgz" end def dcvauth_virtualenv diff --git a/cookbooks/aws-parallelcluster-shared/resources/install_pyenv.rb b/cookbooks/aws-parallelcluster-shared/resources/install_pyenv.rb index 9c09b509a0..64a151b8c9 100644 --- a/cookbooks/aws-parallelcluster-shared/resources/install_pyenv.rb +++ b/cookbooks/aws-parallelcluster-shared/resources/install_pyenv.rb @@ -12,11 +12,7 @@ action :run do python_version = new_resource.python_version || node['cluster']['python-version'] - python_url = "#{node['cluster']['artifacts_s3_url']}/dependencies/python/Python-#{python_version}.tgz" - - if new_resource.python_version - python_url = "https://www.python.org/ftp/python/#{python_version}/Python-#{python_version}.tgz" - end + python_url = "#{node['cluster']['artifacts_build_url']}/python/Python-#{python_version}.tgz" if new_resource.user_only raise "user property is required for resource install_pyenv when user_only is set to true" unless new_resource.user @@ -29,14 +25,6 @@ recursive true end - remote_file "#{prefix}/Python-#{python_version}.tgz" do - source python_url - mode '0644' - retries 3 - retry_delay 5 - action :create_if_missing - end - user = new_resource.user || 'root' bash "install python #{python_version}" do @@ -45,6 +33,7 @@ cwd "#{prefix}" code <<-VENV set -e + aws s3 cp #{python_url} Python-#{python_version}.tgz --region #{node['cluster']['region']} tar -xzf Python-#{python_version}.tgz cd Python-#{python_version} ./configure --prefix=#{prefix}/versions/#{python_version} diff --git a/cookbooks/aws-parallelcluster-shared/resources/package_repos/package_repos_alinux2.rb b/cookbooks/aws-parallelcluster-shared/resources/package_repos/package_repos_alinux2.rb index e6ad8b9676..d8c10e3b16 100644 --- a/cookbooks/aws-parallelcluster-shared/resources/package_repos/package_repos_alinux2.rb +++ b/cookbooks/aws-parallelcluster-shared/resources/package_repos/package_repos_alinux2.rb @@ -22,6 +22,16 @@ action :setup do include_recipe 'yum' alinux_extras_topic 'epel' + if aws_region.start_with?("us-iso") + bash "Disable epel repo" do + user 'root' + group 'root' + code <<-EPEL + set -e + yum-config-manager --disable epel + EPEL + end + end end action :update do From df3adb6c14934972707396188a7003495bc81b81 Mon Sep 17 00:00:00 2001 From: Helena Greebe Date: Tue, 2 Jul 2024 09:53:47 -0400 Subject: [PATCH 3/7] [ADC Build Image] Install missing yum dependencies from s3 --- .../install_packages/install_packages_amazon2.rb | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/cookbooks/aws-parallelcluster-platform/resources/install_packages/install_packages_amazon2.rb b/cookbooks/aws-parallelcluster-platform/resources/install_packages/install_packages_amazon2.rb index 9b3afc0f24..cca896fbd4 100644 --- a/cookbooks/aws-parallelcluster-platform/resources/install_packages/install_packages_amazon2.rb +++ b/cookbooks/aws-parallelcluster-platform/resources/install_packages/install_packages_amazon2.rb @@ -40,4 +40,16 @@ def default_packages ['R3.4'].each do |topic| alinux_extras_topic topic end + + bash 'yum install missing deps' do + user 'root' + group 'root' + code <<-REQ + set -e + aws s3 cp #{node['cluster']['artifacts_build_url']}/epel/rhel7/#{node['kernel']['machine']}/epel_deps.tar.gz epel_deps.tar.gz --region #{node['cluster']['region']} + tar xzf epel_deps.tar.gz + cd epel + yum install -y * 2>/dev/null + REQ + end end From 7f97a39e289c480b556fbdc2f119051131de9eeb Mon Sep 17 00:00:00 2001 From: Helena Greebe Date: Tue, 16 Jul 2024 13:17:21 -0400 Subject: [PATCH 4/7] [ADC Build Image] Get dependencies using https --- .../recipes/awsbatch_virtualenv.rb | 20 +++++++++ .../recipes/install.rb | 23 ++++------ .../install/custom_parallelcluster_node.rb | 7 --- .../recipes/install/parallelcluster_node.rb | 35 ++++++++++++--- .../recipes/install/cfn_bootstrap.rb | 9 +++- .../recipes/install/awscli.rb | 5 ++- .../recipes/install/cookbook_virtualenv.rb | 9 +++- .../recipes/install/cuda.rb | 28 +++++------- .../recipes/install/intel_mpi.rb | 19 +++----- .../resources/dcv/partial/_dcv_common.rb | 17 +++----- .../partial/_fabric_manager_install_rhel.rb | 14 ++++-- .../resources/gdrcopy/gdrcopy_redhat8.rb | 4 ++ .../gdrcopy/partial/_gdrcopy_common.rb | 14 +++--- .../install_packages_amazon2.rb | 9 +++- .../install_packages_redhat8.rb | 43 ++++++++++++++++--- .../nvidia_dcgm/partial/_nvidia_dcgm_rhel.rb | 12 +++++- .../partial/_nvidia_driver_common.rb | 18 +++----- .../attributes/environment.rb | 4 +- .../resources/install_pyenv.rb | 20 ++++++--- .../package_repos/package_repos_redhat8.rb | 2 +- .../recipes/install/install_jwt.rb | 18 ++++---- .../recipes/install/install_pmix.rb | 14 +++--- .../recipes/install/install_slurm.rb | 14 +++--- .../resources/munge/partial/_munge_actions.rb | 16 +++---- .../mysql_client/partial/_setup_rhel_based.rb | 19 +++----- .../slurm_dependencies_redhat8.rb | 2 +- 26 files changed, 229 insertions(+), 166 deletions(-) diff --git a/cookbooks/aws-parallelcluster-awsbatch/recipes/awsbatch_virtualenv.rb b/cookbooks/aws-parallelcluster-awsbatch/recipes/awsbatch_virtualenv.rb index 936a15edd9..cbcd61cf00 100644 --- a/cookbooks/aws-parallelcluster-awsbatch/recipes/awsbatch_virtualenv.rb +++ b/cookbooks/aws-parallelcluster-awsbatch/recipes/awsbatch_virtualenv.rb @@ -28,5 +28,25 @@ not_if { ::File.exist?("#{virtualenv_path}/bin/activate") } end +remote_file "#{node['cluster']['base_dir']}/awsbatch-dependencies.tgz" do + source "#{node['cluster']['artifacts_s3_url']}/dependencies/PyPi/#{node['kernel']['machine']}/awsbatch-dependencies.tgz" + mode '0644' + retries 3 + retry_delay 5 + action :create_if_missing +end + +bash 'pip install' do + user 'root' + group 'root' + cwd "#{node['cluster']['base_dir']}" + code <<-REQ + set -e + tar xzf awsbatch-dependencies.tgz + cd awsbatch + #{virtualenv_path}/bin/pip install * -f ./ --no-index + REQ +end + node.default['cluster']['awsbatch_virtualenv_path'] = virtualenv_path node_attributes "dump node attributes" diff --git a/cookbooks/aws-parallelcluster-awsbatch/recipes/install.rb b/cookbooks/aws-parallelcluster-awsbatch/recipes/install.rb index 64be5fb28c..2ecd0ef06a 100644 --- a/cookbooks/aws-parallelcluster-awsbatch/recipes/install.rb +++ b/cookbooks/aws-parallelcluster-awsbatch/recipes/install.rb @@ -43,32 +43,27 @@ mkdir aws-parallelcluster-awsbatch-cli tar -xzf aws-parallelcluster.tgz --directory aws-parallelcluster-awsbatch-cli - aws s3 cp #{node['cluster']['artifacts_build_url']}/PyPi/#{node['kernel']['machine']}/awsbatch-dependencies.tgz awsbatch-dependencies.tgz --region #{node['cluster']['region']} - tar xzf awsbatch-dependencies.tgz - cd awsbatch - #{node['cluster']['awsbatch_virtualenv_path']}/bin/pip install * -f ./ --no-index - cd .. - cd aws-parallelcluster-awsbatch-cli/*aws-parallelcluster-* - #{node['cluster']['awsbatch_virtualenv_path']}/bin/pip install awsbatch-cli/ CLI end else # Install aws-parallelcluster-awsbatch-cli package + remote_file "#{Chef::Config[:file_cache_path]}/aws-parallelcluster.tgz" do + source "#{node['cluster']['artifacts_s3_url']}/dependencies/awsbatch/aws-parallelcluster.tgz" + mode '0644' + retries 3 + retry_delay 5 + action :create_if_missing + end + bash "install aws-parallelcluster-awsbatch-cli" do cwd Chef::Config[:file_cache_path] code <<-CLI set -e - package_url=#{node['cluster']['artifacts_build_url']}/awsbatch/aws-parallelcluster.tgz - aws s3 cp ${package_url} aws-parallelcluster.tgz --region #{node['cluster']['region']} mkdir aws-parallelcluster-awsbatch-cli tar -xzf aws-parallelcluster.tgz --directory aws-parallelcluster-awsbatch-cli - aws s3 cp #{node['cluster']['artifacts_build_url']}/PyPi/#{node['kernel']['machine']}/awsbatch-dependencies.tgz awsbatch-dependencies.tgz --region #{node['cluster']['region']} - tar xzf awsbatch-dependencies.tgz - cd awsbatch - #{node['cluster']['awsbatch_virtualenv_path']}/bin/pip install * -f ./ --no-index - cd .. + cd aws-parallelcluster-awsbatch-cli/*aws-parallelcluster-* #{node['cluster']['awsbatch_virtualenv_path']}/bin/pip install awsbatch-cli/ CLI diff --git a/cookbooks/aws-parallelcluster-computefleet/recipes/install/custom_parallelcluster_node.rb b/cookbooks/aws-parallelcluster-computefleet/recipes/install/custom_parallelcluster_node.rb index 751f829fb1..d137b1849e 100644 --- a/cookbooks/aws-parallelcluster-computefleet/recipes/install/custom_parallelcluster_node.rb +++ b/cookbooks/aws-parallelcluster-computefleet/recipes/install/custom_parallelcluster_node.rb @@ -37,13 +37,6 @@ rm -fr aws-parallelcluster-custom-node mkdir aws-parallelcluster-custom-node tar -xzf aws-parallelcluster-node.tgz --directory aws-parallelcluster-custom-node - - aws s3 cp #{node['cluster']['artifacts_build_url']}/PyPi/#{node['kernel']['machine']}/node-dependencies.tgz node-dependencies.tgz --region #{node['cluster']['region']} - tar xzf node-dependencies.tgz - cd node - #{node_virtualenv_path}/bin/pip install * -f ./ --no-index - cd .. - cd aws-parallelcluster-custom-node/*aws-parallelcluster-node-* pip install . deactivate diff --git a/cookbooks/aws-parallelcluster-computefleet/recipes/install/parallelcluster_node.rb b/cookbooks/aws-parallelcluster-computefleet/recipes/install/parallelcluster_node.rb index ae7b70b222..69b9169e93 100644 --- a/cookbooks/aws-parallelcluster-computefleet/recipes/install/parallelcluster_node.rb +++ b/cookbooks/aws-parallelcluster-computefleet/recipes/install/parallelcluster_node.rb @@ -33,9 +33,37 @@ not_if { ::File.exist?("#{virtualenv_path}/bin/activate") } end +remote_file "#{node['cluster']['base_dir']}/node-dependencies.tgz" do + source "#{node['cluster']['artifacts_s3_url']}/dependencies/PyPi/#{node['kernel']['machine']}/node-dependencies.tgz" + mode '0644' + retries 3 + retry_delay 5 + action :create_if_missing +end + +bash 'pip install' do + user 'root' + group 'root' + cwd "#{node['cluster']['base_dir']}" + code <<-REQ + set -e + tar xzf node-dependencies.tgz + cd node + #{virtualenv_path}/bin/pip install * -f ./ --no-index + REQ +end + if is_custom_node? include_recipe 'aws-parallelcluster-computefleet::custom_parallelcluster_node' else + remote_file "#{Chef::Config[:file_cache_path]}/aws-parallelcluster-node.tgz" do + source "#{node['cluster']['artifacts_s3_url']}/dependencies/node/aws-parallelcluster-node.tgz" + mode '0644' + retries 3 + retry_delay 5 + action :create_if_missing + end + bash "install official aws-parallelcluster-node" do cwd Chef::Config[:file_cache_path] code <<-NODE @@ -44,16 +72,9 @@ echo "PATH is $PATH" source #{node_virtualenv_path}/bin/activate pip uninstall --yes aws-parallelcluster-node - node_url=#{node['cluster']['artifacts_build_url']}/node/aws-parallelcluster-node.tgz - aws s3 cp ${node_url} aws-parallelcluster-node.tgz --region #{node['cluster']['region']} rm -fr aws-parallelcluster-node mkdir aws-parallelcluster-node tar -xzf aws-parallelcluster-node.tgz --directory aws-parallelcluster-node - aws s3 cp #{node['cluster']['artifacts_build_url']}/PyPi/#{node['kernel']['machine']}/node-dependencies.tgz node-dependencies.tgz --region #{node['cluster']['region']} - tar xzf node-dependencies.tgz - cd node - #{node_virtualenv_path}/bin/pip install * -f ./ --no-index - cd .. cd aws-parallelcluster-node/*aws-parallelcluster-node-* pip install . deactivate diff --git a/cookbooks/aws-parallelcluster-environment/recipes/install/cfn_bootstrap.rb b/cookbooks/aws-parallelcluster-environment/recipes/install/cfn_bootstrap.rb index 77d7bb05c0..8dec833544 100644 --- a/cookbooks/aws-parallelcluster-environment/recipes/install/cfn_bootstrap.rb +++ b/cookbooks/aws-parallelcluster-environment/recipes/install/cfn_bootstrap.rb @@ -33,13 +33,20 @@ not_if { ::File.exist?("#{virtualenv_path}/bin/activate") } end +remote_file "#{node['cluster']['base_dir']}/cfn-dependencies.tgz" do + source "#{node['cluster']['artifacts_s3_url']}/dependencies/PyPi/#{node['kernel']['machine']}/cfn-dependencies.tgz" + mode '0644' + retries 3 + retry_delay 5 + action :create_if_missing +end + bash 'pip install' do user 'root' group 'root' cwd "#{node['cluster']['base_dir']}" code <<-REQ set -e - aws s3 cp #{node['cluster']['artifacts_build_url']}/PyPi/#{node['kernel']['machine']}/cfn-dependencies.tgz cfn-dependencies.tgz --region #{node['cluster']['region']} tar xzf cfn-dependencies.tgz cd cfn #{virtualenv_path}/bin/pip install * -f ./ --no-index diff --git a/cookbooks/aws-parallelcluster-platform/recipes/install/awscli.rb b/cookbooks/aws-parallelcluster-platform/recipes/install/awscli.rb index 8322ae959a..655eeabcb4 100644 --- a/cookbooks/aws-parallelcluster-platform/recipes/install/awscli.rb +++ b/cookbooks/aws-parallelcluster-platform/recipes/install/awscli.rb @@ -22,8 +22,11 @@ file_cache_path = Chef::Config[:file_cache_path] region = aws_region awscli_url = "https://s3.amazonaws.com/aws-cli/awscli-bundle.zip" -if region.start_with?("us-iso") + +if region.start_with?("us-iso-") awscli_url ="https://aws-sdk-common-infra-dca-prod-deployment-bucket.s3.#{aws_region}.#{aws_domain}/aws-cli-v2/linux/x86_64/awscli-exe-linux-x86_64.zip" +elsif region.start_with?("us-isob-") + awscli_url ="https://aws-sdk-common-infra-lck-prod-deployment-bucket.s3.#{aws_region}.#{aws_domain}/aws-cli-v2/linux/x86_64/awscli-exe-linux-x86_64.zip" end remote_file 'download awscli bundle from s3' do diff --git a/cookbooks/aws-parallelcluster-platform/recipes/install/cookbook_virtualenv.rb b/cookbooks/aws-parallelcluster-platform/recipes/install/cookbook_virtualenv.rb index 4eacee3874..63fe8b5ecf 100644 --- a/cookbooks/aws-parallelcluster-platform/recipes/install/cookbook_virtualenv.rb +++ b/cookbooks/aws-parallelcluster-platform/recipes/install/cookbook_virtualenv.rb @@ -27,13 +27,20 @@ not_if { ::File.exist?("#{cookbook_virtualenv_path}/bin/activate") } end +remote_file "#{node['cluster']['base_dir']}/cookbook-dependencies.tgz" do + source "#{node['cluster']['artifacts_s3_url']}/dependencies/PyPi/#{node['kernel']['machine']}/cookbook-dependencies.tgz" + mode '0644' + retries 3 + retry_delay 5 + action :create_if_missing +end + bash 'pip install' do user 'root' group 'root' cwd "#{node['cluster']['base_dir']}" code <<-REQ set -e - aws s3 cp #{node['cluster']['artifacts_build_url']}/PyPi/#{node['kernel']['machine']}/cookbook-dependencies.tgz cookbook-dependencies.tgz --region #{node['cluster']['region']} tar xzf cookbook-dependencies.tgz cd dependencies #{virtualenv_path}/bin/pip install * -f ./ --no-index diff --git a/cookbooks/aws-parallelcluster-platform/recipes/install/cuda.rb b/cookbooks/aws-parallelcluster-platform/recipes/install/cuda.rb index 9c414e6c1a..90d9ccf5a7 100644 --- a/cookbooks/aws-parallelcluster-platform/recipes/install/cuda.rb +++ b/cookbooks/aws-parallelcluster-platform/recipes/install/cuda.rb @@ -24,8 +24,10 @@ cuda_complete_version = "#{cuda_version}.#{cuda_patch}" cuda_version_suffix = '535.104.05' cuda_arch = arm_instance? ? 'linux_sbsa' : 'linux' +cuda_url = "#{node['cluster']['artifacts_s3_url']}/dependencies/cuda/cuda_#{cuda_complete_version}_#{cuda_version_suffix}_#{cuda_arch}.run" cuda_samples_version = '12.2' tmp_cuda_run = '/tmp/cuda.run' +cuda_samples_url = "#{node['cluster']['artifacts_s3_url']}/dependencies/cuda/samples/v#{cuda_samples_version}.tar.gz" tmp_cuda_sample_archive = '/tmp/cuda-sample.tar.gz' node.default['cluster']['nvidia']['cuda']['version'] = cuda_version @@ -33,17 +35,12 @@ node_attributes 'Save cuda and cuda samples versions for InSpec tests' # Get CUDA run file -bash 'Get CUDA run file from s3' do - user 'root' - group 'root' - cwd "#{node['cluster']['sources_dir']}" - code <<-CUDA - set -e - aws s3 cp #{node['cluster']['artifacts_build_url']}/cuda/cuda_#{cuda_complete_version}_#{cuda_version_suffix}_#{cuda_arch}.run #{tmp_cuda_run} --region #{node['cluster']['region']} - chmod 755 #{tmp_cuda_run} - CUDA +remote_file tmp_cuda_run do + source cuda_url + mode '0755' retries 3 retry_delay 5 + not_if { ::File.exist?("/usr/local/cuda-#{cuda_version}") } end # Install CUDA driver @@ -62,17 +59,12 @@ end # Get CUDA Sample Files -bash 'get CUDA Sample Files from s3' do - user 'root' - group 'root' - cwd "#{node['cluster']['sources_dir']}" - code <<-CUDA - set -e - aws s3 cp #{node['cluster']['artifacts_build_url']}/cuda/samples/v#{cuda_samples_version}.tar.gz #{tmp_cuda_sample_archive} --region #{node['cluster']['region']} - chmod 644 #{tmp_cuda_sample_archive} - CUDA +remote_file tmp_cuda_sample_archive do + source cuda_samples_url + mode '0644' retries 3 retry_delay 5 + not_if { ::File.exist?("/usr/local/cuda-#{cuda_version}/samples") } end # Unpack CUDA Samples diff --git a/cookbooks/aws-parallelcluster-platform/recipes/install/intel_mpi.rb b/cookbooks/aws-parallelcluster-platform/recipes/install/intel_mpi.rb index e6e465af79..f70a58f27f 100644 --- a/cookbooks/aws-parallelcluster-platform/recipes/install/intel_mpi.rb +++ b/cookbooks/aws-parallelcluster-platform/recipes/install/intel_mpi.rb @@ -29,7 +29,7 @@ intelmpi_installation_path = "/opt/intel/mpi/#{intelmpi_version}" intelmpi_installer = "l_mpi_oneapi_p_#{intelmpi_full_version}_offline.sh" intelmpi_installer_path = "#{node['cluster']['sources_dir']}/#{intelmpi_installer}" -intelmpi_installer_url = "#{node['cluster']['base_build_url']}/archives/impi/#{intelmpi_installer}" +intelmpi_installer_url = "#{node['cluster']['artifacts_s3_url']}/impi/#{intelmpi_installer}" intelmpi_qt_version = '6.5.3' # Prerequisite for module install @@ -39,17 +39,12 @@ end # fetch intelmpi installer script -bash 'get intelmpi from s3' do - user 'root' - group 'root' - cwd "#{node['cluster']['sources_dir']}" - code <<-IMPI - set -e - aws s3 cp #{intelmpi_installer_url} #{intelmpi_installer_path} --region #{node['cluster']['region']} - chmod 744 #{intelmpi_installer_path} - IMPI - retries 5 - retry_delay 10 +remote_file intelmpi_installer_path do + source intelmpi_installer_url + mode '0744' + retries 3 + retry_delay 5 + not_if { ::File.exist?(intelmpi_installation_path.to_s) } end bash "install intel mpi" do diff --git a/cookbooks/aws-parallelcluster-platform/resources/dcv/partial/_dcv_common.rb b/cookbooks/aws-parallelcluster-platform/resources/dcv/partial/_dcv_common.rb index 388b400dd2..894000cc96 100644 --- a/cookbooks/aws-parallelcluster-platform/resources/dcv/partial/_dcv_common.rb +++ b/cookbooks/aws-parallelcluster-platform/resources/dcv/partial/_dcv_common.rb @@ -147,15 +147,10 @@ def optionally_disable_rnd # Extract DCV packages unless ::File.exist?(dcv_tarball) - bash 'get dcv from s3' do - user 'root' - group 'root' - cwd "#{node['cluster']['sources_dir']}" - code <<-DCV - set -e - aws s3 cp #{dcv_url} #{dcv_tarball} --region #{node['cluster']['region']} - chmod 644 #{dcv_tarball} - DCV + remote_file dcv_tarball do + source dcv_url + checksum dcv_sha256sum + mode '0644' retries 3 retry_delay 5 end @@ -277,7 +272,7 @@ def dcv_gpu_accel_supported? end def dcv_url - "#{node['cluster']['artifacts_build_url']}/dcv/#{dcv_package}.tgz" + "#{node['cluster']['artifacts_s3_url']}/dependencies/dcv/#{dcv_package}.tgz" end def dcv_tarball @@ -290,4 +285,4 @@ def dcvauth_virtualenv def dcvauth_virtualenv_path node['cluster']['dcv']['authenticator']['virtualenv_path'] -end +end \ No newline at end of file diff --git a/cookbooks/aws-parallelcluster-platform/resources/fabric_manager/partial/_fabric_manager_install_rhel.rb b/cookbooks/aws-parallelcluster-platform/resources/fabric_manager/partial/_fabric_manager_install_rhel.rb index aa48c47331..7a7b49ea2a 100644 --- a/cookbooks/aws-parallelcluster-platform/resources/fabric_manager/partial/_fabric_manager_install_rhel.rb +++ b/cookbooks/aws-parallelcluster-platform/resources/fabric_manager/partial/_fabric_manager_install_rhel.rb @@ -13,14 +13,22 @@ # See the License for the specific language governing permissions and limitations under the License. action :install_package do + remote_file "#{node['cluster']['sources_dir']}/#{fabric_manager_package}-#{fabric_manager_version}.rpm" do + source "#{fabric_manager_url}" + mode '0644' + retries 3 + retry_delay 5 + action :create_if_missing + end + package 'yum-plugin-versionlock' bash "Install #{fabric_manager_package}" do user 'root' cwd node['cluster']['sources_dir'] code <<-FABRIC_MANAGER_INSTALL set -e - aws s3 cp #{fabric_manager_url} #{fabric_manager_package}-#{fabric_manager_version}.rpm --region #{node['cluster']['region']} - yum install -y #{fabric_manager_package}-#{fabric_manager_version}.rpm yum versionlock #{fabric_manager_package} + yum install -y #{fabric_manager_package}-#{fabric_manager_version}.rpm + yum versionlock #{fabric_manager_package} FABRIC_MANAGER_INSTALL retries 3 retry_delay 5 @@ -32,5 +40,5 @@ def arch_suffix end def fabric_manager_url - "#{node['cluster']['artifacts_build_url']}/nvidia_fabric/#{platform}/#{fabric_manager_package}-#{fabric_manager_version}-1.#{arch_suffix}.rpm" + "#{node['cluster']['artifacts_s3_url']}/dependencies/nvidia_fabric/#{platform}/#{fabric_manager_package}-#{fabric_manager_version}-1.#{arch_suffix}.rpm" end diff --git a/cookbooks/aws-parallelcluster-platform/resources/gdrcopy/gdrcopy_redhat8.rb b/cookbooks/aws-parallelcluster-platform/resources/gdrcopy/gdrcopy_redhat8.rb index 7a66fbdd9e..ad71c03387 100644 --- a/cookbooks/aws-parallelcluster-platform/resources/gdrcopy/gdrcopy_redhat8.rb +++ b/cookbooks/aws-parallelcluster-platform/resources/gdrcopy/gdrcopy_redhat8.rb @@ -23,6 +23,10 @@ def gdrcopy_enabled? nvidia_enabled? end +def gdrcopy_build_dependencies + %w(rpm-build make check check-devel) +end + def gdrcopy_platform "el#{node['platform_version'].to_i}" end diff --git a/cookbooks/aws-parallelcluster-platform/resources/gdrcopy/partial/_gdrcopy_common.rb b/cookbooks/aws-parallelcluster-platform/resources/gdrcopy/partial/_gdrcopy_common.rb index 0dd6fbd8b2..bef0644160 100644 --- a/cookbooks/aws-parallelcluster-platform/resources/gdrcopy/partial/_gdrcopy_common.rb +++ b/cookbooks/aws-parallelcluster-platform/resources/gdrcopy/partial/_gdrcopy_common.rb @@ -38,17 +38,13 @@ def gdrcopy_checksum recursive true end - bash 'get gdrcopy from s3' do - user 'root' - group 'root' - cwd "#{node['cluster']['sources_dir']}" - code <<-GDR - set -e - aws s3 cp #{node['cluster']['artifacts_build_url']}/gdr_copy/v#{gdrcopy_version}.tar.gz #{gdrcopy_tarball} --region #{node['cluster']['region']} - chmod 644 #{gdrcopy_tarball} - GDR + remote_file gdrcopy_tarball do + source gdrcopy_url + mode '0644' retries 3 retry_delay 5 + checksum gdrcopy_checksum + action :create_if_missing end package_repos 'update package repos' do diff --git a/cookbooks/aws-parallelcluster-platform/resources/install_packages/install_packages_amazon2.rb b/cookbooks/aws-parallelcluster-platform/resources/install_packages/install_packages_amazon2.rb index cca896fbd4..cbf0b35715 100644 --- a/cookbooks/aws-parallelcluster-platform/resources/install_packages/install_packages_amazon2.rb +++ b/cookbooks/aws-parallelcluster-platform/resources/install_packages/install_packages_amazon2.rb @@ -41,12 +41,19 @@ def default_packages alinux_extras_topic topic end + remote_file "epel_deps.tar.gz" do + source "#{node['cluster']['artifacts_s3_url']}/dependencies/epel/rhel7/#{node['kernel']['machine']}/epel_deps.tar.gz" + mode '0644' + retries 3 + retry_delay 5 + action :create_if_missing + end + bash 'yum install missing deps' do user 'root' group 'root' code <<-REQ set -e - aws s3 cp #{node['cluster']['artifacts_build_url']}/epel/rhel7/#{node['kernel']['machine']}/epel_deps.tar.gz epel_deps.tar.gz --region #{node['cluster']['region']} tar xzf epel_deps.tar.gz cd epel yum install -y * 2>/dev/null diff --git a/cookbooks/aws-parallelcluster-platform/resources/install_packages/install_packages_redhat8.rb b/cookbooks/aws-parallelcluster-platform/resources/install_packages/install_packages_redhat8.rb index c30f17c0c6..b22860dba5 100644 --- a/cookbooks/aws-parallelcluster-platform/resources/install_packages/install_packages_redhat8.rb +++ b/cookbooks/aws-parallelcluster-platform/resources/install_packages/install_packages_redhat8.rb @@ -23,12 +23,41 @@ def default_packages # environment-modules required by EFA, Intel MPI and ARM PL # Removed libssh2-devel from base_packages since is not shipped by RedHat 8 and in conflict with package libssh-0.9.6-3.el8.x86_64 # iptables needed for IMDS setup - %w(vim ksh tcsh zsh openssl-devel ncurses-devel pam-devel net-tools openmotif-devel - libXmu-devel hwloc-devel libdb-devel tcl-devel automake autoconf libtool - httpd boost-devel mlocate R atlas-devel - blas-devel libffi-devel dkms libedit-devel jq + # %w(vim ksh tcsh zsh openssl-devel ncurses-devel pam-devel net-tools openmotif-devel + # libXmu-devel hwloc-devel libdb-devel tcl-devel automake autoconf libtool + # httpd boost-devel mlocate R atlas-devel + # blas-devel libffi-devel dkms libedit-devel jq + # libical-devel sendmail libxml2-devel libglvnd-devel + # libgcrypt-devel libevent-devel glibc-static bind-utils + # iproute NetworkManager-config-routing-rules python3 python3-pip iptables libcurl-devel yum-plugin-versionlock + # coreutils moreutils curl environment-modules gcc gcc-c++ bzip2) R + %w(vim ksh tcsh zsh openssl-devel ncurses-devel pam-devel net-tools + libXmu-devel libdb-devel tcl-devel automake autoconf libtool + httpd boost-devel mlocate atlas-devel + libffi-devel jq libical-devel sendmail libxml2-devel libglvnd-devel - libgcrypt-devel libevent-devel glibc-static bind-utils - iproute NetworkManager-config-routing-rules python3 python3-pip iptables libcurl-devel yum-plugin-versionlock - coreutils moreutils curl environment-modules gcc gcc-c++ bzip2) + libgcrypt-devel libevent-devel bind-utils + iproute python3 python3-pip iptables libcurl-devel + coreutils curl environment-modules gcc gcc-c++ bzip2) end + +action :install_extras do + remote_file "epel_deps.tar.gz" do + source "#{node['cluster']['artifacts_s3_url']}/dependencies/epel/rhel8/x86_64/epel_deps.tar.gz" + mode '0644' + retries 3 + retry_delay 5 + action :create_if_missing + end + + bash 'yum install missing deps' do + user 'root' + group 'root' + code <<-REQ + set -e + tar xzf epel_deps.tar.gz + cd epel + yum install -y * 2>/dev/null + REQ + end +end \ No newline at end of file diff --git a/cookbooks/aws-parallelcluster-platform/resources/nvidia_dcgm/partial/_nvidia_dcgm_rhel.rb b/cookbooks/aws-parallelcluster-platform/resources/nvidia_dcgm/partial/_nvidia_dcgm_rhel.rb index 7ea74facd7..997762acd1 100644 --- a/cookbooks/aws-parallelcluster-platform/resources/nvidia_dcgm/partial/_nvidia_dcgm_rhel.rb +++ b/cookbooks/aws-parallelcluster-platform/resources/nvidia_dcgm/partial/_nvidia_dcgm_rhel.rb @@ -13,11 +13,19 @@ # See the License for the specific language governing permissions and limitations under the License. action :install_package do + remote_file "#{node['cluster']['sources_dir']}/#{dcgm_package}-#{package_version}.rpm" do + source "#{dcgm_url}" + mode '0644' + retries 3 + retry_delay 5 + action :create_if_missing + end + bash "Install #{dcgm_package}" do user 'root' + cwd node['cluster']['sources_dir'] code <<-DCGM_INSTALL set -e - aws s3 cp #{dcgm_url} #{dcgm_package}-#{package_version}.rpm --region #{node['cluster']['region']} yum install -y #{dcgm_package}-#{package_version}.rpm DCGM_INSTALL retries 3 @@ -26,7 +34,7 @@ end def dcgm_url - "#{node['cluster']['artifacts_build_url']}/nvidia_dcgm/#{platform}/#{dcgm_package}-#{package_version}-1-#{arch_suffix}.rpm" + "#{node['cluster']['artifacts_s3_url']}/dependencies/nvidia_dcgm/#{platform}/#{dcgm_package}-#{package_version}-1-#{arch_suffix}.rpm" end def dcgm_package diff --git a/cookbooks/aws-parallelcluster-platform/resources/nvidia_driver/partial/_nvidia_driver_common.rb b/cookbooks/aws-parallelcluster-platform/resources/nvidia_driver/partial/_nvidia_driver_common.rb index ca39c1909d..0707eedbf7 100644 --- a/cookbooks/aws-parallelcluster-platform/resources/nvidia_driver/partial/_nvidia_driver_common.rb +++ b/cookbooks/aws-parallelcluster-platform/resources/nvidia_driver/partial/_nvidia_driver_common.rb @@ -27,17 +27,12 @@ node.default['cluster']['nvidia']['driver_version'] = _nvidia_driver_version node_attributes "Save Nvidia driver version for Inspec tests" - bash 'get nvidia driver from s3' do - user 'root' - group 'root' - cwd "#{node['cluster']['sources_dir']}" - code <<-NVIDIA - set -e - aws s3 cp #{node['cluster']['artifacts_build_url']}/nvidia_driver/NVIDIA-Linux-#{nvidia_arch}-#{_nvidia_driver_version}.run #{tmp_nvidia_run} --region #{node['cluster']['region']} - chmod 755 #{tmp_nvidia_run} - NVIDIA + remote_file tmp_nvidia_run do + source nvidia_driver_url + mode '0755' retries 3 retry_delay 5 + not_if { ::File.exist?('/usr/bin/nvidia-smi') } end # Make sure nouveau kernel module is unloaded, otherwise installation of NVIDIA driver fails @@ -71,9 +66,6 @@ owner 'root' group 'root' mode '0644' - variables( - compiler_path: compiler_path - ) end end @@ -131,4 +123,4 @@ def nvidia_kernel_module else "kernel-open" end -end +end \ No newline at end of file diff --git a/cookbooks/aws-parallelcluster-shared/attributes/environment.rb b/cookbooks/aws-parallelcluster-shared/attributes/environment.rb index 5992a98886..302ee07e32 100644 --- a/cookbooks/aws-parallelcluster-shared/attributes/environment.rb +++ b/cookbooks/aws-parallelcluster-shared/attributes/environment.rb @@ -5,6 +5,4 @@ # URL for ParallelCluster Artifacts stored in public S3 buckets # ['cluster']['region'] will need to be defined by image_dna.json during AMI build. -default['cluster']['base_build_url'] = "s3://aws-parallelcluster-dev-build-dependencies" -default['cluster']['artifacts_s3_url'] = "https://#{node['cluster']['region']}-aws-parallelcluster.s3.#{node['cluster']['region']}.#{node['cluster']['aws_domain']}/archives" -default['cluster']['artifacts_build_url'] = "#{node['cluster']['base_build_url']}/archives/dependencies" \ No newline at end of file +default['cluster']['artifacts_s3_url'] = "https://#{node['cluster']['region']}-aws-parallelcluster.s3.#{node['cluster']['region']}.#{node['cluster']['aws_domain']}/archives" \ No newline at end of file diff --git a/cookbooks/aws-parallelcluster-shared/resources/install_pyenv.rb b/cookbooks/aws-parallelcluster-shared/resources/install_pyenv.rb index 64a151b8c9..bfd8cf66ca 100644 --- a/cookbooks/aws-parallelcluster-shared/resources/install_pyenv.rb +++ b/cookbooks/aws-parallelcluster-shared/resources/install_pyenv.rb @@ -4,15 +4,17 @@ unified_mode true # Resource:: to create a Python virtual environment for a given user -property :user_only, [true, false], default: false -property :user, String + property :python_version, String property :prefix, String +property :user_only, [true, false], default: false +property :user, String + default_action :run action :run do python_version = new_resource.python_version || node['cluster']['python-version'] - python_url = "#{node['cluster']['artifacts_build_url']}/python/Python-#{python_version}.tgz" + python_url = "#{node['cluster']['artifacts_s3_url']}/dependencies/python/Python-#{python_version}.tgz" if new_resource.user_only raise "user property is required for resource install_pyenv when user_only is set to true" unless new_resource.user @@ -25,6 +27,14 @@ recursive true end + remote_file "#{prefix}/Python-#{python_version}.tgz" do + source python_url + mode '0644' + retries 3 + retry_delay 5 + action :create_if_missing + end + user = new_resource.user || 'root' bash "install python #{python_version}" do @@ -33,7 +43,6 @@ cwd "#{prefix}" code <<-VENV set -e - aws s3 cp #{python_url} Python-#{python_version}.tgz --region #{node['cluster']['region']} tar -xzf Python-#{python_version}.tgz cd Python-#{python_version} ./configure --prefix=#{prefix}/versions/#{python_version} @@ -41,4 +50,5 @@ make install VENV end -end + +end \ No newline at end of file diff --git a/cookbooks/aws-parallelcluster-shared/resources/package_repos/package_repos_redhat8.rb b/cookbooks/aws-parallelcluster-shared/resources/package_repos/package_repos_redhat8.rb index 9e5ffac37b..9433c4c21b 100644 --- a/cookbooks/aws-parallelcluster-shared/resources/package_repos/package_repos_redhat8.rb +++ b/cookbooks/aws-parallelcluster-shared/resources/package_repos/package_repos_redhat8.rb @@ -23,7 +23,7 @@ action :setup do include_recipe 'yum' - include_recipe "yum-epel" + # include_recipe "yum-epel" package 'yum-utils' do retries 3 diff --git a/cookbooks/aws-parallelcluster-slurm/recipes/install/install_jwt.rb b/cookbooks/aws-parallelcluster-slurm/recipes/install/install_jwt.rb index 487d86fe51..cafc069560 100644 --- a/cookbooks/aws-parallelcluster-slurm/recipes/install/install_jwt.rb +++ b/cookbooks/aws-parallelcluster-slurm/recipes/install/install_jwt.rb @@ -16,19 +16,17 @@ # limitations under the License. jwt_version = '1.15.3' +jwt_url = "#{node['cluster']['artifacts_s3_url']}/dependencies/jwt/v#{jwt_version}.tar.gz" jwt_tarball = "#{node['cluster']['sources_dir']}/libjwt-#{jwt_version}.tar.gz" +jwt_sha256 = 'cb2fd95123689e7d209a3a8c060e02f68341c9a5ded524c0cd881a8cd20d711f' -bash 'get jwt from s3' do - user 'root' - group 'root' - cwd "#{node['cluster']['sources_dir']}" - code <<-JWT - set -e - aws s3 cp #{node['cluster']['artifacts_build_url']}/jwt/v#{jwt_version}.tar.gz #{jwt_tarball} --region #{node['cluster']['region']} - chmod 644 #{jwt_tarball} - JWT +remote_file jwt_tarball do + source jwt_url + mode '0644' retries 3 retry_delay 5 + checksum jwt_sha256 + action :create_if_missing end jwt_dependencies 'Install jwt dependencies' @@ -47,4 +45,4 @@ make -j $CORES sudo make install LIBJWT -end unless redhat_on_docker? +end unless redhat_on_docker? \ No newline at end of file diff --git a/cookbooks/aws-parallelcluster-slurm/recipes/install/install_pmix.rb b/cookbooks/aws-parallelcluster-slurm/recipes/install/install_pmix.rb index 126bc1f680..6548b203e7 100644 --- a/cookbooks/aws-parallelcluster-slurm/recipes/install/install_pmix.rb +++ b/cookbooks/aws-parallelcluster-slurm/recipes/install/install_pmix.rb @@ -21,17 +21,13 @@ pmix_sha256 = node['cluster']['pmix']['sha256'] pmix_tarball = "#{node['cluster']['sources_dir']}/pmix-#{pmix_version}.tar.gz" -bash 'get pmix from s3' do - user 'root' - group 'root' - cwd "#{node['cluster']['sources_dir']}" - code <<-PMIX - set -e - aws s3 cp #{node['cluster']['artifacts_build_url']}/pmix/pmix-#{pmix_version}.tar.gz #{pmix_tarball} --region #{node['cluster']['region']} - chmod 644 #{pmix_tarball} - PMIX +remote_file pmix_tarball do + source pmix_url + mode '0644' retries 3 retry_delay 5 + checksum pmix_sha256 + action :create_if_missing end bash 'Install PMIx' do diff --git a/cookbooks/aws-parallelcluster-slurm/recipes/install/install_slurm.rb b/cookbooks/aws-parallelcluster-slurm/recipes/install/install_slurm.rb index e7e269cd70..656c5b33ae 100644 --- a/cookbooks/aws-parallelcluster-slurm/recipes/install/install_slurm.rb +++ b/cookbooks/aws-parallelcluster-slurm/recipes/install/install_slurm.rb @@ -38,17 +38,13 @@ include_recipe 'aws-parallelcluster-slurm::slurm_users' # Get slurm tarball -bash 'get slurm from s3' do - user 'root' - group 'root' - cwd "#{node['cluster']['sources_dir']}" - code <<-SLURM - set -e - aws s3 cp #{node['cluster']['artifacts_build_url']}/slurm/#{slurm_tar_name}.tar.gz #{slurm_tarball} --region #{node['cluster']['region']} - chmod 644 #{slurm_tarball} - SLURM +remote_file slurm_tarball do + source slurm_url + mode '0644' retries 3 retry_delay 5 + checksum slurm_sha256 + action :create_if_missing end # Copy Slurm patches diff --git a/cookbooks/aws-parallelcluster-slurm/resources/munge/partial/_munge_actions.rb b/cookbooks/aws-parallelcluster-slurm/resources/munge/partial/_munge_actions.rb index 37f89500b9..b97e037eaf 100644 --- a/cookbooks/aws-parallelcluster-slurm/resources/munge/partial/_munge_actions.rb +++ b/cookbooks/aws-parallelcluster-slurm/resources/munge/partial/_munge_actions.rb @@ -20,11 +20,13 @@ default_action :setup munge_version = node['cluster']['munge']['munge_version'] +munge_url = "#{node['cluster']['munge']['base_url']}/munge-#{munge_version}.tar.gz" munge_tarball = "#{node['cluster']['sources_dir']}/munge-#{munge_version}.tar.gz" munge_user = node['cluster']['munge']['user'] munge_user_id = node['cluster']['munge']['user_id'] munge_group = node['cluster']['munge']['group'] munge_group_id = node['cluster']['munge']['group_id'] +munge_sha256 = node['cluster']['munge']['sha256'] action :setup do directory node['cluster']['sources_dir'] do @@ -60,17 +62,13 @@ action :download_source_code do # Get munge tarball - bash 'get munge from s3' do - user 'root' - group 'root' - cwd "#{node['cluster']['sources_dir']}" - code <<-MUNGE - set -e - aws s3 cp #{node['cluster']['artifacts_build_url']}/munge/munge-#{munge_version}.tar.gz #{munge_tarball} --region #{node['cluster']['region']} - chmod 644 #{munge_tarball} - MUNGE + remote_file munge_tarball do + source munge_url + mode '0644' retries 3 retry_delay 5 + checksum munge_sha256 + action :create_if_missing end end diff --git a/cookbooks/aws-parallelcluster-slurm/resources/mysql_client/partial/_setup_rhel_based.rb b/cookbooks/aws-parallelcluster-slurm/resources/mysql_client/partial/_setup_rhel_based.rb index cdaa3cf58c..38310a9a59 100644 --- a/cookbooks/aws-parallelcluster-slurm/resources/mysql_client/partial/_setup_rhel_based.rb +++ b/cookbooks/aws-parallelcluster-slurm/resources/mysql_client/partial/_setup_rhel_based.rb @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and limitations under the License. action :setup do - mysql_archive_url = package_archive("#{node['cluster']['base_build_url']}/archives") + mysql_archive_url = package_archive(node['cluster']['artifacts_s3_url']) mysql_tar_file = "/tmp/#{package_filename}" log "Downloading MySQL packages archive from #{mysql_archive_url}" @@ -22,17 +22,12 @@ # Add MySQL source file action_create_source_link - bash 'get mysql from s3' do - user 'root' - group 'root' - cwd "#{node['cluster']['sources_dir']}" - code <<-MYSQL - set -e - aws s3 cp #{mysql_archive_url} #{mysql_tar_file} --region #{node['cluster']['region']} - chmod 644 #{mysql_tar_file} - MYSQL - retries 5 - retry_delay 10 + remote_file mysql_tar_file do + source mysql_archive_url + mode '0644' + retries 3 + retry_delay 5 + action :create_if_missing end bash 'Install MySQL packages' do diff --git a/cookbooks/aws-parallelcluster-slurm/resources/slurm_dependencies/slurm_dependencies_redhat8.rb b/cookbooks/aws-parallelcluster-slurm/resources/slurm_dependencies/slurm_dependencies_redhat8.rb index d448c24d6d..4e6db7c4fa 100644 --- a/cookbooks/aws-parallelcluster-slurm/resources/slurm_dependencies/slurm_dependencies_redhat8.rb +++ b/cookbooks/aws-parallelcluster-slurm/resources/slurm_dependencies/slurm_dependencies_redhat8.rb @@ -15,5 +15,5 @@ use 'partial/_slurm_dependencies_common' def dependencies - %w(json-c-devel http-parser-devel lua-devel perl dbus-devel) + %w(json-c-devel perl dbus-devel) end From 6b278d2d8c6cfb68b80d74846baf90e626f9d27a Mon Sep 17 00:00:00 2001 From: Helena Greebe Date: Tue, 23 Jul 2024 11:21:44 -0400 Subject: [PATCH 5/7] [ADC Build Image] Patch AWS_CA_BUNDLE --- .../recipes/install.rb | 2 +- .../resources/efs/partial/_install_from_tar.rb | 2 +- .../files/isolated/iso-ca-bundle-config.sh | 15 +++++++++++++++ .../recipes/install/awscli.rb | 18 +++++++++++++++--- 4 files changed, 32 insertions(+), 5 deletions(-) create mode 100644 cookbooks/aws-parallelcluster-platform/files/isolated/iso-ca-bundle-config.sh diff --git a/cookbooks/aws-parallelcluster-environment/recipes/install.rb b/cookbooks/aws-parallelcluster-environment/recipes/install.rb index a369754a80..6e70f010f0 100644 --- a/cookbooks/aws-parallelcluster-environment/recipes/install.rb +++ b/cookbooks/aws-parallelcluster-environment/recipes/install.rb @@ -21,7 +21,7 @@ raid 'Install RAID prerequisite packages' lustre 'Install FSx options' efs 'Install efs-utils' -stunnel 'Install stunnel' +# stunnel 'Install stunnel' system_authentication "Install packages required for directory service integration" # TODO: Disable spack until the feature is complete # spack 'Install Spack' diff --git a/cookbooks/aws-parallelcluster-environment/resources/efs/partial/_install_from_tar.rb b/cookbooks/aws-parallelcluster-environment/resources/efs/partial/_install_from_tar.rb index e6c9a805ba..cf65440cf0 100644 --- a/cookbooks/aws-parallelcluster-environment/resources/efs/partial/_install_from_tar.rb +++ b/cookbooks/aws-parallelcluster-environment/resources/efs/partial/_install_from_tar.rb @@ -32,7 +32,7 @@ package_name = "amazon-efs-utils" package_version = new_resource.efs_utils_version efs_utils_tarball = "#{node['cluster']['sources_dir']}/efs-utils-#{package_version}.tar.gz" - efs_utils_url = "https://github.com/aws/efs-utils/archive/v#{package_version}.tar.gz" + efs_utils_url = "#{node['cluster']['artifacts_s3_url']}/dependencies/efs/v#{package_version}.tar.gz" # Do not install efs-utils if a same or newer version is already installed. return if already_installed?(package_name, package_version) diff --git a/cookbooks/aws-parallelcluster-platform/files/isolated/iso-ca-bundle-config.sh b/cookbooks/aws-parallelcluster-platform/files/isolated/iso-ca-bundle-config.sh new file mode 100644 index 0000000000..5476a51272 --- /dev/null +++ b/cookbooks/aws-parallelcluster-platform/files/isolated/iso-ca-bundle-config.sh @@ -0,0 +1,15 @@ +#!/bin/bash +set -ex + +function get_instance_region { + local _token=$(curl -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 3600") + curl -H "X-aws-ec2-metadata-token: $_token" -v "http://169.254.169.254/latest/meta-data/placement/region" 2> /dev/null +} + +REGION="$(get_instance_region)" + +echo -e "export AWS_CA_BUNDLE=/etc/pki/${REGION}/certs/ca-bundle.pem" >> /etc/profile.d/aws-cli-default-config.sh + +echo "export AWS_DEFAULT_REGION=${REGION}" >> /etc/profile.d/aws-cli-default-config.sh + +echo "Defaults env_keep += \"AWS_DEFAULT_REGION AWS_CA_BUNDLE\"" > /etc/sudoers.d/pcluster-aws-cli-envkeep diff --git a/cookbooks/aws-parallelcluster-platform/recipes/install/awscli.rb b/cookbooks/aws-parallelcluster-platform/recipes/install/awscli.rb index 655eeabcb4..40ef17bdc9 100644 --- a/cookbooks/aws-parallelcluster-platform/recipes/install/awscli.rb +++ b/cookbooks/aws-parallelcluster-platform/recipes/install/awscli.rb @@ -17,7 +17,6 @@ # See the License for the specific language governing permissions and limitations under the License. return if ::File.exist?("/usr/local/bin/aws") || redhat_on_docker? -return if platform?('amazon') file_cache_path = Chef::Config[:file_cache_path] region = aws_region @@ -45,10 +44,23 @@ if region.start_with?("us-iso") bash 'install awscli' do - code "#{file_cache_path}/awscli/aws/install -i /usr/local/aws -b /usr/local/bin/aws" + code "#{file_cache_path}/awscli/aws/install -i /usr/local/aws -b /usr/local/bin" + end + + cookbook_file "#{node['cluster']['scripts_dir']}/iso-ca-bundle-config.sh" do + source 'isolated/iso-ca-bundle-config.sh' + cookbook 'aws-parallelcluster-platform' + owner 'root' + group 'root' + mode '0755' + action :create_if_missing + end + + execute "patch ca bundle" do + command "sh #{node['cluster']['scripts_dir']}/iso-ca-bundle-config.sh" end else bash 'install awscli' do code "#{cookbook_virtualenv_path}/bin/python#{node['cluster']['python-major-minor-version']} #{file_cache_path}/awscli/awscli-bundle/install -i /usr/local/aws -b /usr/local/bin/aws" end -end +end \ No newline at end of file From 357fd1e254c6ad1977658ebfb5830869d45a55c9 Mon Sep 17 00:00:00 2001 From: Helena Greebe Date: Wed, 24 Jul 2024 22:26:46 -0400 Subject: [PATCH 6/7] [ADC Build Image] Include differing paths for iso regions --- .../recipes/install.rb | 25 ++----- .../install/custom_parallelcluster_node.rb | 19 ++++++ .../recipes/install/parallelcluster_node.rb | 45 ++----------- .../recipes/install.rb | 2 +- .../cloudwatch/partial/_cloudwatch_common.rb | 2 +- .../resources/efa/efa_alinux2.rb | 6 +- .../resources/lustre/lustre_redhat8.rb | 14 +++- .../spec/unit/resources/efs_spec.rb | 4 +- .../files/isolated/iso-ca-bundle-config.sh | 2 +- .../recipes/install/awscli.rb | 8 +-- .../recipes/install/cuda.rb | 2 +- .../resources/dcv/partial/_dcv_common.rb | 2 +- .../resources/gdrcopy/gdrcopy_amazon2.rb | 8 +++ .../resources/gdrcopy/gdrcopy_redhat8.rb | 6 +- .../gdrcopy/partial/_gdrcopy_common_rhel.rb | 2 +- .../install_packages_amazon2.rb | 42 +++++++----- .../install_packages_redhat8.rb | 65 ++++++++++--------- .../partial/_nvidia_driver_common.rb | 5 +- .../attributes/environment.rb | 2 +- .../resources/install_pyenv.rb | 7 +- .../package_repos/package_repos_alinux2.rb | 10 +-- .../package_repos/package_repos_redhat8.rb | 5 +- .../recipes/install/install_jwt.rb | 2 +- .../slurm_dependencies_redhat8.rb | 8 ++- 24 files changed, 156 insertions(+), 137 deletions(-) diff --git a/cookbooks/aws-parallelcluster-awsbatch/recipes/install.rb b/cookbooks/aws-parallelcluster-awsbatch/recipes/install.rb index 2ecd0ef06a..bffc4528db 100644 --- a/cookbooks/aws-parallelcluster-awsbatch/recipes/install.rb +++ b/cookbooks/aws-parallelcluster-awsbatch/recipes/install.rb @@ -28,6 +28,9 @@ # Check whether install a custom aws-parallelcluster-awsbatch-cli package or the standard one # Install awsbatch cli into awsbatch virtual env +if aws_region.start_with?("us-iso") && !node['cluster']['custom_awsbatchcli_package'].empty? + node.default['cluster']['custom_awsbatchcli_package'] = "#{node['cluster']['artifacts_s3_url']}/dependencies/awsbatch/aws-parallelcluster.tgz" +end if !node['cluster']['custom_awsbatchcli_package'].nil? && !node['cluster']['custom_awsbatchcli_package'].empty? # Install custom aws-parallelcluster package bash "install aws-parallelcluster-awsbatch-cli" do @@ -42,30 +45,14 @@ curl --retry 3 -L -o aws-parallelcluster.tgz ${custom_package_url} mkdir aws-parallelcluster-awsbatch-cli tar -xzf aws-parallelcluster.tgz --directory aws-parallelcluster-awsbatch-cli - cd aws-parallelcluster-awsbatch-cli/*aws-parallelcluster-* + #{node['cluster']['awsbatch_virtualenv_path']}/bin/pip install awsbatch-cli/ CLI end else # Install aws-parallelcluster-awsbatch-cli package - remote_file "#{Chef::Config[:file_cache_path]}/aws-parallelcluster.tgz" do - source "#{node['cluster']['artifacts_s3_url']}/dependencies/awsbatch/aws-parallelcluster.tgz" - mode '0644' - retries 3 - retry_delay 5 - action :create_if_missing - end - - bash "install aws-parallelcluster-awsbatch-cli" do - cwd Chef::Config[:file_cache_path] - code <<-CLI - set -e - mkdir aws-parallelcluster-awsbatch-cli - tar -xzf aws-parallelcluster.tgz --directory aws-parallelcluster-awsbatch-cli - - cd aws-parallelcluster-awsbatch-cli/*aws-parallelcluster-* - #{node['cluster']['awsbatch_virtualenv_path']}/bin/pip install awsbatch-cli/ - CLI + execute "pip_install_parallelcluster_awsbatch_cli" do + command "#{node['cluster']['awsbatch_virtualenv_path']}/bin/pip install aws-parallelcluster-awsbatch-cli==#{node['cluster']['parallelcluster-awsbatch-cli-version']}" end end diff --git a/cookbooks/aws-parallelcluster-computefleet/recipes/install/custom_parallelcluster_node.rb b/cookbooks/aws-parallelcluster-computefleet/recipes/install/custom_parallelcluster_node.rb index d137b1849e..097ca9bf8d 100644 --- a/cookbooks/aws-parallelcluster-computefleet/recipes/install/custom_parallelcluster_node.rb +++ b/cookbooks/aws-parallelcluster-computefleet/recipes/install/custom_parallelcluster_node.rb @@ -19,6 +19,25 @@ # TODO: once the pyenv Chef resource supports installing packages from a path (e.g. `pip install .`), convert the # bash block to a recipe that uses the pyenv resource. +remote_file "#{node['cluster']['base_dir']}/node-dependencies.tgz" do + source "#{node['cluster']['artifacts_s3_url']}/dependencies/PyPi/#{node['kernel']['machine']}/node-dependencies.tgz" + mode '0644' + retries 3 + retry_delay 5 + action :create_if_missing +end + +bash 'pip install' do + user 'root' + group 'root' + cwd "#{node['cluster']['base_dir']}" + code <<-REQ + set -e + tar xzf node-dependencies.tgz + cd node + #{node_virtualenv_path}/bin/pip install * -f ./ --no-index + REQ +end bash "install custom aws-parallelcluster-node" do cwd Chef::Config[:file_cache_path] diff --git a/cookbooks/aws-parallelcluster-computefleet/recipes/install/parallelcluster_node.rb b/cookbooks/aws-parallelcluster-computefleet/recipes/install/parallelcluster_node.rb index 69b9169e93..82bf5fc294 100644 --- a/cookbooks/aws-parallelcluster-computefleet/recipes/install/parallelcluster_node.rb +++ b/cookbooks/aws-parallelcluster-computefleet/recipes/install/parallelcluster_node.rb @@ -33,51 +33,14 @@ not_if { ::File.exist?("#{virtualenv_path}/bin/activate") } end -remote_file "#{node['cluster']['base_dir']}/node-dependencies.tgz" do - source "#{node['cluster']['artifacts_s3_url']}/dependencies/PyPi/#{node['kernel']['machine']}/node-dependencies.tgz" - mode '0644' - retries 3 - retry_delay 5 - action :create_if_missing -end - -bash 'pip install' do - user 'root' - group 'root' - cwd "#{node['cluster']['base_dir']}" - code <<-REQ - set -e - tar xzf node-dependencies.tgz - cd node - #{virtualenv_path}/bin/pip install * -f ./ --no-index - REQ +if aws_region.start_with?("us-iso") && !is_custom_node? + node.default['cluster']['custom_node_package'] = "#{node['cluster']['artifacts_s3_url']}/dependencies/node/aws-parallelcluster-node.tgz" end if is_custom_node? include_recipe 'aws-parallelcluster-computefleet::custom_parallelcluster_node' else - remote_file "#{Chef::Config[:file_cache_path]}/aws-parallelcluster-node.tgz" do - source "#{node['cluster']['artifacts_s3_url']}/dependencies/node/aws-parallelcluster-node.tgz" - mode '0644' - retries 3 - retry_delay 5 - action :create_if_missing - end - - bash "install official aws-parallelcluster-node" do - cwd Chef::Config[:file_cache_path] - code <<-NODE - set -e - [[ ":$PATH:" != *":/usr/local/bin:"* ]] && PATH="/usr/local/bin:${PATH}" - echo "PATH is $PATH" - source #{node_virtualenv_path}/bin/activate - pip uninstall --yes aws-parallelcluster-node - rm -fr aws-parallelcluster-node - mkdir aws-parallelcluster-node - tar -xzf aws-parallelcluster-node.tgz --directory aws-parallelcluster-node - cd aws-parallelcluster-node/*aws-parallelcluster-node-* - pip install . - deactivate - NODE + execute "install official aws-parallelcluster-node" do + command "#{virtualenv_path}/bin/pip install aws-parallelcluster-node==#{node['cluster']['parallelcluster-node-version']}" end end diff --git a/cookbooks/aws-parallelcluster-environment/recipes/install.rb b/cookbooks/aws-parallelcluster-environment/recipes/install.rb index 6e70f010f0..a369754a80 100644 --- a/cookbooks/aws-parallelcluster-environment/recipes/install.rb +++ b/cookbooks/aws-parallelcluster-environment/recipes/install.rb @@ -21,7 +21,7 @@ raid 'Install RAID prerequisite packages' lustre 'Install FSx options' efs 'Install efs-utils' -# stunnel 'Install stunnel' +stunnel 'Install stunnel' system_authentication "Install packages required for directory service integration" # TODO: Disable spack until the feature is complete # spack 'Install Spack' diff --git a/cookbooks/aws-parallelcluster-environment/resources/cloudwatch/partial/_cloudwatch_common.rb b/cookbooks/aws-parallelcluster-environment/resources/cloudwatch/partial/_cloudwatch_common.rb index 8fc917d19d..df0accab7f 100644 --- a/cookbooks/aws-parallelcluster-environment/resources/cloudwatch/partial/_cloudwatch_common.rb +++ b/cookbooks/aws-parallelcluster-environment/resources/cloudwatch/partial/_cloudwatch_common.rb @@ -28,7 +28,7 @@ action_cloudwatch_prerequisite region = node['cluster']['region'] - key_path = "/amazoncloudwatch-agent/assets/amazon-cloudwatch-agent.gpg" + key_path = "amazoncloudwatch-agent/assets/amazon-cloudwatch-agent.gpg" cloudwatch_key_url = "https://s3.amazonaws.com/#{key_path}" if region.start_with?("us-iso") cloudwatch_key_url = "https://s3.#{aws_region}.#{aws_domain}/#{key_path}" diff --git a/cookbooks/aws-parallelcluster-environment/resources/efa/efa_alinux2.rb b/cookbooks/aws-parallelcluster-environment/resources/efa/efa_alinux2.rb index a3ad4defd2..13396dc9d4 100644 --- a/cookbooks/aws-parallelcluster-environment/resources/efa/efa_alinux2.rb +++ b/cookbooks/aws-parallelcluster-environment/resources/efa/efa_alinux2.rb @@ -24,6 +24,10 @@ def conflicting_packages end def prerequisites - %w(libibverbs-utils librdmacm-utils) + if aws_region.start_with?("us-iso") + %w(libibverbs-utils librdmacm-utils) + else + %w(environment-modules libibverbs-utils librdmacm-utils) + end end end diff --git a/cookbooks/aws-parallelcluster-environment/resources/lustre/lustre_redhat8.rb b/cookbooks/aws-parallelcluster-environment/resources/lustre/lustre_redhat8.rb index 8b727392ad..b4c549489f 100644 --- a/cookbooks/aws-parallelcluster-environment/resources/lustre/lustre_redhat8.rb +++ b/cookbooks/aws-parallelcluster-environment/resources/lustre/lustre_redhat8.rb @@ -44,8 +44,18 @@ action_class do def base_url # https://docs.aws.amazon.com/fsx/latest/LustreGuide/install-lustre-client.html#lustre-client-rhel - "https://fsx-lustre-client-repo.s3.#{aws_region}.#{aws_domain}/el/#{node['platform_version']}/$basearch" end + if aws_region.start_with?("us-iso") + "https://fsx-lustre-client-repo.s3.#{aws_region}.#{aws_domain}/el/#{node['platform_version']}/$basearch" + else + "https://fsx-lustre-client-repo.s3.amazonaws.com/el/#{node['platform_version']}/$basearch" + end + end def public_key - "https://fsx-lustre-client-repo-public-keys.s3.#{aws_region}.#{aws_domain}/fsx-rpm-public-key.asc" end + if aws_region.start_with?("us-iso") + "https://fsx-lustre-client-repo-public-keys.s3.#{aws_region}.#{aws_domain}/fsx-rpm-public-key.asc" + else + "https://fsx-lustre-client-repo-public-keys.s3.amazonaws.com/fsx-rpm-public-key.asc" + end + end end diff --git a/cookbooks/aws-parallelcluster-environment/spec/unit/resources/efs_spec.rb b/cookbooks/aws-parallelcluster-environment/spec/unit/resources/efs_spec.rb index 1fc4c19ce5..c34f00fbb7 100644 --- a/cookbooks/aws-parallelcluster-environment/spec/unit/resources/efs_spec.rb +++ b/cookbooks/aws-parallelcluster-environment/spec/unit/resources/efs_spec.rb @@ -86,7 +86,7 @@ def mock_already_installed(package, expected_version, installed) cached(:source_dir) { 'SOURCE DIR' } cached(:utils_version) { '1.2.3' } cached(:tarball_path) { "#{source_dir}/efs-utils-#{utils_version}.tar.gz" } - cached(:tarball_url) { "https://github.com/aws/efs-utils/archive/v#{utils_version}.tar.gz" } + cached(:tarball_url) { "https://#{node['cluster']['region']}-aws-parallelcluster.s3.#{node['cluster']['region']}.test_aws_domain/archives/dependencies/efs/v#{utils_version}.tar.gz" } cached(:tarball_checksum) { 'TARBALL CHECKSUM' } cached(:bash_code) do <<-EFSUTILSINSTALL @@ -166,7 +166,7 @@ def mock_already_installed(package, expected_version, installed) cached(:source_dir) { 'SOURCE DIR' } cached(:utils_version) { '1.2.3' } cached(:tarball_path) { "#{source_dir}/efs-utils-#{utils_version}.tar.gz" } - cached(:tarball_url) { "https://github.com/aws/efs-utils/archive/v#{utils_version}.tar.gz" } + cached(:tarball_url) { "https://#{node['cluster']['region']}-aws-parallelcluster.s3.#{node['cluster']['region']}.test_aws_domain/archives/dependencies/efs/v#{utils_version}.tar.gz" } cached(:tarball_checksum) { 'TARBALL CHECKSUM' } cached(:bash_code) do <<-EFSUTILSINSTALL diff --git a/cookbooks/aws-parallelcluster-platform/files/isolated/iso-ca-bundle-config.sh b/cookbooks/aws-parallelcluster-platform/files/isolated/iso-ca-bundle-config.sh index 5476a51272..991daa72fb 100644 --- a/cookbooks/aws-parallelcluster-platform/files/isolated/iso-ca-bundle-config.sh +++ b/cookbooks/aws-parallelcluster-platform/files/isolated/iso-ca-bundle-config.sh @@ -8,7 +8,7 @@ function get_instance_region { REGION="$(get_instance_region)" -echo -e "export AWS_CA_BUNDLE=/etc/pki/${REGION}/certs/ca-bundle.pem" >> /etc/profile.d/aws-cli-default-config.sh +echo "export AWS_CA_BUNDLE=/etc/pki/${REGION}/certs/ca-bundle.pem" >> /etc/profile.d/aws-cli-default-config.sh echo "export AWS_DEFAULT_REGION=${REGION}" >> /etc/profile.d/aws-cli-default-config.sh diff --git a/cookbooks/aws-parallelcluster-platform/recipes/install/awscli.rb b/cookbooks/aws-parallelcluster-platform/recipes/install/awscli.rb index 40ef17bdc9..677a80ba3f 100644 --- a/cookbooks/aws-parallelcluster-platform/recipes/install/awscli.rb +++ b/cookbooks/aws-parallelcluster-platform/recipes/install/awscli.rb @@ -23,9 +23,9 @@ awscli_url = "https://s3.amazonaws.com/aws-cli/awscli-bundle.zip" if region.start_with?("us-iso-") - awscli_url ="https://aws-sdk-common-infra-dca-prod-deployment-bucket.s3.#{aws_region}.#{aws_domain}/aws-cli-v2/linux/x86_64/awscli-exe-linux-x86_64.zip" + awscli_url = "https://aws-sdk-common-infra-dca-prod-deployment-bucket.s3.#{aws_region}.#{aws_domain}/aws-cli-v2/linux/x86_64/awscli-exe-linux-x86_64.zip" elsif region.start_with?("us-isob-") - awscli_url ="https://aws-sdk-common-infra-lck-prod-deployment-bucket.s3.#{aws_region}.#{aws_domain}/aws-cli-v2/linux/x86_64/awscli-exe-linux-x86_64.zip" + awscli_url = "https://aws-sdk-common-infra-lck-prod-deployment-bucket.s3.#{aws_region}.#{aws_domain}/aws-cli-v2/linux/x86_64/awscli-exe-linux-x86_64.zip" end remote_file 'download awscli bundle from s3' do @@ -61,6 +61,6 @@ end else bash 'install awscli' do - code "#{cookbook_virtualenv_path}/bin/python#{node['cluster']['python-major-minor-version']} #{file_cache_path}/awscli/awscli-bundle/install -i /usr/local/aws -b /usr/local/bin/aws" + code "#{cookbook_virtualenv_path}/bin/python #{file_cache_path}/awscli/awscli-bundle/install -i /usr/local/aws -b /usr/local/bin/aws" end -end \ No newline at end of file +end diff --git a/cookbooks/aws-parallelcluster-platform/recipes/install/cuda.rb b/cookbooks/aws-parallelcluster-platform/recipes/install/cuda.rb index 90d9ccf5a7..cca8a2f443 100644 --- a/cookbooks/aws-parallelcluster-platform/recipes/install/cuda.rb +++ b/cookbooks/aws-parallelcluster-platform/recipes/install/cuda.rb @@ -26,8 +26,8 @@ cuda_arch = arm_instance? ? 'linux_sbsa' : 'linux' cuda_url = "#{node['cluster']['artifacts_s3_url']}/dependencies/cuda/cuda_#{cuda_complete_version}_#{cuda_version_suffix}_#{cuda_arch}.run" cuda_samples_version = '12.2' -tmp_cuda_run = '/tmp/cuda.run' cuda_samples_url = "#{node['cluster']['artifacts_s3_url']}/dependencies/cuda/samples/v#{cuda_samples_version}.tar.gz" +tmp_cuda_run = '/tmp/cuda.run' tmp_cuda_sample_archive = '/tmp/cuda-sample.tar.gz' node.default['cluster']['nvidia']['cuda']['version'] = cuda_version diff --git a/cookbooks/aws-parallelcluster-platform/resources/dcv/partial/_dcv_common.rb b/cookbooks/aws-parallelcluster-platform/resources/dcv/partial/_dcv_common.rb index 894000cc96..3f8172ea99 100644 --- a/cookbooks/aws-parallelcluster-platform/resources/dcv/partial/_dcv_common.rb +++ b/cookbooks/aws-parallelcluster-platform/resources/dcv/partial/_dcv_common.rb @@ -285,4 +285,4 @@ def dcvauth_virtualenv def dcvauth_virtualenv_path node['cluster']['dcv']['authenticator']['virtualenv_path'] -end \ No newline at end of file +end diff --git a/cookbooks/aws-parallelcluster-platform/resources/gdrcopy/gdrcopy_amazon2.rb b/cookbooks/aws-parallelcluster-platform/resources/gdrcopy/gdrcopy_amazon2.rb index 793d8bf0a1..fec40d1f15 100644 --- a/cookbooks/aws-parallelcluster-platform/resources/gdrcopy/gdrcopy_amazon2.rb +++ b/cookbooks/aws-parallelcluster-platform/resources/gdrcopy/gdrcopy_amazon2.rb @@ -25,6 +25,14 @@ def gdrcopy_platform 'amzn-2' end +def gdrcopy_build_dependencies + if aws_region.start_with?("us-iso") + %w(dkms rpm-build make check check-devel) + else + %w(dkms rpm-build make check check-devel subunit subunit-devel) + end +end + def gdrcopy_arch arm_instance? ? 'aarch64' : 'x86_64' end diff --git a/cookbooks/aws-parallelcluster-platform/resources/gdrcopy/gdrcopy_redhat8.rb b/cookbooks/aws-parallelcluster-platform/resources/gdrcopy/gdrcopy_redhat8.rb index ad71c03387..4efe69c126 100644 --- a/cookbooks/aws-parallelcluster-platform/resources/gdrcopy/gdrcopy_redhat8.rb +++ b/cookbooks/aws-parallelcluster-platform/resources/gdrcopy/gdrcopy_redhat8.rb @@ -24,7 +24,11 @@ def gdrcopy_enabled? end def gdrcopy_build_dependencies - %w(rpm-build make check check-devel) + if aws_region.start_with?("us-iso") + %w(rpm-build make check check-devel) + else + %w(dkms rpm-build make check check-devel subunit subunit-devel) + end end def gdrcopy_platform diff --git a/cookbooks/aws-parallelcluster-platform/resources/gdrcopy/partial/_gdrcopy_common_rhel.rb b/cookbooks/aws-parallelcluster-platform/resources/gdrcopy/partial/_gdrcopy_common_rhel.rb index 2d32d25e84..824df60cfc 100644 --- a/cookbooks/aws-parallelcluster-platform/resources/gdrcopy/partial/_gdrcopy_common_rhel.rb +++ b/cookbooks/aws-parallelcluster-platform/resources/gdrcopy/partial/_gdrcopy_common_rhel.rb @@ -17,7 +17,7 @@ def gdrcopy_service end def gdrcopy_build_dependencies - %w(dkms rpm-build make check check-devel) + %w(dkms rpm-build make check check-devel subunit subunit-devel) end def installation_code diff --git a/cookbooks/aws-parallelcluster-platform/resources/install_packages/install_packages_amazon2.rb b/cookbooks/aws-parallelcluster-platform/resources/install_packages/install_packages_amazon2.rb index cbf0b35715..09b3200f18 100644 --- a/cookbooks/aws-parallelcluster-platform/resources/install_packages/install_packages_amazon2.rb +++ b/cookbooks/aws-parallelcluster-platform/resources/install_packages/install_packages_amazon2.rb @@ -22,7 +22,7 @@ def default_packages # environment-modules required by EFA, Intel MPI and ARM PL # iptables needed for IMDS setup - %w(vim ksh tcsh zsh openssl-devel ncurses-devel pam-devel net-tools openmotif-devel + packages = %w(vim ksh tcsh zsh openssl-devel ncurses-devel pam-devel net-tools openmotif-devel libXmu-devel hwloc-devel libdb-devel tcl-devel automake autoconf pyparted libtool httpd boost-devel system-lsb mlocate atlas-devel glibc-static iproute libffi-devel dkms libedit-devel sendmail cmake byacc libglvnd-devel libgcrypt-devel libevent-devel @@ -32,6 +32,12 @@ def default_packages jq wget python-pip NetworkManager-config-routing-rules python3 python3-pip iptables libcurl-devel yum-plugin-versionlock coreutils environment-modules bzip2) + + if aws_region.start_with?("us-iso") + packages -= %w(moreutils) + end + + packages end action :install_extras do @@ -41,22 +47,24 @@ def default_packages alinux_extras_topic topic end - remote_file "epel_deps.tar.gz" do - source "#{node['cluster']['artifacts_s3_url']}/dependencies/epel/rhel7/#{node['kernel']['machine']}/epel_deps.tar.gz" - mode '0644' - retries 3 - retry_delay 5 - action :create_if_missing - end + if aws_region.start_with?("us-iso") + remote_file "epel_deps.tar.gz" do + source "#{node['cluster']['artifacts_s3_url']}/dependencies/epel/rhel7/#{node['kernel']['machine']}/epel_deps.tar.gz" + mode '0644' + retries 3 + retry_delay 5 + action :create_if_missing + end - bash 'yum install missing deps' do - user 'root' - group 'root' - code <<-REQ - set -e - tar xzf epel_deps.tar.gz - cd epel - yum install -y * 2>/dev/null - REQ + bash 'yum install missing deps' do + user 'root' + group 'root' + code <<-REQ + set -e + tar xzf epel_deps.tar.gz + cd epel + yum install -y * 2>/dev/null + REQ + end end end diff --git a/cookbooks/aws-parallelcluster-platform/resources/install_packages/install_packages_redhat8.rb b/cookbooks/aws-parallelcluster-platform/resources/install_packages/install_packages_redhat8.rb index b22860dba5..fdd6b71795 100644 --- a/cookbooks/aws-parallelcluster-platform/resources/install_packages/install_packages_redhat8.rb +++ b/cookbooks/aws-parallelcluster-platform/resources/install_packages/install_packages_redhat8.rb @@ -23,41 +23,42 @@ def default_packages # environment-modules required by EFA, Intel MPI and ARM PL # Removed libssh2-devel from base_packages since is not shipped by RedHat 8 and in conflict with package libssh-0.9.6-3.el8.x86_64 # iptables needed for IMDS setup - # %w(vim ksh tcsh zsh openssl-devel ncurses-devel pam-devel net-tools openmotif-devel - # libXmu-devel hwloc-devel libdb-devel tcl-devel automake autoconf libtool - # httpd boost-devel mlocate R atlas-devel - # blas-devel libffi-devel dkms libedit-devel jq - # libical-devel sendmail libxml2-devel libglvnd-devel - # libgcrypt-devel libevent-devel glibc-static bind-utils - # iproute NetworkManager-config-routing-rules python3 python3-pip iptables libcurl-devel yum-plugin-versionlock - # coreutils moreutils curl environment-modules gcc gcc-c++ bzip2) R - %w(vim ksh tcsh zsh openssl-devel ncurses-devel pam-devel net-tools - libXmu-devel libdb-devel tcl-devel automake autoconf libtool - httpd boost-devel mlocate atlas-devel - libffi-devel jq + packages = %w(vim ksh tcsh zsh openssl-devel ncurses-devel pam-devel net-tools openmotif-devel + libXmu-devel hwloc-devel libdb-devel tcl-devel automake autoconf libtool + httpd boost-devel mlocate R atlas-devel + blas-devel libffi-devel dkms libedit-devel jq libical-devel sendmail libxml2-devel libglvnd-devel - libgcrypt-devel libevent-devel bind-utils - iproute python3 python3-pip iptables libcurl-devel - coreutils curl environment-modules gcc gcc-c++ bzip2) + libgcrypt-devel libevent-devel glibc-static bind-utils + iproute NetworkManager-config-routing-rules python3 python3-pip iptables libcurl-devel yum-plugin-versionlock + coreutils moreutils curl environment-modules gcc gcc-c++ bzip2) + + if aws_region.start_with?("us-iso") + packages -= %w(openmotif-devel hwloc-devel R blas-devel dkms libedit-devel glibc-static + NetworkManager-config-routing-rules yum-plugin-versionlock moreutils) + end + + packages end action :install_extras do - remote_file "epel_deps.tar.gz" do - source "#{node['cluster']['artifacts_s3_url']}/dependencies/epel/rhel8/x86_64/epel_deps.tar.gz" - mode '0644' - retries 3 - retry_delay 5 - action :create_if_missing - end + if aws_region.start_with?("us-iso") + remote_file "epel_deps.tar.gz" do + source "#{node['cluster']['artifacts_s3_url']}/dependencies/epel/rhel8/x86_64/epel_deps.tar.gz" + mode '0644' + retries 3 + retry_delay 5 + action :create_if_missing + end - bash 'yum install missing deps' do - user 'root' - group 'root' - code <<-REQ - set -e - tar xzf epel_deps.tar.gz - cd epel - yum install -y * 2>/dev/null - REQ + bash 'yum install missing deps' do + user 'root' + group 'root' + code <<-REQ + set -e + tar xzf epel_deps.tar.gz + cd epel + yum install -y * 2>/dev/null + REQ + end end -end \ No newline at end of file +end diff --git a/cookbooks/aws-parallelcluster-platform/resources/nvidia_driver/partial/_nvidia_driver_common.rb b/cookbooks/aws-parallelcluster-platform/resources/nvidia_driver/partial/_nvidia_driver_common.rb index 0707eedbf7..947e68f49f 100644 --- a/cookbooks/aws-parallelcluster-platform/resources/nvidia_driver/partial/_nvidia_driver_common.rb +++ b/cookbooks/aws-parallelcluster-platform/resources/nvidia_driver/partial/_nvidia_driver_common.rb @@ -66,6 +66,9 @@ owner 'root' group 'root' mode '0644' + variables( + compiler_path: compiler_path + ) end end @@ -123,4 +126,4 @@ def nvidia_kernel_module else "kernel-open" end -end \ No newline at end of file +end diff --git a/cookbooks/aws-parallelcluster-shared/attributes/environment.rb b/cookbooks/aws-parallelcluster-shared/attributes/environment.rb index 302ee07e32..6e11dce797 100644 --- a/cookbooks/aws-parallelcluster-shared/attributes/environment.rb +++ b/cookbooks/aws-parallelcluster-shared/attributes/environment.rb @@ -5,4 +5,4 @@ # URL for ParallelCluster Artifacts stored in public S3 buckets # ['cluster']['region'] will need to be defined by image_dna.json during AMI build. -default['cluster']['artifacts_s3_url'] = "https://#{node['cluster']['region']}-aws-parallelcluster.s3.#{node['cluster']['region']}.#{node['cluster']['aws_domain']}/archives" \ No newline at end of file +default['cluster']['artifacts_s3_url'] = "https://#{node['cluster']['region']}-aws-parallelcluster.s3.#{node['cluster']['region']}.#{node['cluster']['aws_domain']}/archives" diff --git a/cookbooks/aws-parallelcluster-shared/resources/install_pyenv.rb b/cookbooks/aws-parallelcluster-shared/resources/install_pyenv.rb index bfd8cf66ca..1f7afb1266 100644 --- a/cookbooks/aws-parallelcluster-shared/resources/install_pyenv.rb +++ b/cookbooks/aws-parallelcluster-shared/resources/install_pyenv.rb @@ -16,6 +16,10 @@ python_version = new_resource.python_version || node['cluster']['python-version'] python_url = "#{node['cluster']['artifacts_s3_url']}/dependencies/python/Python-#{python_version}.tgz" + if !aws_region.start_with?("us-iso") && new_resource.python_version + python_url = "https://www.python.org/ftp/python/#{python_version}/Python-#{python_version}.tgz" + end + if new_resource.user_only raise "user property is required for resource install_pyenv when user_only is set to true" unless new_resource.user prefix = new_resource.prefix || "#{::File.expand_path("~#{user}")}/.pyenv" @@ -50,5 +54,4 @@ make install VENV end - -end \ No newline at end of file +end diff --git a/cookbooks/aws-parallelcluster-shared/resources/package_repos/package_repos_alinux2.rb b/cookbooks/aws-parallelcluster-shared/resources/package_repos/package_repos_alinux2.rb index d8c10e3b16..42c0be0197 100644 --- a/cookbooks/aws-parallelcluster-shared/resources/package_repos/package_repos_alinux2.rb +++ b/cookbooks/aws-parallelcluster-shared/resources/package_repos/package_repos_alinux2.rb @@ -23,15 +23,15 @@ include_recipe 'yum' alinux_extras_topic 'epel' if aws_region.start_with?("us-iso") - bash "Disable epel repo" do - user 'root' - group 'root' - code <<-EPEL + bash "Disable epel repo" do + user 'root' + group 'root' + code <<-EPEL set -e yum-config-manager --disable epel EPEL - end end + end end action :update do diff --git a/cookbooks/aws-parallelcluster-shared/resources/package_repos/package_repos_redhat8.rb b/cookbooks/aws-parallelcluster-shared/resources/package_repos/package_repos_redhat8.rb index 9433c4c21b..c2fe584ca4 100644 --- a/cookbooks/aws-parallelcluster-shared/resources/package_repos/package_repos_redhat8.rb +++ b/cookbooks/aws-parallelcluster-shared/resources/package_repos/package_repos_redhat8.rb @@ -23,7 +23,10 @@ action :setup do include_recipe 'yum' - # include_recipe "yum-epel" + + unless aws_region.start_with?("us-iso") + include_recipe "yum-epel" + end package 'yum-utils' do retries 3 diff --git a/cookbooks/aws-parallelcluster-slurm/recipes/install/install_jwt.rb b/cookbooks/aws-parallelcluster-slurm/recipes/install/install_jwt.rb index cafc069560..5dd551857e 100644 --- a/cookbooks/aws-parallelcluster-slurm/recipes/install/install_jwt.rb +++ b/cookbooks/aws-parallelcluster-slurm/recipes/install/install_jwt.rb @@ -45,4 +45,4 @@ make -j $CORES sudo make install LIBJWT -end unless redhat_on_docker? \ No newline at end of file +end unless redhat_on_docker? diff --git a/cookbooks/aws-parallelcluster-slurm/resources/slurm_dependencies/slurm_dependencies_redhat8.rb b/cookbooks/aws-parallelcluster-slurm/resources/slurm_dependencies/slurm_dependencies_redhat8.rb index 4e6db7c4fa..ce136389bc 100644 --- a/cookbooks/aws-parallelcluster-slurm/resources/slurm_dependencies/slurm_dependencies_redhat8.rb +++ b/cookbooks/aws-parallelcluster-slurm/resources/slurm_dependencies/slurm_dependencies_redhat8.rb @@ -15,5 +15,11 @@ use 'partial/_slurm_dependencies_common' def dependencies - %w(json-c-devel perl dbus-devel) + packages = %w(json-c-devel http-parser-devel lua-devel perl dbus-devel) + + if aws_region.start_with?("us-iso") + packages -= %w(http-parser-devel lua-devel) + end + + packages end From 05b9c185cf9fb3d3df799736c6117cae779bcd9e Mon Sep 17 00:00:00 2001 From: Helena Greebe Date: Thu, 25 Jul 2024 08:25:22 -0400 Subject: [PATCH 7/7] [ADC Build Image] Update changelog --- CHANGELOG.md | 2 +- .../spec/unit/resources/efs_spec.rb | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 965cc1a001..74c4a0c7e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ This file is used to list changes made in each version of the AWS ParallelCluste **ENHANCEMENTS** - Add support for external Slurmdbd. -- Allow build-image to be run in an isolated network. +- Add support for build-image to be run in an isolated network and ADC regions - Add support for Amazon Linux 2023. **CHANGES** diff --git a/cookbooks/aws-parallelcluster-environment/spec/unit/resources/efs_spec.rb b/cookbooks/aws-parallelcluster-environment/spec/unit/resources/efs_spec.rb index c34f00fbb7..ec508c3e40 100644 --- a/cookbooks/aws-parallelcluster-environment/spec/unit/resources/efs_spec.rb +++ b/cookbooks/aws-parallelcluster-environment/spec/unit/resources/efs_spec.rb @@ -86,7 +86,7 @@ def mock_already_installed(package, expected_version, installed) cached(:source_dir) { 'SOURCE DIR' } cached(:utils_version) { '1.2.3' } cached(:tarball_path) { "#{source_dir}/efs-utils-#{utils_version}.tar.gz" } - cached(:tarball_url) { "https://#{node['cluster']['region']}-aws-parallelcluster.s3.#{node['cluster']['region']}.test_aws_domain/archives/dependencies/efs/v#{utils_version}.tar.gz" } + cached(:tarball_url) { "https://#{aws_region}-aws-parallelcluster.s3.#{aws_region}.test_aws_domain/archives/dependencies/efs/v#{utils_version}.tar.gz" } cached(:tarball_checksum) { 'TARBALL CHECKSUM' } cached(:bash_code) do <<-EFSUTILSINSTALL @@ -104,6 +104,7 @@ def mock_already_installed(package, expected_version, installed) runner = runner(platform: platform, version: version, step_into: ['efs']) do |node| node.override['cluster']['efs_utils']['tarball_path'] = tarball_path node.override['cluster']['sources_dir'] = source_dir + node.override['cluster']['region'] = aws_region end ConvergeEfs.install_utils(runner, efs_utils_version: utils_version, tarball_checksum: tarball_checksum) end @@ -166,7 +167,7 @@ def mock_already_installed(package, expected_version, installed) cached(:source_dir) { 'SOURCE DIR' } cached(:utils_version) { '1.2.3' } cached(:tarball_path) { "#{source_dir}/efs-utils-#{utils_version}.tar.gz" } - cached(:tarball_url) { "https://#{node['cluster']['region']}-aws-parallelcluster.s3.#{node['cluster']['region']}.test_aws_domain/archives/dependencies/efs/v#{utils_version}.tar.gz" } + cached(:tarball_url) { "https://#{aws_region}-aws-parallelcluster.s3.#{aws_region}.test_aws_domain/archives/dependencies/efs/v#{utils_version}.tar.gz" } cached(:tarball_checksum) { 'TARBALL CHECKSUM' } cached(:bash_code) do <<-EFSUTILSINSTALL @@ -191,6 +192,7 @@ def mock_already_installed(package, expected_version, installed) runner = runner(platform: platform, version: version, step_into: ['efs']) do |node| node.override['cluster']['efs_utils']['tarball_path'] = tarball_path node.override['cluster']['sources_dir'] = source_dir + node.override['cluster']['region'] = aws_region end ConvergeEfs.install_utils(runner, efs_utils_version: utils_version, tarball_checksum: tarball_checksum) end