From a34fb0ac8eacfe8f3d245018d364d386d5a8148f Mon Sep 17 00:00:00 2001 From: Himani Deshpande Date: Mon, 4 Mar 2024 12:26:58 -0500 Subject: [PATCH] [DFSM]Using cluster-config-version file to be used during Init and Upate phase of the clusters Moving login_nodes_keys.rb in init folder as its invoked during init phase of cluster creation --- .../{config => init}/login_nodes_keys.rb | 6 ++--- .../unit/recipes/login_nodes_keys_spec.rb | 6 ++--- .../resources/fetch_config.rb | 23 ---------------- .../spec/unit/resources/fetch_config_spec.rb | 6 ++--- .../libraries/helpers.rb | 26 +++++++++---------- 5 files changed, 22 insertions(+), 45 deletions(-) rename cookbooks/aws-parallelcluster-environment/recipes/{config => init}/login_nodes_keys.rb (91%) diff --git a/cookbooks/aws-parallelcluster-environment/recipes/config/login_nodes_keys.rb b/cookbooks/aws-parallelcluster-environment/recipes/init/login_nodes_keys.rb similarity index 91% rename from cookbooks/aws-parallelcluster-environment/recipes/config/login_nodes_keys.rb rename to cookbooks/aws-parallelcluster-environment/recipes/init/login_nodes_keys.rb index e30df9877c..1a834bc434 100644 --- a/cookbooks/aws-parallelcluster-environment/recipes/config/login_nodes_keys.rb +++ b/cookbooks/aws-parallelcluster-environment/recipes/init/login_nodes_keys.rb @@ -21,7 +21,7 @@ script_dir = "#{keys_dir}/scripts" script_path = "#{script_dir}/keys-manager.sh" -sync_file_path = "#{keys_dir}/.login_nodes_keys_sync_file" +sync_file_path = "#{keys_dir}/cluster-config-version" case node['cluster']['node_type'] when 'ComputeFleet' @@ -46,11 +46,11 @@ user 'root' end - write_sync_file(sync_file_path) + write_config_version_file(sync_file_path) when 'LoginNode' - wait_sync_file(sync_file_path) + wait_cluster_config_file(sync_file_path) execute 'Import Login Nodes keys' do command "bash #{script_path} --import --folder-path #{keys_dir}" diff --git a/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/login_nodes_keys_spec.rb b/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/login_nodes_keys_spec.rb index 5603982d57..e857a32e26 100644 --- a/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/login_nodes_keys_spec.rb +++ b/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/login_nodes_keys_spec.rb @@ -2,7 +2,7 @@ describe 'aws-parallelcluster-environment::login_nodes_keys' do SHARED_DIR_LOGIN_NODES = "/SHARED_DIR_LOGIN_NODES".freeze - SYNC_FILE = "#{SHARED_DIR_LOGIN_NODES}/.login_nodes_keys_sync_file".freeze + SYNC_FILE = "#{SHARED_DIR_LOGIN_NODES}/cluster-config-version".freeze CLUSTER_CONFIG_VERSION = "CLUSTER_CONFIG_VERSION".freeze for_all_oses do |platform, version| @@ -94,10 +94,10 @@ cached(:node) { chef_run.node } it "waits for cluster config version file" do - is_expected.to run_bash("Wait for synchronization file at #{SYNC_FILE} to be written for version #{CLUSTER_CONFIG_VERSION}").with( + is_expected.to run_bash("Wait for file at #{SYNC_FILE} to be updated by the head node").with( code: "[[ \"$(cat #{SYNC_FILE})\" == \"#{CLUSTER_CONFIG_VERSION}\" ]] || exit 1", retries: 30, - retry_delay: 10, + retry_delay: 15, timeout: 5 ) end diff --git a/cookbooks/aws-parallelcluster-platform/resources/fetch_config.rb b/cookbooks/aws-parallelcluster-platform/resources/fetch_config.rb index 0f2b492ad0..9232b9ed93 100644 --- a/cookbooks/aws-parallelcluster-platform/resources/fetch_config.rb +++ b/cookbooks/aws-parallelcluster-platform/resources/fetch_config.rb @@ -162,27 +162,4 @@ def fetch_instance_type_data fetch_s3_object("copy_instance_type_data_from_s3", node['cluster']['instance_types_data_s3_key'], node['cluster']['instance_types_data_path'], instance_version_id) end end - - def write_config_version_file(path) - # Write the cluster config version into the specified file. - # This file is used as a synchronization point between the head node and the other cluster nodes. - # In particular, the head node uses this file to signal to other cluster nodes that all files - # in the shared folder related to the cluster config have been updated with the current config version. - file path do - content node['cluster']['cluster_config_version'] - mode '0644' - owner 'root' - group 'root' - end - end - - def wait_cluster_config_file(path) - # Wait for the config version file to contain the current cluster config version. - bash "Wait cluster config files to be updated by the head node" do - code "[[ \"$(cat #{path})\" == \"#{node['cluster']['cluster_config_version']}\" ]] || exit 1" - retries 30 - retry_delay 15 - timeout 5 - end - end end diff --git a/cookbooks/aws-parallelcluster-platform/spec/unit/resources/fetch_config_spec.rb b/cookbooks/aws-parallelcluster-platform/spec/unit/resources/fetch_config_spec.rb index daa80aa4a1..6e840d9103 100644 --- a/cookbooks/aws-parallelcluster-platform/spec/unit/resources/fetch_config_spec.rb +++ b/cookbooks/aws-parallelcluster-platform/spec/unit/resources/fetch_config_spec.rb @@ -59,7 +59,7 @@ end it "does not wait for cluster config version file" do - is_expected.not_to run_execute("Wait cluster config files to be updated by the head node") + is_expected.not_to run_execute("Wait for file at /shared_dir_login_nodes/cluster-config-version to be updated by the head node") end end @@ -90,7 +90,7 @@ end it "does not wait for cluster config version file" do - is_expected.not_to run_execute("Wait cluster config files to be updated by the head node") + is_expected.not_to run_execute("Wait for file at #{cluster_config_path} to be updated by the head node") end it "reads config from shared folder" do @@ -146,7 +146,7 @@ else raise "Unsupported node_type #{node_type}" end - is_expected.to run_bash("Wait cluster config files to be updated by the head node").with( + is_expected.to run_bash("Wait for file at #{config_version_file} to be updated by the head node").with( code: "[[ \"$(cat #{config_version_file})\" == \"cluster_config_version\" ]] || exit 1", retries: 30, retry_delay: 15, diff --git a/cookbooks/aws-parallelcluster-shared/libraries/helpers.rb b/cookbooks/aws-parallelcluster-shared/libraries/helpers.rb index 993e2f8a5b..cd24afd920 100644 --- a/cookbooks/aws-parallelcluster-shared/libraries/helpers.rb +++ b/cookbooks/aws-parallelcluster-shared/libraries/helpers.rb @@ -82,28 +82,28 @@ def is_custom_node? !custom_node_package.nil? && !custom_node_package.empty? end -def write_sync_file(path) - # Write a synchronization file containing the current cluster config version. - # Synchronization files are used as a synchronization point between cluster nodes - # to signal that a group of actions have been completed. +def write_config_version_file(path) + # Write the cluster config version into the specified file. + # This file is used as a synchronization point between the head node and the other cluster nodes. + # In particular, the head node uses this file to signal to other cluster nodes that all files + # in the shared folder related to the cluster config have been updated with the current config version. file path do - content node["cluster"]["cluster_config_version"] - mode "0644" - owner "root" - group "root" + content node['cluster']['cluster_config_version'] + mode '0644' + owner 'root' + group 'root' end end -def wait_sync_file(path) +def wait_cluster_config_file(path) # Wait for a synchronization file to be written for the current cluster config version. # Synchronization files are used as a synchronization point between cluster nodes # to signal that a group of actions have been completed. - cluster_config_version = node["cluster"]["cluster_config_version"] # Wait for the config version file to contain the current cluster config version. - bash "Wait for synchronization file at #{path} to be written for version #{cluster_config_version}" do - code "[[ \"$(cat #{path})\" == \"#{cluster_config_version}\" ]] || exit 1" + bash "Wait for file at #{path} to be updated by the head node" do + code "[[ \"$(cat #{path})\" == \"#{node['cluster']['cluster_config_version']}\" ]] || exit 1" retries 30 - retry_delay 10 + retry_delay 15 timeout 5 end end