Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
script_dir = "#{keys_dir}/scripts"
script_path = "#{script_dir}/keys-manager.sh"

sync_file_path = "#{keys_dir}/.login_nodes_keys_sync_file"
sync_file_path = "#{keys_dir}/cluster-config-version"

case node['cluster']['node_type']
when 'ComputeFleet'
Expand All @@ -46,11 +46,11 @@
user 'root'
end

write_sync_file(sync_file_path)
write_config_version_file(sync_file_path)

when 'LoginNode'

wait_sync_file(sync_file_path)
wait_cluster_config_file(sync_file_path)

execute 'Import Login Nodes keys' do
command "bash #{script_path} --import --folder-path #{keys_dir}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

describe 'aws-parallelcluster-environment::login_nodes_keys' do
SHARED_DIR_LOGIN_NODES = "/SHARED_DIR_LOGIN_NODES".freeze
SYNC_FILE = "#{SHARED_DIR_LOGIN_NODES}/.login_nodes_keys_sync_file".freeze
SYNC_FILE = "#{SHARED_DIR_LOGIN_NODES}/cluster-config-version".freeze
CLUSTER_CONFIG_VERSION = "CLUSTER_CONFIG_VERSION".freeze

for_all_oses do |platform, version|
Expand Down Expand Up @@ -94,10 +94,10 @@
cached(:node) { chef_run.node }

it "waits for cluster config version file" do
is_expected.to run_bash("Wait for synchronization file at #{SYNC_FILE} to be written for version #{CLUSTER_CONFIG_VERSION}").with(
is_expected.to run_bash("Wait for file at #{SYNC_FILE} to be updated by the head node").with(
code: "[[ \"$(cat #{SYNC_FILE})\" == \"#{CLUSTER_CONFIG_VERSION}\" ]] || exit 1",
retries: 30,
retry_delay: 10,
retry_delay: 15,
timeout: 5
)
end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,27 +162,4 @@ def fetch_instance_type_data
fetch_s3_object("copy_instance_type_data_from_s3", node['cluster']['instance_types_data_s3_key'], node['cluster']['instance_types_data_path'], instance_version_id)
end
end

def write_config_version_file(path)
# Write the cluster config version into the specified file.
# This file is used as a synchronization point between the head node and the other cluster nodes.
# In particular, the head node uses this file to signal to other cluster nodes that all files
# in the shared folder related to the cluster config have been updated with the current config version.
file path do
content node['cluster']['cluster_config_version']
mode '0644'
owner 'root'
group 'root'
end
end

def wait_cluster_config_file(path)
# Wait for the config version file to contain the current cluster config version.
bash "Wait cluster config files to be updated by the head node" do
code "[[ \"$(cat #{path})\" == \"#{node['cluster']['cluster_config_version']}\" ]] || exit 1"
retries 30
retry_delay 15
timeout 5
end
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
end

it "does not wait for cluster config version file" do
is_expected.not_to run_execute("Wait cluster config files to be updated by the head node")
is_expected.not_to run_execute("Wait for file at /shared_dir_login_nodes/cluster-config-version to be updated by the head node")
end
end

Expand Down Expand Up @@ -90,7 +90,7 @@
end

it "does not wait for cluster config version file" do
is_expected.not_to run_execute("Wait cluster config files to be updated by the head node")
is_expected.not_to run_execute("Wait for file at #{cluster_config_path} to be updated by the head node")
end

it "reads config from shared folder" do
Expand Down Expand Up @@ -146,7 +146,7 @@
else
raise "Unsupported node_type #{node_type}"
end
is_expected.to run_bash("Wait cluster config files to be updated by the head node").with(
is_expected.to run_bash("Wait for file at #{config_version_file} to be updated by the head node").with(
code: "[[ \"$(cat #{config_version_file})\" == \"cluster_config_version\" ]] || exit 1",
retries: 30,
retry_delay: 15,
Expand Down
26 changes: 13 additions & 13 deletions cookbooks/aws-parallelcluster-shared/libraries/helpers.rb
Original file line number Diff line number Diff line change
Expand Up @@ -82,28 +82,28 @@ def is_custom_node?
!custom_node_package.nil? && !custom_node_package.empty?
end

def write_sync_file(path)
# Write a synchronization file containing the current cluster config version.
# Synchronization files are used as a synchronization point between cluster nodes
# to signal that a group of actions have been completed.
def write_config_version_file(path)
# Write the cluster config version into the specified file.
# This file is used as a synchronization point between the head node and the other cluster nodes.
# In particular, the head node uses this file to signal to other cluster nodes that all files
# in the shared folder related to the cluster config have been updated with the current config version.
file path do
content node["cluster"]["cluster_config_version"]
mode "0644"
owner "root"
group "root"
content node['cluster']['cluster_config_version']
mode '0644'
owner 'root'
group 'root'
end
end

def wait_sync_file(path)
def wait_cluster_config_file(path)
# Wait for a synchronization file to be written for the current cluster config version.
# Synchronization files are used as a synchronization point between cluster nodes
# to signal that a group of actions have been completed.
cluster_config_version = node["cluster"]["cluster_config_version"]
# Wait for the config version file to contain the current cluster config version.
bash "Wait for synchronization file at #{path} to be written for version #{cluster_config_version}" do
code "[[ \"$(cat #{path})\" == \"#{cluster_config_version}\" ]] || exit 1"
bash "Wait for file at #{path} to be updated by the head node" do
code "[[ \"$(cat #{path})\" == \"#{node['cluster']['cluster_config_version']}\" ]] || exit 1"
retries 30
retry_delay 10
retry_delay 15
timeout 5
end
end