From e8377077073363d89a651ee609ba45739551ee10 Mon Sep 17 00:00:00 2001 From: Xuanqi He Date: Mon, 20 May 2024 01:17:52 -0400 Subject: [PATCH 1/9] feat: Add data integrity checks before removing backup directories - Updated restore_home_shared_data.rb to include a diff check before removing /tmp/home/ directory. - Ensures data in the new home directory is the same as the original before deleting the temporary backup. - Updated restore_internal_use_shared_data.rb to include a diff check before removing /tmp directories for each internal shared directory. - Ensures data in the new directory is the same as the original before deleting the temporary backup. - Added detailed comments to explain the new data integrity checks. This update prevents potential data loss or inconsistency during the restoration process. --- .../recipes/init/restore_home_shared_data.rb | 13 +++++++++++-- .../init/restore_internal_use_shared_data.rb | 13 +++++++++++-- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/cookbooks/aws-parallelcluster-environment/recipes/init/restore_home_shared_data.rb b/cookbooks/aws-parallelcluster-environment/recipes/init/restore_home_shared_data.rb index b75c04cb8c..7a228a92c4 100644 --- a/cookbooks/aws-parallelcluster-environment/recipes/init/restore_home_shared_data.rb +++ b/cookbooks/aws-parallelcluster-environment/recipes/init/restore_home_shared_data.rb @@ -19,13 +19,22 @@ # This is necessary to preserve any data in these directories that was # generated during the build of ParallelCluster AMIs after converting to # shared storage and backed up to a temporary location previously - # Remove the backup after the copy is done + # Before removing the backup, ensure the data in the new home is the same + # as the original to avoid any data loss or inconsistency. This is done + # by using rsync to copy the data and diff to check for differences. + # Remove the backup after the copy is done and the data integrity is verified. bash "Restore /home" do user 'root' group 'root' code <<-EOH rsync -a --ignore-existing /tmp/home/ /home - rm -rf /tmp/home/ + diff -r /tmp/home/ /home + if [ $? -eq 0 ]; then + rm -rf /tmp/home/ + else + echo "Data integrity check failed for /home" + exit 1 + fi EOH end end diff --git a/cookbooks/aws-parallelcluster-environment/recipes/init/restore_internal_use_shared_data.rb b/cookbooks/aws-parallelcluster-environment/recipes/init/restore_internal_use_shared_data.rb index 83b0d71ec7..686304f19c 100644 --- a/cookbooks/aws-parallelcluster-environment/recipes/init/restore_internal_use_shared_data.rb +++ b/cookbooks/aws-parallelcluster-environment/recipes/init/restore_internal_use_shared_data.rb @@ -19,14 +19,23 @@ # This is necessary to preserve any data in these directories that was # generated during the build of ParallelCluster AMIs after converting to # shared storage and backed up to a temporary location previously - # Remove the backup after the copy is done + # Before removing the backup, ensure the data in the new directory is the same + # as the original to avoid any data loss or inconsistency. This is done + # by using rsync to copy the data and diff to check for differences. + # Remove the backup after the copy is done and the data integrity is verified. node['cluster']['internal_shared_dirs'].each do |dir| bash "Restore #{dir}" do user 'root' group 'root' code <<-EOH rsync -a --ignore-existing /tmp#{dir}/ #{dir} - rm -rf /tmp#{dir}/ + diff -r /tmp#{dir}/ #{dir} + if [ $? -eq 0 ]; then + rm -rf /tmp#{dir}/ + else + echo "Data integrity check failed for #{dir}" + exit 1 + fi EOH end end From f0dc2ad97fdefad7069e80bdd6bfebfc197a43fc Mon Sep 17 00:00:00 2001 From: Xuanqi He Date: Mon, 20 May 2024 12:37:16 -0400 Subject: [PATCH 2/9] Add data integrity check when moving the default home directory, update error messages and update spec tests - Modified 'config_default_user_home.rb' to include data integrity checks during the process of moving the cluster user's default home directory. This ensures the data in the new directory matches the original before deletion. - Updated error message when data integrity checks fail to show the difference. - Enhanced tests in 'config_default_user_home_spec.rb' to verify the data integrity check logic in the 'config_default_user_home.rb' recipe. - Enhanced tests in 'mount_internal_use_efs_spec.rb' to verify the data integrity check logic in the 'restore_internal_use_shared_data.rb' recipe. --- .../recipes/init/config_default_user_home.rb | 20 ++++++++++-- .../recipes/init/restore_home_shared_data.rb | 4 +-- .../init/restore_internal_use_shared_data.rb | 4 +-- .../recipes/config_default_user_home_spec.rb | 24 ++++++++++++++ .../recipes/mount_internal_use_efs_spec.rb | 32 +++++++++++++++++++ 5 files changed, 77 insertions(+), 7 deletions(-) diff --git a/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb b/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb index 3902a4022d..2dfde7e5c5 100644 --- a/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb +++ b/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb @@ -44,7 +44,15 @@ EOH end -# move the cluster user's default home directory +# Move the cluster user's default home directory +# This script performs the following actions: +# 1. Creates the new local home directory for the cluster user if it doesn't already exist. +# 2. Copies the data from the temporary backup directory (/tmp/cluster_user_home) to the new local home directory. +# 3. Updates the cluster user's home directory path to the new local home directory. +# 4. Changes the ownership of the new local home directory to the cluster user. +# 5. Verifies data integrity by comparing the temporary backup directory and the new local home directory. +# 6. If the data integrity check passes, it removes both the temporary backup directory and the original home directory. +# 7. If the data integrity check fails, it outputs an error message and exits with an error code. bash "Move #{node['cluster']['cluster_user_home']}" do user 'root' group 'root' @@ -54,8 +62,14 @@ rsync -a /tmp#{node['cluster']['cluster_user_home']}/ #{node['cluster']['cluster_user_local_home']} usermod -d #{node['cluster']['cluster_user_local_home']} #{node['cluster']['cluster_user']} chown -R #{node['cluster']['cluster_user']}: #{node['cluster']['cluster_user_local_home']} - rm -rf /tmp#{node['cluster']['cluster_user_home']} - rm -rf #{node['cluster']['cluster_user_home']} + diff_output=$(diff -r /tmp#{node['cluster']['cluster_user_home']} #{node['cluster']['cluster_user_local_home']}) + if [ $? -eq 0 ]; then + rm -rf /tmp#{node['cluster']['cluster_user_home']} + rm -rf #{node['cluster']['cluster_user_home']} + else + echo "Data integrity check failed comparing #{node['cluster']['cluster_user_home']} and /tmp#{node['cluster']['cluster_user_home']}: $diff_output" + exit 1 + fi EOH end diff --git a/cookbooks/aws-parallelcluster-environment/recipes/init/restore_home_shared_data.rb b/cookbooks/aws-parallelcluster-environment/recipes/init/restore_home_shared_data.rb index 7a228a92c4..df13ca3c32 100644 --- a/cookbooks/aws-parallelcluster-environment/recipes/init/restore_home_shared_data.rb +++ b/cookbooks/aws-parallelcluster-environment/recipes/init/restore_home_shared_data.rb @@ -28,11 +28,11 @@ group 'root' code <<-EOH rsync -a --ignore-existing /tmp/home/ /home - diff -r /tmp/home/ /home + diff_output=$(diff -r /tmp/home/ /home) if [ $? -eq 0 ]; then rm -rf /tmp/home/ else - echo "Data integrity check failed for /home" + echo "Data integrity check failed comparing /home and /tmp/home: $diff_output" exit 1 fi EOH diff --git a/cookbooks/aws-parallelcluster-environment/recipes/init/restore_internal_use_shared_data.rb b/cookbooks/aws-parallelcluster-environment/recipes/init/restore_internal_use_shared_data.rb index 686304f19c..39dc93d9c2 100644 --- a/cookbooks/aws-parallelcluster-environment/recipes/init/restore_internal_use_shared_data.rb +++ b/cookbooks/aws-parallelcluster-environment/recipes/init/restore_internal_use_shared_data.rb @@ -29,11 +29,11 @@ group 'root' code <<-EOH rsync -a --ignore-existing /tmp#{dir}/ #{dir} - diff -r /tmp#{dir}/ #{dir} + diff_output=$(diff -r /tmp#{dir}/ #{dir}) if [ $? -eq 0 ]; then rm -rf /tmp#{dir}/ else - echo "Data integrity check failed for #{dir}" + echo "Data integrity check failed comparing #{dir} and /tmp#{dir}: $diff_output" exit 1 fi EOH diff --git a/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_default_user_home_spec.rb b/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_default_user_home_spec.rb index 34da35d701..3f01692c52 100644 --- a/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_default_user_home_spec.rb +++ b/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_default_user_home_spec.rb @@ -22,7 +22,31 @@ expect(chef_run.node['cluster']['cluster_user_home']).to eq('/local/home/user') is_expected.to start_service("sshd") end + + it 'moves the cluster user home directory with data integrity check' do + user_home = "/home/user" + user_local_home = "/local/home/user" + cluster_user = chef_run.node['cluster']['cluster_user'] + expect(chef_run).to run_bash("Move #{user_home}").with( + code: <<-CODE + set -e + mkdir -p #{user_local_home} + rsync -a /tmp#{user_home}/ #{user_local_home} + usermod -d #{user_local_home} #{cluster_user} + chown -R #{cluster_user}: #{user_local_home} + diff_output=$(diff -r /tmp#{user_home} #{user_local_home}) + if [ $? -eq 0 ]; then + rm -rf /tmp#{user_home} + rm -rf #{user_home} + else + echo "Data integrity check failed comparing #{user_home} and /tmp#{user_home}: $diff_output" + exit 1 + fi + CODE + ) + end end + context 'when shared' do cached(:chef_run) do runner = runner(platform: platform, version: version) do |node| diff --git a/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/mount_internal_use_efs_spec.rb b/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/mount_internal_use_efs_spec.rb index 0198e32618..34123f56b6 100644 --- a/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/mount_internal_use_efs_spec.rb +++ b/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/mount_internal_use_efs_spec.rb @@ -17,6 +17,38 @@ describe 'call efs for mounting' do it { is_expected.to mount_efs('mount internal shared efs') } end + + context "when node type is HeadNode" do + cached(:chef_run) do + runner = runner(platform: platform, version: version) do |node| + node.override['cluster']['head_node_private_ip'] = '0.0.0.0' + node.override['cluster']['node_type'] = 'HeadNode' + node.override['cluster']['internal_shared_dirs'] = %w(/opt/slurm /opt/intel) + node.override['cluster']['efs_shared_dirs'] = "/opt/parallelcluster/init_shared" + end + runner.converge(described_recipe) + end + cached(:node) { chef_run.node } + + describe 'restore internal use shared data with integrity check' do + it 'restores internal shared dirs with data integrity check' do + chef_run.node['cluster']['internal_shared_dirs'].each do |dir| + expect(chef_run).to run_bash("Restore #{dir}").with( + code: <<-CODE + rsync -a --ignore-existing /tmp#{dir}/ #{dir} + diff_output=$(diff -r /tmp#{dir}/ #{dir}) + if [ $? -eq 0 ]; then + rm -rf /tmp#{dir}/ + else + echo "Data integrity check failed comparing #{dir} and /tmp#{dir}: $diff_output" + exit 1 + fi + CODE + ) + end + end + end + end end end end From d9844d09dd76eeba16734791f3b57415f5955eb4 Mon Sep 17 00:00:00 2001 From: Xuanqi He Date: Tue, 21 May 2024 22:56:44 -0400 Subject: [PATCH 3/9] Add the manual test of the unhappy case step and result in the description comments. Output error messages to stderr to ensure we can see it in chef-client.log. Add restart sshd service in config_default_user_home.rb, or after failed, user will can not ssh into the instance. --- .../recipes/init/config_default_user_home.rb | 10 ++++++++-- .../spec/unit/recipes/config_default_user_home_spec.rb | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb b/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb index 2dfde7e5c5..eb7a7e3069 100644 --- a/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb +++ b/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb @@ -52,7 +52,12 @@ # 4. Changes the ownership of the new local home directory to the cluster user. # 5. Verifies data integrity by comparing the temporary backup directory and the new local home directory. # 6. If the data integrity check passes, it removes both the temporary backup directory and the original home directory. -# 7. If the data integrity check fails, it outputs an error message and exits with an error code. +# 7. If the data integrity check fails, it outputs an error message, restart sshd service and exits with an error code. +# 8. Unhappy Path Manually test passed successfully: +# Simulate a failure in the data integrity check by creating expected `node['cluster']['cluster_user_local_home']` +# Modify files in the expected directory to differ from the original. +# Manually run `config_default_user_home` recipe. +# Verified that the script fails and outputs the correct error message in chef-client.log. bash "Move #{node['cluster']['cluster_user_home']}" do user 'root' group 'root' @@ -67,7 +72,8 @@ rm -rf /tmp#{node['cluster']['cluster_user_home']} rm -rf #{node['cluster']['cluster_user_home']} else - echo "Data integrity check failed comparing #{node['cluster']['cluster_user_home']} and /tmp#{node['cluster']['cluster_user_home']}: $diff_output" + echo "Data integrity check failed comparing #{node['cluster']['cluster_user_home']} and /tmp#{node['cluster']['cluster_user_home']}: $diff_output" >&2 + systemctl start sshd exit 1 fi EOH diff --git a/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_default_user_home_spec.rb b/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_default_user_home_spec.rb index 3f01692c52..f6768333d9 100644 --- a/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_default_user_home_spec.rb +++ b/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_default_user_home_spec.rb @@ -40,6 +40,7 @@ rm -rf #{user_home} else echo "Data integrity check failed comparing #{user_home} and /tmp#{user_home}: $diff_output" + systemctl start sshd exit 1 fi CODE From 366af3a02848f68708e6d3c627d85382bfeab359 Mon Sep 17 00:00:00 2001 From: Xuanqi He Date: Tue, 21 May 2024 23:31:15 -0400 Subject: [PATCH 4/9] Modify the diff command to compare cluster_user_home and cluster_user_local_home --- .../recipes/init/config_default_user_home.rb | 4 ++-- .../spec/unit/recipes/config_default_user_home_spec.rb | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb b/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb index eb7a7e3069..714122dfa7 100644 --- a/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb +++ b/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb @@ -67,12 +67,12 @@ rsync -a /tmp#{node['cluster']['cluster_user_home']}/ #{node['cluster']['cluster_user_local_home']} usermod -d #{node['cluster']['cluster_user_local_home']} #{node['cluster']['cluster_user']} chown -R #{node['cluster']['cluster_user']}: #{node['cluster']['cluster_user_local_home']} - diff_output=$(diff -r /tmp#{node['cluster']['cluster_user_home']} #{node['cluster']['cluster_user_local_home']}) + diff_output=$(diff -r #{node['cluster']['cluster_user_home']} #{node['cluster']['cluster_user_local_home']}) if [ $? -eq 0 ]; then rm -rf /tmp#{node['cluster']['cluster_user_home']} rm -rf #{node['cluster']['cluster_user_home']} else - echo "Data integrity check failed comparing #{node['cluster']['cluster_user_home']} and /tmp#{node['cluster']['cluster_user_home']}: $diff_output" >&2 + echo "Data integrity check failed comparing #{node['cluster']['cluster_user_local_home']} and #{node['cluster']['cluster_user_home']}: $diff_output" >&2 systemctl start sshd exit 1 fi diff --git a/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_default_user_home_spec.rb b/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_default_user_home_spec.rb index f6768333d9..6551fe0cc9 100644 --- a/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_default_user_home_spec.rb +++ b/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_default_user_home_spec.rb @@ -34,12 +34,12 @@ rsync -a /tmp#{user_home}/ #{user_local_home} usermod -d #{user_local_home} #{cluster_user} chown -R #{cluster_user}: #{user_local_home} - diff_output=$(diff -r /tmp#{user_home} #{user_local_home}) + diff_output=$(diff -r #{user_home} #{user_local_home}) if [ $? -eq 0 ]; then rm -rf /tmp#{user_home} rm -rf #{user_home} else - echo "Data integrity check failed comparing #{user_home} and /tmp#{user_home}: $diff_output" + echo "Data integrity check failed comparing #{user_local_home} and #{user_home}: $diff_output" >&2 systemctl start sshd exit 1 fi From 675028c0f22c479a7aac0469b1d269e3c7106432 Mon Sep 17 00:00:00 2001 From: Xuanqi He Date: Wed, 22 May 2024 11:19:40 -0400 Subject: [PATCH 5/9] Remove set -e, or it will end the script at diff -r #{user_home} #{user_local_home}) so that the script will not reach the if else part. --- .../recipes/init/config_default_user_home.rb | 5 ++--- .../spec/unit/recipes/config_default_user_home_spec.rb | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb b/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb index 714122dfa7..cda4e8aac9 100644 --- a/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb +++ b/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb @@ -62,18 +62,17 @@ user 'root' group 'root' code <<-EOH - set -e mkdir -p #{node['cluster']['cluster_user_local_home']} rsync -a /tmp#{node['cluster']['cluster_user_home']}/ #{node['cluster']['cluster_user_local_home']} usermod -d #{node['cluster']['cluster_user_local_home']} #{node['cluster']['cluster_user']} chown -R #{node['cluster']['cluster_user']}: #{node['cluster']['cluster_user_local_home']} diff_output=$(diff -r #{node['cluster']['cluster_user_home']} #{node['cluster']['cluster_user_local_home']}) - if [ $? -eq 0 ]; then + diff_exit_code=$? + if [ $diff_exit_code -eq 0 ]; then rm -rf /tmp#{node['cluster']['cluster_user_home']} rm -rf #{node['cluster']['cluster_user_home']} else echo "Data integrity check failed comparing #{node['cluster']['cluster_user_local_home']} and #{node['cluster']['cluster_user_home']}: $diff_output" >&2 - systemctl start sshd exit 1 fi EOH diff --git a/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_default_user_home_spec.rb b/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_default_user_home_spec.rb index 6551fe0cc9..ba965c9acb 100644 --- a/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_default_user_home_spec.rb +++ b/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_default_user_home_spec.rb @@ -29,18 +29,17 @@ cluster_user = chef_run.node['cluster']['cluster_user'] expect(chef_run).to run_bash("Move #{user_home}").with( code: <<-CODE - set -e mkdir -p #{user_local_home} rsync -a /tmp#{user_home}/ #{user_local_home} usermod -d #{user_local_home} #{cluster_user} chown -R #{cluster_user}: #{user_local_home} diff_output=$(diff -r #{user_home} #{user_local_home}) - if [ $? -eq 0 ]; then + diff_exit_code=$? + if [ $diff_exit_code -eq 0 ]; then rm -rf /tmp#{user_home} rm -rf #{user_home} else echo "Data integrity check failed comparing #{user_local_home} and #{user_home}: $diff_output" >&2 - systemctl start sshd exit 1 fi CODE From eee24d682a30c5a573a593f71dd6b99fdd778c1a Mon Sep 17 00:00:00 2001 From: Xuanqi He Date: Wed, 22 May 2024 15:16:22 -0400 Subject: [PATCH 6/9] Move the data integrity check to a separate bash resource. Add back set -e for the existing bash script. --- .../recipes/init/config_default_user_home.rb | 25 +++++++++++++------ .../recipes/config_default_user_home_spec.rb | 7 +----- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb b/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb index cda4e8aac9..6831bb271c 100644 --- a/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb +++ b/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb @@ -50,22 +50,31 @@ # 2. Copies the data from the temporary backup directory (/tmp/cluster_user_home) to the new local home directory. # 3. Updates the cluster user's home directory path to the new local home directory. # 4. Changes the ownership of the new local home directory to the cluster user. -# 5. Verifies data integrity by comparing the temporary backup directory and the new local home directory. -# 6. If the data integrity check passes, it removes both the temporary backup directory and the original home directory. -# 7. If the data integrity check fails, it outputs an error message, restart sshd service and exits with an error code. -# 8. Unhappy Path Manually test passed successfully: -# Simulate a failure in the data integrity check by creating expected `node['cluster']['cluster_user_local_home']` -# Modify files in the expected directory to differ from the original. -# Manually run `config_default_user_home` recipe. -# Verified that the script fails and outputs the correct error message in chef-client.log. bash "Move #{node['cluster']['cluster_user_home']}" do user 'root' group 'root' code <<-EOH + set -e mkdir -p #{node['cluster']['cluster_user_local_home']} rsync -a /tmp#{node['cluster']['cluster_user_home']}/ #{node['cluster']['cluster_user_local_home']} usermod -d #{node['cluster']['cluster_user_local_home']} #{node['cluster']['cluster_user']} chown -R #{node['cluster']['cluster_user']}: #{node['cluster']['cluster_user_local_home']} + EOH +end + +# Data integrity check +# 1. Verifies data integrity by comparing the temporary backup directory and the new local home directory. +# 2. If the data integrity check passes, it removes both the temporary backup directory and the original home directory. +# 3. If the data integrity check fails, it outputs an error message and exits with an error code 1. +# 4. Unhappy Path Manually test passed successfully: +# Simulate a failure in the data integrity check by creating expected `node['cluster']['cluster_user_local_home']` +# Modify files in the expected directory to differ from the original. +# Manually run `config_default_user_home` recipe. +# Verified that the script fails and outputs the correct error message in chef-client.log. +bash "Verify data integrity for #{node['cluster']['cluster_user_home']}" do + user 'root' + group 'root' + code <<-EOH diff_output=$(diff -r #{node['cluster']['cluster_user_home']} #{node['cluster']['cluster_user_local_home']}) diff_exit_code=$? if [ $diff_exit_code -eq 0 ]; then diff --git a/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_default_user_home_spec.rb b/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_default_user_home_spec.rb index ba965c9acb..eb30174877 100644 --- a/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_default_user_home_spec.rb +++ b/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_default_user_home_spec.rb @@ -26,13 +26,8 @@ it 'moves the cluster user home directory with data integrity check' do user_home = "/home/user" user_local_home = "/local/home/user" - cluster_user = chef_run.node['cluster']['cluster_user'] - expect(chef_run).to run_bash("Move #{user_home}").with( + expect(chef_run).to run_bash("Verify data integrity for #{user_home}").with( code: <<-CODE - mkdir -p #{user_local_home} - rsync -a /tmp#{user_home}/ #{user_local_home} - usermod -d #{user_local_home} #{cluster_user} - chown -R #{cluster_user}: #{user_local_home} diff_output=$(diff -r #{user_home} #{user_local_home}) diff_exit_code=$? if [ $diff_exit_code -eq 0 ]; then From eaacb00bdbab951d24f6d0e2b08c1cff953dfc19 Mon Sep 17 00:00:00 2001 From: Xuanqi He Date: Thu, 23 May 2024 13:44:05 -0400 Subject: [PATCH 7/9] Remove useless defination of diff_exit_code to be consistent. --- .../recipes/init/config_default_user_home.rb | 3 +-- .../spec/unit/recipes/config_default_user_home_spec.rb | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb b/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb index 6831bb271c..521ef66657 100644 --- a/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb +++ b/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb @@ -76,8 +76,7 @@ group 'root' code <<-EOH diff_output=$(diff -r #{node['cluster']['cluster_user_home']} #{node['cluster']['cluster_user_local_home']}) - diff_exit_code=$? - if [ $diff_exit_code -eq 0 ]; then + if [ $? -eq 0 ]; then rm -rf /tmp#{node['cluster']['cluster_user_home']} rm -rf #{node['cluster']['cluster_user_home']} else diff --git a/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_default_user_home_spec.rb b/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_default_user_home_spec.rb index eb30174877..971415c359 100644 --- a/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_default_user_home_spec.rb +++ b/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_default_user_home_spec.rb @@ -29,8 +29,7 @@ expect(chef_run).to run_bash("Verify data integrity for #{user_home}").with( code: <<-CODE diff_output=$(diff -r #{user_home} #{user_local_home}) - diff_exit_code=$? - if [ $diff_exit_code -eq 0 ]; then + if [ $? -eq 0 ]; then rm -rf /tmp#{user_home} rm -rf #{user_home} else From 57b62782d4cf6a9ae06ebb4ea1bf49718310c75f Mon Sep 17 00:00:00 2001 From: Xuanqi He Date: Fri, 24 May 2024 08:35:17 -0400 Subject: [PATCH 8/9] Minor changes in comments. --- .../recipes/init/config_default_user_home.rb | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb b/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb index 521ef66657..550828626b 100644 --- a/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb +++ b/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb @@ -62,15 +62,10 @@ EOH end -# Data integrity check +# Data integrity check and cleanup for temporary backup and original home directory # 1. Verifies data integrity by comparing the temporary backup directory and the new local home directory. # 2. If the data integrity check passes, it removes both the temporary backup directory and the original home directory. # 3. If the data integrity check fails, it outputs an error message and exits with an error code 1. -# 4. Unhappy Path Manually test passed successfully: -# Simulate a failure in the data integrity check by creating expected `node['cluster']['cluster_user_local_home']` -# Modify files in the expected directory to differ from the original. -# Manually run `config_default_user_home` recipe. -# Verified that the script fails and outputs the correct error message in chef-client.log. bash "Verify data integrity for #{node['cluster']['cluster_user_home']}" do user 'root' group 'root' From 1983c84ce277aa6e708a39fe10b1ff22bb735715 Mon Sep 17 00:00:00 2001 From: Xuanqi He Date: Fri, 24 May 2024 09:29:31 -0400 Subject: [PATCH 9/9] Add back start SSHD service logic. --- .../recipes/init/config_default_user_home.rb | 1 + .../spec/unit/recipes/config_default_user_home_spec.rb | 1 + 2 files changed, 2 insertions(+) diff --git a/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb b/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb index 550828626b..9607007b07 100644 --- a/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb +++ b/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb @@ -76,6 +76,7 @@ rm -rf #{node['cluster']['cluster_user_home']} else echo "Data integrity check failed comparing #{node['cluster']['cluster_user_local_home']} and #{node['cluster']['cluster_user_home']}: $diff_output" >&2 + systemctl start sshd exit 1 fi EOH diff --git a/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_default_user_home_spec.rb b/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_default_user_home_spec.rb index 971415c359..20ee258d29 100644 --- a/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_default_user_home_spec.rb +++ b/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_default_user_home_spec.rb @@ -34,6 +34,7 @@ rm -rf #{user_home} else echo "Data integrity check failed comparing #{user_local_home} and #{user_home}: $diff_output" >&2 + systemctl start sshd exit 1 fi CODE