diff --git a/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb b/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb index 9607007b07..39d1cf5878 100644 --- a/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb +++ b/cookbooks/aws-parallelcluster-environment/recipes/init/config_default_user_home.rb @@ -62,20 +62,24 @@ EOH end -# Data integrity check and cleanup for temporary backup and original home directory -# 1. Verifies data integrity by comparing the temporary backup directory and the new local home directory. +# Data integrity check and cleanup for temporary backup directory +# 1. Verifies data integrity by comparing the original home directory and the new local home directory. # 2. If the data integrity check passes, it removes both the temporary backup directory and the original home directory. # 3. If the data integrity check fails, it outputs an error message and exits with an error code 1. -bash "Verify data integrity for #{node['cluster']['cluster_user_home']}" do +# To avoid any confusion, ['cluster_user_home'] is the original dir, ['cluster_user_local_home'] is the destination dir. +# To avoid any potential file system data loss risks, we decided to keep node['cluster']['cluster_user_home']. +bash "Verify data integrity for #{node['cluster']['cluster_user_local_home']}" do user 'root' group 'root' code <<-EOH diff_output=$(diff -r #{node['cluster']['cluster_user_home']} #{node['cluster']['cluster_user_local_home']}) - if [ $? -eq 0 ]; then + if [[ $diff_output != *"Only in #{node['cluster']['cluster_user_home']}"* ]]; then + echo "Data integrity check succeeded, removing temporary directory /tmp#{node['cluster']['cluster_user_home']}" rm -rf /tmp#{node['cluster']['cluster_user_home']} - rm -rf #{node['cluster']['cluster_user_home']} else - echo "Data integrity check failed comparing #{node['cluster']['cluster_user_local_home']} and #{node['cluster']['cluster_user_home']}: $diff_output" >&2 + only_in_cluster_user_home=$(echo "$diff_output" | grep "Only in #{node['cluster']['cluster_user_home']}") + echo "Data integrity check failed comparing #{node['cluster']['cluster_user_local_home']} and #{node['cluster']['cluster_user_home']}. Differences:" + echo "$only_in_cluster_user_home" systemctl start sshd exit 1 fi diff --git a/cookbooks/aws-parallelcluster-environment/recipes/init/restore_home_shared_data.rb b/cookbooks/aws-parallelcluster-environment/recipes/init/restore_home_shared_data.rb index df13ca3c32..ab694587bb 100644 --- a/cookbooks/aws-parallelcluster-environment/recipes/init/restore_home_shared_data.rb +++ b/cookbooks/aws-parallelcluster-environment/recipes/init/restore_home_shared_data.rb @@ -19,7 +19,7 @@ # This is necessary to preserve any data in these directories that was # generated during the build of ParallelCluster AMIs after converting to # shared storage and backed up to a temporary location previously - # Before removing the backup, ensure the data in the new home is the same + # Before removing the backup, ensure the new data in the new home is the same # as the original to avoid any data loss or inconsistency. This is done # by using rsync to copy the data and diff to check for differences. # Remove the backup after the copy is done and the data integrity is verified. @@ -29,10 +29,13 @@ code <<-EOH rsync -a --ignore-existing /tmp/home/ /home diff_output=$(diff -r /tmp/home/ /home) - if [ $? -eq 0 ]; then + if [[ $diff_output != *"Only in /tmp/home"* ]]; then + echo "Data integrity check succeeded, removing temporary directory /tmp/home" rm -rf /tmp/home/ else - echo "Data integrity check failed comparing /home and /tmp/home: $diff_output" + only_in_tmp=$(echo "$diff_output" | grep "Only in /tmp/home") + echo "Data integrity check failed comparing /home and /tmp/home. Differences:" + echo "$only_in_tmp" exit 1 fi EOH diff --git a/cookbooks/aws-parallelcluster-environment/recipes/init/restore_internal_use_shared_data.rb b/cookbooks/aws-parallelcluster-environment/recipes/init/restore_internal_use_shared_data.rb index 39dc93d9c2..5351d0274f 100644 --- a/cookbooks/aws-parallelcluster-environment/recipes/init/restore_internal_use_shared_data.rb +++ b/cookbooks/aws-parallelcluster-environment/recipes/init/restore_internal_use_shared_data.rb @@ -19,7 +19,7 @@ # This is necessary to preserve any data in these directories that was # generated during the build of ParallelCluster AMIs after converting to # shared storage and backed up to a temporary location previously - # Before removing the backup, ensure the data in the new directory is the same + # Before removing the backup, ensure the new data in the new directory is the same # as the original to avoid any data loss or inconsistency. This is done # by using rsync to copy the data and diff to check for differences. # Remove the backup after the copy is done and the data integrity is verified. @@ -30,10 +30,13 @@ code <<-EOH rsync -a --ignore-existing /tmp#{dir}/ #{dir} diff_output=$(diff -r /tmp#{dir}/ #{dir}) - if [ $? -eq 0 ]; then - rm -rf /tmp#{dir}/ + if [[ $diff_output != *"Only in /tmp#{dir}"* ]]; then + echo "Data integrity check succeeded, removing temporary directory /tmp#{dir}" + rm -rf /tmp#{dir} else - echo "Data integrity check failed comparing #{dir} and /tmp#{dir}: $diff_output" + only_in_tmp=$(echo "$diff_output" | grep "Only in /tmp#{dir}") + echo "Data integrity check failed comparing #{dir} and /tmp#{dir}. Differences:" + echo "$only_in_tmp" exit 1 fi EOH diff --git a/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_default_user_home_spec.rb b/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_default_user_home_spec.rb index 20ee258d29..141baed4ef 100644 --- a/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_default_user_home_spec.rb +++ b/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_default_user_home_spec.rb @@ -24,16 +24,19 @@ end it 'moves the cluster user home directory with data integrity check' do - user_home = "/home/user" - user_local_home = "/local/home/user" - expect(chef_run).to run_bash("Verify data integrity for #{user_home}").with( + # To avoid any confusion, user_home is the original dir, user_local_home is the destination dir. + original_user_home = "/home/user" + destination_user_local_home = "/local/home/user" + expect(chef_run).to run_bash("Verify data integrity for #{destination_user_local_home}").with( code: <<-CODE - diff_output=$(diff -r #{user_home} #{user_local_home}) - if [ $? -eq 0 ]; then - rm -rf /tmp#{user_home} - rm -rf #{user_home} + diff_output=$(diff -r #{original_user_home} #{destination_user_local_home}) + if [[ $diff_output != *"Only in #{original_user_home}"* ]]; then + echo "Data integrity check succeeded, removing temporary directory /tmp#{original_user_home}" + rm -rf /tmp#{original_user_home} else - echo "Data integrity check failed comparing #{user_local_home} and #{user_home}: $diff_output" >&2 + only_in_cluster_user_home=$(echo "$diff_output" | grep "Only in #{original_user_home}") + echo "Data integrity check failed comparing #{destination_user_local_home} and #{original_user_home}. Differences:" + echo "$only_in_cluster_user_home" systemctl start sshd exit 1 fi diff --git a/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/mount_internal_use_efs_spec.rb b/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/mount_internal_use_efs_spec.rb index 34123f56b6..17e690de59 100644 --- a/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/mount_internal_use_efs_spec.rb +++ b/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/mount_internal_use_efs_spec.rb @@ -37,10 +37,13 @@ code: <<-CODE rsync -a --ignore-existing /tmp#{dir}/ #{dir} diff_output=$(diff -r /tmp#{dir}/ #{dir}) - if [ $? -eq 0 ]; then - rm -rf /tmp#{dir}/ + if [[ $diff_output != *"Only in /tmp#{dir}"* ]]; then + echo "Data integrity check succeeded, removing temporary directory /tmp#{dir}" + rm -rf /tmp#{dir} else - echo "Data integrity check failed comparing #{dir} and /tmp#{dir}: $diff_output" + only_in_tmp=$(echo "$diff_output" | grep "Only in /tmp#{dir}") + echo "Data integrity check failed comparing #{dir} and /tmp#{dir}. Differences:" + echo "$only_in_tmp" exit 1 fi CODE