From 618e44ab7a7d747db50188449e85c289867652bc Mon Sep 17 00:00:00 2001 From: Himani Anil Deshpande Date: Thu, 20 Feb 2025 14:17:43 -0500 Subject: [PATCH] [Scaling] Improving share_compute_fleet_dna.py * adding Logs which would be visible in chef-client.log --- .../files/cfn_hup_configuration/share_compute_fleet_dna.py | 7 +++++++ .../resources/cfn_hup_configuration.rb | 5 ++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/cookbooks/aws-parallelcluster-environment/files/cfn_hup_configuration/share_compute_fleet_dna.py b/cookbooks/aws-parallelcluster-environment/files/cfn_hup_configuration/share_compute_fleet_dna.py index 729a610fb7..726b746011 100644 --- a/cookbooks/aws-parallelcluster-environment/files/cfn_hup_configuration/share_compute_fleet_dna.py +++ b/cookbooks/aws-parallelcluster-environment/files/cfn_hup_configuration/share_compute_fleet_dna.py @@ -71,6 +71,7 @@ def get_compute_launch_template_ids(lt_config_file_name): """ lt_config = None try: + logger.info("Getting LaunchTemplate ID and versions from %s", lt_config_file_name) with open(lt_config_file_name, "r", encoding="utf-8") as file: lt_config = json.loads(file.read()) except Exception as err: @@ -117,6 +118,7 @@ def get_user_data(lt_id, lt_version, region_name): proxy_config = parse_proxy_config() ec2_client = boto3.client("ec2", region_name=region_name, config=proxy_config) + logger.info("Running EC2 DescribeLaunchTemplateVersions API for %s version %s", lt_id, lt_version) response = ec2_client.describe_launch_template_versions( LaunchTemplateId=lt_id, Versions=[ @@ -139,6 +141,7 @@ def get_write_directives_section(user_data): write_directives_section = None try: data = message_from_string(user_data) + logger.info("Parsing UserData to get write_files section") for cloud_config_section in data.walk(): if cloud_config_section.get_content_type() == "text/cloud-config": write_directives_section = yaml.safe_load(cloud_config_section._payload).get("write_files") @@ -160,6 +163,7 @@ def write_dna_files(write_files_section, shared_storage_loc): for data in write_files_section: if data["path"] in ["/tmp/dna.json"]: # nosec B108 with open(file_path, "w", encoding="utf-8") as file: + logger.info("Writing %s", file_path) file.write(json.dumps(json.loads(data["content"]), indent=4)) except Exception as err: if hasattr(err, "message"): @@ -190,6 +194,7 @@ def cleanup(directory_loc): f_path = os.path.join(directory_loc, f) try: if os.path.isfile(f_path): + logger.info("Cleaning up %s", f_path) os.remove(f_path) except Exception as err: logger.warning("Unable to delete %s due to %s", f_path, err) @@ -238,6 +243,8 @@ def main(): ) raise SystemExit(0) + logger.info("All dna.json files have been shared!") + if __name__ == "__main__": main() diff --git a/cookbooks/aws-parallelcluster-environment/resources/cfn_hup_configuration.rb b/cookbooks/aws-parallelcluster-environment/resources/cfn_hup_configuration.rb index 60ecc760af..6ea028dc82 100644 --- a/cookbooks/aws-parallelcluster-environment/resources/cfn_hup_configuration.rb +++ b/cookbooks/aws-parallelcluster-environment/resources/cfn_hup_configuration.rb @@ -59,11 +59,14 @@ group 'root' mode '0400' variables( + # Common variable + launch_template_resource_id: node['cluster']['launch_template_id'], + # HeadNode and LoginNode specific variables stack_id: node['cluster']['stack_arn'], region: node['cluster']['region'], cloudformation_url: cloudformation_url, cfn_init_role: instance_role_name, - launch_template_resource_id: node['cluster']['launch_template_id'], + # ComputeFleet specific variables update_hook_script_dir: node['cluster']['scripts_dir'], node_bootstrap_timeout: node['cluster']['compute_node_bootstrap_timeout'] || node['cluster']['Timeout'] )