From 8bcd7e8dcb518c1a73c3c297f1ff6231db5decf5 Mon Sep 17 00:00:00 2001 From: Hanwen Date: Thu, 22 Jul 2021 14:21:25 -0700 Subject: [PATCH] Fix cluster configuration schema 1. Allow configure DisableSimultaneousMultithreading only for Slurm compute nodes. pcluster is currently not able to configure AWS Batch compute nodes 2. Allow update RootVolume. This means user can add the section to the configuration file after the cluster is created. Then user can adjust updatable parameters in the section 3. Allow update Role section under IAM section. Same reasoning as #2 4. Require key value in tag section 5. Modify awsbatch.full.yaml, awsbatch.simple.yaml, slurm.full.yaml to remove unnecessary comments Signed-off-by: Hanwen --- cli/src/pcluster/config/cluster_config.py | 8 +- cli/src/pcluster/schemas/cluster_schema.py | 6 +- cli/src/pcluster/schemas/common_schema.py | 8 +- .../awsbatch.full.yaml | 91 +++++++++---------- .../awsbatch.simple.yaml | 4 +- .../test_cluster_schema_slurm/slurm.full.yaml | 34 +++---- 6 files changed, 75 insertions(+), 76 deletions(-) diff --git a/cli/src/pcluster/config/cluster_config.py b/cli/src/pcluster/config/cluster_config.py index 40ce3d779d..3c37733229 100644 --- a/cli/src/pcluster/config/cluster_config.py +++ b/cli/src/pcluster/config/cluster_config.py @@ -837,13 +837,9 @@ class BaseComputeResource(Resource): def __init__( self, name: str, - disable_simultaneous_multithreading: bool = None, ): super().__init__() self.name = Resource.init_param(name) - self.disable_simultaneous_multithreading = Resource.init_param( - disable_simultaneous_multithreading, default=False - ) def _register_validators(self): self._register_validator(NameValidator, name=self.name) @@ -1258,6 +1254,7 @@ def __init__( min_count: int = None, spot_price: float = None, efa: Efa = None, + disable_simultaneous_multithreading: bool = None, **kwargs, ): super().__init__(**kwargs) @@ -1265,6 +1262,9 @@ def __init__( self.max_count = Resource.init_param(max_count, default=DEFAULT_MAX_COUNT) self.min_count = Resource.init_param(min_count, default=DEFAULT_MIN_COUNT) self.spot_price = Resource.init_param(spot_price) + self.disable_simultaneous_multithreading = Resource.init_param( + disable_simultaneous_multithreading, default=False + ) self.__instance_type_info = None efa_supported = self.instance_type_info.is_efa_supported() self.efa = efa or Efa(enabled=efa_supported, implied=True) diff --git a/cli/src/pcluster/schemas/cluster_schema.py b/cli/src/pcluster/schemas/cluster_schema.py index d03a48caf2..066bfbd558 100644 --- a/cli/src/pcluster/schemas/cluster_schema.py +++ b/cli/src/pcluster/schemas/cluster_schema.py @@ -239,7 +239,7 @@ def make_resource(self, data, **kwargs): class HeadNodeStorageSchema(BaseSchema): """Represent the schema of storage attached to a node.""" - root_volume = fields.Nested(HeadNodeRootVolumeSchema, metadata={"update_policy": UpdatePolicy.UNSUPPORTED}) + root_volume = fields.Nested(HeadNodeRootVolumeSchema, metadata={"update_policy": UpdatePolicy.SUPPORTED}) ephemeral_volume = fields.Nested( HeadNodeEphemeralVolumeSchema, metadata={"update_policy": UpdatePolicy.UNSUPPORTED} ) @@ -705,7 +705,7 @@ def make_resource(self, data, **kwargs): class ClusterIamSchema(BaseSchema): """Represent the schema of IAM for Cluster.""" - roles = fields.Nested(RolesSchema) + roles = fields.Nested(RolesSchema, metadata={"update_policy": UpdatePolicy.SUPPORTED}) permissions_boundary = fields.Str( metadata={"update_policy": UpdatePolicy.SUPPORTED}, validate=validate.Regexp("^arn:.*:policy/") ) @@ -905,7 +905,6 @@ class _ComputeResourceSchema(BaseSchema): """Represent the schema of the ComputeResource.""" name = fields.Str(required=True, metadata={"update_policy": UpdatePolicy.UNSUPPORTED}) - disable_simultaneous_multithreading = fields.Bool(metadata={"update_policy": UpdatePolicy.COMPUTE_FLEET_STOP}) class SlurmComputeResourceSchema(_ComputeResourceSchema): @@ -916,6 +915,7 @@ class SlurmComputeResourceSchema(_ComputeResourceSchema): min_count = fields.Int(validate=validate.Range(min=0), metadata={"update_policy": UpdatePolicy.COMPUTE_FLEET_STOP}) spot_price = fields.Float(validate=validate.Range(min=0), metadata={"update_policy": UpdatePolicy.SUPPORTED}) efa = fields.Nested(EfaSchema, metadata={"update_policy": UpdatePolicy.COMPUTE_FLEET_STOP}) + disable_simultaneous_multithreading = fields.Bool(metadata={"update_policy": UpdatePolicy.COMPUTE_FLEET_STOP}) @post_load def make_resource(self, data, **kwargs): diff --git a/cli/src/pcluster/schemas/common_schema.py b/cli/src/pcluster/schemas/common_schema.py index 10c2628024..42eaa98480 100644 --- a/cli/src/pcluster/schemas/common_schema.py +++ b/cli/src/pcluster/schemas/common_schema.py @@ -136,8 +136,12 @@ def _is_implied(resource, attr, value): class TagSchema(BaseSchema): """Represent the schema of Tag section.""" - key = fields.Str(validate=validate.Length(max=128), metadata={"update_policy": UpdatePolicy.UNSUPPORTED}) - value = fields.Str(validate=validate.Length(max=256), metadata={"update_policy": UpdatePolicy.SUPPORTED}) + key = fields.Str( + required=True, validate=validate.Length(max=128), metadata={"update_policy": UpdatePolicy.UNSUPPORTED} + ) + value = fields.Str( + required=True, validate=validate.Length(max=256), metadata={"update_policy": UpdatePolicy.SUPPORTED} + ) @post_load def make_resource(self, data, **kwargs): diff --git a/cli/tests/pcluster/schemas/test_cluster_schema/test_cluster_schema_awsbatch/awsbatch.full.yaml b/cli/tests/pcluster/schemas/test_cluster_schema/test_cluster_schema_awsbatch/awsbatch.full.yaml index 045cd2a73d..0428b4fca1 100644 --- a/cli/tests/pcluster/schemas/test_cluster_schema/test_cluster_schema_awsbatch/awsbatch.full.yaml +++ b/cli/tests/pcluster/schemas/test_cluster_schema/test_cluster_schema_awsbatch/awsbatch.full.yaml @@ -1,43 +1,43 @@ Image: Os: alinux2 # alinux2 | centos7 | ubuntu1804 | ubuntu2004 - CustomAmi: ami-12345678 # ami-xxx + CustomAmi: ami-12345678 HeadNode: - InstanceType: String # t2.micro + InstanceType: t2.micro Networking: - SubnetId: subnet-12345678 # subnet-xxx + SubnetId: subnet-12345678 ElasticIp: String # true|false|EIP-id AssignPublicIp: true # true|false AdditionalSecurityGroups: - - sg-12345678 # sg-xxxx + - sg-12345678 - sg-23456789 Proxy: - HttpProxyAddress: String # https://proxy-address:port + HttpProxyAddress: https://proxy-address:port DisableSimultaneousMultithreading: true CustomActions: OnNodeStart: Script: https://test.tgz # s3:// | https:// | file://* Args: - - String # arg1 - - stirng2 + - arg1 + - arg2 OnNodeConfigured: Script: https://test.tgz # s3:// | https:// | file://* Args: - - String # arg1 - - stirng2 + - arg1 + - arg2 Ssh: - KeyName: String # ec2-key-name - AllowedIps: 1.2.3.4/32 # 1.2.3.4/32 + KeyName: ec2-key-name + AllowedIps: 1.2.3.4/32 LocalStorage: RootVolume: - Size: 37 # 35 - Encrypted: true # true + Size: 37 + Encrypted: true DeleteOnTermination: true EphemeralVolume: - Encrypted: true # true - MountDir: String # /scratch + Encrypted: true + MountDir: /scratch Dcv: - Enabled: true # true - Port: 8443 # 8443 + Enabled: true + Port: 8443 AllowedIps: 0.0.0.0/0 # 0.0.0.0/0 Scheduling: Scheduler: awsbatch @@ -54,7 +54,7 @@ Scheduling: Enabled: true # true Id: String Proxy: - HttpProxyAddress: String # https://proxy-address:port + HttpProxyAddress: https://proxy-address:port ComputeResources: # this maps to a Batch compute environment (initially we support only 1) - Name: compute_resource InstanceTypes: @@ -65,7 +65,6 @@ Scheduling: DesiredvCpus: 10 MaxvCpus: 20 SpotBidPercentage: 50 - DisableSimultaneousMultithreading: false SharedStorage: - MountDir: /my/mount/point StorageType: Ebs @@ -90,8 +89,8 @@ SharedStorage: KmsKeyId: String # id-xxx PerformanceMode: maxIO # generalPurpose | maxIO ThroughputMode: provisioned # bursting | provisioned - ProvisionedThroughput: 1024 # 1024 - FileSystemId: fs-12345678 # fs-xxxx + ProvisionedThroughput: 1024 + FileSystemId: fs-12345678 - MountDir: /my/mount/point3 StorageType: FsxLustre Name: name3 @@ -99,48 +98,44 @@ SharedStorage: StorageCapacity: 3600 # 3600 DeploymentType: SCRATCH_1 # PERSISTENT_1 | SCRATCH_1 | SCRATCH_2 DataCompressionType: LZ4 - ImportedFileChunkSize: 10 # 1024 - ExportPath: String # s3://bucket/folder - ImportPath: String # s3://bucket - WeeklyMaintenanceStartTime: "1:00:00" # "1:00:00" - AutomaticBackupRetentionDays: 1 # 0 - CopyTagsToBackups: true # true - DailyAutomaticBackupStartTime: 01:03 # 01:03 - PerUnitStorageThroughput: 50 # 200 + ImportedFileChunkSize: 1024 + ExportPath: s3://bucket/folder + ImportPath: s3://bucket + WeeklyMaintenanceStartTime: "1:00:00" + AutomaticBackupRetentionDays: 1 + CopyTagsToBackups: true + DailyAutomaticBackupStartTime: 01:03 + PerUnitStorageThroughput: 50 #BackupId: backup-12345678 # BackupId cannot coexist with some of the fields - KmsKeyId: String # xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx - #FileSystemId: fs-12345678123456789 # FileSystemId cannot coexist with some of the fields + KmsKeyId: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + #FileSystemId: fs-12345678123456789 # FileSystemId cannot coexist with other fields AutoImportPolicy: NEW_CHANGED # NEW | NEW_CHANGED DriveCacheType: READ # READ StorageType: HDD # HDD | SSD Iam: Roles: - CustomLambdaResources: String # arn:aws:iam::aws:role/CustomResourcesLambdaRole + CustomLambdaResources: arn:aws:iam::aws:role/CustomResourcesLambdaRole Monitoring: - DetailedMonitoring: true # false + DetailedMonitoring: true Logs: CloudWatch: - Enabled: true # true - RetentionInDays: 14 # 14 + Enabled: true + RetentionInDays: 14 DeletionPolicy: Delete Dashboards: CloudWatch: - Enabled: true # true -#AdditionalPackages: -# IntelSelectSolutions: -# InstallIntelSoftware: true # false -#ClusterS3ResourceBucket: String + Enabled: true +CustomS3Bucket: String Tags: - Key: String Value: String - Key: two Value: two22 -#CustomS3Bucket: String -#AdditionalResources: String # https://template.url -#DevSettings: +AdditionalResources: https://template.url +DevSettings: # ClusterTemplate: String -# Cookbook: -# ChefCookbook: String -# ExtraChefAttributes: Object -# AwsBatchCliPackage: String -# NodePackage: String + Cookbook: + ChefCookbook: https://template.url + ExtraChefAttributes: '{ "cluster" : { "custom_node_package" : "https://s3.eu-west-1.amazonaws.com/cfncluster-fdm-build-artifacts-eu-west-1/node/aws-parallelcluster-node-2.8.1.tgz", "skip_install_recipes": "no" } }' + AwsBatchCliPackage: https://batchcli.url + NodePackage: https://nodepackage.url diff --git a/cli/tests/pcluster/schemas/test_cluster_schema/test_cluster_schema_awsbatch/awsbatch.simple.yaml b/cli/tests/pcluster/schemas/test_cluster_schema/test_cluster_schema_awsbatch/awsbatch.simple.yaml index 4cdc0f5a18..9fa323cb8c 100644 --- a/cli/tests/pcluster/schemas/test_cluster_schema/test_cluster_schema_awsbatch/awsbatch.simple.yaml +++ b/cli/tests/pcluster/schemas/test_cluster_schema/test_cluster_schema_awsbatch/awsbatch.simple.yaml @@ -1,9 +1,9 @@ Image: Os: alinux2 HeadNode: - InstanceType: String # t2.micro + InstanceType: t2.micro Ssh: - KeyName: String # ec2-key-name + KeyName: ec2-key-name Networking: SubnetId: subnet-12345678 Scheduling: diff --git a/cli/tests/pcluster/schemas/test_cluster_schema/test_cluster_schema_slurm/slurm.full.yaml b/cli/tests/pcluster/schemas/test_cluster_schema/test_cluster_schema_slurm/slurm.full.yaml index 2fc6367796..62c97f8761 100644 --- a/cli/tests/pcluster/schemas/test_cluster_schema/test_cluster_schema_slurm/slurm.full.yaml +++ b/cli/tests/pcluster/schemas/test_cluster_schema/test_cluster_schema_slurm/slurm.full.yaml @@ -2,16 +2,16 @@ Image: Os: centos7 CustomAmi: ami-12345678 HeadNode: - InstanceType: t2.micro # t2.micro + InstanceType: t2.micro Networking: - SubnetId: subnet-12345678 # subnet-xxx + SubnetId: subnet-12345678 ElasticIp: true # true|false|EIP-id AssignPublicIp: True # true|false AdditionalSecurityGroups: - sg-34567890 - sg-45678901 Proxy: - HttpProxyAddress: String # https://proxy-address:port + HttpProxyAddress: https://proxy-address:port DisableSimultaneousMultithreading: false Ssh: KeyName: ec2-key-name @@ -34,13 +34,13 @@ HeadNode: OnNodeStart: Script: https://test.tgz # s3:// | https:// | file://* Args: - - String # arg1 - - stirng2 + - arg1 + - arg2 OnNodeConfigured: Script: https://test.tgz # s3:// | https:// | file://* Args: - - String # arg1 - - stirng2 + - arg1 + - arg2 Iam: InstanceRole: arn:aws:iam::aws:role/CustomHeadNodeRole S3Access: @@ -70,13 +70,13 @@ Scheduling: OnNodeStart: Script: https://test.tgz # s3:// | https:// | file://* Args: - - String # arg1 - - stirng2 + - arg1 + - arg2 OnNodeConfigured: Script: https://test.tgz # s3:// | https:// | file://* Args: - - String # arg1 - - stirng2 + - arg1 + - arg2 Iam: S3Access: - BucketName: string1 @@ -104,7 +104,7 @@ Scheduling: Enabled: true Id: String Proxy: - HttpProxyAddress: String # https://proxy-address:port + HttpProxyAddress: https://proxy-address:port ComputeResources: - Name: compute-resource-1 InstanceType: c4.2xlarge @@ -146,16 +146,16 @@ SharedStorage: DeploymentType: PERSISTENT_1 # PERSISTENT_1 | SCRATCH_1 | SCRATCH_2 ImportedFileChunkSize: 1024 DataCompressionType: LZ4 - ExportPath: String # s3://bucket/folder - ImportPath: String # s3://bucket + ExportPath: s3://bucket/folder + ImportPath: s3://bucket WeeklyMaintenanceStartTime: "1:00:00" AutomaticBackupRetentionDays: 0 CopyTagsToBackups: true DailyAutomaticBackupStartTime: 01:03 PerUnitStorageThroughput: 200 # BackupId: backup-fedcba98 # BackupId cannot coexist with some of the fields - KmsKeyId: String # xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx - # FileSystemId: fs-12345678123456789 # FileSystemId cannot coexist with some of the fields + KmsKeyId: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + # FileSystemId: fs-12345678123456789 # FileSystemId cannot coexist with other fields AutoImportPolicy: NEW # NEW | NEW_CHANGED DriveCacheType: READ # READ StorageType: HDD # HDD | SSD @@ -181,7 +181,7 @@ Tags: - Key: two Value: two22 CustomS3Bucket: String -AdditionalResources: String # https://template.url +AdditionalResources: https://template.url DevSettings: ClusterTemplate: file:///tests/aws-parallelcluster-template-3.0.tgz Cookbook: