diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000000..e0464dd62c --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,74 @@ +name: ParallelCluster CI + +on: [push, pull_request] + +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + name: + - Python 2.7 Tests + - Python 3.4 Tests + - Python 3.5 Tests + - Python 3.6 Tests + - Python 3.7 Tests + - Python 3.8 Tests + - Python 3.9 Tests + - Python 3.8 Tests Coverage + - Code Checks + - CloudFormation Templates Checks + include: + - name: Python 2.7 Tests + python: 2.7 + toxdir: cli + toxenv: py27-nocov + - name: Python 3.4 Tests + python: 3.4 + toxdir: cli + toxenv: py34-nocov + - name: Python 3.5 Tests + python: 3.5 + toxdir: cli + toxenv: py35-nocov + - name: Python 3.6 Tests + python: 3.6 + toxdir: cli + toxenv: py36-nocov + - name: Python 3.7 Tests + python: 3.7 + toxdir: cli + toxenv: py37-nocov + - name: Python 3.8 Tests + python: 3.8 + toxdir: cli + toxenv: py38-nocov + - name: Python 3.9 Tests + python: 3.9 + toxdir: cli + toxenv: py39-nocov + - name: Python 3.8 Tests Coverage + python: 3.8 + toxdir: cli + toxenv: py38-cov + - name: Code Checks + python: 3.6 + toxdir: cli + toxenv: code-linters + - name: CloudFormation Templates Checks + python: 3.6 + toxdir: cli + toxenv: cfn-format-check,cfn-lint,cfn-tests + + steps: + - uses: actions/checkout@v2 + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python }} + - name: Install Tox and any other packages + run: pip install tox + - name: Run Tox + run: cd ${{ matrix.toxdir }} && tox -e ${{ matrix.toxenv }} diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 61643625e6..0000000000 --- a/.travis.yml +++ /dev/null @@ -1,32 +0,0 @@ -language: python -sudo: required -dist: xenial - -python: - - "2.7" - - "3.4" - - "3.5" - - "3.6" - - "3.7" - - "3.8" - -matrix: - include: - - name: Code Checks - python: 3.6 - stage: linters - env: TOXENV=code-linters - - name: CloudFormation Templates Checks - python: 3.6 - stage: linters - env: TOXENV=cfn-format-check,cfn-lint,cfn-tests - - name: Validate integration tests configs - python: 3.6 - stage: linters - env: TOXENV=validate-test-configs - script: cd tests/integration-tests && tox - -install: - - pip install tox-travis - -script: cd cli && tox diff --git a/CHANGELOG.md b/CHANGELOG.md index eb9bd30d3e..2ce782e751 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,59 @@ CHANGELOG ========= +2.10.1 +------ + +**ENHANCEMENTS** + +- Add support for me-south-1 region (Bahrein), af-south-1 region (Cape Town) and eu-south-1 region (Milan) + - At the time of this version launch: + - Amazon FSx for Lustre and ARM instance types are not supported in me-south-1, af-south-1 and eu-south-1 + - AWS Batch is not supported in af-south-1 + - EBS io2 is not supported in af-south-1 and eu-south-1 +- Install Arm Performance Libraries (APL) 20.2.1 on ARM AMIs (CentOS8, Alinux2, Ubuntu1804). +- Install EFA kernel module on ARM instances with `alinux2` and `ubuntu1804`. This enables support for `c6gn` instances. +- Add support for io2 and gp3 EBS volume type. +- Add `iam_lambda_role` parameter under `cluster` section to enable the possibility to specify an existing IAM role to + be used by AWS Lambda functions in CloudFormation. + When using `sge`, `torque`, or `slurm` as the scheduler, + `pcluster` will not create any IAM role if both `ec2_iam_role` and `iam_lambda_role` are provided. +- Improve robustness of a Slurm cluster when clustermgtd is down. +- Configure NFS threads to be max(8, num_cores) for performance. This enhancement will not take effect on Ubuntu 16.04. +- Optimize calls to DescribeInstanceTypes EC2 API when validating cluster configuration. + +**CHANGES** + +- Upgrade EFA installer to version 1.11.1. + - EFA configuration: ``efa-config-1.7`` (from efa-config-1.5) + - EFA profile: ``efa-profile-1.3`` (from efa-profile-1.1) + - EFA kernel module: ``efa-1.10.2`` (no change) + - RDMA core: ``rdma-core-31.2amzn`` (from rdma-core-31.amzn0) + - Libfabric: ``libfabric-1.11.1amzn1.0`` (from libfabric-1.11.1amzn1.1) + - Open MPI: ``openmpi40-aws-4.1.0`` (from openmpi40-aws-4.0.5) +- Upgrade Intel MPI to version U8. +- Upgrade NICE DCV to version 2020.2-9662. +- Set default systemd runlevel to multi-user.target on all OSes during ParallelCluster official AMI creation. + The runlevel is set to graphical.target on head node only when DCV is enabled. This prevents the execution of + graphical services, such as x/gdm, when they are not required. +- Download Intel MPI and HPC packages from S3 rather than Intel yum repos. +- Change the default of instance types from the hardcoded `t2.micro` to the free tier instance type + (`t2.micro` or `t3.micro` dependent on region). In regions without free tier, the default is `t3.micro`. +- Enable support for p4d as head node instance type (p4d was already supported as compute node in 2.10.0). +- Pull Amazon Linux Docker images from public ECR when building docker image for `awsbatch` scheduler. +- Increase max retry attempts when registering Slurm nodes in Route53. + +**BUG FIXES** + +- Fix pcluster createami for Ubuntu 1804 by downloading SGE sources from Debian repository and not from the EOL + Ubuntu 19.10. +- Remove CloudFormation DescribeStacks API call from AWS Batch Docker entrypoint. This removes the risk of job + failures due to CloudFormation throttling. +- Mandate the presence of `vpc_settings`, `vpc_id`, `master_subnet_id` in the config file to avoid unhandled exceptions. +- Set the default EBS volume size to 500 GiB when volume type is `st1` or `sc1`. +- Fix installation of Intel PSXE package on CentOS 7 by using yum4. +- Fix routing issues with multiple Network Interfaces on Ubuntu 18.04. + 2.10.0 ------ @@ -8,7 +61,7 @@ CHANGELOG - Add support for CentOS 8 in all Commercial regions. - Add support for P4d instance type as compute node. -- Add the possibilty to enable NVIDIA GPUDirect RDMA support on EFA by using the new `enable_efa_gdr` configuration +- Add the possibility to enable NVIDIA GPUDirect RDMA support on EFA by using the new `enable_efa_gdr` configuration parameter. - Enable support for NICE DCV in GovCloud regions. - Enable support for AWS Batch scheduler in GovCloud regions. diff --git a/README.md b/README.md index f0d3dc4a27..30faa7c7cb 100644 --- a/README.md +++ b/README.md @@ -70,16 +70,16 @@ Automate VPC creation? (y/n) [n]: Enter ``n`` if you already have a VPC suitable for the cluster. Otherwise you can let ``pcluster configure`` create a VPC for you. The same choice is given for the subnet: you can select a valid subnet ID for -both the master and compute nodes, or you can let ``pcluster configure`` set up everything for you. +both the head node and compute nodes, or you can let ``pcluster configure`` set up everything for you. The same choice is given for the subnet configuration: you can select a valid subnet ID for both -the master and compute nodes, or you can let pcluster configure set up everything for you. +the head node and compute nodes, or you can let pcluster configure set up everything for you. In the latter case, just select the configuration you prefer. ``` Automate Subnet creation? (y/n) [y]: y Allowed values for Network Configuration: -1. Master in a public subnet and compute fleet in a private subnet -2. Master and compute fleet in the same public subnet +1. Head node in a public subnet and compute fleet in a private subnet +2. Head node and compute fleet in the same public subnet ``` diff --git a/amis.txt b/amis.txt index fb20f63162..1b557023fe 100644 --- a/amis.txt +++ b/amis.txt @@ -1,144 +1,163 @@ ## x86_64 # alinux -ap-east-1: ami-086d023ae9be29265 -ap-northeast-1: ami-071d1f9ecf81d5866 -ap-northeast-2: ami-0f13dd00403ae009c +af-south-1: ami-0f2e2135a05f814df +ap-east-1: ami-05ef7cb79d3a43092 +ap-northeast-1: ami-006b7f4c929aaf4a9 +ap-northeast-2: ami-06a49824cc501a981 ap-northeast-3: UNSUPPORTED -ap-south-1: ami-09952eb2d4dd3f2e2 -ap-southeast-1: ami-043d99842781aff9d -ap-southeast-2: ami-02656322bc3cae72c -ca-central-1: ami-0a853d47df00d434a -cn-north-1: ami-03a764b8e78792057 -cn-northwest-1: ami-077d4bb98d5ca5def -eu-central-1: ami-09f3ed2c18ba86996 -eu-north-1: ami-03d6705f566e99836 -eu-west-1: ami-02c2421d6cd745994 -eu-west-2: ami-0ef2ff4ce73c7208e -eu-west-3: ami-07e87c1e0dd3d3e75 -sa-east-1: ami-0d552ac237f838360 -us-east-1: ami-01a1cf6f36f2bd13b -us-east-2: ami-0a8d40acce6869be4 -us-gov-east-1: ami-05bb72e83f0973bdc -us-gov-west-1: ami-0318093bb66476048 -us-west-1: ami-0425c898b65b066f3 -us-west-2: ami-0ab937dbac92ae27e +ap-south-1: ami-0a47fd68cf7034c58 +ap-southeast-1: ami-0e6cfdde386164836 +ap-southeast-2: ami-0ba7f788162c4a4de +ca-central-1: ami-0808f2df200c7006c +cn-north-1: ami-050c2b7b0181fbfd5 +cn-northwest-1: ami-07d8dd2f175498353 +eu-central-1: ami-032358687770c43e1 +eu-north-1: ami-038781ae9b21b98ca +eu-south-1: ami-0a9a6c50dc32a934f +eu-west-1: ami-0dd799c2e1a68608e +eu-west-2: ami-0849a887182dd033a +eu-west-3: ami-060df914a5b5ad680 +me-south-1: ami-0d68c8e916ccf0418 +sa-east-1: ami-0d4a17532432d5aa2 +us-east-1: ami-0604e4a14869de93f +us-east-2: ami-00d4efc81188687a0 +us-gov-east-1: ami-01d57910cd71ea0c4 +us-gov-west-1: ami-0cfc4f4eb94c9f403 +us-west-1: ami-0c8decb747bfca25f +us-west-2: ami-018ccd7660ecade5e # alinux2 -ap-east-1: ami-060927fff43c77a88 -ap-northeast-1: ami-068b3b3104ae04d62 -ap-northeast-2: ami-07cd9137f04b28895 +af-south-1: ami-046f49b550ce90d8a +ap-east-1: ami-0ec0d099b8a276aec +ap-northeast-1: ami-0a13402dc88c19be2 +ap-northeast-2: ami-0bdfbd3521caa5dd2 ap-northeast-3: UNSUPPORTED -ap-south-1: ami-06cc0e0c03bd6abd9 -ap-southeast-1: ami-005fd4e1d2ccdd8a9 -ap-southeast-2: ami-0256ef02207960118 -ca-central-1: ami-08f2692bcab1f1660 -cn-north-1: ami-0b03a127af1f7956c -cn-northwest-1: ami-0beb9d63e7fe38381 -eu-central-1: ami-04d384114ab202c13 -eu-north-1: ami-01afd40fed001ed87 -eu-west-1: ami-04627ea002a11c93c -eu-west-2: ami-0953845dcd74d8d34 -eu-west-3: ami-01525b6212f1a191b -sa-east-1: ami-0cabfea2e0cf31af2 -us-east-1: ami-018cd948cda2d7384 -us-east-2: ami-08aa991eca6bf394a -us-gov-east-1: ami-0cbbce3284f341759 -us-gov-west-1: ami-01b605af54e3fdd9e -us-west-1: ami-04b508b1dae54310b -us-west-2: ami-0f085dccfc6937af7 +ap-south-1: ami-0059d599d21636768 +ap-southeast-1: ami-074f58cccc7ebb68f +ap-southeast-2: ami-04b4a20ee9f67608f +ca-central-1: ami-0523a9bc4151ee96e +cn-north-1: ami-0a12307d2d0ddc535 +cn-northwest-1: ami-0ef379c7fd5eb332e +eu-central-1: ami-07055c21834b0bc56 +eu-north-1: ami-0fe475ca307943eb1 +eu-south-1: ami-042fc69d71433a75c +eu-west-1: ami-063ac3df7f8595751 +eu-west-2: ami-086111e12527fa455 +eu-west-3: ami-0258a5a8320ccfa42 +me-south-1: ami-0c98692d98eb38c50 +sa-east-1: ami-0f463bcf6d86cad85 +us-east-1: ami-0b71488efbe422723 +us-east-2: ami-0075df3faa5b6e07e +us-gov-east-1: ami-057f7c2d5a1ca7b7d +us-gov-west-1: ami-01222b796bafd609f +us-west-1: ami-01c4b0b6d5597b80b +us-west-2: ami-079facc5ab3fdf701 # centos7 -ap-east-1: ami-0f7168e4940a70237 -ap-northeast-1: ami-07555b739b7d7f81c -ap-northeast-2: ami-0c0df7a4adaebab82 +af-south-1: ami-0e0fb5acd64f2be5e +ap-east-1: ami-0d6f16d7fceae84ee +ap-northeast-1: ami-03a451be7ebcc159e +ap-northeast-2: ami-0ba51cb6c4ceae756 ap-northeast-3: UNSUPPORTED -ap-south-1: ami-0d4e69b97911c88aa -ap-southeast-1: ami-0e715873ed4fdf15a -ap-southeast-2: ami-04ba6428ce77e728a -ca-central-1: ami-08f798212e2b9dcc9 +ap-south-1: ami-0919124b7770af8d9 +ap-southeast-1: ami-021e3e90b0458781f +ap-southeast-2: ami-0f5a161afeed62a35 +ca-central-1: ami-057fc92460a096dab cn-north-1: UNSUPPORTED cn-northwest-1: UNSUPPORTED -eu-central-1: ami-0b7dd5253f4218850 -eu-north-1: ami-01d49e3863d1f2d11 -eu-west-1: ami-009bfc0fb11ad5c6c -eu-west-2: ami-0100afbc14f4008ce -eu-west-3: ami-0b2a108cb09e900ac -sa-east-1: ami-0dde4970d46cc7abc -us-east-1: ami-0bbd714da0c7763e2 -us-east-2: ami-0b3d403d025f9ba61 +eu-central-1: ami-0a0f1b95d41e6a651 +eu-north-1: ami-0c191b20554866575 +eu-south-1: ami-03276d70dacf1a574 +eu-west-1: ami-000a3d84d3c77fdb3 +eu-west-2: ami-0a347ff9b26c5e34c +eu-west-3: ami-02f8c6e2622c3804f +me-south-1: ami-0a64f83dd01c08dab +sa-east-1: ami-082aafa914dc04479 +us-east-1: ami-0516dc2ba9f4fc177 +us-east-2: ami-07d1461ceceb4df43 us-gov-east-1: UNSUPPORTED us-gov-west-1: UNSUPPORTED -us-west-1: ami-0e589824d8d821ce0 -us-west-2: ami-0f386f4a170027bbb +us-west-1: ami-0a426e145ced105df +us-west-2: ami-0e92bdb4aee551791 # centos8 -ap-east-1: ami-081b80aa11a8a4c50 -ap-northeast-1: ami-09151d65b9344095c -ap-northeast-2: ami-09e91a17a5454daf8 +af-south-1: ami-0d881910a58319c15 +ap-east-1: ami-0ca1228e9ddcfa963 +ap-northeast-1: ami-075b3892ecd63214f +ap-northeast-2: ami-02287a8528bba818d ap-northeast-3: UNSUPPORTED -ap-south-1: ami-055bc2174e27b050e -ap-southeast-1: ami-0f221ad56e980433c -ap-southeast-2: ami-085265c078d86c75b -ca-central-1: ami-0fad54150dd6e4d1a +ap-south-1: ami-0602d9f62b83744f5 +ap-southeast-1: ami-0e789f414c14f6332 +ap-southeast-2: ami-0d1d7229f7b73a5de +ca-central-1: ami-03ea006f20d390940 cn-north-1: UNSUPPORTED cn-northwest-1: UNSUPPORTED -eu-central-1: ami-0d986e39b68bd9add -eu-north-1: ami-014f4cd90479d0c71 -eu-west-1: ami-09a3a5c16f6c84015 -eu-west-2: ami-09956ce8a4fda6388 -eu-west-3: ami-08454ae2840055567 -sa-east-1: ami-0f2a6ece6191f85e8 -us-east-1: ami-0615c0838767b8e00 -us-east-2: ami-0a07f43c650ad20e9 +eu-central-1: ami-02277a3e208351cc6 +eu-north-1: ami-00b01d23b71fff297 +eu-south-1: ami-0ffbb41ac00ef5dff +eu-west-1: ami-04a5c9c04faa5b9f7 +eu-west-2: ami-017fab72fc1db3851 +eu-west-3: ami-041e8760a7b80cfde +me-south-1: ami-0e30fd8da5a1ddc84 +sa-east-1: ami-005096f26ee208519 +us-east-1: ami-0f56d9873066cb6b9 +us-east-2: ami-073e63c94c971cc69 us-gov-east-1: UNSUPPORTED us-gov-west-1: UNSUPPORTED -us-west-1: ami-0e71f40a3e18cb184 -us-west-2: ami-0668454eee630c595 +us-west-1: ami-0f3085000b53339e0 +us-west-2: ami-029dc099ae4e121f1 # ubuntu1604 -ap-east-1: ami-0e4408eaab978d69d -ap-northeast-1: ami-021df455e65a94ba1 -ap-northeast-2: ami-02a8aa869263d3268 +af-south-1: ami-041e26e4bfed8cd7b +ap-east-1: ami-04539233794f181f7 +ap-northeast-1: ami-01fe6e75948fa65df +ap-northeast-2: ami-05dd0b07a0e8cf644 ap-northeast-3: UNSUPPORTED -ap-south-1: ami-0b939ee54f0c676de -ap-southeast-1: ami-04b44a45309e8f7b7 -ap-southeast-2: ami-06367ff848861ddfc -ca-central-1: ami-0438f2fca91841651 -cn-north-1: ami-0e4c187d835abb08b -cn-northwest-1: ami-0be4b9f412690daa2 -eu-central-1: ami-0f81af77a1e347a75 -eu-north-1: ami-0875c09d8a230cc3f -eu-west-1: ami-0da1dd9a8b40ee87a -eu-west-2: ami-0519e22cbfd281d2c -eu-west-3: ami-0bf33211929a48c9f -sa-east-1: ami-0c337a8051bd0ed6f -us-east-1: ami-04ce9ff46c759ffa8 -us-east-2: ami-0eb0a40959685a105 -us-gov-east-1: ami-09bdb46f2643fefbd -us-gov-west-1: ami-0b790f0b4c3856aa3 -us-west-1: ami-012699e63307886c5 -us-west-2: ami-08b09c217d19d5e1a +ap-south-1: ami-06aec0a1241e29730 +ap-southeast-1: ami-0f35154dc0071f71b +ap-southeast-2: ami-0da4c39e17cdfef89 +ca-central-1: ami-098c762477bc6b1fb +cn-north-1: ami-0a3f41e4d89bdff32 +cn-northwest-1: ami-0d304a0c5d04ac4e2 +eu-central-1: ami-0df794834b461ba22 +eu-north-1: ami-06975933696e0263a +eu-south-1: ami-0842531296e56778e +eu-west-1: ami-0d9c6bf221068c7c3 +eu-west-2: ami-0d2a3fa50134294e9 +eu-west-3: ami-010b0bc4570ec96a3 +me-south-1: ami-08d2926b1669d79d2 +sa-east-1: ami-0ae46391f64fa0b71 +us-east-1: ami-0b3dfe986b324a1bf +us-east-2: ami-04fd4dda7bb2fcaff +us-gov-east-1: ami-00150b953797bdaa4 +us-gov-west-1: ami-06bfdc6f4185351c5 +us-west-1: ami-00fbdde9fb06d3b09 +us-west-2: ami-008383c0ab2a2d425 # ubuntu1804 -ap-east-1: ami-05bd159bdb29e9aa9 -ap-northeast-1: ami-0f99a29d4392ac446 -ap-northeast-2: ami-0556b2de682ca759d +af-south-1: ami-04280c7f4bee35afe +ap-east-1: ami-0d16f6585c134b76d +ap-northeast-1: ami-096205fd8c1ea23b8 +ap-northeast-2: ami-04ef860d893888eee ap-northeast-3: UNSUPPORTED -ap-south-1: ami-059ddcf5c8e408012 -ap-southeast-1: ami-04878f0e013df9869 -ap-southeast-2: ami-02fa7e4f4b1073823 -ca-central-1: ami-0e6aa6758cd219754 -cn-north-1: ami-099431ae52fbcc1b9 -cn-northwest-1: ami-09277e9f7bb212c56 -eu-central-1: ami-0928231bd3b6a52b2 -eu-north-1: ami-0dfc3abcaeeafebd6 -eu-west-1: ami-06065b90e25ef853b -eu-west-2: ami-0a26ef861ba74b872 -eu-west-3: ami-0ee11f5712078f6e3 -sa-east-1: ami-0ff7ab2d586a7f0a6 -us-east-1: ami-05b80d924accf2dac -us-east-2: ami-05e89121e9222b5c6 -us-gov-east-1: ami-0b540f46f29ef9019 -us-gov-west-1: ami-00c69b11f502f4f08 -us-west-1: ami-0339ba2c62b77a99e -us-west-2: ami-036a032a9f6c44f84 +ap-south-1: ami-09fe484636519a7fd +ap-southeast-1: ami-0d2894ee85aac22c0 +ap-southeast-2: ami-07de67a2c91b2605c +ca-central-1: ami-0b8b3c3a561758ae3 +cn-north-1: ami-0abc7e40f18e6cda4 +cn-northwest-1: ami-0f52a155923e4de7f +eu-central-1: ami-07d1489352b517f39 +eu-north-1: ami-0c47a559ed268c649 +eu-south-1: ami-01cfe122f3044a34d +eu-west-1: ami-0cbef8b383ddeff80 +eu-west-2: ami-08d337feaf0f0a59f +eu-west-3: ami-074fae2e6420b8ac7 +me-south-1: ami-0d7bc19407d2b26a9 +sa-east-1: ami-0b7b49d35034bbc2f +us-east-1: ami-009fdaa0002906c5b +us-east-2: ami-0ec51c20170525d3f +us-gov-east-1: ami-04a13dedb7a0a1cfa +us-gov-west-1: ami-0be2fc1895e4b4d9f +us-west-1: ami-04ac69ccbff147270 +us-west-2: ami-01a7264e2e3bf272f ## arm64 # alinux +af-south-1: UNSUPPORTED ap-east-1: UNSUPPORTED ap-northeast-1: UNSUPPORTED ap-northeast-2: UNSUPPORTED @@ -151,9 +170,11 @@ cn-north-1: UNSUPPORTED cn-northwest-1: UNSUPPORTED eu-central-1: UNSUPPORTED eu-north-1: UNSUPPORTED +eu-south-1: UNSUPPORTED eu-west-1: UNSUPPORTED eu-west-2: UNSUPPORTED eu-west-3: UNSUPPORTED +me-south-1: UNSUPPORTED sa-east-1: UNSUPPORTED us-east-1: UNSUPPORTED us-east-2: UNSUPPORTED @@ -162,29 +183,33 @@ us-gov-west-1: UNSUPPORTED us-west-1: UNSUPPORTED us-west-2: UNSUPPORTED # alinux2 -ap-east-1: ami-0afa2c302be613354 -ap-northeast-1: ami-0d90445a8e0dd846e -ap-northeast-2: ami-012482cac933631dd +af-south-1: ami-00cd9a9915d5abf79 +ap-east-1: ami-0f73d5986d43564e2 +ap-northeast-1: ami-0802175acef9f342e +ap-northeast-2: ami-0b67c1511fe4c46e1 ap-northeast-3: UNSUPPORTED -ap-south-1: ami-0e66f1d2824238a00 -ap-southeast-1: ami-0944c0f8c608a1dbe -ap-southeast-2: ami-059405472e209bf90 -ca-central-1: ami-075d7c38d2c7f2347 +ap-south-1: ami-0e75152f8053094c3 +ap-southeast-1: ami-0da4635ceb6d846d9 +ap-southeast-2: ami-0d7a81adbdde9dce5 +ca-central-1: ami-070c24fa069a27265 cn-north-1: UNSUPPORTED cn-northwest-1: UNSUPPORTED -eu-central-1: ami-0d99e8ab1b493e9bc -eu-north-1: ami-064946adc4b641961 -eu-west-1: ami-0bb09e10d8b5cb747 -eu-west-2: ami-0f857639eb82d291e -eu-west-3: ami-0b606e6e56b851d66 -sa-east-1: ami-010d19c007ce5a2a4 -us-east-1: ami-0e96b1f22bc4ded89 -us-east-2: ami-065f40fae77a9fb41 +eu-central-1: ami-0331d559f079efd03 +eu-north-1: ami-03e4cc7f565c8efec +eu-south-1: ami-0392d65ba7c8af2b5 +eu-west-1: ami-0b86c4a8da59d6d11 +eu-west-2: ami-039c5ad24328545a9 +eu-west-3: ami-067a371d97ca6fcf2 +me-south-1: ami-0ea4909cdeeef4b03 +sa-east-1: ami-013861b8a2b1a63b5 +us-east-1: ami-0b1f998cf2b1498db +us-east-2: ami-059703a477566540c us-gov-east-1: UNSUPPORTED us-gov-west-1: UNSUPPORTED -us-west-1: ami-0715d8a272300c41f -us-west-2: ami-01c9e5995bc8ee16a +us-west-1: ami-08540a991b0cd29bd +us-west-2: ami-0257f455a26d9ed84 # centos7 +af-south-1: UNSUPPORTED ap-east-1: UNSUPPORTED ap-northeast-1: UNSUPPORTED ap-northeast-2: UNSUPPORTED @@ -197,9 +222,11 @@ cn-north-1: UNSUPPORTED cn-northwest-1: UNSUPPORTED eu-central-1: UNSUPPORTED eu-north-1: UNSUPPORTED +eu-south-1: UNSUPPORTED eu-west-1: UNSUPPORTED eu-west-2: UNSUPPORTED eu-west-3: UNSUPPORTED +me-south-1: UNSUPPORTED sa-east-1: UNSUPPORTED us-east-1: UNSUPPORTED us-east-2: UNSUPPORTED @@ -208,29 +235,33 @@ us-gov-west-1: UNSUPPORTED us-west-1: UNSUPPORTED us-west-2: UNSUPPORTED # centos8 -ap-east-1: ami-090b2a584a3f7f287 -ap-northeast-1: ami-082dd4b62e4501fd3 -ap-northeast-2: ami-0a1d5d4acc59b8c42 +af-south-1: ami-0b0a12580b8fcdfe5 +ap-east-1: ami-007ed99f82a962cc1 +ap-northeast-1: ami-0490e30625fc3466f +ap-northeast-2: ami-03222a04de4228c08 ap-northeast-3: UNSUPPORTED -ap-south-1: ami-07ce7adc39b9e28b2 -ap-southeast-1: ami-04a794d883bdaf470 -ap-southeast-2: ami-0a4a36662eaed020f -ca-central-1: ami-0f6f89bc747dc927c +ap-south-1: ami-0075369b2abf46b05 +ap-southeast-1: ami-086fd32165ac4e0a2 +ap-southeast-2: ami-07fb6c0c050a39292 +ca-central-1: ami-04be4e45fe60b19e1 cn-north-1: UNSUPPORTED cn-northwest-1: UNSUPPORTED -eu-central-1: ami-0ff2b3c8975a3db85 -eu-north-1: ami-0047a557a82efb0d0 -eu-west-1: ami-0163897289a397a7b -eu-west-2: ami-041508f55e7353f16 -eu-west-3: ami-08b703b2dce4d24db -sa-east-1: ami-00cb9fa66203e7004 -us-east-1: ami-02597a6058d9bb415 -us-east-2: ami-0b833d4b88b0df37f +eu-central-1: ami-02ea94742b76197a4 +eu-north-1: ami-052bb6243fc4b84a3 +eu-south-1: ami-015ae8f55912771fe +eu-west-1: ami-0773e67d6a7466681 +eu-west-2: ami-009ac53f9323b9865 +eu-west-3: ami-021787c0ad5f60f6b +me-south-1: ami-085892d9aec1ed9f9 +sa-east-1: ami-0d3e6fbc43aaa308c +us-east-1: ami-02839a5871b4ec582 +us-east-2: ami-0d4f0890ef069afc0 us-gov-east-1: UNSUPPORTED us-gov-west-1: UNSUPPORTED -us-west-1: ami-00aa1f310b54e3cd5 -us-west-2: ami-0e6ed2c534bab4d35 +us-west-1: ami-0aaeb3457c5f4a511 +us-west-2: ami-02a5f2d441ae4ea8e # ubuntu1604 +af-south-1: UNSUPPORTED ap-east-1: UNSUPPORTED ap-northeast-1: UNSUPPORTED ap-northeast-2: UNSUPPORTED @@ -243,9 +274,11 @@ cn-north-1: UNSUPPORTED cn-northwest-1: UNSUPPORTED eu-central-1: UNSUPPORTED eu-north-1: UNSUPPORTED +eu-south-1: UNSUPPORTED eu-west-1: UNSUPPORTED eu-west-2: UNSUPPORTED eu-west-3: UNSUPPORTED +me-south-1: UNSUPPORTED sa-east-1: UNSUPPORTED us-east-1: UNSUPPORTED us-east-2: UNSUPPORTED @@ -254,25 +287,28 @@ us-gov-west-1: UNSUPPORTED us-west-1: UNSUPPORTED us-west-2: UNSUPPORTED # ubuntu1804 -ap-east-1: ami-026aa8e70fb166ea9 -ap-northeast-1: ami-0bb8f711d35faf564 -ap-northeast-2: ami-001e5f95b7285db48 +af-south-1: ami-046d98477dd9fc7eb +ap-east-1: ami-0d2624d4b9e6970e7 +ap-northeast-1: ami-08efda47b57a7c048 +ap-northeast-2: ami-05ae60c35b06f34e5 ap-northeast-3: UNSUPPORTED -ap-south-1: ami-060347326df2488ce -ap-southeast-1: ami-0e8da59005bbebc4c -ap-southeast-2: ami-03e63532571847086 -ca-central-1: ami-0e28334f9f98bce1c +ap-south-1: ami-0ef07d3677758fee7 +ap-southeast-1: ami-052b40fbf4a7d852f +ap-southeast-2: ami-02a57f04d151d2ce2 +ca-central-1: ami-0bef62a8de524cc3d cn-north-1: UNSUPPORTED cn-northwest-1: UNSUPPORTED -eu-central-1: ami-0bdcdad522abee324 -eu-north-1: ami-0abb2f4b0fb7f9419 -eu-west-1: ami-07b73abb892002aba -eu-west-2: ami-0c9501e4fae3574e1 -eu-west-3: ami-0fe23436c8cd7c55d -sa-east-1: ami-0e2ac51ba079f6df3 -us-east-1: ami-0bb7443216d8ea706 -us-east-2: ami-02a92e06fd643c11b +eu-central-1: ami-0ab72b2539a8e4eaf +eu-north-1: ami-00315391fc87d6a4a +eu-south-1: ami-00d476744294f458a +eu-west-1: ami-0224e61822e0603ba +eu-west-2: ami-06496496998c0480a +eu-west-3: ami-0d16162157fa31ea4 +me-south-1: ami-0ce1e2728e4ba14c6 +sa-east-1: ami-079a46ad6559d1023 +us-east-1: ami-0a5c0725ce4d960f1 +us-east-2: ami-08776e764b05c8fa7 us-gov-east-1: UNSUPPORTED us-gov-west-1: UNSUPPORTED -us-west-1: ami-084f3531a939fa437 -us-west-2: ami-0a81e33be478706e1 +us-west-1: ami-0ca03ec3eca322ace +us-west-2: ami-0f67bacfcdf1f2374 diff --git a/cli/.flake8 b/cli/.flake8 index 052dc1fc72..fef48e04b6 100644 --- a/cli/.flake8 +++ b/cli/.flake8 @@ -7,7 +7,6 @@ ignore = D103, # Missing docstring in public function W503, # line break before binary operator => Conflicts with black style. D413, # Missing blank line after last section -# D103 Missing docstring in public function # E402 module level import not at top of file # D101 Missing docstring in public class # D102 Missing docstring in public method @@ -15,12 +14,9 @@ ignore = # D400 First line should end with a period # D401 First line should be in imperative mood per-file-ignores = - pcluster/configure/easyconfig.py: E402 - pcluster/utils.py: E402 - tests/pcluster/pcluster-unittest.py: D101, D102 - tests/pcluster/configure/test_*.py: D101, D102 - tests/pcluster/*/test_*.py: D101, D102 - tests/awsbatch/test_*.py: D101, D102 + src/pcluster/configure/easyconfig.py: E402 + src/pcluster/utils.py: E402 + tests/*: D101, D102 ../tests/integration-tests/*: D205, D400, D401 exclude = .tox, diff --git a/cli/MANIFEST.in b/cli/MANIFEST.in index aa190a4faa..977b1cd061 100644 --- a/cli/MANIFEST.in +++ b/cli/MANIFEST.in @@ -1,4 +1,3 @@ -recursive-include pcluster/cloudformation * -recursive-include pcluster/examples * -recursive-include pcluster/resources * +recursive-include src/pcluster/examples * +recursive-include src/pcluster/resources * recursive-exclude tests * diff --git a/cli/README b/cli/README index 1a942019d7..8c412c530b 100644 --- a/cli/README +++ b/cli/README @@ -17,11 +17,11 @@ You can build higher level workflows, such as a Genomics portal that automates t update Updates a running cluster. delete Deletes a cluster. start Starts the compute fleet that has been stopped. - stop Stops the compute fleet, but leave the master server running for debugging/development. + stop Stops the compute fleet, but leave the head node running for debugging/development. status Pulls the current status of the cluster. list Displays a list of stacks associated with AWS ParallelCluster. instances Displays a list of all instances in a cluster. - ssh Connects to the master server using SSH. + ssh Connects to the head node using SSH. configure Starts the AWS ParallelCluster configuration. version Displays version of AWS ParallelCluster. createami (Linux/macOS) Creates a custom AMI to use with AWS ParallelCluster. diff --git a/cli/setup.py b/cli/setup.py index c330554e95..33fc52695e 100644 --- a/cli/setup.py +++ b/cli/setup.py @@ -21,7 +21,7 @@ def readme(): return f.read() -VERSION = "2.10.0" +VERSION = "2.10.1" REQUIRES = [ "setuptools", "boto3>=1.16.14", @@ -46,8 +46,9 @@ def readme(): "and manage HPC clusters in the AWS cloud.", url="https://github.com/aws/aws-parallelcluster", license="Apache License 2.0", - packages=find_packages(), - python_requires=">=2.7", + package_dir={"": "src"}, + packages=find_packages("src"), + python_requires=">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*", install_requires=REQUIRES, entry_points={ "console_scripts": [ @@ -63,13 +64,27 @@ def readme(): }, include_package_data=True, zip_safe=False, - package_data={"": ["examples/config"]}, + package_data={"": ["src/examples/config"]}, long_description=readme(), classifiers=[ "Development Status :: 5 - Production/Stable", "Environment :: Console", "Programming Language :: Python", + "Programming Language :: Python :: 2", + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.4", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", "Topic :: Scientific/Engineering", "License :: OSI Approved :: Apache Software License", ], + project_urls={ + "Changelog": "https://github.com/aws/aws-parallelcluster/blob/develop/CHANGELOG.md", + "Issue Tracker": "https://github.com/aws/aws-parallelcluster/issues", + "Documentation": "https://docs.aws.amazon.com/parallelcluster/", + }, ) diff --git a/cli/awsbatch/__init__.py b/cli/src/awsbatch/__init__.py similarity index 100% rename from cli/awsbatch/__init__.py rename to cli/src/awsbatch/__init__.py diff --git a/cli/awsbatch/awsbhosts.py b/cli/src/awsbatch/awsbhosts.py similarity index 100% rename from cli/awsbatch/awsbhosts.py rename to cli/src/awsbatch/awsbhosts.py diff --git a/cli/awsbatch/awsbkill.py b/cli/src/awsbatch/awsbkill.py similarity index 100% rename from cli/awsbatch/awsbkill.py rename to cli/src/awsbatch/awsbkill.py diff --git a/cli/awsbatch/awsbout.py b/cli/src/awsbatch/awsbout.py similarity index 100% rename from cli/awsbatch/awsbout.py rename to cli/src/awsbatch/awsbout.py diff --git a/cli/awsbatch/awsbqueues.py b/cli/src/awsbatch/awsbqueues.py similarity index 100% rename from cli/awsbatch/awsbqueues.py rename to cli/src/awsbatch/awsbqueues.py diff --git a/cli/awsbatch/awsbstat.py b/cli/src/awsbatch/awsbstat.py similarity index 100% rename from cli/awsbatch/awsbstat.py rename to cli/src/awsbatch/awsbstat.py diff --git a/cli/awsbatch/awsbsub.py b/cli/src/awsbatch/awsbsub.py similarity index 99% rename from cli/awsbatch/awsbsub.py rename to cli/src/awsbatch/awsbsub.py index ea67dfdafa..5c2f9f7620 100644 --- a/cli/awsbatch/awsbsub.py +++ b/cli/src/awsbatch/awsbsub.py @@ -566,7 +566,7 @@ def main(): retry_attempts=args.retry_attempts, timeout=args.timeout, env=[ - ("MASTER_IP", config.master_ip), # TODO remove + ("MASTER_IP", config.head_node_ip), # TODO remove ("PCLUSTER_JOB_S3_URL", "s3://{0}/{1}".format(config.s3_bucket, job_s3_folder)), ], ) diff --git a/cli/awsbatch/common.py b/cli/src/awsbatch/common.py similarity index 98% rename from cli/awsbatch/common.py rename to cli/src/awsbatch/common.py index f9bcb41a58..022ffdb3a8 100644 --- a/cli/awsbatch/common.py +++ b/cli/src/awsbatch/common.py @@ -178,7 +178,7 @@ def __verify_initialization(self, log): log.debug("compute_environment = %s", self.compute_environment) log.debug("job_queue = %s", self.job_queue) log.debug("job_definition = %s", self.job_definition) - log.debug("master_ip = %s", self.master_ip) + log.debug("master_ip = %s", self.head_node_ip) log.info(self) except AttributeError as e: fail( @@ -261,7 +261,7 @@ def __init_from_config(self, cli_config_file, cluster, log): # noqa: C901 FIXME self.job_definition_mnp = config.get(cluster_section, "job_definition_mnp") except NoOptionError: pass - self.master_ip = config.get(cluster_section, "master_ip") + self.head_node_ip = config.get(cluster_section, "master_ip") # get proxy self.proxy = config.get(cluster_section, "proxy") @@ -316,7 +316,7 @@ def __init_from_stack(self, cluster, log): # noqa: C901 FIXME elif output_key == "BatchJobDefinitionArn": self.job_definition = output_value elif output_key == "MasterPrivateIP": - self.master_ip = output_value + self.head_node_ip = output_value elif output_key == "BatchJobDefinitionMnpArn": self.job_definition_mnp = output_value diff --git a/cli/awsbatch/examples/awsbatch-cli.cfg b/cli/src/awsbatch/examples/awsbatch-cli.cfg similarity index 97% rename from cli/awsbatch/examples/awsbatch-cli.cfg rename to cli/src/awsbatch/examples/awsbatch-cli.cfg index 164fcc2e48..6b92bc381e 100644 --- a/cli/awsbatch/examples/awsbatch-cli.cfg +++ b/cli/src/awsbatch/examples/awsbatch-cli.cfg @@ -37,7 +37,7 @@ job_definition = arn:aws:batch:::job-definition/parallelclus job_definition_mnp = arn:aws:batch:::job-definition/parallelcluster--mnp:1 # HTTP(S) proxy server, typically http://x.x.x.x:8080, used for internal boto3 calls proxy = NONE -# Private Master IP, used internally in the job submission phase. +# Private head node IP, used internally in the job submission phase. master_ip = x.x.x.x # Environment blacklist variables # Comma separated list of environment variable names to not export when submitting a job with "--env all" parameter diff --git a/cli/awsbatch/utils.py b/cli/src/awsbatch/utils.py similarity index 100% rename from cli/awsbatch/utils.py rename to cli/src/awsbatch/utils.py diff --git a/cli/pcluster/__init__.py b/cli/src/pcluster/__init__.py similarity index 100% rename from cli/pcluster/__init__.py rename to cli/src/pcluster/__init__.py diff --git a/cli/pcluster/cli.py b/cli/src/pcluster/cli.py similarity index 98% rename from cli/pcluster/cli.py rename to cli/src/pcluster/cli.py index 22e7084d42..c2e8a85eb8 100644 --- a/cli/pcluster/cli.py +++ b/cli/src/pcluster/cli.py @@ -272,9 +272,9 @@ def _get_parser(): # stop command subparser pstop = subparsers.add_parser( "stop", - help="Stops the compute fleet, leaving the master server running.", + help="Stops the compute fleet, leaving the head node running.", epilog="This command sets the Auto Scaling Group parameters to min/max/desired = 0/0/0 and " - "terminates the compute fleet. The master will remain running. To terminate " + "terminates the compute fleet. The head node will remain running. To terminate " "all EC2 resources and avoid EC2 charges, consider deleting the cluster.", ) pstop.add_argument("cluster_name", help="Stops the compute fleet of the cluster name provided here.") @@ -331,7 +331,7 @@ def _get_parser(): ) pssh = subparsers.add_parser( "ssh", - help="Connects to the master instance using SSH.", + help="Connects to the head node instance using SSH.", description="Run ssh command with the cluster username and IP address pre-populated. " "Arbitrary arguments are appended to the end of the ssh command. " "This command can be customized in the aliases " @@ -429,7 +429,7 @@ def _get_parser(): dcv_subparsers.required = True dcv_subparsers.dest = "subcommand" pdcv_connect = dcv_subparsers.add_parser( - "connect", help="Permits to connect to the master node through an interactive session by using NICE DCV." + "connect", help="Permits to connect to the head node through an interactive session by using NICE DCV." ) _addarg_region(pdcv_connect) pdcv_connect.add_argument("cluster_name", help="Name of the cluster to connect to") diff --git a/cli/pcluster/cli_commands/__init__.py b/cli/src/pcluster/cli_commands/__init__.py similarity index 100% rename from cli/pcluster/cli_commands/__init__.py rename to cli/src/pcluster/cli_commands/__init__.py diff --git a/cli/pcluster/cli_commands/compute_fleet_status_manager.py b/cli/src/pcluster/cli_commands/compute_fleet_status_manager.py similarity index 100% rename from cli/pcluster/cli_commands/compute_fleet_status_manager.py rename to cli/src/pcluster/cli_commands/compute_fleet_status_manager.py diff --git a/cli/pcluster/cli_commands/delete.py b/cli/src/pcluster/cli_commands/delete.py similarity index 100% rename from cli/pcluster/cli_commands/delete.py rename to cli/src/pcluster/cli_commands/delete.py diff --git a/cli/pcluster/cli_commands/start.py b/cli/src/pcluster/cli_commands/start.py similarity index 100% rename from cli/pcluster/cli_commands/start.py rename to cli/src/pcluster/cli_commands/start.py diff --git a/cli/pcluster/cli_commands/stop.py b/cli/src/pcluster/cli_commands/stop.py similarity index 100% rename from cli/pcluster/cli_commands/stop.py rename to cli/src/pcluster/cli_commands/stop.py diff --git a/cli/pcluster/cli_commands/update.py b/cli/src/pcluster/cli_commands/update.py similarity index 100% rename from cli/pcluster/cli_commands/update.py rename to cli/src/pcluster/cli_commands/update.py diff --git a/cli/pcluster/cluster_model.py b/cli/src/pcluster/cluster_model.py similarity index 98% rename from cli/pcluster/cluster_model.py rename to cli/src/pcluster/cluster_model.py index e0e601cff4..e35bfbb735 100644 --- a/cli/pcluster/cluster_model.py +++ b/cli/src/pcluster/cluster_model.py @@ -133,11 +133,11 @@ def _get_latest_alinux_ami_id(self): def public_ips_in_compute_subnet(self, pcluster_config, network_interfaces_count): """Tell if public IPs will be used in compute subnet.""" vpc_section = pcluster_config.get_section("vpc") - master_subnet_id = vpc_section.get_param_value("master_subnet_id") + head_node_subnet_id = vpc_section.get_param_value("master_subnet_id") compute_subnet_id = vpc_section.get_param_value("compute_subnet_id") use_public_ips = vpc_section.get_param_value("use_public_ips") and ( - # For single NIC instances we check only if subnet is the same of master node - (not compute_subnet_id or compute_subnet_id == master_subnet_id) + # For single NIC instances we check only if subnet is the same of head node + (not compute_subnet_id or compute_subnet_id == head_node_subnet_id) # For multiple NICs instances we check also if subnet is different # to warn users about the current lack of support for public IPs or (network_interfaces_count > 1) diff --git a/cli/pcluster/commands.py b/cli/src/pcluster/commands.py similarity index 95% rename from cli/pcluster/commands.py rename to cli/src/pcluster/commands.py index 3a2dc105da..1501fb70e4 100644 --- a/cli/pcluster/commands.py +++ b/cli/src/pcluster/commands.py @@ -424,33 +424,33 @@ def list_stacks(args): sys.exit(0) -def _poll_master_server_state(stack_name): +def _poll_head_node_state(stack_name): ec2 = boto3.client("ec2") try: - instances = utils.describe_cluster_instances(stack_name, node_type=utils.NodeType.master) + instances = utils.describe_cluster_instances(stack_name, node_type=utils.NodeType.head_node) if not instances: - LOGGER.error("Cannot retrieve master node status. Exiting...") + LOGGER.error("Cannot retrieve head node status. Exiting...") sys.exit(1) - master_id = instances[0].get("InstanceId") + head_node_id = instances[0].get("InstanceId") state = instances[0].get("State").get("Name") sys.stdout.write("\rMasterServer: %s" % state.upper()) sys.stdout.flush() while state not in ["running", "stopped", "terminated", "shutting-down"]: time.sleep(5) state = ( - ec2.describe_instance_status(InstanceIds=[master_id]) + ec2.describe_instance_status(InstanceIds=[head_node_id]) .get("InstanceStatuses")[0] .get("InstanceState") .get("Name") ) - master_status = "\r\033[KMasterServer: %s" % state.upper() - sys.stdout.write(master_status) + head_node_status = "\r\033[KMasterServer: %s" % state.upper() + sys.stdout.write(head_node_status) sys.stdout.flush() if state in ["terminated", "shutting-down"]: LOGGER.info("State: %s is irrecoverable. Cluster needs to be re-created.", state) sys.exit(1) - master_status = "\rMasterServer: %s\n" % state.upper() - sys.stdout.write(master_status) + head_node_status = "\rMasterServer: %s\n" % state.upper() + sys.stdout.write(head_node_status) sys.stdout.flush() except ClientError as e: LOGGER.critical(e.response.get("Error").get("Message")) @@ -475,9 +475,9 @@ def instances(args): scheduler = utils.get_cfn_param(cfn_stack.get("Parameters"), "Scheduler") instances = [] - master_server = utils.describe_cluster_instances(stack_name, node_type=utils.NodeType.master) - if master_server: - instances.append(("MasterServer", master_server[0].get("InstanceId"))) + head_node_server = utils.describe_cluster_instances(stack_name, node_type=utils.NodeType.head_node) + if head_node_server: + instances.append(("MasterServer", head_node_server[0].get("InstanceId"))) if scheduler != "awsbatch": instances.extend(_get_compute_instances(stack_name)) @@ -491,7 +491,7 @@ def instances(args): def ssh(args, extra_args): # noqa: C901 FIXME!!! """ - Execute an SSH command to the master instance, according to the [aliases] section if there. + Execute an SSH command to the head node instance, according to the [aliases] section if there. :param args: pcluster CLI args :param extra_args: pcluster CLI extra_args @@ -504,7 +504,7 @@ def ssh(args, extra_args): # noqa: C901 FIXME!!! ssh_command = "ssh {CFN_USER}@{MASTER_IP} {ARGS}" try: - master_ip, username = utils.get_master_ip_and_username(args.cluster_name) + head_node_ip, username = utils.get_head_node_ip_and_username(args.cluster_name) try: from shlex import quote as cmd_quote except ImportError: @@ -512,7 +512,7 @@ def ssh(args, extra_args): # noqa: C901 FIXME!!! # build command cmd = ssh_command.format( - CFN_USER=username, MASTER_IP=master_ip, ARGS=" ".join(cmd_quote(str(arg)) for arg in extra_args) + CFN_USER=username, MASTER_IP=head_node_ip, ARGS=" ".join(cmd_quote(str(arg)) for arg in extra_args) ) # run command @@ -558,7 +558,7 @@ def status(args): # noqa: C901 FIXME!!! sys.stdout.write("\rStatus: %s\n" % stack.get("StackStatus")) sys.stdout.flush() if stack.get("StackStatus") in ["CREATE_COMPLETE", "UPDATE_COMPLETE", "UPDATE_ROLLBACK_COMPLETE"]: - state = _poll_master_server_state(stack_name) + state = _poll_head_node_state(stack_name) if state == "running": _print_stack_outputs(stack) _print_compute_fleet_status(args.cluster_name, stack) diff --git a/cli/pcluster/config/__init__.py b/cli/src/pcluster/config/__init__.py similarity index 100% rename from cli/pcluster/config/__init__.py rename to cli/src/pcluster/config/__init__.py diff --git a/cli/pcluster/config/cfn_param_types.py b/cli/src/pcluster/config/cfn_param_types.py similarity index 91% rename from cli/pcluster/config/cfn_param_types.py rename to cli/src/pcluster/config/cfn_param_types.py index bfaa9b588f..4239eb33ce 100644 --- a/cli/pcluster/config/cfn_param_types.py +++ b/cli/src/pcluster/config/cfn_param_types.py @@ -19,16 +19,15 @@ from pcluster.config.resource_map import ResourceMap from pcluster.constants import PCLUSTER_ISSUES_LINK from pcluster.utils import ( + InstanceTypeInfo, disable_ht_via_cpu_options, error, get_availability_zone_of_subnet, get_cfn_param, - get_default_threads_per_core, + get_default_instance_type, get_ebs_snapshot_info, get_efs_mount_target_id, get_file_section_name, - get_instance_network_interfaces, - get_instance_vcpus, get_supported_architectures_for_instance_type, ) @@ -583,7 +582,7 @@ def to_file(self, config_parser, write_defaults=False): pass -class MasterAvailabilityZoneCfnParam(AvailabilityZoneCfnParam): +class HeadNodeAvailabilityZoneCfnParam(AvailabilityZoneCfnParam): """ Class to manage master_availability_zone internal attribute. @@ -592,15 +591,15 @@ class MasterAvailabilityZoneCfnParam(AvailabilityZoneCfnParam): """ def from_file(self, config_parser): - """Initialize the Availability zone of the cluster by checking the Master Subnet.""" + """Initialize the Availability zone of the cluster by checking the head node Subnet.""" self._init_az(config_parser, "master_subnet_id") return self def from_cfn_params(self, cfn_params): - """Initialize the Availability zone by checking the Compute Subnet from cfn.""" - master_subnet_id = get_cfn_param(cfn_params, "MasterSubnetId") - self.value = get_availability_zone_of_subnet(master_subnet_id) + """Initialize the Availability zone by checking the head node subnet from cfn.""" + head_node_subnet_id = get_cfn_param(cfn_params, "MasterSubnetId") + self.value = get_availability_zone_of_subnet(head_node_subnet_id) return self @@ -655,12 +654,13 @@ def _get_cfn_params_for_instance_type(instance_type): HT is disabled (or "NONE" if it shouldn't be disabled). The second item is a boolean expressing if HT should be disabled via CPU Options for the given instance type. """ - default_threads_per_core = get_default_threads_per_core(instance_type) + instance_type_info = InstanceTypeInfo.init_from_instance_type(instance_type) + default_threads_per_core = instance_type_info.default_threads_per_core() if default_threads_per_core == 1: # no action is required to disable hyperthreading cores = "NONE" else: - cores = get_instance_vcpus(instance_type) // default_threads_per_core + cores = instance_type_info.vcpus_count() // default_threads_per_core return cores, disable_ht_via_cpu_options(instance_type, default_threads_per_core) @@ -668,14 +668,14 @@ def to_cfn(self): """ Define the Cores CFN parameter if disable_hyperthreading = true. - :return: string (cores_master,cores_compute,master_supports_cpu_options,compute_supports_cpu_options) + :return: string (head_node_cores,compute_cores,head_node_supports_cpu_options,compute_supports_cpu_options) """ cfn_params = {self.definition.get("cfn_param_mapping"): "NONE,NONE,NONE,NONE"} cluster_config = self.pcluster_config.get_section(self.section_key) if self.value: - master_instance_type = cluster_config.get_param_value("master_instance_type") - master_cores, disable_master_ht_via_cpu_options = self._get_cfn_params_for_instance_type( - master_instance_type + head_node_instance_type = cluster_config.get_param_value("master_instance_type") + head_node_cores, disable_head_node_ht_via_cpu_options = self._get_cfn_params_for_instance_type( + head_node_instance_type ) if ( @@ -693,7 +693,7 @@ def to_cfn(self): disable_compute_ht_via_cpu_options = False for node_label, cores, instance_type in [ - ("master", master_cores, master_instance_type), + ("master", head_node_cores, head_node_instance_type), ("compute", compute_cores, compute_instance_type), ]: if isinstance(cores, int) and cores < 0: @@ -704,9 +704,9 @@ def to_cfn(self): cfn_params.update( { self.definition.get("cfn_param_mapping"): "{0},{1},{2},{3}".format( - master_cores, + head_node_cores, compute_cores, - str(disable_master_ht_via_cpu_options).lower(), + str(disable_head_node_ht_via_cpu_options).lower(), str(disable_compute_ht_via_cpu_options).lower(), ) } @@ -799,25 +799,25 @@ class BaseOSCfnParam(CfnParam): @staticmethod def get_instance_type_architecture(instance_type): - """Compute cluster's 'Architecture' CFN parameter based on its master server instance type.""" + """Compute cluster's 'Architecture' CFN parameter based on its head node instance type.""" if not instance_type: - error("Cannot infer architecture without master instance type") - master_inst_supported_architectures = get_supported_architectures_for_instance_type(instance_type) + error("Cannot infer architecture without head node instance type") + head_node_supported_architectures = get_supported_architectures_for_instance_type(instance_type) - if not master_inst_supported_architectures: + if not head_node_supported_architectures: error("Unable to get architectures supported by instance type {0}.".format(instance_type)) # If the instance type supports multiple architectures, choose the first one. # TODO: this is currently not an issue because none of the instance types we support more than one of the # architectures we support. If this were ever to change (e.g., we start supporting i386) then we would - # probably need to choose based on the subset of the architecutres supported by both the master and + # probably need to choose based on the subset of the architecutres supported by both the head node and # compute instance types. - return master_inst_supported_architectures[0] + return head_node_supported_architectures[0] def refresh(self): """Initialize the private architecture param.""" if self.value: - master_inst_type = self.owner_section.get_param_value("master_instance_type") - architecture = self.get_instance_type_architecture(master_inst_type) + head_node_instance_type = self.owner_section.get_param_value("master_instance_type") + architecture = self.get_instance_type_architecture(head_node_instance_type) self.owner_section.get_param("architecture").value = architecture @@ -855,6 +855,34 @@ def to_cfn(self): return cfn_params +class ComputeInstanceTypeCfnParam(CfnParam): + """ + Class to manage the compute instance type parameter. + + We need this class in order to set the default instance type from a boto3 call. + """ + + def refresh(self): + """Get default value from a boto3 call for free tier instance type.""" + if not self.value: + scheduler = self.pcluster_config.get_section("cluster").get_param_value("scheduler") + if scheduler: + self.value = "optimal" if scheduler == "awsbatch" else get_default_instance_type() + + +class HeadNodeInstanceTypeCfnParam(CfnParam): + """ + Class to manage the head node instance type parameter. + + We need this class in order to set the default instance type from a boto3 call. + """ + + def refresh(self): + """Get default value from a boto3 call for free tier instance type.""" + if not self.value: + self.value = get_default_instance_type() + + class TagsParam(JsonCfnParam): """ Class to manage the tags json configuration parameter. @@ -1078,17 +1106,26 @@ class NetworkInterfacesCountCfnParam(CommaSeparatedCfnParam): Class to manage NetworkInterfacesCount Cfn param. The internal value is a list of two items, which respectively indicate the number of network interfaces to activate - on master and compute nodes. + on head node and compute nodes. """ def refresh(self): - """Compute the number of network interfaces for master and compute nodes.""" + """Compute the number of network interfaces for head node and compute nodes.""" cluster_section = self.pcluster_config.get_section("cluster") scheduler = cluster_section.get_param_value("scheduler") - self.value = [ - str(get_instance_network_interfaces(cluster_section.get_param_value("master_instance_type"))), - str(get_instance_network_interfaces(cluster_section.get_param_value("compute_instance_type"))) + compute_instance_type = ( + cluster_section.get_param_value("compute_instance_type") if self.pcluster_config.cluster_model.name == "SIT" and scheduler != "awsbatch" + else None + ) + self.value = [ + str( + InstanceTypeInfo.init_from_instance_type( + cluster_section.get_param_value("master_instance_type") + ).max_network_interface_count() + ), + str(InstanceTypeInfo.init_from_instance_type(compute_instance_type).max_network_interface_count()) + if compute_instance_type else "1", ] @@ -1208,28 +1245,28 @@ def to_storage(self, storage_params=None): cfn_items.append(param.get_cfn_value()) if cfn_items[0] == "NONE": - master_mt_valid = False + head_node_mt_valid = False compute_mt_valid = False - master_avail_zone = "fake_az1" + head_node_avail_zone = "fake_az1" compute_avail_zone = "fake_az2" # empty dict or first item is NONE --> set all values to NONE cfn_items = ["NONE"] * len(self.definition.get("params")) else: # add another CFN param that will identify if create or not a Mount Target for the given EFS FS Id - master_avail_zone = self.pcluster_config.get_master_availability_zone() - master_mount_target_id = get_efs_mount_target_id( - efs_fs_id=self.get_param_value("efs_fs_id"), avail_zone=master_avail_zone + head_node_avail_zone = self.pcluster_config.get_head_node_availability_zone() + head_node_mount_target_id = get_efs_mount_target_id( + efs_fs_id=self.get_param_value("efs_fs_id"), avail_zone=head_node_avail_zone ) compute_avail_zone = self.pcluster_config.get_compute_availability_zone() compute_mount_target_id = get_efs_mount_target_id( efs_fs_id=self.get_param_value("efs_fs_id"), avail_zone=compute_avail_zone ) - master_mt_valid = bool(master_mount_target_id) + head_node_mt_valid = bool(head_node_mount_target_id) compute_mt_valid = bool(compute_mount_target_id) - cfn_items.append("Valid" if master_mt_valid else "NONE") - # Do not create additional compute mount target if compute and master subnet in the same AZ - cfn_items.append("Valid" if compute_mt_valid or (master_avail_zone == compute_avail_zone) else "NONE") + cfn_items.append("Valid" if head_node_mt_valid else "NONE") + # Do not create additional compute mount target if compute and head node subnet in the same AZ + cfn_items.append("Valid" if compute_mt_valid or (head_node_avail_zone == compute_avail_zone) else "NONE") cfn_params[cfn_converter] = ",".join(cfn_items) return storage_params @@ -1280,5 +1317,32 @@ def refresh(self): ebs_snapshot_id = section.get_param_value("ebs_snapshot_id") default_volume_size = get_ebs_snapshot_info(ebs_snapshot_id).get("VolumeSize") else: - default_volume_size = 20 + default_volume_size = 500 if section.get_param_value("volume_type") in ["st1", "sc1"] else 20 self.value = default_volume_size + + +class VolumeIopsParam(IntCfnParam): + """Class to manage ebs volume_iops parameter in the EBS section.""" + + EBS_VOLUME_TYPE_IOPS_DEFAULT = { + "io1": 100, + "io2": 100, + "gp3": 3000, + } + + def refresh(self): + """ + We need this method to set different default value for ebs IOPS for different volume. + + Check whether the user have an input on ebs volume_iops when specify volume type to be "gp3". + For "gp3", the default iops is 3000. For other volumes, the default iops are 100. If the volume_iops is not + specified by an user, we will create an EBS volume with default volume + iops. + """ + section = self.pcluster_config.get_section(self.section_key, self.section_label) + + if section and section.get_param_value("volume_iops") is None: + volume_type = section.get_param_value("volume_type") + if volume_type in VolumeIopsParam.EBS_VOLUME_TYPE_IOPS_DEFAULT: + default_iops = VolumeIopsParam.EBS_VOLUME_TYPE_IOPS_DEFAULT.get(volume_type) + self.value = default_iops diff --git a/cli/pcluster/config/config_patch.py b/cli/src/pcluster/config/config_patch.py similarity index 100% rename from cli/pcluster/config/config_patch.py rename to cli/src/pcluster/config/config_patch.py diff --git a/cli/pcluster/config/hit_converter.py b/cli/src/pcluster/config/hit_converter.py similarity index 100% rename from cli/pcluster/config/hit_converter.py rename to cli/src/pcluster/config/hit_converter.py diff --git a/cli/pcluster/config/iam_policy_rules.py b/cli/src/pcluster/config/iam_policy_rules.py similarity index 100% rename from cli/pcluster/config/iam_policy_rules.py rename to cli/src/pcluster/config/iam_policy_rules.py diff --git a/cli/pcluster/config/json_param_types.py b/cli/src/pcluster/config/json_param_types.py similarity index 94% rename from cli/pcluster/config/json_param_types.py rename to cli/src/pcluster/config/json_param_types.py index c490f9a855..097d941183 100644 --- a/cli/pcluster/config/json_param_types.py +++ b/cli/src/pcluster/config/json_param_types.py @@ -288,35 +288,34 @@ def refresh_compute_resource(self, compute_resource_section): instance_type_param = compute_resource_section.get_param("instance_type") if instance_type_param.value: - instance_type = utils.get_instance_type(instance_type_param.value) + instance_type_info = utils.InstanceTypeInfo.init_from_instance_type(instance_type_param.value) # Set vcpus according to queue's disable_hyperthreading and instance features ht_disabled = self.get_param_value("disable_hyperthreading") - vcpus_info = instance_type.get("VCpuInfo") - default_threads_per_core = utils.get_default_threads_per_core(instance_type_param.value, instance_type) + default_threads_per_core = instance_type_info.default_threads_per_core() vcpus = ( - (vcpus_info.get("DefaultVCpus") // default_threads_per_core) + (instance_type_info.vcpus_count() // default_threads_per_core) if ht_disabled - else vcpus_info.get("DefaultVCpus") + else instance_type_info.vcpus_count() ) compute_resource_section.get_param("vcpus").value = vcpus # Set gpus according to instance features - gpus = utils.get_instance_gpus(instance_type_param.value, instance_type) + gpus = instance_type_info.gpu_count() compute_resource_section.get_param("gpus").value = gpus # Set enable_efa according to queues' enable_efa and instance features # Instance type must support EFA enable_efa = self.get_param_value("enable_efa") - compute_resource_section.get_param("enable_efa").value = enable_efa and instance_type.get( - "NetworkInfo" - ).get("EfaSupported") + compute_resource_section.get_param("enable_efa").value = ( + enable_efa and instance_type_info.is_efa_supported() + ) # Set enable_efa_gdr according to queues' enable_efa_gdr and instance features # Instance type must support EFA and have GPUs enable_efa_gdr = self.get_param_value("enable_efa_gdr") compute_resource_section.get_param("enable_efa_gdr").value = ( - enable_efa_gdr and instance_type.get("NetworkInfo").get("EfaSupported") and (gpus > 0) + enable_efa_gdr and instance_type_info.is_efa_supported() and (gpus > 0) ) # Set disable_hyperthreading according to queues' disable_hyperthreading and instance features @@ -331,7 +330,7 @@ def refresh_compute_resource(self, compute_resource_section): ).value = compute_resource_section.get_param( "disable_hyperthreading" ).value and utils.disable_ht_via_cpu_options( - instance_type_param.value, utils.get_default_threads_per_core(instance_type_param.value, instance_type) + instance_type_param.value, instance_type_info.default_threads_per_core() ) # Set initial_count to min_count if not manually set @@ -340,9 +339,9 @@ def refresh_compute_resource(self, compute_resource_section): initial_count_param.value = compute_resource_section.get_param_value("min_count") # Set number of network interfaces - compute_resource_section.get_param("network_interfaces").value = utils.get_instance_network_interfaces( - instance_type_param.value, instance_type - ) + compute_resource_section.get_param( + "network_interfaces" + ).value = instance_type_info.max_network_interface_count() # ---------------------- Common functions ---------------------- # diff --git a/cli/pcluster/config/mappings.py b/cli/src/pcluster/config/mappings.py similarity index 94% rename from cli/pcluster/config/mappings.py rename to cli/src/pcluster/config/mappings.py index 1fab9cdff8..3d9215760c 100644 --- a/cli/pcluster/config/mappings.py +++ b/cli/src/pcluster/config/mappings.py @@ -21,14 +21,16 @@ ClusterCfnSection, ClusterConfigMetadataCfnParam, ComputeAvailabilityZoneCfnParam, + ComputeInstanceTypeCfnParam, DisableHyperThreadingCfnParam, EBSSettingsCfnParam, EFSCfnSection, ExtraJsonCfnParam, FloatCfnParam, + HeadNodeAvailabilityZoneCfnParam, + HeadNodeInstanceTypeCfnParam, IntCfnParam, MaintainInitialSizeCfnParam, - MasterAvailabilityZoneCfnParam, NetworkInterfacesCountCfnParam, QueueSizeCfnParam, SettingsCfnParam, @@ -36,6 +38,7 @@ SpotBidPercentageCfnParam, SpotPriceCfnParam, TagsParam, + VolumeIopsParam, VolumeSizeParam, ) from pcluster.config.json_param_types import ( @@ -64,6 +67,7 @@ ebs_settings_validator, ebs_volume_iops_validator, ebs_volume_size_snapshot_validator, + ebs_volume_throughput_validator, ebs_volume_type_size_validator, ec2_ami_validator, ec2_iam_policies_validator, @@ -75,6 +79,7 @@ ec2_volume_validator, ec2_vpc_id_validator, efa_gdr_validator, + efa_os_arch_validator, efa_validator, efs_id_validator, efs_validator, @@ -95,7 +100,6 @@ maintain_initial_size_validator, queue_settings_validator, queue_validator, - raid_volume_iops_validator, s3_bucket_uri_validator, s3_bucket_validator, scheduler_validator, @@ -149,7 +153,7 @@ "snapshot_id": r"^snap-[0-9a-z]{8}$|^snap-[0-9a-z]{17}$", "subnet_id": r"^subnet-[0-9a-z]{8}$|^subnet-[0-9a-z]{17}$", "volume_id": r"^vol-[0-9a-z]{8}$|^vol-[0-9a-z]{17}$", - "volume_types": ["standard", "io1", "gp2", "st1", "sc1"], + "volume_types": ["standard", "io1", "io2", "gp2", "st1", "sc1", "gp3"], "vpc_id": r"^vpc-[0-9a-z]{8}$|^vpc-[0-9a-z]{17}$", "fsx_deployment_type": ["SCRATCH_1", "SCRATCH_2", "PERSISTENT_1"], "fsx_ssd_throughput": FSX_SSD_THROUGHPUT, @@ -239,68 +243,70 @@ "key": "vpc", "default_label": "default", "autocreate": True, - "params": { - "vpc_id": { + "params": OrderedDict([ + ("vpc_id", { "cfn_param_mapping": "VPCId", + "required": True, "allowed_values": ALLOWED_VALUES["vpc_id"], "validators": [ec2_vpc_id_validator], "update_policy": UpdatePolicy.UNSUPPORTED - }, - "master_subnet_id": { + }), + ("master_subnet_id", { "cfn_param_mapping": "MasterSubnetId", + "required": True, "allowed_values": ALLOWED_VALUES["subnet_id"], "validators": [ec2_subnet_id_validator], "update_policy": UpdatePolicy.UNSUPPORTED - }, - "ssh_from": { + }), + ("ssh_from", { "default": CIDR_ALL_IPS, "allowed_values": ALLOWED_VALUES["cidr"], "cfn_param_mapping": "AccessFrom", "update_policy": UpdatePolicy.SUPPORTED - }, - "additional_sg": { + }), + ("additional_sg", { "cfn_param_mapping": "AdditionalSG", "allowed_values": ALLOWED_VALUES["security_group_id"], "validators": [ec2_security_group_validator], "update_policy": UpdatePolicy.SUPPORTED - }, - "compute_subnet_id": { + }), + ("compute_subnet_id", { "cfn_param_mapping": "ComputeSubnetId", "allowed_values": ALLOWED_VALUES["subnet_id"], "validators": [ec2_subnet_id_validator], "update_policy": UpdatePolicy.COMPUTE_FLEET_STOP - }, - "compute_subnet_cidr": { + }), + ("compute_subnet_cidr", { "cfn_param_mapping": "ComputeSubnetCidr", "allowed_values": ALLOWED_VALUES["cidr"], "update_policy": UpdatePolicy.UNSUPPORTED - }, - "use_public_ips": { + }), + ("use_public_ips", { "type": BoolCfnParam, "default": True, "cfn_param_mapping": "UsePublicIps", "update_policy": UpdatePolicy.COMPUTE_FLEET_STOP - }, - "vpc_security_group_id": { + }), + ("vpc_security_group_id", { "cfn_param_mapping": "VPCSecurityGroupId", "allowed_values": ALLOWED_VALUES["security_group_id"], "validators": [ec2_security_group_validator], "update_policy": UpdatePolicy.SUPPORTED - }, - "master_availability_zone": { + }), + ("master_availability_zone", { # NOTE: this is not exposed as a configuration parameter - "type": MasterAvailabilityZoneCfnParam, + "type": HeadNodeAvailabilityZoneCfnParam, "cfn_param_mapping": "AvailabilityZone", "update_policy": UpdatePolicy.IGNORED, "visibility": Visibility.PRIVATE - }, - "compute_availability_zone": { + }), + ("compute_availability_zone", { # NOTE: this is not exposed as a configuration parameter "type": ComputeAvailabilityZoneCfnParam, "update_policy": UpdatePolicy.IGNORED, "visibility": Visibility.PRIVATE - } - }, + }) + ]), } EBS = { @@ -308,20 +314,22 @@ "key": "ebs", "default_label": "default", "max_resources": 5, - "validators": [ebs_volume_type_size_validator, ebs_volume_iops_validator, ebs_volume_size_snapshot_validator], - "params": { - "shared_dir": { + "validators": [ebs_volume_type_size_validator, ebs_volume_iops_validator, ebs_volume_size_snapshot_validator, + ebs_volume_throughput_validator], + "params": OrderedDict([ # Use OrderedDict because the in python 3.5 a dict is not ordered by default, need it in + # the test of hit converter + ("shared_dir", { "allowed_values": ALLOWED_VALUES["file_path"], "cfn_param_mapping": "SharedDir", "validators": [shared_dir_validator], "update_policy": UpdatePolicy.UNSUPPORTED - }, - "ebs_snapshot_id": { + }), + ("ebs_snapshot_id", { "allowed_values": ALLOWED_VALUES["snapshot_id"], "cfn_param_mapping": "EBSSnapshotId", "update_policy": UpdatePolicy.UNSUPPORTED - }, - "volume_type": { + }), + ("volume_type", { "default": "gp2", "allowed_values": ALLOWED_VALUES["volume_types"], "cfn_param_mapping": "VolumeType", @@ -329,8 +337,8 @@ UpdatePolicy.UNSUPPORTED, action_needed=UpdatePolicy.ACTIONS_NEEDED["ebs_volume_update"] ) - }, - "volume_size": { + }), + ("volume_size", { "type": VolumeSizeParam, "cfn_param_mapping": "VolumeSize", "update_policy": UpdatePolicy( @@ -338,31 +346,36 @@ fail_reason=UpdatePolicy.FAIL_REASONS["ebs_volume_resize"], action_needed=UpdatePolicy.ACTIONS_NEEDED["ebs_volume_update"] ) - }, - "volume_iops": { - "type": IntCfnParam, - "default": 100, + }), + ("volume_iops", { + "type": VolumeIopsParam, "cfn_param_mapping": "VolumeIOPS", "update_policy": UpdatePolicy.SUPPORTED - }, - "encrypted": { + }), + ("encrypted", { "type": BoolCfnParam, "cfn_param_mapping": "EBSEncryption", "default": False, "update_policy": UpdatePolicy.UNSUPPORTED - }, - "ebs_kms_key_id": { + }), + ("ebs_kms_key_id", { "cfn_param_mapping": "EBSKMSKeyId", "validators": [kms_key_validator], "update_policy": UpdatePolicy.UNSUPPORTED - }, - "ebs_volume_id": { + }), + ("ebs_volume_id", { "cfn_param_mapping": "EBSVolumeId", "allowed_values": ALLOWED_VALUES["volume_id"], "validators": [ec2_volume_validator], "update_policy": UpdatePolicy.UNSUPPORTED - }, - }, + }), + ("volume_throughput", { + "type": IntCfnParam, + "cfn_param_mapping": "VolumeThroughput", + "update_policy": UpdatePolicy.SUPPORTED, + "default": 125 + }) + ]), } EFS = { @@ -415,6 +428,7 @@ "type": CfnSection, "key": "raid", "default_label": "default", + "validators": [ebs_volume_type_size_validator, ebs_volume_iops_validator, ebs_volume_throughput_validator], "cfn_param_mapping": "RAIDOptions", # All the parameters in the section are converted into a single CFN parameter "params": OrderedDict( # Use OrderedDict because the parameters must respect the order in the CFN parameter [ @@ -446,9 +460,7 @@ "update_policy": UpdatePolicy.UNSUPPORTED }), ("volume_iops", { - "type": IntCfnParam, - "default": 100, - "validators": [raid_volume_iops_validator], + "type": VolumeIopsParam, "update_policy": UpdatePolicy.SUPPORTED }), ("encrypted", { @@ -460,6 +472,12 @@ "validators": [kms_key_validator], "update_policy": UpdatePolicy.UNSUPPORTED }), + ("volume_throughput", { + "type": IntCfnParam, + "default": 125, + "cfn_param_mapping": "VolumeThroughput", + "update_policy": UpdatePolicy.SUPPORTED + }), ] ) } @@ -715,6 +733,7 @@ }), ("enable_efa", { "type": BooleanJsonParam, + "validators": [efa_os_arch_validator], "update_policy": UpdatePolicy.COMPUTE_FLEET_STOP, }), ("enable_efa_gdr", { @@ -754,14 +773,6 @@ "validators": [ec2_key_pair_validator], "update_policy": UpdatePolicy.UNSUPPORTED }), - ("base_os", { - "type": BaseOSCfnParam, - "cfn_param_mapping": "BaseOS", - "allowed_values": ["alinux", "alinux2", "ubuntu1604", "ubuntu1804", "centos7", "centos8"], - "validators": [base_os_validator, architecture_os_validator], - "required": True, - "update_policy": UpdatePolicy.UNSUPPORTED - }), ("scheduler", { "cfn_param_mapping": "Scheduler", "allowed_values": ["awsbatch", "sge", "slurm", "torque"], @@ -769,9 +780,9 @@ "required": True, "update_policy": UpdatePolicy.UNSUPPORTED }), - # Master + # Head node ("master_instance_type", { - "default": "t2.micro", + "type": HeadNodeInstanceTypeCfnParam, "cfn_param_mapping": "MasterInstanceType", "validators": [head_node_instance_type_validator, ec2_instance_type_validator], "update_policy": UpdatePolicy.UNSUPPORTED, @@ -787,6 +798,14 @@ action_needed=UpdatePolicy.ACTIONS_NEEDED["ebs_volume_update"] ) }), + ("base_os", { + "type": BaseOSCfnParam, + "cfn_param_mapping": "BaseOS", + "allowed_values": ["alinux", "alinux2", "ubuntu1604", "ubuntu1804", "centos7", "centos8"], + "validators": [base_os_validator, architecture_os_validator], + "required": True, + "update_policy": UpdatePolicy.UNSUPPORTED + }), # Compute fleet ("compute_root_volume_size", { "type": IntCfnParam, @@ -830,7 +849,7 @@ ("enable_efa", { "allowed_values": ["compute"], "cfn_param_mapping": "EFA", - "validators": [efa_validator], + "validators": [efa_validator, efa_os_arch_validator], "update_policy": UpdatePolicy.UNSUPPORTED }), ("enable_efa_gdr", { @@ -921,6 +940,7 @@ }), ("vpc_settings", { "type": SettingsCfnParam, + "required": True, "referred_section": VPC, "update_policy": UpdatePolicy.UNSUPPORTED, }), @@ -1001,6 +1021,10 @@ "validators": [s3_bucket_validator], "update_policy": UpdatePolicy.READ_ONLY_RESOURCE_BUCKET, }), + ("iam_lambda_role", { + "cfn_param_mapping": "IAMLambdaRoleName", + "update_policy": UpdatePolicy.SUPPORTED, + }), ] @@ -1025,9 +1049,7 @@ }), # Compute fleet ("compute_instance_type", { - "default": - lambda section: - "optimal" if section and section.get_param_value("scheduler") == "awsbatch" else "t2.micro", + "type": ComputeInstanceTypeCfnParam, "cfn_param_mapping": "ComputeInstanceType", "validators": [compute_instance_type_validator, instances_architecture_compatibility_validator], "update_policy": UpdatePolicy.COMPUTE_FLEET_STOP diff --git a/cli/pcluster/config/param_types.py b/cli/src/pcluster/config/param_types.py similarity index 99% rename from cli/pcluster/config/param_types.py rename to cli/src/pcluster/config/param_types.py index ce1e425f34..f6db94ad2b 100644 --- a/cli/pcluster/config/param_types.py +++ b/cli/src/pcluster/config/param_types.py @@ -13,6 +13,7 @@ import re import sys from abc import abstractmethod +from collections import OrderedDict from enum import Enum from configparser import NoSectionError @@ -441,7 +442,7 @@ def __init__(self, section_definition, pcluster_config, section_label=None, pare self.parent_section = parent_section # initialize section parameters with default values - self.params = {} + self.params = OrderedDict({}) self._from_definition() @property diff --git a/cli/pcluster/config/pcluster_config.py b/cli/src/pcluster/config/pcluster_config.py similarity index 99% rename from cli/pcluster/config/pcluster_config.py rename to cli/src/pcluster/config/pcluster_config.py index c006f6482a..bc122dbf0a 100644 --- a/cli/pcluster/config/pcluster_config.py +++ b/cli/src/pcluster/config/pcluster_config.py @@ -494,8 +494,8 @@ def validate(self): # test provided configuration self.__test_configuration() - def get_master_availability_zone(self): - """Get the Availability zone of the Master Subnet.""" + def get_head_node_availability_zone(self): + """Get the Availability zone of the Head Node Subnet.""" return self.get_section("vpc").get_param_value("master_availability_zone") def get_compute_availability_zone(self): diff --git a/cli/pcluster/config/resource_map.py b/cli/src/pcluster/config/resource_map.py similarity index 99% rename from cli/pcluster/config/resource_map.py rename to cli/src/pcluster/config/resource_map.py index 4776ed47c5..4220869574 100644 --- a/cli/pcluster/config/resource_map.py +++ b/cli/src/pcluster/config/resource_map.py @@ -22,7 +22,7 @@ class ResourceArray(object): """ Represents a set of available resources for a single resource type. - For instance, this class can represent the available EBS volume resources that can be attached to a master node. + For instance, this class can represent the available EBS volume resources that can be attached to a head node. """ def __init__(self, resources): diff --git a/cli/pcluster/config/update_policy.py b/cli/src/pcluster/config/update_policy.py similarity index 97% rename from cli/pcluster/config/update_policy.py rename to cli/src/pcluster/config/update_policy.py index 379a4928f0..3c0195f39a 100644 --- a/cli/pcluster/config/update_policy.py +++ b/cli/src/pcluster/config/update_policy.py @@ -210,12 +210,12 @@ def _check_generated_bucket(change, patch): condition_checker=lambda change, patch: not utils.cluster_has_running_capacity(patch.stack_name), ) -# Update supported only with master node down -UpdatePolicy.MASTER_STOP = UpdatePolicy( +# Update supported only with head node down +UpdatePolicy.HEAD_NODE_STOP = UpdatePolicy( level=20, - fail_reason="To perform this update action, the master node must be in a stopped state", + fail_reason="To perform this update action, the head node must be in a stopped state", action_needed=UpdatePolicy.ACTIONS_NEEDED["pcluster_stop"], - condition_checker=lambda change, patch: utils.get_master_server_state(patch.stack_name) == "stopped", + condition_checker=lambda change, patch: utils.get_head_node_state(patch.stack_name) == "stopped", ) # Expected Behavior: diff --git a/cli/pcluster/config/validators.py b/cli/src/pcluster/config/validators.py similarity index 94% rename from cli/pcluster/config/validators.py rename to cli/src/pcluster/config/validators.py index d908f891a6..be7a235171 100644 --- a/cli/pcluster/config/validators.py +++ b/cli/src/pcluster/config/validators.py @@ -20,13 +20,12 @@ from pcluster.constants import CIDR_ALL_IPS, FSX_HDD_THROUGHPUT, FSX_SSD_THROUGHPUT from pcluster.dcv.utils import get_supported_dcv_os from pcluster.utils import ( + InstanceTypeInfo, ellipsize, get_base_additional_iam_policies, get_ebs_snapshot_info, get_efs_mount_target_id, get_file_section_name, - get_instance_network_interfaces, - get_instance_vcpus, get_partition, get_region, get_supported_architectures_for_instance_type, @@ -69,16 +68,28 @@ FSX_PARAM_WITH_DEFAULT = {"drive_cache_type": "NONE"} +EFA_UNSUPPORTED_ARCHITECTURES_OSES = { + "x86_64": [], + "arm64": ["centos8"], +} + EBS_VOLUME_TYPE_TO_VOLUME_SIZE_BOUNDS = { "standard": (1, 1024), "io1": (4, 16 * 1024), - "io2": (4, 16 * 1024), + "io2": (4, 64 * 1024), "gp2": (1, 16 * 1024), + "gp3": (1, 16 * 1024), "st1": (500, 16 * 1024), "sc1": (500, 16 * 1024), } -HEAD_NODE_UNSUPPORTED_INSTANCE_TYPES = ["p4d.24xlarge"] +EBS_VOLUME_IOPS_BOUNDS = { + "io1": (100, 64000), + "io2": (100, 256000), + "gp3": (3000, 16000), +} + +HEAD_NODE_UNSUPPORTED_INSTANCE_TYPES = [] HEAD_NODE_UNSUPPORTED_MESSAGE = "The instance type '{0}' is not supported as head node." # Constants for section labels @@ -119,15 +130,15 @@ def efs_id_validator(param_key, param_value, pcluster_config): errors = [] warnings = [] try: - # Get master availability zone - master_avail_zone = pcluster_config.get_master_availability_zone() - mount_target_id = get_efs_mount_target_id(efs_fs_id=param_value, avail_zone=master_avail_zone) + # Get head node availability zone + head_node_avail_zone = pcluster_config.get_head_node_availability_zone() + head_node_target_id = get_efs_mount_target_id(efs_fs_id=param_value, avail_zone=head_node_avail_zone) # If there is an existing mt in the az, need to check the inbound and outbound rules of the security groups - if mount_target_id: + if head_node_target_id: # Get list of security group IDs of the mount target sg_ids = ( boto3.client("efs") - .describe_mount_target_security_groups(MountTargetId=mount_target_id) + .describe_mount_target_security_groups(MountTargetId=head_node_target_id) .get("SecurityGroups") ) if not _check_in_out_access(sg_ids, port=2049): @@ -135,7 +146,7 @@ def efs_id_validator(param_key, param_value, pcluster_config): "There is an existing Mount Target {0} in the Availability Zone {1} for EFS {2}, " "but it does not have a security group that allows inbound and outbound rules to support NFS. " "Please modify the Mount Target's security group, to allow traffic on port 2049.".format( - mount_target_id, master_avail_zone, param_value + head_node_target_id, head_node_avail_zone, param_value ) ) except ClientError as e: @@ -384,12 +395,12 @@ def dcv_enabled_validator(param_key, param_value, pcluster_config): "Please double check the 'base_os' configuration parameter".format(allowed_oses) ) - master_instance_type = cluster_section.get_param_value("master_instance_type") - if re.search(r"(micro)|(nano)", master_instance_type): + head_node_instance_type = cluster_section.get_param_value("master_instance_type") + if re.search(r"(micro)|(nano)", head_node_instance_type): warnings.append( "The packages required for desktop virtualization in the selected instance type '{0}' " - "may cause instability of the master instance. If you want to use NICE DCV it is recommended " - "to use an instance type with at least 1.7 GB of memory.".format(master_instance_type) + "may cause instability of the head node instance. If you want to use NICE DCV it is recommended " + "to use an instance type with at least 1.7 GB of memory.".format(head_node_instance_type) ) if pcluster_config.get_section("dcv").get_param_value("access_from") == CIDR_ALL_IPS: @@ -626,7 +637,7 @@ def ec2_ami_validator(param_key, param_value, pcluster_config): if cluster_section.get_param_value("architecture") != ami_architecture: errors.append( "AMI {0}'s architecture ({1}) is incompatible with the architecture supported by the instance type " - "chosen for the master server ({2}). Use either a different AMI or a different instance type.".format( + "chosen for the head node ({2}). Use either a different AMI or a different instance type.".format( param_value, ami_architecture, cluster_section.get_param_value("architecture") ) ) @@ -895,18 +906,6 @@ def efs_validator(section_key, section_label, pcluster_config): return errors, warnings -def raid_volume_iops_validator(param_key, param_value, pcluster_config): - errors = [] - warnings = [] - - raid_iops = float(param_value) - raid_vol_size = float(pcluster_config.get_section("raid").get_param_value("volume_size")) - if raid_iops > raid_vol_size * 50: - errors.append("IOPS to volume size ratio of {0} is too high; maximum is 50.".format(raid_iops / raid_vol_size)) - - return errors, warnings - - def scheduler_validator(param_key, param_value, pcluster_config): errors = [] warnings = [] @@ -965,11 +964,11 @@ def cluster_validator(section_key, section_label, pcluster_config): def instances_architecture_compatibility_validator(param_key, param_value, pcluster_config): - """Verify that master and compute instance types imply compatible architectures.""" + """Verify that head node and compute instance types imply compatible architectures.""" errors = [] warnings = [] - master_architecture = pcluster_config.get_section("cluster").get_param_value("architecture") + head_node_architecture = pcluster_config.get_section("cluster").get_param_value("architecture") # When awsbatch is used as the scheduler, compute_instance_type can contain a CSV list. compute_instance_types = param_value.split(",") for compute_instance_type in compute_instance_types: @@ -983,11 +982,11 @@ def instances_architecture_compatibility_validator(param_key, param_value, pclus ) continue compute_architectures = get_supported_architectures_for_instance_type(compute_instance_type) - if master_architecture not in compute_architectures: + if head_node_architecture not in compute_architectures: errors.append( "The specified compute_instance_type ({0}) supports the architectures {1}, none of which are " "compatible with the architecture supported by the master_instance_type ({2}).".format( - compute_instance_type, compute_architectures, master_architecture + compute_instance_type, compute_architectures, head_node_architecture ) ) @@ -1019,7 +1018,7 @@ def compute_instance_type_validator(param_key, param_value, pcluster_config): if "," not in param_value and "." in param_value: # if the type is not a list, and contains dot (nor optimal, nor a family) # validate instance type against max_vcpus limit - vcpus = get_instance_vcpus(param_value) + vcpus = InstanceTypeInfo.init_from_instance_type(param_value).vcpus_count() if vcpus <= 0: warnings.append( "Unable to get the number of vcpus for the compute_instance_type '{0}'. " @@ -1034,17 +1033,6 @@ def compute_instance_type_validator(param_key, param_value, pcluster_config): else: errors, warnings = ec2_instance_type_validator(param_key, param_value, pcluster_config) - if scheduler != "slurm": - # Multiple NICs instance types are currently supported only with Slurm clusters - instance_nics = get_instance_network_interfaces(param_value) - if instance_nics > 1: - warnings.append( - "Some services needed to support clusters with instance type '{0}' with multiple " - "network interfaces and job scheduler '{1}' may not yet be generally available. " - "Please refer to https://docs.aws.amazon.com/autoscaling/ec2/userguide/create-launch-template.html " - "for more information.".format(param_value, scheduler) - ) - return errors, warnings @@ -1088,7 +1076,7 @@ def intel_hpc_architecture_validator(param_key, param_value, pcluster_config): architecture = pcluster_config.get_section("cluster").get_param_value("architecture") if param_value and architecture not in allowed_architectures: errors.append( - "When using enable_intel_hpc_platform = {0} it is required to use master and compute instance " + "When using enable_intel_hpc_platform = {0} it is required to use head node and compute instance " "types and an AMI that support these architectures: {1}".format(param_value, allowed_architectures) ) @@ -1344,7 +1332,7 @@ def ebs_volume_type_size_validator(section_key, section_label, pcluster_config): The default value of volume_size for EBS volumes is 20 GiB. The volume size of standard ranges from 1 GiB - 1 TiB(1024 GiB) - The volume size of gp2 ranges from 1 GiB - 16 TiB(16384 GiB) + The volume size of gp2 and gp3 ranges from 1 GiB - 16 TiB(16384 GiB) The volume size of io1 and io2 ranges from 4 GiB - 16 TiB(16384 GiB) The volume sizes of st1 and sc1 range from 500 GiB - 16 TiB(16384 GiB) """ @@ -1372,11 +1360,11 @@ def ebs_volume_iops_validator(section_key, section_label, pcluster_config): section = pcluster_config.get_section(section_key, section_label) volume_size = section.get_param_value("volume_size") volume_type = section.get_param_value("volume_type") - volume_type_to_iops_ratio = {"io1": 50, "io2": 500} + volume_type_to_iops_ratio = {"io1": 50, "io2": 1000, "gp3": 500} volume_iops = section.get_param_value("volume_iops") - min_iops = 100 - max_iops = 64000 - if volume_type in volume_type_to_iops_ratio: + + if volume_type in EBS_VOLUME_IOPS_BOUNDS: + min_iops, max_iops = EBS_VOLUME_IOPS_BOUNDS.get(volume_type) if volume_iops and (volume_iops < min_iops or volume_iops > max_iops): errors.append( "IOPS rate must be between {min_iops} and {max_iops} when provisioning {volume_type} volumes.".format( @@ -1563,3 +1551,46 @@ def duplicate_shared_dir_validator(section_key, section_label, pcluster_config): errors.append("'shared_dir' can not be specified in cluster section when using multiple EBS volumes") return errors, warnings + + +def efa_os_arch_validator(param_key, param_value, pcluster_config): + errors = [] + warnings = [] + + cluster_section = pcluster_config.get_section("cluster") + architecture = cluster_section.get_param_value("architecture") + base_os = cluster_section.get_param_value("base_os") + + if base_os in EFA_UNSUPPORTED_ARCHITECTURES_OSES.get(architecture): + errors.append("EFA currently not supported on {0} for {1} architecture".format(base_os, architecture)) + + return errors, warnings + + +def ebs_volume_throughput_validator(section_key, section_label, pcluster_config): + errors = [] + warnings = [] + + section = pcluster_config.get_section(section_key, section_label) + volume_type = section.get_param_value("volume_type") + volume_iops = section.get_param_value("volume_iops") + volume_throughput = section.get_param_value("volume_throughput") + volume_throughput_to_iops_ratio = 0.25 + + if volume_type == "gp3": + min_throughput, max_throughput = 125, 1000 + if volume_throughput < min_throughput or volume_throughput > max_throughput: + errors.append( + "Throughput must be between {min_throughput} MB/s and {max_throughput} MB/s when provisioning " + "{volume_type} volumes.".format( + min_throughput=min_throughput, max_throughput=max_throughput, volume_type=volume_type + ) + ) + if volume_throughput and volume_throughput > volume_iops * volume_throughput_to_iops_ratio: + errors.append( + "Throughput to IOPS ratio of {0} is too high; maximum is 0.25.".format( + float(volume_throughput) / float(volume_iops) + ) + ) + + return errors, warnings diff --git a/cli/pcluster/configure/__init__.py b/cli/src/pcluster/configure/__init__.py similarity index 100% rename from cli/pcluster/configure/__init__.py rename to cli/src/pcluster/configure/__init__.py diff --git a/cli/pcluster/configure/easyconfig.py b/cli/src/pcluster/configure/easyconfig.py similarity index 88% rename from cli/pcluster/configure/easyconfig.py rename to cli/src/pcluster/configure/easyconfig.py index e1318b0d00..18720ecd6f 100644 --- a/cli/pcluster/configure/easyconfig.py +++ b/cli/src/pcluster/configure/easyconfig.py @@ -11,11 +11,6 @@ # fmt: off from __future__ import absolute_import, print_function # isort:skip from future import standard_library # isort:skip - -from pcluster.cluster_model import ClusterModel -from pcluster.config.hit_converter import HitConverter -from pcluster.config.validators import HEAD_NODE_UNSUPPORTED_INSTANCE_TYPES, HEAD_NODE_UNSUPPORTED_MESSAGE - standard_library.install_aliases() # fmt: on @@ -26,7 +21,10 @@ import boto3 +from pcluster.cluster_model import ClusterModel +from pcluster.config.hit_converter import HitConverter from pcluster.config.pcluster_config import PclusterConfig +from pcluster.config.validators import HEAD_NODE_UNSUPPORTED_INSTANCE_TYPES, HEAD_NODE_UNSUPPORTED_MESSAGE from pcluster.configure.networking import ( NetworkConfiguration, PublicPrivateNetworkConfig, @@ -36,6 +34,7 @@ from pcluster.configure.utils import get_regions, get_resource_tag, handle_client_exception, prompt, prompt_iterable from pcluster.utils import ( error, + get_default_instance_type, get_region, get_supported_az_for_multi_instance_types, get_supported_az_for_one_instance_type, @@ -124,7 +123,7 @@ def configure(args): if pcluster_config.cluster_model == ClusterModel.HIT: error( "Configuration in file {0} cannot be overwritten. Please specify a different file path".format( - args.config_file + pcluster_config.config_file ) ) @@ -232,8 +231,8 @@ def _create_vpc_parameters(vpc_section, cluster_config): vpc_id = prompt_iterable("VPC ID", vpc_list, default_value=default_vpc) vpc_parameters["vpc_id"] = vpc_id subnet_list = vpc_and_subnets["vpc_subnets"][vpc_id] - qualified_master_subnets = _filter_subnets_offering_instance_type( - subnet_list, cluster_config.master_instance_type + qualified_head_node_subnets = _filter_subnets_offering_instance_type( + subnet_list, cluster_config.head_node_instance_type ) if cluster_config.scheduler != "awsbatch": qualified_compute_subnets = _filter_subnets_offering_instance_type( @@ -243,20 +242,20 @@ def _create_vpc_parameters(vpc_section, cluster_config): # Special case of awsbatch, where compute instance type is not specified qualified_compute_subnets = subnet_list if ( - not qualified_master_subnets + not qualified_head_node_subnets or not qualified_compute_subnets or (prompt("Automate Subnet creation? (y/n)", lambda x: x in ("y", "n"), default_value="y") == "y") ): # Start auto subnets creation in the absence of qualified subnets. # Otherwise, user selects between manual and automate subnets creation - if not qualified_master_subnets or not qualified_compute_subnets: + if not qualified_head_node_subnets or not qualified_compute_subnets: print("There are no qualified subnets. Starting automatic creation of subnets...") vpc_parameters.update( automate_subnet_creation(vpc_id, _choose_network_configuration(cluster_config), min_subnet_size) ) else: vpc_parameters.update( - _ask_for_subnets(subnet_list, vpc_section, qualified_master_subnets, qualified_compute_subnets) + _ask_for_subnets(subnet_list, vpc_section, qualified_head_node_subnets, qualified_compute_subnets) ) return vpc_parameters @@ -266,20 +265,20 @@ def _filter_subnets_offering_instance_type(subnet_list, instance_type): return [subnet_entry for subnet_entry in subnet_list if subnet_entry["availability_zone"] in qualified_azs] -def _ask_for_subnets(subnet_list, vpc_section, qualified_master_subnets, qualified_compute_subnets): - master_subnet_id = _prompt_for_subnet( - vpc_section.get_param_value("master_subnet_id"), subnet_list, qualified_master_subnets, "Master Subnet ID" +def _ask_for_subnets(subnet_list, vpc_section, qualified_head_node_subnets, qualified_compute_subnets): + head_node_subnet_id = _prompt_for_subnet( + vpc_section.get_param_value("master_subnet_id"), subnet_list, qualified_head_node_subnets, "head node Subnet ID" ) compute_subnet_id = _prompt_for_subnet( - vpc_section.get_param_value("compute_subnet_id") or master_subnet_id, + vpc_section.get_param_value("compute_subnet_id") or head_node_subnet_id, subnet_list, qualified_compute_subnets, - "Compute Subnet ID", + "compute Subnet ID", ) - vpc_parameters = {"master_subnet_id": master_subnet_id} + vpc_parameters = {"master_subnet_id": head_node_subnet_id} - if master_subnet_id != compute_subnet_id: + if head_node_subnet_id != compute_subnet_id: vpc_parameters["compute_subnet_id"] = compute_subnet_id return vpc_parameters @@ -288,18 +287,18 @@ def _ask_for_subnets(subnet_list, vpc_section, qualified_master_subnets, qualifi def _choose_network_configuration(cluster_config): if cluster_config.scheduler == "awsbatch": return PublicPrivateNetworkConfig() - azs_for_master_type = get_supported_az_for_one_instance_type(cluster_config.master_instance_type) + azs_for_head_node_type = get_supported_az_for_one_instance_type(cluster_config.head_node_instance_type) azs_for_compute_type = get_supported_az_for_one_instance_type(cluster_config.compute_instance_type) - common_availability_zones = set(azs_for_master_type) & set(azs_for_compute_type) + common_availability_zones = set(azs_for_head_node_type) & set(azs_for_compute_type) if not common_availability_zones: # Automate subnet creation only allows subnets to reside in a single az. # But user can bypass it by using manual subnets creation during configure or modify the config file directly. print( - "Error: There is no single availability zone offering master and compute in current region.\n" - "To create your cluster, make sure you have a subnet for master node in {0}" + "Error: There is no single availability zone offering head node and compute in current region.\n" + "To create your cluster, make sure you have a subnet for head node in {0}" ", and a subnet for compute nodes in {1}. Then run pcluster configure again" - "and avoid using Automate VPC/Subnet creation.".format(azs_for_master_type, azs_for_compute_type) + "and avoid using Automate VPC/Subnet creation.".format(azs_for_head_node_type, azs_for_compute_type) ) print("Exiting...") sys.exit(1) @@ -368,17 +367,23 @@ def prompt_os(self): ) def prompt_instance_types(self): - """Ask for master_instance_type and compute_instance_type (if necessary).""" - self.master_instance_type = prompt( - "Master instance type", + """Ask for head_node_instance_type and compute_instance_type (if necessary).""" + default_head_node_instance_type = self.cluster_section.get_param_value("master_instance_type") + if not default_head_node_instance_type: + default_head_node_instance_type = get_default_instance_type() + self.head_node_instance_type = prompt( + "Head node instance type", lambda x: _is_instance_type_supported_for_head_node(x) and x in get_supported_instance_types(), - default_value=self.cluster_section.get_param_value("master_instance_type"), + default_value=default_head_node_instance_type, ) if not self.is_aws_batch: + default_compute_instance_type = self.cluster_section.get_param_value("compute_instance_type") + if not default_compute_instance_type: + default_compute_instance_type = get_default_instance_type() self.compute_instance_type = prompt( "Compute instance type", lambda x: x in get_supported_compute_instance_types(self.scheduler), - default_value=self.cluster_section.get_param_value("compute_instance_type"), + default_value=default_compute_instance_type, ) # Cache availability zones offering the selected instance type(s) for later use self.cache_qualified_az() @@ -401,7 +406,7 @@ def get_scheduler_parameters(self): """Return a dict containing the scheduler dependent parameters.""" scheduler_parameters = { "base_os": self.base_os, - "master_instance_type": self.master_instance_type, + "master_instance_type": self.head_node_instance_type, "compute_instance_type": self.compute_instance_type, self.max_size_name: self.max_cluster_size, self.min_size_name: self.min_cluster_size, @@ -421,9 +426,9 @@ def get_parameters_to_reset(self): def cache_qualified_az(self): """ - Call API once for both master and compute instance type. + Call API once for both head node and compute instance type. Cache is done inside get get_supported_az_for_instance_types. """ if not self.is_aws_batch: - get_supported_az_for_multi_instance_types([self.master_instance_type, self.compute_instance_type]) + get_supported_az_for_multi_instance_types([self.head_node_instance_type, self.compute_instance_type]) diff --git a/cli/pcluster/configure/networking.py b/cli/src/pcluster/configure/networking.py similarity index 95% rename from cli/pcluster/configure/networking.py rename to cli/src/pcluster/configure/networking.py index 18aa484327..de21f770fe 100644 --- a/cli/pcluster/configure/networking.py +++ b/cli/src/pcluster/configure/networking.py @@ -33,7 +33,7 @@ DEFAULT_AWS_REGION_NAME = "us-east-1" LOGGER = logging.getLogger(__name__) TIMESTAMP = "-{:%Y%m%d%H%M%S}".format(datetime.datetime.utcnow()) -MASTER_SUBNET_IPS = 250 +HEAD_NODE_SUBNET_IPS = 250 if sys.version_info >= (3, 4): ABC = abc.ABC @@ -88,11 +88,11 @@ def _get_availability_zone(self): class PublicNetworkConfig(BaseNetworkConfig): - """The public configuration that creates one public subnet with master and compute fleet.""" + """The public configuration that creates one public subnet with head node and compute fleet.""" def __init__(self, availability_zones=None): super(PublicNetworkConfig, self).__init__( - config_type="Master and compute fleet in the same public subnet", + config_type="Head node and compute fleet in the same public subnet", template_name="public", stack_name_prefix="pub", availability_zones=availability_zones, @@ -106,7 +106,7 @@ def get_cfn_parameters(self, vpc_id, internet_gateway_id, public_cidr): def _create(self, vpc_id, vpc_cidr, subnet_cidrs, internet_gateway_id, compute_subnet_size): public_cidr = get_subnet_cidr( - vpc_cidr=vpc_cidr, occupied_cidr=subnet_cidrs, min_subnet_size=compute_subnet_size + MASTER_SUBNET_IPS + vpc_cidr=vpc_cidr, occupied_cidr=subnet_cidrs, min_subnet_size=compute_subnet_size + HEAD_NODE_SUBNET_IPS ) _validate_cidr(public_cidr) parameters = self.get_cfn_parameters(vpc_id, internet_gateway_id, public_cidr) @@ -115,11 +115,11 @@ def _create(self, vpc_id, vpc_cidr, subnet_cidrs, internet_gateway_id, compute_s class PublicPrivateNetworkConfig(BaseNetworkConfig): - """The publicprivate configuration that creates one public subnet for master and one private subnet for compute.""" + """The public private config that creates one public subnet for head node and one private subnet for compute.""" def __init__(self, availability_zones=None): super(PublicPrivateNetworkConfig, self).__init__( - config_type="Master in a public subnet and compute fleet in a private subnet", + config_type="Head node in a public subnet and compute fleet in a private subnet", template_name="public-private", stack_name_prefix="pubpriv", availability_zones=availability_zones, @@ -133,7 +133,7 @@ def get_cfn_parameters(self, vpc_id, internet_gateway_id, public_cidr, private_c return parameters def _create(self, vpc_id, vpc_cidr, subnet_cidrs, internet_gateway_id, compute_subnet_size): # noqa D102 - public_cidr = evaluate_cidr(vpc_cidr=vpc_cidr, occupied_cidrs=subnet_cidrs, target_size=MASTER_SUBNET_IPS) + public_cidr = evaluate_cidr(vpc_cidr=vpc_cidr, occupied_cidrs=subnet_cidrs, target_size=HEAD_NODE_SUBNET_IPS) _validate_cidr(public_cidr) subnet_cidrs.append(public_cidr) private_cidr = get_subnet_cidr( diff --git a/cli/pcluster/configure/subnet_computation.py b/cli/src/pcluster/configure/subnet_computation.py similarity index 100% rename from cli/pcluster/configure/subnet_computation.py rename to cli/src/pcluster/configure/subnet_computation.py diff --git a/cli/pcluster/configure/utils.py b/cli/src/pcluster/configure/utils.py similarity index 100% rename from cli/pcluster/configure/utils.py rename to cli/src/pcluster/configure/utils.py diff --git a/cli/pcluster/constants.py b/cli/src/pcluster/constants.py similarity index 100% rename from cli/pcluster/constants.py rename to cli/src/pcluster/constants.py diff --git a/cli/pcluster/createami.py b/cli/src/pcluster/createami.py similarity index 96% rename from cli/pcluster/createami.py rename to cli/src/pcluster/createami.py index 409bd50a02..b0e3e8cf18 100644 --- a/cli/pcluster/createami.py +++ b/cli/src/pcluster/createami.py @@ -244,9 +244,9 @@ def _get_default_createami_instance_type(ami_architecture): LOGGER.error("Base AMI used in createami has an unsupported architecture: {0}".format(ami_architecture)) sys.exit(1) - # Ensure instance type is avaiable in the selected region + # Ensure instance type is available in the selected region try: - utils.get_instance_types_info([instance_type], fail_on_error=True) + utils.InstanceTypeInfo.init_from_instance_type(instance_type) except SystemExit as system_exit: if "instance types do not exist" in str(system_exit): LOGGER.error( @@ -358,12 +358,3 @@ def create_ami(args): _print_create_ami_results(results) if "tmp_dir" in locals() and tmp_dir: rmtree(tmp_dir) - - -def _get_default_template_url(region): - return ( - "https://{REGION}-aws-parallelcluster.s3.{REGION}.amazonaws.com{SUFFIX}/templates/" - "aws-parallelcluster-{VERSION}.cfn.json".format( - REGION=region, SUFFIX=".cn" if region.startswith("cn") else "", VERSION=utils.get_installed_version() - ) - ) diff --git a/cli/pcluster/dcv/__init__.py b/cli/src/pcluster/dcv/__init__.py similarity index 100% rename from cli/pcluster/dcv/__init__.py rename to cli/src/pcluster/dcv/__init__.py diff --git a/cli/pcluster/dcv/connect.py b/cli/src/pcluster/dcv/connect.py similarity index 80% rename from cli/pcluster/dcv/connect.py rename to cli/src/pcluster/dcv/connect.py index ec999b8509..f1b1fc01e2 100644 --- a/cli/pcluster/dcv/connect.py +++ b/cli/src/pcluster/dcv/connect.py @@ -16,7 +16,7 @@ from pcluster.config.pcluster_config import PclusterConfig from pcluster.constants import PCLUSTER_ISSUES_LINK from pcluster.dcv.utils import DCV_CONNECT_SCRIPT -from pcluster.utils import error, get_cfn_param, get_master_ip_and_username, get_stack, get_stack_name, retry +from pcluster.utils import error, get_cfn_param, get_head_node_ip_and_username, get_stack, get_stack_name, retry LOGGER = logging.getLogger(__name__) @@ -40,26 +40,26 @@ def dcv_connect(args): # Parse configuration file to read the AWS section PclusterConfig.init_aws() # FIXME it always searches for the default configuration file - # Prepare ssh command to execute in the master instance + # Prepare ssh command to execute in the head node instance stack = get_stack(get_stack_name(args.cluster_name)) shared_dir = get_cfn_param(stack.get("Parameters"), "SharedDir") - master_ip, username = get_master_ip_and_username(args.cluster_name) - cmd = 'ssh {CFN_USER}@{MASTER_IP} {KEY} "{REMOTE_COMMAND} {DCV_SHARED_DIR}"'.format( + head_node_ip, username = get_head_node_ip_and_username(args.cluster_name) + cmd = 'ssh {CFN_USER}@{HEAD_NODE_IP} {KEY} "{REMOTE_COMMAND} {DCV_SHARED_DIR}"'.format( CFN_USER=username, - MASTER_IP=master_ip, + HEAD_NODE_IP=head_node_ip, KEY="-i {0}".format(args.key_path) if args.key_path else "", REMOTE_COMMAND=DCV_CONNECT_SCRIPT, DCV_SHARED_DIR=shared_dir, ) try: - url = retry(_retrieve_dcv_session_url, func_args=[cmd, args.cluster_name, master_ip], attempts=4) + url = retry(_retrieve_dcv_session_url, func_args=[cmd, args.cluster_name, head_node_ip], attempts=4) url_message = "Please use the following one-time URL in your browser within 30 seconds:\n{0}".format(url) except DCVConnectionError as e: error( "Something went wrong during DCV connection.\n{0}" "Please check the logs in the /var/log/parallelcluster/ folder " - "of the master instance and submit an issue {1}\n".format(e, PCLUSTER_ISSUES_LINK) + "of the head node and submit an issue {1}\n".format(e, PCLUSTER_ISSUES_LINK) ) if args.show_url: @@ -73,8 +73,8 @@ def dcv_connect(args): LOGGER.info("{0}\n{1}".format(e, url_message)) -def _retrieve_dcv_session_url(ssh_cmd, cluster_name, master_ip): - """Connect by ssh to the master instance, prepare DCV session and return the DCV session URL.""" +def _retrieve_dcv_session_url(ssh_cmd, cluster_name, head_node_ip): + """Connect by ssh to the head node instance, prepare DCV session and return the DCV session URL.""" try: LOGGER.debug("SSH command: {0}".format(ssh_cmd)) output = _check_command_output(ssh_cmd) @@ -93,7 +93,7 @@ def _retrieve_dcv_session_url(ssh_cmd, cluster_name, master_ip): error( "Something went wrong during DCV connection. Please manually execute the command:\n{0}\n" "If the problem persists, please check the logs in the /var/log/parallelcluster/ folder " - "of the master instance and submit an issue {1}".format(ssh_cmd, PCLUSTER_ISSUES_LINK) + "of the head node and submit an issue {1}".format(ssh_cmd, PCLUSTER_ISSUES_LINK) ) except sub.CalledProcessError as e: @@ -106,5 +106,5 @@ def _retrieve_dcv_session_url(ssh_cmd, cluster_name, master_ip): raise DCVConnectionError(e.output) return "https://{IP}:{PORT}?authToken={TOKEN}#{SESSION_ID}".format( - IP=master_ip, PORT=dcv_server_port, TOKEN=dcv_session_token, SESSION_ID=dcv_session_id + IP=head_node_ip, PORT=dcv_server_port, TOKEN=dcv_session_token, SESSION_ID=dcv_session_id ) diff --git a/cli/pcluster/dcv/utils.py b/cli/src/pcluster/dcv/utils.py similarity index 100% rename from cli/pcluster/dcv/utils.py rename to cli/src/pcluster/dcv/utils.py diff --git a/cli/pcluster/examples/config b/cli/src/pcluster/examples/config similarity index 93% rename from cli/pcluster/examples/config rename to cli/src/pcluster/examples/config index 201172f06d..914766f190 100644 --- a/cli/pcluster/examples/config +++ b/cli/src/pcluster/examples/config @@ -23,11 +23,11 @@ key_name = mykey # Override path to cloudformation in S3 # (defaults to https://s3.amazonaws.com/-aws-parallelcluster/templates/aws-parallelcluster-.cfn.json) #template_url = https://s3.amazonaws.com/-aws-parallelcluster/templates/aws-parallelcluster-.cfn.json -# EC2 instance type for master node -# (defaults to t2.micro) +# EC2 instance type for head node +# (defaults to the free tier instance type of the region. If the region does not have free tier, default to t3.micro) #master_instance_type = t2.micro # EC2 instance type for compute nodes -# (defaults to t2.micro , 'optimal' when scheduler is awsbatch) +# (defaults to the free tier instance type of the region, 'optimal' when scheduler is awsbatch) #compute_instance_type = t2.micro # Initial number of EC2 instances to launch as compute nodes in the cluster for schedulers other than awsbatch. # (defaults to 2) @@ -94,7 +94,7 @@ key_name = mykey # Encrypted ephemeral drives. In-memory keys, non-recoverable. # (defaults to false) #encrypted_ephemeral = false -# MasterServer root volume size in GB. (AMI must support growroot) +# Head node root volume size in GB. (AMI must support growroot) # (defaults to 25) #master_root_volume_size = 25 # ComputeFleet root volume size in GB. (AMI must support growroot) @@ -109,6 +109,9 @@ key_name = mykey # Existing EC2 IAM policies to be associated with the EC2 instances # (defaults to NONE) #additional_iam_policies = NONE +# Existing IAM role to be associated with Lambda functions +# (defaults to NONE) +#iam_lambda_role = NONE # Disable Hyperthreading on all instances # (defaults to False) #disable_hyperthreading = false @@ -137,7 +140,7 @@ vpc_settings = public [vpc public] # ID of the VPC you want to provision cluster into. vpc_id = vpc-12345678 -# ID of the Subnet you want to provision the Master server into +# ID of the Subnet you want to provision the head node into master_subnet_id = subnet-12345678 # SSH from CIDR # This is only used when AWS ParallelCluster creates the security group @@ -153,7 +156,7 @@ master_subnet_id = subnet-12345678 #[vpc private-new] # ID of the VPC you want to provision cluster into. #vpc_id = vpc-12345678 -# ID of the Subnet you want to provision the Master server into +# ID of the Subnet you want to provision the head node into #master_subnet_id = subnet-12345678 # CIDR for new backend subnet i.e. 10.0.100.0/24 #compute_subnet_cidr = 10.0.100.0/24 @@ -161,7 +164,7 @@ master_subnet_id = subnet-12345678 #[vpc private-existing] # ID of the VPC you want to provision cluster into. #vpc_id = vpc-12345678 -# ID of the Subnet you want to provision the Master server into +# ID of the Subnet you want to provision the head node into #master_subnet_id = subnet-12345678 # CIDR for new backend subnet i.e. 10.0.100.0/24 #compute_subnet_id = subnet-23456789 @@ -184,7 +187,7 @@ master_subnet_id = subnet-12345678 # Use encrypted volume (should not be used with snapshots) # (defaults to false) #encrypted = false -# Existing EBS volume to be attached to the MasterServer +# Existing EBS volume to be attached to the head node # (defaults to NONE) #ebs_volume_id = NONE diff --git a/cli/pcluster/examples/job.sh b/cli/src/pcluster/examples/job.sh similarity index 100% rename from cli/pcluster/examples/job.sh rename to cli/src/pcluster/examples/job.sh diff --git a/cli/pcluster/models/__init__.py b/cli/src/pcluster/models/__init__.py similarity index 100% rename from cli/pcluster/models/__init__.py rename to cli/src/pcluster/models/__init__.py diff --git a/cli/pcluster/models/hit/__init__.py b/cli/src/pcluster/models/hit/__init__.py similarity index 100% rename from cli/pcluster/models/hit/__init__.py rename to cli/src/pcluster/models/hit/__init__.py diff --git a/cli/pcluster/models/hit/hit_cluster_model.py b/cli/src/pcluster/models/hit/hit_cluster_model.py similarity index 81% rename from cli/pcluster/models/hit/hit_cluster_model.py rename to cli/src/pcluster/models/hit/hit_cluster_model.py index 6ba716a8b5..3b60a7f350 100644 --- a/cli/pcluster/models/hit/hit_cluster_model.py +++ b/cli/src/pcluster/models/hit/hit_cluster_model.py @@ -12,7 +12,7 @@ from pcluster.cluster_model import ClusterModel from pcluster.config import mappings -from pcluster.utils import disable_ht_via_cpu_options, get_default_threads_per_core, get_instance_type +from pcluster.utils import InstanceTypeInfo, disable_ht_via_cpu_options class HITClusterModel(ClusterModel): @@ -42,56 +42,55 @@ def test_configuration(self, pcluster_config): cluster_section = pcluster_config.get_section("cluster") vpc_section = pcluster_config.get_section("vpc") - if not cluster_section or cluster_section.get_param_value("scheduler") == "awsbatch" or not vpc_section: + if cluster_section.get_param_value("scheduler") == "awsbatch": return - master_instance_type = cluster_section.get_param_value("master_instance_type") + head_node_instance_type = cluster_section.get_param_value("master_instance_type") # Retrieve network parameters compute_subnet = vpc_section.get_param_value("compute_subnet_id") - master_subnet = vpc_section.get_param_value("master_subnet_id") + head_node_subnet = vpc_section.get_param_value("master_subnet_id") vpc_security_group = vpc_section.get_param_value("vpc_security_group_id") if not compute_subnet: - compute_subnet = master_subnet + compute_subnet = head_node_subnet security_groups_ids = [] if vpc_security_group: security_groups_ids.append(vpc_security_group) # Initialize CpuOptions disable_hyperthreading = cluster_section.get_param_value("disable_hyperthreading") - master_instance_type_info = get_instance_type(master_instance_type) + head_node_instance_type_info = InstanceTypeInfo.init_from_instance_type(head_node_instance_type) # Set vcpus according to queue's disable_hyperthreading and instance features - vcpus_info = master_instance_type_info.get("VCpuInfo") - master_vcpus = vcpus_info.get("DefaultVCpus") - - master_cpu_options = {"CoreCount": master_vcpus // 2, "ThreadsPerCore": 1} if disable_hyperthreading else {} - master_threads_per_core = get_default_threads_per_core(master_instance_type) - master_cpu_options = ( - {"CoreCount": master_vcpus // master_threads_per_core, "ThreadsPerCore": 1} - if disable_hyperthreading and disable_ht_via_cpu_options(master_instance_type, master_threads_per_core) + head_node_vcpus = head_node_instance_type_info.vcpus_count() + + head_node_threads_per_core = head_node_instance_type_info.default_threads_per_core() + head_node_cpu_options = ( + {"CoreCount": head_node_vcpus // head_node_threads_per_core, "ThreadsPerCore": 1} + if disable_hyperthreading + and disable_ht_via_cpu_options(head_node_instance_type, head_node_threads_per_core) else {} ) try: latest_alinux_ami_id = self._get_latest_alinux_ami_id() - master_network_interfaces = self.build_launch_network_interfaces( + head_node_network_interfaces = self.build_launch_network_interfaces( network_interfaces_count=int(cluster_section.get_param_value("network_interfaces_count")[0]), - use_efa=False, # EFA is not supported on master node + use_efa=False, # EFA is not supported on head node security_group_ids=security_groups_ids, - subnet=master_subnet, + subnet=head_node_subnet, use_public_ips=vpc_section.get_param_value("use_public_ips"), ) - # Test Master Instance Configuration + # Test Head Node Instance Configuration self._ec2_run_instance( pcluster_config, - InstanceType=master_instance_type, + InstanceType=head_node_instance_type, MinCount=1, MaxCount=1, ImageId=latest_alinux_ami_id, - CpuOptions=master_cpu_options, - NetworkInterfaces=master_network_interfaces, + CpuOptions=head_node_cpu_options, + NetworkInterfaces=head_node_network_interfaces, DryRun=True, ) diff --git a/cli/pcluster/models/sit/__init__.py b/cli/src/pcluster/models/sit/__init__.py similarity index 100% rename from cli/pcluster/models/sit/__init__.py rename to cli/src/pcluster/models/sit/__init__.py diff --git a/cli/pcluster/models/sit/sit_cluster_model.py b/cli/src/pcluster/models/sit/sit_cluster_model.py similarity index 76% rename from cli/pcluster/models/sit/sit_cluster_model.py rename to cli/src/pcluster/models/sit/sit_cluster_model.py index 9f4a75c433..8bdc85f887 100644 --- a/cli/pcluster/models/sit/sit_cluster_model.py +++ b/cli/src/pcluster/models/sit/sit_cluster_model.py @@ -12,7 +12,7 @@ from pcluster.cluster_model import ClusterModel from pcluster.config import mappings -from pcluster.utils import disable_ht_via_cpu_options, get_default_threads_per_core, get_instance_vcpus +from pcluster.utils import InstanceTypeInfo, disable_ht_via_cpu_options class SITClusterModel(ClusterModel): @@ -60,35 +60,36 @@ def test_configuration(self, pcluster_config): vpc_section = pcluster_config.get_section("vpc") if ( - not cluster_section - or cluster_section.get_param_value("scheduler") == "awsbatch" + cluster_section.get_param_value("scheduler") == "awsbatch" or cluster_section.get_param_value("cluster_type") == "spot" - or not vpc_section ): return - master_instance_type = cluster_section.get_param_value("master_instance_type") + head_node_instance_type = cluster_section.get_param_value("master_instance_type") compute_instance_type = cluster_section.get_param_value("compute_instance_type") # Retrieve network parameters compute_subnet = vpc_section.get_param_value("compute_subnet_id") - master_subnet = vpc_section.get_param_value("master_subnet_id") + head_node_subnet = vpc_section.get_param_value("master_subnet_id") vpc_security_group = vpc_section.get_param_value("vpc_security_group_id") if not compute_subnet: - compute_subnet = master_subnet + compute_subnet = head_node_subnet security_groups_ids = [] if vpc_security_group: security_groups_ids.append(vpc_security_group) # Initialize CpuOptions disable_hyperthreading = cluster_section.get_param_value("disable_hyperthreading") - master_vcpus = get_instance_vcpus(master_instance_type) - master_threads_per_core = get_default_threads_per_core(master_instance_type) - compute_vcpus = get_instance_vcpus(compute_instance_type) - compute_threads_per_core = get_default_threads_per_core(compute_instance_type) - master_cpu_options = ( - {"CoreCount": master_vcpus // master_threads_per_core, "ThreadsPerCore": 1} - if disable_hyperthreading and disable_ht_via_cpu_options(master_instance_type, master_threads_per_core) + head_node_instance_type_info = InstanceTypeInfo.init_from_instance_type(head_node_instance_type) + head_node_vcpus = head_node_instance_type_info.vcpus_count() + head_node_threads_per_core = head_node_instance_type_info.default_threads_per_core() + compute_instance_type_info = InstanceTypeInfo.init_from_instance_type(compute_instance_type) + compute_vcpus = compute_instance_type_info.vcpus_count() + compute_threads_per_core = compute_instance_type_info.default_threads_per_core() + head_node_cpu_options = ( + {"CoreCount": head_node_vcpus // head_node_threads_per_core, "ThreadsPerCore": 1} + if disable_hyperthreading + and disable_ht_via_cpu_options(head_node_instance_type, head_node_threads_per_core) else {} ) compute_cpu_options = ( @@ -100,7 +101,7 @@ def test_configuration(self, pcluster_config): # Initialize Placement Group Logic placement_group = cluster_section.get_param_value("placement_group") placement = cluster_section.get_param_value("placement") - master_placement_group = ( + head_node_placement_group = ( {"GroupName": placement_group} if placement_group not in [None, "NONE", "DYNAMIC"] and placement == "cluster" else {} @@ -112,30 +113,30 @@ def test_configuration(self, pcluster_config): try: latest_alinux_ami_id = self._get_latest_alinux_ami_id() - master_network_interfaces = self.build_launch_network_interfaces( + head_node_network_interfaces = self.build_launch_network_interfaces( network_interfaces_count=int(cluster_section.get_param_value("network_interfaces_count")[0]), - use_efa=False, # EFA is not supported on master node + use_efa=False, # EFA is not supported on head node security_group_ids=security_groups_ids, - subnet=master_subnet, + subnet=head_node_subnet, use_public_ips=vpc_section.get_param_value("use_public_ips"), ) - # Test Master Instance Configuration + # Test head node configuration self._ec2_run_instance( pcluster_config, - InstanceType=master_instance_type, + InstanceType=head_node_instance_type, MinCount=1, MaxCount=1, ImageId=latest_alinux_ami_id, - CpuOptions=master_cpu_options, - NetworkInterfaces=master_network_interfaces, - Placement=master_placement_group, + CpuOptions=head_node_cpu_options, + NetworkInterfaces=head_node_network_interfaces, + Placement=head_node_placement_group, DryRun=True, ) compute_network_interfaces_count = int(cluster_section.get_param_value("network_interfaces_count")[1]) enable_efa = "compute" == cluster_section.get_param_value("enable_efa") - # TODO: check if master == compute subnet condition is to take into account + # TODO: check if head node == compute subnet condition is to take into account use_public_ips = self.public_ips_in_compute_subnet(pcluster_config, compute_network_interfaces_count) network_interfaces = self.build_launch_network_interfaces( diff --git a/cli/pcluster/networking/__init__.py b/cli/src/pcluster/networking/__init__.py similarity index 100% rename from cli/pcluster/networking/__init__.py rename to cli/src/pcluster/networking/__init__.py diff --git a/cli/pcluster/networking/vpc_factory.py b/cli/src/pcluster/networking/vpc_factory.py similarity index 100% rename from cli/pcluster/networking/vpc_factory.py rename to cli/src/pcluster/networking/vpc_factory.py diff --git a/cli/pcluster/resources/batch/docker/alinux b/cli/src/pcluster/resources/batch/docker/alinux similarity index 100% rename from cli/pcluster/resources/batch/docker/alinux rename to cli/src/pcluster/resources/batch/docker/alinux diff --git a/cli/pcluster/resources/batch/docker/alinux2/Dockerfile b/cli/src/pcluster/resources/batch/docker/alinux2/Dockerfile similarity index 97% rename from cli/pcluster/resources/batch/docker/alinux2/Dockerfile rename to cli/src/pcluster/resources/batch/docker/alinux2/Dockerfile index 28ff1e5b6a..737e94dcae 100644 --- a/cli/pcluster/resources/batch/docker/alinux2/Dockerfile +++ b/cli/src/pcluster/resources/batch/docker/alinux2/Dockerfile @@ -1,4 +1,4 @@ -FROM amazonlinux:latest +FROM public.ecr.aws/amazonlinux/amazonlinux:2 ENV USER root diff --git a/cli/pcluster/resources/batch/docker/build-docker-images.sh b/cli/src/pcluster/resources/batch/docker/build-docker-images.sh similarity index 100% rename from cli/pcluster/resources/batch/docker/build-docker-images.sh rename to cli/src/pcluster/resources/batch/docker/build-docker-images.sh diff --git a/cli/pcluster/resources/batch/docker/buildspec.yml b/cli/src/pcluster/resources/batch/docker/buildspec.yml similarity index 91% rename from cli/pcluster/resources/batch/docker/buildspec.yml rename to cli/src/pcluster/resources/batch/docker/buildspec.yml index 2d9c5a6bb5..0f14e5f0fa 100644 --- a/cli/pcluster/resources/batch/docker/buildspec.yml +++ b/cli/src/pcluster/resources/batch/docker/buildspec.yml @@ -1,9 +1,6 @@ version: 0.2 phases: - install: - runtime-versions: - docker: 18 pre_build: commands: - echo Logging in to Amazon ECR... diff --git a/cli/pcluster/resources/batch/docker/scripts/entrypoint.sh b/cli/src/pcluster/resources/batch/docker/scripts/entrypoint.sh similarity index 68% rename from cli/pcluster/resources/batch/docker/scripts/entrypoint.sh rename to cli/src/pcluster/resources/batch/docker/scripts/entrypoint.sh index 110232148d..a372270253 100755 --- a/cli/pcluster/resources/batch/docker/scripts/entrypoint.sh +++ b/cli/src/pcluster/resources/batch/docker/scripts/entrypoint.sh @@ -9,16 +9,9 @@ echo "Starting ssh agents..." eval $(ssh-agent -s) && ssh-add ${SSHDIR}/id_rsa /usr/sbin/sshd -f /root/.ssh/sshd_config -h /root/.ssh/ssh_host_rsa_key -# get private Master IP -_master_ip="$(aws --region "${PCLUSTER_AWS_REGION}" cloudformation describe-stacks --stack-name "${PCLUSTER_STACK_NAME}" --query "Stacks[0].Outputs[?OutputKey=='MasterPrivateIP'].OutputValue" --output text)" -if [[ -z "${_master_ip}" ]]; then - echo "Error getting Master IP" - exit 1 -fi - # mount nfs echo "Mounting /home..." -/parallelcluster/bin/mount_nfs.sh "${_master_ip}" "/home" +/parallelcluster/bin/mount_nfs.sh "${PCLUSTER_MASTER_IP}" "/home" echo "Mounting shared file system..." ebs_shared_dirs=$(echo "${PCLUSTER_SHARED_DIRS}" | tr "," " ") @@ -27,7 +20,7 @@ for ebs_shared_dir in ${ebs_shared_dirs} do if [[ ${ebs_shared_dir} != "NONE" ]]; then # mount nfs - /parallelcluster/bin/mount_nfs.sh "${_master_ip}" "${ebs_shared_dir}" + /parallelcluster/bin/mount_nfs.sh "${PCLUSTER_MASTER_IP}" "${ebs_shared_dir}" fi done @@ -41,7 +34,7 @@ fi # mount RAID via nfs if [[ ${PCLUSTER_RAID_SHARED_DIR} != "NONE" ]]; then - /parallelcluster/bin/mount_nfs.sh "${_master_ip}" "${PCLUSTER_RAID_SHARED_DIR}" + /parallelcluster/bin/mount_nfs.sh "${PCLUSTER_MASTER_IP}" "${PCLUSTER_RAID_SHARED_DIR}" fi # create hostfile if mnp job diff --git a/cli/pcluster/resources/batch/docker/scripts/generate_hostfile.sh b/cli/src/pcluster/resources/batch/docker/scripts/generate_hostfile.sh similarity index 100% rename from cli/pcluster/resources/batch/docker/scripts/generate_hostfile.sh rename to cli/src/pcluster/resources/batch/docker/scripts/generate_hostfile.sh diff --git a/cli/pcluster/resources/batch/docker/scripts/mount_efs.sh b/cli/src/pcluster/resources/batch/docker/scripts/mount_efs.sh similarity index 100% rename from cli/pcluster/resources/batch/docker/scripts/mount_efs.sh rename to cli/src/pcluster/resources/batch/docker/scripts/mount_efs.sh diff --git a/cli/pcluster/resources/batch/docker/scripts/mount_nfs.sh b/cli/src/pcluster/resources/batch/docker/scripts/mount_nfs.sh similarity index 67% rename from cli/pcluster/resources/batch/docker/scripts/mount_nfs.sh rename to cli/src/pcluster/resources/batch/docker/scripts/mount_nfs.sh index 2f6f30e12d..4a350500ac 100755 --- a/cli/pcluster/resources/batch/docker/scripts/mount_nfs.sh +++ b/cli/src/pcluster/resources/batch/docker/scripts/mount_nfs.sh @@ -13,7 +13,7 @@ # ANY KIND, express or implied. See the License for the specific # language governing permissions and limitations under the License. -# Usage: mount_filesystem.sh master_ip shared_dir +# Usage: mount_filesystem.sh head_node_ip shared_dir error_exit_usage() { echo "Error executing script: $1" @@ -28,20 +28,20 @@ error_exit() { usage() { cat < -master_ip: ip address of the main node -shared_dir: directory from master to be shared. If directory doesn't exist on compute, will be created +mount_nfs +head_node_ip: ip address of the main node +shared_dir: directory from head node to be shared. If directory doesn't exist on compute, will be created ENDUSAGE } # Check that the arguments are valid check_arguments_valid(){ - if [ -z "${master_ip}" ]; then - error_exit_usage "Master IP is a required argument" + if [ -z "${head_node_ip}" ]; then + error_exit_usage "Head Node IP is a required argument" fi if [ -z "${shared_dir}" ]; then @@ -58,22 +58,22 @@ mount_nfs() { fi mkdir -p ${shared_dir} - error_message=$(mount -t nfs -o hard,intr,noatime,_netdev "${master_ip}":"${shared_dir}" "${shared_dir}" 2>&1) + error_message=$(mount -t nfs -o hard,intr,noatime,_netdev "${head_node_ip}":"${shared_dir}" "${shared_dir}" 2>&1) if [[ $? -ne 0 ]]; then - error_exit "Failed to mount nfs volume from ${master_ip}:${shared_dir} with error_message: ${error_message}" + error_exit "Failed to mount nfs volume from ${head_node_ip}:${shared_dir} with error_message: ${error_message}" fi # Check that the filesystem is mounted as appropriate - mount_line=$(mount | grep "${master_ip}:${shared_dir}") + mount_line=$(mount | grep "${head_node_ip}:${shared_dir}") if [[ -z "${mount_line}" ]]; then - error_exit "mount succeeded but nfs volume from ${master_ip}:${shared_dir} was not mounted as expected" + error_exit "mount succeeded but nfs volume from ${head_node_ip}:${shared_dir} was not mounted as expected" fi } # main function main() { - master_ip=${1} + head_node_ip=${1} shared_dir=${2} if [[ "${shared_dir:0:1}" != '/' ]]; then shared_dir="/${shared_dir}" diff --git a/cli/pcluster/resources/batch/docker/upload-docker-images.sh b/cli/src/pcluster/resources/batch/docker/upload-docker-images.sh similarity index 100% rename from cli/pcluster/resources/batch/docker/upload-docker-images.sh rename to cli/src/pcluster/resources/batch/docker/upload-docker-images.sh diff --git a/cli/pcluster/resources/custom_resources/custom_resources_code/__init__.py b/cli/src/pcluster/resources/custom_resources/custom_resources_code/__init__.py similarity index 100% rename from cli/pcluster/resources/custom_resources/custom_resources_code/__init__.py rename to cli/src/pcluster/resources/custom_resources/custom_resources_code/__init__.py diff --git a/cli/pcluster/resources/custom_resources/custom_resources_code/cleanup_resources.py b/cli/src/pcluster/resources/custom_resources/custom_resources_code/cleanup_resources.py similarity index 100% rename from cli/pcluster/resources/custom_resources/custom_resources_code/cleanup_resources.py rename to cli/src/pcluster/resources/custom_resources/custom_resources_code/cleanup_resources.py diff --git a/cli/pcluster/resources/custom_resources/custom_resources_code/crhelper/LICENSE b/cli/src/pcluster/resources/custom_resources/custom_resources_code/crhelper/LICENSE similarity index 100% rename from cli/pcluster/resources/custom_resources/custom_resources_code/crhelper/LICENSE rename to cli/src/pcluster/resources/custom_resources/custom_resources_code/crhelper/LICENSE diff --git a/cli/pcluster/resources/custom_resources/custom_resources_code/crhelper/NOTICE b/cli/src/pcluster/resources/custom_resources/custom_resources_code/crhelper/NOTICE similarity index 100% rename from cli/pcluster/resources/custom_resources/custom_resources_code/crhelper/NOTICE rename to cli/src/pcluster/resources/custom_resources/custom_resources_code/crhelper/NOTICE diff --git a/cli/pcluster/resources/custom_resources/custom_resources_code/crhelper/__init__.py b/cli/src/pcluster/resources/custom_resources/custom_resources_code/crhelper/__init__.py similarity index 100% rename from cli/pcluster/resources/custom_resources/custom_resources_code/crhelper/__init__.py rename to cli/src/pcluster/resources/custom_resources/custom_resources_code/crhelper/__init__.py diff --git a/cli/pcluster/resources/custom_resources/custom_resources_code/crhelper/log_helper.py b/cli/src/pcluster/resources/custom_resources/custom_resources_code/crhelper/log_helper.py similarity index 100% rename from cli/pcluster/resources/custom_resources/custom_resources_code/crhelper/log_helper.py rename to cli/src/pcluster/resources/custom_resources/custom_resources_code/crhelper/log_helper.py diff --git a/cli/pcluster/resources/custom_resources/custom_resources_code/crhelper/resource_helper.py b/cli/src/pcluster/resources/custom_resources/custom_resources_code/crhelper/resource_helper.py similarity index 100% rename from cli/pcluster/resources/custom_resources/custom_resources_code/crhelper/resource_helper.py rename to cli/src/pcluster/resources/custom_resources/custom_resources_code/crhelper/resource_helper.py diff --git a/cli/pcluster/resources/custom_resources/custom_resources_code/crhelper/utils.py b/cli/src/pcluster/resources/custom_resources/custom_resources_code/crhelper/utils.py similarity index 100% rename from cli/pcluster/resources/custom_resources/custom_resources_code/crhelper/utils.py rename to cli/src/pcluster/resources/custom_resources/custom_resources_code/crhelper/utils.py diff --git a/cli/pcluster/resources/custom_resources/custom_resources_code/manage_docker_images.py b/cli/src/pcluster/resources/custom_resources/custom_resources_code/manage_docker_images.py similarity index 100% rename from cli/pcluster/resources/custom_resources/custom_resources_code/manage_docker_images.py rename to cli/src/pcluster/resources/custom_resources/custom_resources_code/manage_docker_images.py diff --git a/cli/pcluster/resources/custom_resources/custom_resources_code/send_build_notification.py b/cli/src/pcluster/resources/custom_resources/custom_resources_code/send_build_notification.py similarity index 100% rename from cli/pcluster/resources/custom_resources/custom_resources_code/send_build_notification.py rename to cli/src/pcluster/resources/custom_resources/custom_resources_code/send_build_notification.py diff --git a/cli/pcluster/resources/custom_resources/custom_resources_code/wait_for_update.py b/cli/src/pcluster/resources/custom_resources/custom_resources_code/wait_for_update.py similarity index 100% rename from cli/pcluster/resources/custom_resources/custom_resources_code/wait_for_update.py rename to cli/src/pcluster/resources/custom_resources/custom_resources_code/wait_for_update.py diff --git a/cli/pcluster/utils.py b/cli/src/pcluster/utils.py similarity index 86% rename from cli/pcluster/utils.py rename to cli/src/pcluster/utils.py index 9d317aec46..d7844d509f 100644 --- a/cli/pcluster/utils.py +++ b/cli/src/pcluster/utils.py @@ -10,6 +10,9 @@ # limitations under the License. # fmt: off from __future__ import absolute_import, print_function # isort:skip + +import functools + from future import standard_library # isort:skip standard_library.install_aliases() # fmt: on @@ -46,7 +49,7 @@ class NodeType(Enum): """Enum that identifies the cluster node type.""" - master = "Master" + head_node = "Master" compute = "Compute" def __str__(self): @@ -311,25 +314,6 @@ def upload_resources_artifacts(bucket_name, artifact_directory, root): bucket.upload_file(os.path.join(root, res), "%s/%s" % (artifact_directory, res)) -def get_instance_vcpus(instance_type, instance_info=None): - """ - Get number of vcpus for the given instance type. - - :param instance_type: the instance type to search for. - :return: the number of vcpus or -1 if the instance type cannot be found - """ - try: - if not instance_info: - instance_info = get_instance_type(instance_type) - - vcpus_info = instance_info.get("VCpuInfo") - vcpus = vcpus_info.get("DefaultVCpus") - except (ClientError): - vcpus = -1 - - return vcpus - - def get_supported_instance_types(): """Return the list of instance types available in the given region.""" ec2_client = boto3.client("ec2") @@ -856,28 +840,28 @@ def describe_cluster_instances(stack_name, node_type): return instances -def _get_master_server_ip(stack_name): +def _get_head_node_ip(stack_name): """ - Get the IP Address of the MasterServer. + Get the IP Address of the head node. :param stack_name: The name of the cloudformation stack :param config: Config object :return private/public ip address """ - instances = describe_cluster_instances(stack_name, node_type=NodeType.master) + instances = describe_cluster_instances(stack_name, node_type=NodeType.head_node) if not instances: - error("MasterServer not running. Can't SSH") - master_instance = instances[0] - ip_address = master_instance.get("PublicIpAddress") + error("Head node not running. Can't SSH") + head_node = instances[0] + ip_address = head_node.get("PublicIpAddress") if ip_address is None: - ip_address = master_instance.get("PrivateIpAddress") - state = master_instance.get("State").get("Name") + ip_address = head_node.get("PrivateIpAddress") + state = head_node.get("State").get("Name") if state != "running" or ip_address is None: error("MasterServer: {0}\nCannot get ip address.".format(state.upper())) return ip_address -def get_master_ip_and_username(cluster_name): +def get_head_node_ip_and_username(cluster_name): cfn = boto3.client("cloudformation") try: stack_name = get_stack_name(cluster_name) @@ -886,15 +870,15 @@ def get_master_ip_and_username(cluster_name): stack_status = stack_result.get("StackStatus") if stack_status in ["DELETE_COMPLETE", "DELETE_IN_PROGRESS"]: - error("Unable to retrieve master_ip and username for a stack in the status: {0}".format(stack_status)) + error("Unable to retrieve head node ip and username for a stack in the status: {0}".format(stack_status)) else: - master_ip = _get_master_server_ip(stack_name) + head_node_ip = _get_head_node_ip(stack_name) template = cfn.get_template(StackName=stack_name) mappings = template.get("TemplateBody").get("Mappings").get("OSFeatures") base_os = get_cfn_param(stack_result.get("Parameters"), "BaseOS") username = mappings.get(base_os).get("User") - if not master_ip: + if not head_node_ip: error("Failed to get cluster {0} ip.".format(cluster_name)) if not username: error("Failed to get cluster {0} username.".format(cluster_name)) @@ -902,19 +886,19 @@ def get_master_ip_and_username(cluster_name): except ClientError as e: error(e.response.get("Error").get("Message")) - return master_ip, username + return head_node_ip, username -def get_master_server_state(stack_name): +def get_head_node_state(stack_name): """ - Get the State of the MasterServer. + Get the State of the head node. :param stack_name: The name of the cloudformation stack - :return master server state name + :return head node state name """ instances = describe_cluster_instances(stack_name, "Master") if not instances: - error("MasterServer not running.") + error("Head node not running.") return instances[0].get("State").get("Name") @@ -939,20 +923,6 @@ def validate_pcluster_version_based_on_ami_name(ami_name): ) -def get_instance_types_info(instance_types, fail_on_error=True): - """Return InstanceTypes list returned by EC2's DescribeInstanceTypes API.""" - try: - ec2_client = boto3.client("ec2") - return ec2_client.describe_instance_types(InstanceTypes=instance_types).get("InstanceTypes") - except ClientError as e: - error( - "Error when calling DescribeInstanceTypes for instances {0}: {1}".format( - ", ".join(instance_types), e.response.get("Error").get("Message") - ), - fail_on_error, - ) - - def get_supported_architectures_for_instance_type(instance_type): """Get a list of architectures supported for the given instance type.""" # "optimal" compute instance type (when using batch) implies the use of instances from the @@ -961,8 +931,8 @@ def get_supported_architectures_for_instance_type(instance_type): if instance_type == "optimal": return ["x86_64"] - instance_info = get_instance_types_info([instance_type])[0] - supported_architectures = instance_info.get("ProcessorInfo").get("SupportedArchitectures") + instance_info = InstanceTypeInfo.init_from_instance_type(instance_type) + supported_architectures = instance_info.supported_architecture() # Some instance types support multiple architectures (x86_64 and i386). Filter unsupported ones. supported_architectures = list(set(supported_architectures) & set(SUPPORTED_ARCHITECTURES)) @@ -1021,7 +991,7 @@ def get_batch_ce(stack_name): """ Get name of the AWS Batch Compute Environment. - :param stack_name: name of the master stack + :param stack_name: name of the head node stack :param config: config :return: ce_name or exit if not found """ @@ -1094,35 +1064,10 @@ def cluster_has_running_capacity(stack_name): return cluster_has_running_capacity.cached_result -def get_instance_type(instance_type): - ec2_client = boto3.client("ec2") - try: - return ec2_client.describe_instance_types(InstanceTypes=[instance_type]).get("InstanceTypes")[0] - except Exception as e: - LOGGER.error("Failed when retrieving instance type data for instance type %s: %s", instance_type, e) - raise e - - -def get_default_threads_per_core(instance_type, instance_info=None): - """Return the default threads per core for the given instance type.""" - # NOTE: currently, .metal instances do not contain the DefaultThreadsPerCore - # attribute in their VCpuInfo section. This is a known issue with the - # ec2 DescribeInstanceTypes API. For these instance types an assumption - # is made that if the instance's supported architectures list includes - # x86_64 then the default is 2, otherwise it's 1. - if instance_info is None: - instance_info = get_instance_type(instance_type) - threads_per_core = instance_info.get("VCpuInfo", {}).get("DefaultThreadsPerCore") - if threads_per_core is None: - supported_architectures = instance_info.get("ProcessorInfo", {}).get("SupportedArchitectures", []) - threads_per_core = 2 if "x86_64" in supported_architectures else 1 - return threads_per_core - - def disable_ht_via_cpu_options(instance_type, default_threads_per_core=None): """Return a boolean describing whether hyperthreading should be disabled via CPU options for instance_type.""" if default_threads_per_core is None: - default_threads_per_core = get_default_threads_per_core(instance_type) + default_threads_per_core = InstanceTypeInfo.init_from_instance_type(instance_type).default_threads_per_core() res = all( [ # If default threads per core is 1, HT doesn't need to be disabled @@ -1225,25 +1170,154 @@ def get_ebs_snapshot_info(ebs_snapshot_id, raise_exceptions=False): ) -def get_instance_network_interfaces(instance_type, instance_info=None): - """Return the number of network interfaces to configure for the instance type.""" - if not instance_info: - instance_info = get_instance_type(instance_type) - - # Until maximumNetworkCards is not available, 1 is a safe value for all instance types - needed_interfaces = int(instance_info.get("NetworkInfo").get("MaximumNetworkCards", 1)) - - return needed_interfaces - +def get_default_instance_type(): + """If current region support free tier, return the free tier instance type. Otherwise, return t3.micro .""" + if not hasattr(get_default_instance_type, "cache"): + get_default_instance_type.cache = {} + cache = get_default_instance_type.cache + region = os.environ.get("AWS_DEFAULT_REGION") + if region not in cache: + free_tier_instance_type = [] + for page in paginate_boto3( + boto3.client("ec2").describe_instance_types, + Filters=[ + {"Name": "free-tier-eligible", "Values": ["true"]}, + {"Name": "current-generation", "Values": ["true"]}, + ], + ): + free_tier_instance_type.append(page) + cache[region] = free_tier_instance_type[0]["InstanceType"] if free_tier_instance_type else "t3.micro" + return cache[region] + + +class Cache: + """Simple utility class providing a cache mechanism for expensive functions.""" + + _caches = [] + + @staticmethod + def is_enabled(): + """Tell if the cache is enabled.""" + return not os.environ.get("PCLUSTER_CACHE_DISABLED") + + @staticmethod + def clear_all(): + """Clear the content of all caches.""" + for cache in Cache._caches: + cache.clear() + + @staticmethod + def _make_key(args, kwargs): + key = args + if kwargs: + for item in kwargs.items(): + key += item + return hash(key) + + @staticmethod + def cached(function): + """ + Decorate a function to make it use a results cache based on passed arguments. + + Note: all arguments must be hashable for this function to work properly. + """ + cache = {} + Cache._caches.append(cache) + + @functools.wraps(function) + def wrapper(*args, **kwargs): + cache_key = Cache._make_key(args, kwargs) + + if Cache.is_enabled() and cache_key in cache: + return cache[cache_key] + else: + return_value = function(*args, **kwargs) + if Cache.is_enabled(): + cache[cache_key] = return_value + return return_value + + return wrapper + + +class InstanceTypeInfo: + """Data object wrapping the result of a describe_instance_types call.""" + + def __init__(self, instance_type_data): + self.instance_type_data = instance_type_data + + @staticmethod + @Cache.cached + def init_from_instance_type(instance_type, exit_on_error=True): + """ + Init InstanceTypeInfo by performing a describe_instance_types call. + + Multiple calls for the same instance_type are cached. + The function exits with error if exit_on_error is set to True. + """ + try: + ec2_client = boto3.client("ec2") + return InstanceTypeInfo( + ec2_client.describe_instance_types(InstanceTypes=[instance_type]).get("InstanceTypes")[0] + ) + except ClientError as e: + error( + "Failed when retrieving instance type data for instance {0}: {1}".format( + instance_type, e.response.get("Error").get("Message") + ), + exit_on_error, + ) -def get_instance_gpus(instance_type, instance_info=None): - """Return the number of GPUs provided by the instance type.""" - if not instance_info: - instance_info = get_instance_type(instance_type) + def gpu_count(self): + """Return the number of GPUs for the instance.""" + gpu_info = self.instance_type_data.get("GpuInfo", None) + + gpu_count = 0 + if gpu_info: + for gpus in gpu_info.get("Gpus", []): + gpu_manufacturer = gpus.get("Manufacturer", "") + if gpu_manufacturer.upper() == "NVIDIA": + gpu_count += gpus.get("Count", 0) + else: + warn( + "ParallelCluster currently does not offer native support for '{0}' GPUs. " + "Please make sure to use a custom AMI with the appropriate drivers in order to leverage " + "GPUs functionalities".format(gpu_manufacturer) + ) + + return gpu_count + + def max_network_interface_count(self): + """Max number of NICs for the instance.""" + needed_interfaces = int(self.instance_type_data.get("NetworkInfo").get("MaximumNetworkCards", 1)) + return needed_interfaces + + def default_threads_per_core(self): + """Return the default threads per core for the given instance type.""" + # NOTE: currently, .metal instances do not contain the DefaultThreadsPerCore + # attribute in their VCpuInfo section. This is a known issue with the + # ec2 DescribeInstanceTypes API. For these instance types an assumption + # is made that if the instance's supported architectures list includes + # x86_64 then the default is 2, otherwise it's 1. + threads_per_core = self.instance_type_data.get("VCpuInfo", {}).get("DefaultThreadsPerCore") + if threads_per_core is None: + supported_architectures = self.instance_type_data.get("ProcessorInfo", {}).get("SupportedArchitectures", []) + threads_per_core = 2 if "x86_64" in supported_architectures else 1 + return threads_per_core + + def vcpus_count(self): + """Get number of vcpus for the given instance type.""" + try: + vcpus_info = self.instance_type_data.get("VCpuInfo") + vcpus = vcpus_info.get("DefaultVCpus") + except ClientError: + vcpus = -1 - gpu_info = instance_info.get("GpuInfo", None) + return vcpus - # Currently adding up all gpus. To be reviewed if the case of heterogeneous GPUs arises. - gpus = sum([gpus.get("Count") for gpus in gpu_info.get("Gpus")]) if gpu_info else 0 + def supported_architecture(self): + """Return the list of supported architectures.""" + return self.instance_type_data.get("ProcessorInfo").get("SupportedArchitectures") - return gpus + def is_efa_supported(self): + """Check whether EFA is supported.""" + return self.instance_type_data.get("NetworkInfo").get("EfaSupported") diff --git a/cli/pcluster_config/__init__.py b/cli/src/pcluster_config/__init__.py similarity index 100% rename from cli/pcluster_config/__init__.py rename to cli/src/pcluster_config/__init__.py diff --git a/cli/pcluster_config/cli.py b/cli/src/pcluster_config/cli.py similarity index 97% rename from cli/pcluster_config/cli.py rename to cli/src/pcluster_config/cli.py index 6befd5e2ac..bc5f1a4cd0 100644 --- a/cli/pcluster_config/cli.py +++ b/cli/src/pcluster_config/cli.py @@ -12,10 +12,11 @@ # See the License for the specific language governing permissions and limitations under the License. # -import argparse import os import sys +import argparse + from pcluster.config.hit_converter import HitConverter from pcluster.config.pcluster_config import PclusterConfig, default_config_file_path @@ -31,9 +32,7 @@ def _parse_args(argv=None): default_config_file = default_config_file_path() parser = argparse.ArgumentParser( - description=( - "Updates the AWS ParallelCluster configuration file." - ), + description=("Updates the AWS ParallelCluster configuration file."), epilog='For command specific flags, please run: "pcluster-config [command] --help"', ) subparsers = parser.add_subparsers() diff --git a/cli/tests/conftest.py b/cli/tests/conftest.py index 76c854e1e1..0cad54ed55 100644 --- a/cli/tests/conftest.py +++ b/cli/tests/conftest.py @@ -19,6 +19,20 @@ def clear_env(): del os.environ["AWS_DEFAULT_REGION"] +@pytest.fixture(autouse=True) +def mock_default_instance(mocker, request): + """ + Mock get_default_instance_type for all tests. + + To disable the mock for certain tests, add annotation `@pytest.mark.nomockdefaultinstance` to the tests. + To disable the mock for an entire file, declare global var `pytestmark = pytest.mark.noassertnopendingresponses` + """ + if "nomockdefaultinstance" in request.keywords: + # skip mocking + return + mocker.patch("pcluster.config.cfn_param_types.get_default_instance_type", return_value="t2.micro") + + @pytest.fixture def failed_with_message(capsys): """Assert that the command exited with a specific error message.""" @@ -55,7 +69,7 @@ def test_datadir(request, datadir): @pytest.fixture() def convert_to_date_mock(request, mocker): """Mock convert_to_date function by enforcing the timezone to UTC.""" - module_under_test = request.module.__name__.replace("test_", "") + module_under_test = request.node.fspath.purebasename.replace("test_", "") def _convert_to_date_utc(*args, **kwargs): from dateutil import tz @@ -147,7 +161,7 @@ def _boto3_stubber(service, mocked_requests): @pytest.fixture() def awsbatchcliconfig_mock(request, mocker): """Mock AWSBatchCliConfig object with a default mock.""" - module_under_test = request.module.__name__.replace("test_", "") + module_under_test = request.node.fspath.purebasename.replace("test_", "") mock = mocker.patch("awsbatch." + module_under_test + ".AWSBatchCliConfig", autospec=True) for key, value in DEFAULT_AWSBATCHCLICONFIG_MOCK_CONFIG.items(): setattr(mock.return_value, key, value) diff --git a/cli/tests/pcluster/config/defaults.py b/cli/tests/pcluster/config/defaults.py index 33e8bab722..2aa3ef4741 100644 --- a/cli/tests/pcluster/config/defaults.py +++ b/cli/tests/pcluster/config/defaults.py @@ -40,10 +40,11 @@ "ebs_snapshot_id": None, "volume_type": "gp2", "volume_size": None, - "volume_iops": 100, + "volume_iops": None, "encrypted": False, "ebs_kms_key_id": None, "ebs_volume_id": None, + "volume_throughput": 125, } DEFAULT_EFS_DICT = { @@ -62,9 +63,10 @@ "num_of_raid_volumes": 2, "volume_type": "gp2", "volume_size": 20, - "volume_iops": 100, + "volume_iops": None, "encrypted": False, "ebs_kms_key_id": None, + "volume_throughput": 125, } DEFAULT_FSX_DICT = { @@ -99,9 +101,9 @@ "shared_dir": "/shared", "placement_group": None, "placement": "compute", - "master_instance_type": "t2.micro", + "master_instance_type": None, "master_root_volume_size": 25, - "compute_instance_type": "t2.micro", + "compute_instance_type": None, "compute_root_volume_size": 25, "initial_queue_size": 0, "max_queue_size": 10, @@ -145,6 +147,7 @@ "architecture": "x86_64", "network_interfaces_count": ["1", "1"], "cluster_resource_bucket": None, + "iam_lambda_role": None, } DEFAULT_CLUSTER_HIT_DICT = { @@ -155,7 +158,7 @@ "base_os": None, # base_os does not have a default, but this is here to make testing easier "scheduler": None, # The cluster does not have a default, but this is here to make testing easier "shared_dir": "/shared", - "master_instance_type": "t2.micro", + "master_instance_type": None, "master_root_volume_size": 25, "compute_root_volume_size": 25, "proxy_server": None, @@ -194,6 +197,7 @@ "architecture": "x86_64", "network_interfaces_count": ["1", "1"], "cluster_resource_bucket": None, # cluster_resource_bucket no default, but this is here to make testing easier + "iam_lambda_role": None, } DEFAULT_CW_LOG_DICT = {"enable": True, "retention_days": 14} @@ -226,8 +230,8 @@ class DefaultDict(Enum): # ------------------ Default CFN parameters ------------------ # # number of CFN parameters created by the PclusterConfig object. -CFN_SIT_CONFIG_NUM_OF_PARAMS = 61 -CFN_HIT_CONFIG_NUM_OF_PARAMS = 52 +CFN_SIT_CONFIG_NUM_OF_PARAMS = 63 +CFN_HIT_CONFIG_NUM_OF_PARAMS = 53 # CFN parameters created by the pcluster CLI CFN_CLI_RESERVED_PARAMS = ["ArtifactS3RootDirectory", "RemoveBucketOnDeletion"] @@ -252,15 +256,16 @@ class DefaultDict(Enum): "EBSSnapshotId": "NONE,NONE,NONE,NONE,NONE", "VolumeType": "gp2,gp2,gp2,gp2,gp2", "VolumeSize": "NONE,NONE,NONE,NONE,NONE", - "VolumeIOPS": "100,100,100,100,100", + "VolumeIOPS": "NONE,NONE,NONE,NONE,NONE", "EBSEncryption": "false,false,false,false,false", "EBSKMSKeyId": "NONE,NONE,NONE,NONE,NONE", "EBSVolumeId": "NONE,NONE,NONE,NONE,NONE", + "VolumeIdThroughput": "125,125,125,125,125", } DEFAULT_EFS_CFN_PARAMS = {"EFSOptions": "NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE"} -DEFAULT_RAID_CFN_PARAMS = {"RAIDOptions": "NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE"} +DEFAULT_RAID_CFN_PARAMS = {"RAIDOptions": "NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE"} DEFAULT_FSX_CFN_PARAMS = { "FSXOptions": "NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE" @@ -325,14 +330,15 @@ class DefaultDict(Enum): "EBSSnapshotId": "NONE,NONE,NONE,NONE,NONE", "VolumeType": "gp2,gp2,gp2,gp2,gp2", "VolumeSize": "NONE,NONE,NONE,NONE,NONE", - "VolumeIOPS": "100,100,100,100,100", + "VolumeIOPS": "NONE,NONE,NONE,NONE,NONE", "EBSEncryption": "false,false,false,false,false", "EBSKMSKeyId": "NONE,NONE,NONE,NONE,NONE", "EBSVolumeId": "NONE,NONE,NONE,NONE,NONE", + "VolumeThroughput": "125,125,125,125,125", # efs "EFSOptions": "NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE", # raid - "RAIDOptions": "NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE", + "RAIDOptions": "NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE", # fsx "FSXOptions": "NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE", # dcv @@ -343,6 +349,7 @@ class DefaultDict(Enum): # architecture "Architecture": "x86_64", "NetworkInterfacesCount": "1,1", + "IAMLambdaRoleName": "NONE", } @@ -394,14 +401,15 @@ class DefaultDict(Enum): "EBSSnapshotId": "NONE,NONE,NONE,NONE,NONE", "VolumeType": "gp2,gp2,gp2,gp2,gp2", "VolumeSize": "NONE,NONE,NONE,NONE,NONE", - "VolumeIOPS": "100,100,100,100,100", + "VolumeIOPS": "NONE,NONE,NONE,NONE,NONE", "EBSEncryption": "false,false,false,false,false", "EBSKMSKeyId": "NONE,NONE,NONE,NONE,NONE", "EBSVolumeId": "NONE,NONE,NONE,NONE,NONE", + "VolumeThroughput": "125,125,125,125,125", # efs "EFSOptions": "NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE", # raid - "RAIDOptions": "NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE", + "RAIDOptions": "NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE", # fsx "FSXOptions": "NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE", # dcv @@ -412,6 +420,7 @@ class DefaultDict(Enum): # architecture "Architecture": "x86_64", "NetworkInterfacesCount": "1,1", + "IAMLambdaRoleName": "NONE", } diff --git a/cli/tests/pcluster/config/test_config_patch.py b/cli/tests/pcluster/config/test_config_patch.py index 3100dd83d8..fdefcfed74 100644 --- a/cli/tests/pcluster/config/test_config_patch.py +++ b/cli/tests/pcluster/config/test_config_patch.py @@ -16,6 +16,7 @@ from pcluster.config.config_patch import Change, ConfigPatch from pcluster.config.pcluster_config import PclusterConfig from pcluster.config.update_policy import UpdatePolicy +from pcluster.utils import InstanceTypeInfo from tests.pcluster.config.utils import duplicate_config_file default_cluster_params = { @@ -35,7 +36,16 @@ def _do_mocking_for_tests(mocker): mocker.patch( "pcluster.config.cfn_param_types.get_supported_architectures_for_instance_type", return_value=["x86_64"] ) - mocker.patch("pcluster.config.cfn_param_types.get_instance_network_interfaces", return_value=1) + mocker.patch( + "pcluster.config.cfn_param_types.InstanceTypeInfo.init_from_instance_type", + return_value=InstanceTypeInfo( + { + "InstanceType": "g4dn.metal", + "VCpuInfo": {"DefaultVCpus": 96, "DefaultCores": 48, "DefaultThreadsPerCore": 2}, + "NetworkInfo": {"EfaSupported": True, "MaximumNetworkCards": 1}, + } + ), + ) def _check_patch(src_conf, dst_conf, expected_changes, expected_patch_policy): diff --git a/cli/tests/pcluster/config/test_hit_converter.py b/cli/tests/pcluster/config/test_hit_converter.py index f034553b06..971fbc7606 100644 --- a/cli/tests/pcluster/config/test_hit_converter.py +++ b/cli/tests/pcluster/config/test_hit_converter.py @@ -14,8 +14,7 @@ from pcluster.cluster_model import ClusterModel from pcluster.config.hit_converter import HitConverter -from pcluster.utils import is_hit_enabled_scheduler -from tests.common import MockedBoto3Request +from pcluster.utils import InstanceTypeInfo from tests.pcluster.config.utils import init_pcluster_config_from_configparser @@ -170,25 +169,16 @@ def test_hit_converter(mocker, boto3_stubber, src_config_dict, dst_config_dict): scheduler = src_config_dict["cluster default"]["scheduler"] instance_type = src_config_dict["cluster default"]["compute_instance_type"] - mocker.patch("pcluster.config.cfn_param_types.get_instance_network_interfaces", return_value=1) - - if is_hit_enabled_scheduler(scheduler): - mocked_requests = [ - MockedBoto3Request( - method="describe_instance_types", - response={ - "InstanceTypes": [ - { - "InstanceType": instance_type, - "VCpuInfo": {"DefaultVCpus": 96, "DefaultCores": 48, "DefaultThreadsPerCore": 2}, - "NetworkInfo": {"EfaSupported": True}, - } - ] - }, - expected_params={"InstanceTypes": [instance_type]}, - ) - ] - boto3_stubber("ec2", mocked_requests) + mocker.patch( + "pcluster.config.cfn_param_types.InstanceTypeInfo.init_from_instance_type", + return_value=InstanceTypeInfo( + { + "InstanceType": instance_type, + "VCpuInfo": {"DefaultVCpus": 96, "DefaultCores": 48, "DefaultThreadsPerCore": 2}, + "NetworkInfo": {"EfaSupported": True, "MaximumNetworkCards": 1}, + } + ), + ) config_parser = configparser.ConfigParser() diff --git a/cli/tests/pcluster/config/test_json_param_types.py b/cli/tests/pcluster/config/test_json_param_types.py index 705f5db600..3aa3057513 100644 --- a/cli/tests/pcluster/config/test_json_param_types.py +++ b/cli/tests/pcluster/config/test_json_param_types.py @@ -114,6 +114,13 @@ } +@pytest.fixture(autouse=True) +def clear_cache(): + from pcluster.utils import Cache + + Cache.clear_all() + + @pytest.fixture() def boto3_stubber_path(): return "pcluster.utils.boto3" @@ -140,7 +147,7 @@ def test_config_to_json(capsys, boto3_stubber, test_datadir, pcluster_config_rea expected_json_params = _prepare_json_config(queues, test_datadir) # Mock expected boto3 calls - _mock_boto3(boto3_stubber, expected_json_params, master_instance_type="c4.xlarge") + _mock_boto3(boto3_stubber, expected_json_params, head_node_instance_type="c4.xlarge") # Load config from created config file dst_config_file = pcluster_config_reader(dst_config_file, queue_settings=queue_settings) @@ -172,7 +179,7 @@ def mock_get_avail_zone(subnet_id): expected_json_params = _prepare_json_config(queues, test_datadir) # Mock expected boto3 calls - _mock_boto3(boto3_stubber, expected_json_params, master_instance_type="t2.micro") + _mock_boto3(boto3_stubber, expected_json_params, head_node_instance_type="t2.micro") pcluster_config = get_mocked_pcluster_config(mocker, auto_refresh=False) cluster_section = CfnSection(CLUSTER_HIT, pcluster_config, section_label="default") @@ -237,30 +244,27 @@ def _prepare_json_config(queues, test_datadir): return expected_json_params -def _mock_boto3(boto3_stubber, expected_json_params, master_instance_type=None): +def _mock_boto3(boto3_stubber, expected_json_params, head_node_instance_type=None): """Mock the boto3 client based on the expected json configuration.""" expected_json_queue_settings = expected_json_params["cluster"].get("queue_settings", {}) mocked_requests = [] + instance_types = [] + # One describe_instance_type for the Head node + if head_node_instance_type: + instance_types.append(head_node_instance_type) + + # One describe_instance_type per compute resource + for _, queue in expected_json_queue_settings.items(): + for _, compute_resource in queue.get("compute_resource_settings", {}).items(): + if compute_resource["instance_type"] not in instance_types: + instance_types.append(compute_resource["instance_type"]) - # One describe_instance_type for the Master node - if master_instance_type: + for instance_type in instance_types: mocked_requests.append( MockedBoto3Request( method="describe_instance_types", - response=DESCRIBE_INSTANCE_TYPES_RESPONSES[master_instance_type], - expected_params={"InstanceTypes": [master_instance_type]}, + response=DESCRIBE_INSTANCE_TYPES_RESPONSES[instance_type], + expected_params={"InstanceTypes": [instance_type]}, ) ) - - # One describe_instance_type per compute resource - for _, queue in expected_json_queue_settings.items(): - for _, compute_resource in queue.get("compute_resource_settings", {}).items(): - instance_type = compute_resource["instance_type"] - mocked_requests.append( - MockedBoto3Request( - method="describe_instance_types", - response=DESCRIBE_INSTANCE_TYPES_RESPONSES[instance_type], - expected_params={"InstanceTypes": [instance_type]}, - ) - ) boto3_stubber("ec2", mocked_requests) diff --git a/cli/tests/pcluster/config/test_runtime.py b/cli/tests/pcluster/config/test_runtime.py index e2f9450aa9..ad28cd606d 100644 --- a/cli/tests/pcluster/config/test_runtime.py +++ b/cli/tests/pcluster/config/test_runtime.py @@ -20,7 +20,11 @@ def test_update_sections(mocker, pcluster_config_reader): mocker.patch( "pcluster.config.cfn_param_types.get_supported_architectures_for_instance_type", return_value=["x86_64"] ) - mocker.patch("pcluster.config.cfn_param_types.get_instance_network_interfaces", return_value=1) + instance_type_info_mock = mocker.MagicMock() + mocker.patch( + "pcluster.config.cfn_param_types.InstanceTypeInfo.init_from_instance_type", return_value=instance_type_info_mock + ) + instance_type_info_mock.max_network_interface_count.return_value = 1 pcluster_config = PclusterConfig( cluster_label="default", config_file=pcluster_config_reader(), fail_on_file_absence=True, fail_on_error=True ) diff --git a/cli/tests/pcluster/config/test_section_cluster.py b/cli/tests/pcluster/config/test_section_cluster.py index 4ce1257ee7..910f690397 100644 --- a/cli/tests/pcluster/config/test_section_cluster.py +++ b/cli/tests/pcluster/config/test_section_cluster.py @@ -38,6 +38,8 @@ "base_os": "alinux2", "scheduler": "slurm", "cluster_config_metadata": {"sections": {"cluster": ["custom_cluster_label"]}}, + "master_instance_type": "t2.micro", + "compute_instance_type": "t2.micro", }, ), "custom_cluster_label", @@ -50,6 +52,8 @@ "additional_iam_policies": ["arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy"], "base_os": "alinux2", "scheduler": "slurm", + "master_instance_type": "t2.micro", + "compute_instance_type": "t2.micro", }, ), "default", @@ -86,6 +90,8 @@ "arn:aws:iam::aws:policy/AWSBatchFullAccess", "arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy", ], + "master_instance_type": "t2.micro", + "compute_instance_type": "t2.micro", }, ), "default", @@ -265,9 +271,8 @@ def test_hit_cluster_section_from_file(mocker, config_parser_dict, expected_dict ("placement", "wrong_value", None, "has an invalid value"), ("placement", "NONE", None, "has an invalid value"), ("placement", "cluster", "cluster", None), - # Master + # Head node # TODO add regex for master_instance_type - ("master_instance_type", None, "t2.micro", None), ("master_instance_type", "", "", None), ("master_instance_type", "test", "test", None), ("master_instance_type", "NONE", "NONE", None), @@ -281,7 +286,6 @@ def test_hit_cluster_section_from_file(mocker, config_parser_dict, expected_dict ("master_root_volume_size", "31", 31, None), # Compute fleet # TODO add regex for compute_instance_type - ("compute_instance_type", None, "t2.micro", None), ("compute_instance_type", "", "", None), ("compute_instance_type", "test", "test", None), ("compute_instance_type", "NONE", "NONE", None), @@ -541,9 +545,8 @@ def test_sit_cluster_param_from_file( ("shared_dir", "/test//test2", None, "has an invalid value"), ("shared_dir", "/test\\test2", None, "has an invalid value"), ("shared_dir", "NONE", "NONE", None), # NONE is evaluated as a valid path - # Master + # Head node # TODO add regex for master_instance_type - ("master_instance_type", None, "t2.micro", None), ("master_instance_type", "", "", None), ("master_instance_type", "test", "test", None), ("master_instance_type", "NONE", "NONE", None), @@ -801,11 +804,18 @@ def test_sit_cluster_section_to_file(mocker, section_dict, expected_config_parse def test_cluster_section_to_cfn( mocker, cluster_section_definition, section_dict, expected_cfn_params, default_threads_per_core ): + section_dict["master_instance_type"] = "t2.micro" + if cluster_section_definition == CLUSTER_SIT: + section_dict["compute_instance_type"] = "t2.micro" utils.set_default_values_for_required_cluster_section_params(section_dict) utils.mock_pcluster_config(mocker) mocker.patch("pcluster.config.cfn_param_types.get_efs_mount_target_id", return_value="valid_mount_target_id") - mocker.patch("pcluster.config.cfn_param_types.get_instance_vcpus", return_value=4) - mocker.patch("pcluster.config.cfn_param_types.get_default_threads_per_core", side_effect=default_threads_per_core) + instance_type_info_mock = mocker.MagicMock() + mocker.patch( + "pcluster.config.cfn_param_types.InstanceTypeInfo.init_from_instance_type", return_value=instance_type_info_mock + ) + instance_type_info_mock.vcpus_count.return_value = 4 + instance_type_info_mock.default_threads_per_core.side_effect = default_threads_per_core utils.assert_section_to_cfn(mocker, cluster_section_definition, section_dict, expected_cfn_params) @@ -1007,7 +1017,7 @@ def test_cluster_section_to_cfn( "SharedDir": "ebs1,NONE,NONE,NONE,NONE", "VolumeType": "io1,gp2,gp2,gp2,gp2", "VolumeSize": "40,NONE,NONE,NONE,NONE", - "VolumeIOPS": "200,100,100,100,100", + "VolumeIOPS": "200,NONE,NONE,NONE,NONE", "EBSEncryption": "true,false,false,false,false", "EBSKMSKeyId": "kms_key,NONE,NONE,NONE,NONE", "EBSVolumeId": "vol-12345678,NONE,NONE,NONE,NONE", @@ -1027,7 +1037,7 @@ def test_cluster_section_to_cfn( "SharedDir": "ebs1,ebs2,NONE,NONE,NONE", "VolumeType": "io1,standard,gp2,gp2,gp2", "VolumeSize": "40,30,NONE,NONE,NONE", - "VolumeIOPS": "200,300,100,100,100", + "VolumeIOPS": "200,300,NONE,NONE,NONE", "EBSEncryption": "true,false,false,false,false", "EBSKMSKeyId": "kms_key,NONE,NONE,NONE,NONE", "EBSVolumeId": "vol-12345678,NONE,NONE,NONE,NONE", @@ -1047,7 +1057,7 @@ def test_cluster_section_to_cfn( "SharedDir": "/shared", "VolumeType": "standard,gp2,gp2,gp2,gp2", "VolumeSize": "30,NONE,NONE,NONE,NONE", - "VolumeIOPS": "300,100,100,100,100", + "VolumeIOPS": "300,NONE,NONE,NONE,NONE", "EBSEncryption": "false,false,false,false,false", "EBSKMSKeyId": "NONE,NONE,NONE,NONE,NONE", "EBSVolumeId": "NONE,NONE,NONE,NONE,NONE", @@ -1067,7 +1077,7 @@ def test_cluster_section_to_cfn( "SharedDir": "/work", "VolumeType": "standard,gp2,gp2,gp2,gp2", "VolumeSize": "30,NONE,NONE,NONE,NONE", - "VolumeIOPS": "300,100,100,100,100", + "VolumeIOPS": "300,NONE,NONE,NONE,NONE", "EBSEncryption": "false,false,false,false,false", "EBSKMSKeyId": "NONE,NONE,NONE,NONE,NONE", "EBSVolumeId": "NONE,NONE,NONE,NONE,NONE", @@ -1087,7 +1097,7 @@ def test_cluster_section_to_cfn( "SharedDir": "ebs1,NONE,NONE,NONE,NONE", "VolumeType": "io1,gp2,gp2,gp2,gp2", "VolumeSize": "40,NONE,NONE,NONE,NONE", - "VolumeIOPS": "200,100,100,100,100", + "VolumeIOPS": "200,NONE,NONE,NONE,NONE", "EBSEncryption": "true,false,false,false,false", "EBSKMSKeyId": "kms_key,NONE,NONE,NONE,NONE", "EBSVolumeId": "vol-12345678,NONE,NONE,NONE,NONE", @@ -1115,14 +1125,14 @@ def test_cluster_section_to_cfn( "SharedDir": "ebs1,NONE,NONE,NONE,NONE", "VolumeType": "io1,gp2,gp2,gp2,gp2", "VolumeSize": "40,NONE,NONE,NONE,NONE", - "VolumeIOPS": "200,100,100,100,100", + "VolumeIOPS": "200,NONE,NONE,NONE,NONE", "EBSEncryption": "true,false,false,false,false", "EBSKMSKeyId": "kms_key,NONE,NONE,NONE,NONE", "EBSVolumeId": "vol-12345678,NONE,NONE,NONE,NONE", # efs "EFSOptions": "efs,NONE,generalPurpose,NONE,NONE,false,bursting,Valid,NONE", # raid - "RAIDOptions": "raid,NONE,2,gp2,20,100,false,NONE", + "RAIDOptions": "raid,NONE,2,gp2,20,NONE,false,NONE,125", # fsx "FSXOptions": "fsx,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE," "NONE,NONE", @@ -1181,14 +1191,14 @@ def test_cluster_section_to_cfn( "SharedDir": "ebs1,NONE,NONE,NONE,NONE", "VolumeType": "io1,gp2,gp2,gp2,gp2", "VolumeSize": "40,NONE,NONE,NONE,NONE", - "VolumeIOPS": "200,100,100,100,100", + "VolumeIOPS": "200,NONE,NONE,NONE,NONE", "EBSEncryption": "true,false,false,false,false", "EBSKMSKeyId": "kms_key,NONE,NONE,NONE,NONE", "EBSVolumeId": "vol-12345678,NONE,NONE,NONE,NONE", # efs "EFSOptions": "efs,NONE,generalPurpose,NONE,NONE,false,bursting,Valid,NONE", # raid - "RAIDOptions": "raid,NONE,2,gp2,20,100,false,NONE", + "RAIDOptions": "raid,NONE,2,gp2,20,NONE,false,NONE,125", # fsx "FSXOptions": "fsx,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE," "NONE,NONE", @@ -1210,7 +1220,7 @@ def test_sit_cluster_from_file_to_cfn(mocker, pcluster_config_reader, settings_l side_effect=lambda subnet: "mocked_avail_zone" if subnet == "subnet-12345678" else "some_other_az", ) - mocker.patch("pcluster.config.cfn_param_types.get_instance_vcpus", return_value=2) + mocker.patch("pcluster.config.cfn_param_types.InstanceTypeInfo.vcpus_count", return_value=2) utils.assert_section_params(mocker, pcluster_config_reader, settings_label, expected_cfn_params) diff --git a/cli/tests/pcluster/config/test_section_ebs.py b/cli/tests/pcluster/config/test_section_ebs.py index c6c4058dac..1977605290 100644 --- a/cli/tests/pcluster/config/test_section_ebs.py +++ b/cli/tests/pcluster/config/test_section_ebs.py @@ -69,6 +69,7 @@ def test_ebs_section_from_cfn(mocker, cfn_params_dict, expected_section_dict): ({"volume_type": "gp2"}, {"ebs default": {"volume_type": "gp2"}}, "No section"), # other values ({"volume_type": "io1"}, {"ebs default": {"volume_type": "io1"}}, None), + ({"volume_type": "io2"}, {"ebs default": {"volume_type": "io2"}}, None), ({"volume_size": 30}, {"ebs default": {"volume_size": "30"}}, None), ], ) @@ -86,10 +87,11 @@ def test_ebs_section_to_file(mocker, section_dict, expected_config_parser_dict, "EBSSnapshotId": "NONE", "VolumeType": "gp2", "VolumeSize": "NONE", - "VolumeIOPS": "100", + "VolumeIOPS": "NONE", "EBSEncryption": "false", "EBSKMSKeyId": "NONE", "EBSVolumeId": "NONE", + "VolumeThroughput": "125", }, ), ( @@ -102,6 +104,7 @@ def test_ebs_section_to_file(mocker, section_dict, expected_config_parser_dict, "encrypted": True, "ebs_kms_key_id": "test", "ebs_volume_id": "test", + "volume_throughput": "125", }, { "SharedDir": "test", @@ -112,6 +115,7 @@ def test_ebs_section_to_file(mocker, section_dict, expected_config_parser_dict, "EBSEncryption": "true", "EBSKMSKeyId": "test", "EBSVolumeId": "test", + "VolumeThroughput": "125", }, ), ], @@ -155,6 +159,7 @@ def test_ebs_section_to_cfn(mocker, section_dict, expected_cfn_params): ("volume_type", None, "gp2", None), ("volume_type", "wrong_value", None, "Allowed values are"), ("volume_type", "io1", "io1", None), + ("volume_type", "io2", "io2", None), ("volume_type", "standard", "standard", None), ("volume_type", "NONE", None, "Allowed values are"), ("volume_size", None, None, None), @@ -163,12 +168,17 @@ def test_ebs_section_to_cfn(mocker, section_dict, expected_cfn_params): ("volume_size", "wrong_value", None, "must be an Integer"), ("volume_size", "10", 10, None), ("volume_size", "3", 3, None), - ("volume_iops", None, 100, None), + ("volume_iops", None, None, None), ("volume_iops", "", None, "must be an Integer"), ("volume_iops", "NONE", None, "must be an Integer"), ("volume_iops", "wrong_value", None, "must be an Integer"), ("volume_iops", "10", 10, None), ("volume_iops", "3", 3, None), + ("volume_throughput", None, 125, None), + ("volume_throughput", "", None, "must be an Integer"), + ("volume_throughput", "NONE", None, "must be an Integer"), + ("volume_throughput", "wrong_value", None, "must be an Integer"), + ("volume_throughput", "200", 200, None), ("encrypted", None, False, None), ("encrypted", "", None, "must be a Boolean"), ("encrypted", "NONE", None, "must be a Boolean"), @@ -205,10 +215,11 @@ def test_ebs_param_from_file(mocker, param_key, param_value, expected_value, exp "SharedDir": "ebs1,NONE,NONE,NONE,NONE", "VolumeType": "io1,gp2,gp2,gp2,gp2", "VolumeSize": "40,NONE,NONE,NONE,NONE", - "VolumeIOPS": "200,100,100,100,100", + "VolumeIOPS": "200,NONE,NONE,NONE,NONE", "EBSEncryption": "true,false,false,false,false", "EBSKMSKeyId": "kms_key,NONE,NONE,NONE,NONE", "EBSVolumeId": "vol-12345678,NONE,NONE,NONE,NONE", + "VolumeIOPS": "200,NONE,NONE,NONE,NONE", }, ), ), @@ -221,10 +232,27 @@ def test_ebs_param_from_file(mocker, param_key, param_value, expected_value, exp "SharedDir": "ebs2,NONE,NONE,NONE,NONE", "VolumeType": "standard,gp2,gp2,gp2,gp2", "VolumeSize": "30,NONE,NONE,NONE,NONE", - "VolumeIOPS": "300,100,100,100,100", + "VolumeIOPS": "300,NONE,NONE,NONE,NONE", + "EBSEncryption": "false,false,false,false,false", + "EBSKMSKeyId": "NONE,NONE,NONE,NONE,NONE", + "EBSVolumeId": "NONE,NONE,NONE,NONE,NONE", + }, + ), + ), + ( + "ebs3", + utils.merge_dicts( + DefaultCfnParams["cluster_sit"].value, + { + "NumberOfEBSVol": "1", + "SharedDir": "ebs3,NONE,NONE,NONE,NONE", + "VolumeType": "gp3,gp2,gp2,gp2,gp2", + "VolumeSize": "30,NONE,NONE,NONE,NONE", + "VolumeIOPS": "3000,NONE,NONE,NONE,NONE", "EBSEncryption": "false,false,false,false,false", "EBSKMSKeyId": "NONE,NONE,NONE,NONE,NONE", "EBSVolumeId": "NONE,NONE,NONE,NONE,NONE", + "VolumeThroughput": "150,125,125,125,125", }, ), ), diff --git a/cli/tests/pcluster/config/test_section_ebs/test_ebs_from_file_to_cfn/pcluster.config.ini b/cli/tests/pcluster/config/test_section_ebs/test_ebs_from_file_to_cfn/pcluster.config.ini index 761f3ca45b..37217fe2f3 100644 --- a/cli/tests/pcluster/config/test_section_ebs/test_ebs_from_file_to_cfn/pcluster.config.ini +++ b/cli/tests/pcluster/config/test_section_ebs/test_ebs_from_file_to_cfn/pcluster.config.ini @@ -27,3 +27,9 @@ volume_size = 30 volume_iops = 300 encrypted = false +[ebs ebs3] +shared_dir = ebs3 +volume_type = gp3 +volume_size = 30 +volume_iops = 3000 +volume_throughput =150 diff --git a/cli/tests/pcluster/config/test_section_efs.py b/cli/tests/pcluster/config/test_section_efs.py index d90e71f42d..931e355ab8 100644 --- a/cli/tests/pcluster/config/test_section_efs.py +++ b/cli/tests/pcluster/config/test_section_efs.py @@ -164,7 +164,8 @@ def test_efs_param_from_file(mocker, param_key, param_value, expected_value, exp def test_efs_section_to_cfn(mocker, section_dict, expected_cfn_params): mocker.patch("pcluster.config.cfn_param_types.get_efs_mount_target_id", return_value="valid_mount_target_id") mocker.patch( - "pcluster.config.pcluster_config.PclusterConfig.get_master_availability_zone", return_value="mocked_avail_zone" + "pcluster.config.pcluster_config.PclusterConfig.get_head_node_availability_zone", + return_value="mocked_avail_zone", ) utils.assert_section_to_cfn(mocker, EFS, section_dict, expected_cfn_params) diff --git a/cli/tests/pcluster/config/test_section_efs/test_efs_from_file_to_cfn/pcluster.config.ini b/cli/tests/pcluster/config/test_section_efs/test_efs_from_file_to_cfn/pcluster.config.ini index dada4b6d7c..3ba70269b6 100644 --- a/cli/tests/pcluster/config/test_section_efs/test_efs_from_file_to_cfn/pcluster.config.ini +++ b/cli/tests/pcluster/config/test_section_efs/test_efs_from_file_to_cfn/pcluster.config.ini @@ -13,7 +13,7 @@ scheduler = slurm base_os = alinux2 [vpc default] -# EFS conversion requires master subnet id to check mount-target avail zone +# EFS conversion requires head node subnet id to check mount-target avail zone master_subnet_id = subnet-12345678 compute_subnet_id = subnet-23456789 diff --git a/cli/tests/pcluster/config/test_section_fsx/test_fsx_from_file_to_cfn/pcluster.config.ini b/cli/tests/pcluster/config/test_section_fsx/test_fsx_from_file_to_cfn/pcluster.config.ini index 89d8fdc419..3ac84626d3 100644 --- a/cli/tests/pcluster/config/test_section_fsx/test_fsx_from_file_to_cfn/pcluster.config.ini +++ b/cli/tests/pcluster/config/test_section_fsx/test_fsx_from_file_to_cfn/pcluster.config.ini @@ -13,7 +13,7 @@ scheduler = slurm base_os = alinux2 [vpc default] -# FSX conversion requires master subnet id to check mount-target +# FSX conversion requires head node subnet id to check mount-target master_subnet_id = subnet-12345678 [fsx test1] diff --git a/cli/tests/pcluster/config/test_section_raid.py b/cli/tests/pcluster/config/test_section_raid.py index 0f56accaa6..e483ec205f 100644 --- a/cli/tests/pcluster/config/test_section_raid.py +++ b/cli/tests/pcluster/config/test_section_raid.py @@ -20,19 +20,20 @@ [ (DefaultCfnParams["raid"].value, DefaultDict["raid"].value), ({}, DefaultDict["raid"].value), - ({"RAIDOptions": "NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE"}, DefaultDict["raid"].value), - ({"RAIDOptions": "NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE"}, DefaultDict["raid"].value), + ({"RAIDOptions": "NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE"}, DefaultDict["raid"].value), + ({"RAIDOptions": "NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE"}, DefaultDict["raid"].value), ( - {"RAIDOptions": "test,NONE,NONE,NONE,NONE,NONE,NONE,NONE"}, + {"RAIDOptions": "test,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE"}, { "shared_dir": "test", "raid_type": None, "num_of_raid_volumes": 2, "volume_type": "gp2", "volume_size": 20, - "volume_iops": 100, + "volume_iops": None, "encrypted": False, "ebs_kms_key_id": None, + "volume_throughput": 125, }, ), ( @@ -79,7 +80,7 @@ def test_raid_section_from_file(mocker, config_parser_dict, expected_dict_params # default ({}, {"raid default": {}}, None), # default values - ({"volume_iops": 100}, {"raid default": {"volume_iops": "100"}}, "No section.*"), + ({"volume_throughput": 125}, {"raid default": {"volume_throughput": "125"}}, "No section.*"), ({"encrypted": False}, {"raid default": {"encrypted": "false"}}, "No section.*"), # other values ({"volume_iops": 120}, {"raid default": {"volume_iops": "120"}}, None), @@ -144,7 +145,7 @@ def test_raid_section_to_cfn(mocker, section_dict, expected_cfn_params): ("volume_size", "wrong_value", None, "must be an Integer"), ("volume_size", "10", 10, None), ("volume_size", "3", 3, None), - ("volume_iops", None, 100, None), + ("volume_iops", None, None, None), ("volume_iops", "", None, "must be an Integer"), ("volume_iops", "NONE", None, "must be an Integer"), ("volume_iops", "wrong_value", None, "must be an Integer"), @@ -160,6 +161,9 @@ def test_raid_section_to_cfn(mocker, section_dict, expected_cfn_params): ("ebs_kms_key_id", "fake_value", "fake_value", None), ("ebs_kms_key_id", "test", "test", None), ("ebs_kms_key_id", "NONE", "NONE", None), # NONE is evaluated as a valid kms id + ("volume_throughput", "NONE", None, "must be an Integer"), + ("volume_throughput", "wrong_value", None, "must be an Integer"), + ("volume_throughput", "150", 150, None), ], ) def test_raid_param_from_file(mocker, param_key, param_value, expected_value, expected_message): diff --git a/cli/tests/pcluster/config/test_source_consistency.py b/cli/tests/pcluster/config/test_source_consistency.py index 131dec3aa9..b177fa22e8 100644 --- a/cli/tests/pcluster/config/test_source_consistency.py +++ b/cli/tests/pcluster/config/test_source_consistency.py @@ -16,7 +16,6 @@ import tests.pcluster.config.utils as utils from pcluster.config.mappings import ALIASES, AWS, CLUSTER_SIT, CW_LOG, DCV, EBS, EFS, FSX, GLOBAL, RAID, SCALING, VPC -from pcluster.config.pcluster_config import PclusterConfig from tests.pcluster.config.defaults import CFN_CLI_RESERVED_PARAMS, CFN_SIT_CONFIG_NUM_OF_PARAMS, DefaultCfnParams EXISTING_SECTIONS = [ALIASES, AWS, CLUSTER_SIT, CW_LOG, DCV, EBS, EFS, FSX, GLOBAL, RAID, SCALING, VPC] @@ -68,23 +67,6 @@ def test_mapping_consistency(): ).is_not_none() -def test_example_config_consistency(mocker): - """Validate example file and try to convert to CFN.""" - mocker.patch("pcluster.config.cfn_param_types.get_availability_zone_of_subnet", return_value="mocked_avail_zone") - mocker.patch( - "pcluster.config.cfn_param_types.get_supported_architectures_for_instance_type", return_value=["x86_64"] - ) - mocker.patch("pcluster.config.cfn_param_types.get_instance_network_interfaces", return_value=1) - pcluster_config = PclusterConfig(config_file=utils.get_pcluster_config_example(), fail_on_file_absence=True) - - cfn_params = pcluster_config.to_cfn() - - assert_that(len(cfn_params)).is_equal_to(utils.get_cfn_config_num_of_params(pcluster_config)) - - # for param_key, param_value in expected_cfn_params.items(): - # assert_that(cfn_params.get(param_key)).is_equal_to(expected_cfn_params.get(param_key)) - - def test_defaults_consistency(): """Verifies that the defaults values for the CFN parameters used in the tests are the same in the CFN template.""" template_num_of_params = _get_pcluster_cfn_num_of_params() @@ -106,6 +88,10 @@ def test_defaults_consistency(): # metadata is generated dynamically based on user's configuration. ignored_params += ["ClusterConfigMetadata"] + # ComputeInstanceType parameter is expected to differ from the default value in the CFN template because + # it is dynamically generated based on the AWS region + ignored_params += ["ComputeInstanceType"] + cfn_params = [section_cfn_params.value for section_cfn_params in DefaultCfnParams] default_cfn_values = utils.merge_dicts(*cfn_params) diff --git a/cli/tests/pcluster/config/test_utils.py b/cli/tests/pcluster/config/test_utils.py index 4fe7e0bf83..88ae99f255 100644 --- a/cli/tests/pcluster/config/test_utils.py +++ b/cli/tests/pcluster/config/test_utils.py @@ -8,10 +8,12 @@ # or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES # OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and # limitations under the License. +import os + import pytest from assertpy import assert_that -from pcluster.utils import get_instance_vcpus +from pcluster.utils import get_default_instance_type from tests.common import MockedBoto3Request @@ -20,17 +22,38 @@ def boto3_stubber_path(): return "pcluster.utils.boto3" -@pytest.mark.parametrize("valid_instance_type, expected_vcpus", [(True, 96), (False, -1)]) -def test_get_instance_vcpus(boto3_stubber, valid_instance_type, expected_vcpus): - instance_type = "g4dn.metal" - mocked_requests = [ - MockedBoto3Request( - method="describe_instance_types", - response={"InstanceTypes": [{"InstanceType": "g4dn.metal", "VCpuInfo": {"DefaultVCpus": 96}}]}, - expected_params={"InstanceTypes": [instance_type]}, - generate_error=not valid_instance_type, - ) - ] +@pytest.mark.parametrize( + "region, free_tier_instance_type, default_instance_type, stub_boto3", + [ + ("us-east-1", "t2.micro", "t2.micro", True), + ("eu-north-1", "t3.micro", "t3.micro", True), + ("us-gov-east-1", None, "t3.micro", True), + # Retrieving free tier instance type again should use cache to reduce boto3 call + ("us-east-1", "t2.micro", "t2.micro", False), + ("eu-north-1", "t3.micro", "t3.micro", False), + ("us-gov-east-1", None, "t3.micro", False), + ], +) +@pytest.mark.nomockdefaultinstance +def test_get_default_instance(boto3_stubber, region, free_tier_instance_type, default_instance_type, stub_boto3): + os.environ["AWS_DEFAULT_REGION"] = region + if free_tier_instance_type: + response = {"InstanceTypes": [{"InstanceType": free_tier_instance_type}]} + else: + response = {"InstanceTypes": []} + if stub_boto3: + mocked_requests = [ + MockedBoto3Request( + method="describe_instance_types", + response=response, + expected_params={ + "Filters": [ + {"Name": "free-tier-eligible", "Values": ["true"]}, + {"Name": "current-generation", "Values": ["true"]}, + ] + }, + ) + ] - boto3_stubber("ec2", mocked_requests) - assert_that(get_instance_vcpus(instance_type)).is_equal_to(expected_vcpus) + boto3_stubber("ec2", mocked_requests) + assert_that(get_default_instance_type()).is_equal_to(default_instance_type) diff --git a/cli/tests/pcluster/config/test_validators.py b/cli/tests/pcluster/config/test_validators.py index 68d133a6ab..1de759b4ef 100644 --- a/cli/tests/pcluster/config/test_validators.py +++ b/cli/tests/pcluster/config/test_validators.py @@ -29,6 +29,7 @@ compute_resource_validator, disable_hyperthreading_architecture_validator, efa_gdr_validator, + efa_os_arch_validator, fsx_ignored_parameters_validator, instances_architecture_compatibility_validator, intel_hpc_architecture_validator, @@ -87,9 +88,7 @@ def test_ec2_instance_type_validator(mocker, instance_type, expected_message): utils.assert_param_validator(mocker, config_parser_dict, expected_message) -@pytest.mark.parametrize( - "instance_type, expected_message", [("t2.micro", None), ("c4.xlarge", None), ("p4d.24xlarge", "is not supported")] -) +@pytest.mark.parametrize("instance_type, expected_message", [("t2.micro", None), ("c4.xlarge", None)]) def test_head_node_instance_type_validator(mocker, instance_type, expected_message): config_parser_dict = {"cluster default": {"master_instance_type": instance_type}} utils.assert_param_validator(mocker, config_parser_dict, expected_message) @@ -113,7 +112,9 @@ def test_head_node_instance_type_validator(mocker, instance_type, expected_messa def test_compute_instance_type_validator(mocker, scheduler, instance_type, expected_message, expected_warnings): config_parser_dict = {"cluster default": {"scheduler": scheduler, "compute_instance_type": instance_type}} extra_patches = { - "pcluster.config.validators.get_instance_network_interfaces": 4 if instance_type == "p4d.24xlarge" else 1, + "pcluster.config.validators.InstanceTypeInfo.max_network_interface_count": 4 + if instance_type == "p4d.24xlarge" + else 1, } utils.assert_param_validator( mocker, config_parser_dict, expected_message, expected_warnings, extra_patches=extra_patches @@ -145,12 +146,12 @@ def test_ec2_key_pair_validator(mocker, boto3_stubber): ( "arm64", None, - "incompatible with the architecture supported by the instance type chosen for the master server", + "incompatible with the architecture supported by the instance type chosen for the head node", ), ( "arm64", "Unable to get information for AMI", - "incompatible with the architecture supported by the instance type chosen for the master server", + "incompatible with the architecture supported by the instance type chosen for the head node", ), ], ) @@ -684,15 +685,42 @@ def test_efs_validator(mocker, section_dict, expected_message): @pytest.mark.parametrize( "section_dict, expected_message", [ - # Testing iops validator - ({"volume_iops": 1, "volume_size": 1}, None), - ({"volume_iops": 51, "volume_size": 1}, "IOPS to volume size ratio of .* is too hig"), - ({"volume_iops": 1, "volume_size": 20}, None), - ({"volume_iops": 1001, "volume_size": 20}, "IOPS to volume size ratio of .* is too hig"), - # Testing shared_dir validator - ({"shared_dir": "NONE"}, "NONE cannot be used as a shared directory"), - ({"shared_dir": "/NONE"}, "/NONE cannot be used as a shared directory"), - ({"shared_dir": "/raid"}, None), + ({"volume_type": "io1", "volume_size": 20, "volume_iops": 120}, None), + ( + {"volume_type": "io1", "volume_size": 20, "volume_iops": 90}, + "IOPS rate must be between 100 and 64000 when provisioning io1 volumes.", + ), + ( + {"volume_type": "io1", "volume_size": 20, "volume_iops": 64001}, + "IOPS rate must be between 100 and 64000 when provisioning io1 volumes.", + ), + ({"volume_type": "io1", "volume_size": 20, "volume_iops": 1001}, "IOPS to volume size ratio of .* is too high"), + ({"volume_type": "io2", "volume_size": 20, "volume_iops": 120}, None), + ( + {"volume_type": "io2", "volume_size": 20, "volume_iops": 90}, + "IOPS rate must be between 100 and 256000 when provisioning io2 volumes.", + ), + ( + {"volume_type": "io2", "volume_size": 20, "volume_iops": 256001}, + "IOPS rate must be between 100 and 256000 when provisioning io2 volumes.", + ), + ( + {"volume_type": "io2", "volume_size": 20, "volume_iops": 20001}, + "IOPS to volume size ratio of .* is too high", + ), + ({"volume_type": "gp3", "volume_size": 20, "volume_iops": 3000}, None), + ( + {"volume_type": "gp3", "volume_size": 20, "volume_iops": 2900}, + "IOPS rate must be between 3000 and 16000 when provisioning gp3 volumes.", + ), + ( + {"volume_type": "gp3", "volume_size": 20, "volume_iops": 16001}, + "IOPS rate must be between 3000 and 16000 when provisioning gp3 volumes.", + ), + ( + {"volume_type": "gp3", "volume_size": 20, "volume_iops": 10001}, + "IOPS to volume size ratio of .* is too high", + ), ], ) def test_raid_validators(mocker, section_dict, expected_message): @@ -2010,7 +2038,11 @@ def test_compute_resource_validator(mocker, section_dict, expected_message): mocker.patch( "pcluster.config.cfn_param_types.get_supported_architectures_for_instance_type", return_value=["x86_64"] ) - mocker.patch("pcluster.config.cfn_param_types.get_instance_network_interfaces", return_value=1) + instance_type_info_mock = mocker.MagicMock() + mocker.patch( + "pcluster.config.cfn_param_types.InstanceTypeInfo.init_from_instance_type", return_value=instance_type_info_mock + ) + instance_type_info_mock.max_network_interface_count.return_value = 1 mocker.patch("pcluster.config.validators.get_supported_architectures_for_instance_type", return_value=["x86_64"]) pcluster_config = utils.init_pcluster_config_from_configparser(config_parser, False) @@ -2194,7 +2226,7 @@ def test_disable_hyperthreading_architecture_validator(mocker, disable_hyperthre @pytest.mark.parametrize( - "master_architecture, compute_architecture, compute_instance_type, expected_message", + "head_node_architecture, compute_architecture, compute_instance_type, expected_message", [ # Single compute_instance_type ("x86_64", "x86_64", "c5.xlarge", []), @@ -2227,7 +2259,7 @@ def test_disable_hyperthreading_architecture_validator(mocker, disable_hyperthre ], ) def test_instances_architecture_compatibility_validator( - mocker, caplog, master_architecture, compute_architecture, compute_instance_type, expected_message + mocker, caplog, head_node_architecture, compute_architecture, compute_instance_type, expected_message ): def internal_is_instance_type(itype): return "." in itype or itype == "optimal" @@ -2241,7 +2273,7 @@ def internal_is_instance_type(itype): logger_patch = mocker.patch.object(LOGFILE_LOGGER, "debug") run_architecture_validator_test( mocker, - {"cluster": {"architecture": master_architecture}}, + {"cluster": {"architecture": head_node_architecture}}, "cluster", "architecture", "compute_instance_type", @@ -2415,13 +2447,15 @@ def test_fsx_ignored_parameters_validator(mocker, section_dict, expected_error): ({"volume_type": "io1", "volume_size": 15}, None), ({"volume_type": "io1", "volume_size": 3}, "The size of io1 volumes must be at least 4 GiB"), ({"volume_type": "io1", "volume_size": 16385}, "The size of io1 volumes can not exceed 16384 GiB"), - # TODO Uncomment these lines after adding support for io2 volume types - # ({"volume_type": "io2", "volume_size": 15}, None), - # ({"volume_type": "io2", "volume_size": 3}, "The size of io2 volumes must be at least 4 GiB"), - # ({"volume_type": "io2", "volume_size": 16385}, "The size of io2 volumes must be at most 16384 GiB"), + ({"volume_type": "io2", "volume_size": 15}, None), + ({"volume_type": "io2", "volume_size": 3}, "The size of io2 volumes must be at least 4 GiB"), + ({"volume_type": "io2", "volume_size": 65537}, "The size of io2 volumes can not exceed 65536 GiB"), ({"volume_type": "gp2", "volume_size": 15}, None), ({"volume_type": "gp2", "volume_size": 0}, "The size of gp2 volumes must be at least 1 GiB"), ({"volume_type": "gp2", "volume_size": 16385}, "The size of gp2 volumes can not exceed 16384 GiB"), + ({"volume_type": "gp3", "volume_size": 15}, None), + ({"volume_type": "gp3", "volume_size": 0}, "The size of gp3 volumes must be at least 1 GiB"), + ({"volume_type": "gp3", "volume_size": 16385}, "The size of gp3 volumes can not exceed 16384 GiB"), ({"volume_type": "st1", "volume_size": 500}, None), ({"volume_type": "st1", "volume_size": 20}, "The size of st1 volumes must be at least 500 GiB"), ({"volume_type": "st1", "volume_size": 16385}, "The size of st1 volumes can not exceed 16384 GiB"), @@ -2455,19 +2489,33 @@ def test_ebs_allowed_values_all_have_volume_size_bounds(): {"volume_type": "io1", "volume_size": 20, "volume_iops": 64001}, "IOPS rate must be between 100 and 64000 when provisioning io1 volumes.", ), - ({"volume_type": "io1", "volume_size": 20, "volume_iops": 1001}, "IOPS to volume size ratio of .* is too hig"), - # TODO Uncomment these lines after adding support for io2 volume types - # ({"volume_type": "io2", "volume_size": 20, "volume_iops": 120}, None), - # ( - # {"volume_type": "io2", "volume_size": 20, "volume_iops": 90}, - # "IOPS rate must be between 100 and 64000 when provisioning io2 volumes.", - # ), - # ( - # {"volume_type": "io2", "volume_size": 20, "volume_iops": 64001}, - # "IOPS rate must be between 100 and 64000 when provisioning io2 volumes.", - # ), - # ({"volume_type": "io2", "volume_size": 20, "volume_iops": 10001}, - # "IOPS to volume size ratio of .* is too hig"), + ({"volume_type": "io1", "volume_size": 20, "volume_iops": 1001}, "IOPS to volume size ratio of .* is too high"), + ({"volume_type": "io2", "volume_size": 20, "volume_iops": 120}, None), + ( + {"volume_type": "io2", "volume_size": 20, "volume_iops": 90}, + "IOPS rate must be between 100 and 256000 when provisioning io2 volumes.", + ), + ( + {"volume_type": "io2", "volume_size": 20, "volume_iops": 256001}, + "IOPS rate must be between 100 and 256000 when provisioning io2 volumes.", + ), + ( + {"volume_type": "io2", "volume_size": 20, "volume_iops": 20001}, + "IOPS to volume size ratio of .* is too high", + ), + ({"volume_type": "gp3", "volume_size": 20, "volume_iops": 3000}, None), + ( + {"volume_type": "gp3", "volume_size": 20, "volume_iops": 2900}, + "IOPS rate must be between 3000 and 16000 when provisioning gp3 volumes.", + ), + ( + {"volume_type": "gp3", "volume_size": 20, "volume_iops": 16001}, + "IOPS rate must be between 3000 and 16000 when provisioning gp3 volumes.", + ), + ( + {"volume_type": "gp3", "volume_size": 20, "volume_iops": 10001}, + "IOPS to volume size ratio of .* is too high", + ), ], ) def test_ebs_volume_iops_validator(mocker, section_dict, expected_message): @@ -2663,3 +2711,63 @@ def test_duplicate_shared_dir_validator( def test_extra_json_validator(mocker, capsys, extra_json, expected_message): config_parser_dict = {"cluster default": extra_json} utils.assert_param_validator(mocker, config_parser_dict, capsys=capsys, expected_warning=expected_message) + + +@pytest.mark.parametrize( + "cluster_dict, architecture, expected_error", + [ + ({"base_os": "alinux2", "enable_efa": "compute"}, "x86_64", None), + ({"base_os": "alinux2", "enable_efa": "compute"}, "arm64", None), + ({"base_os": "centos8", "enable_efa": "compute"}, "x86_64", None), + ( + {"base_os": "centos8", "enable_efa": "compute"}, + "arm64", + "EFA currently not supported on centos8 for arm64 architecture", + ), + ({"base_os": "ubuntu1804", "enable_efa": "compute"}, "x86_64", None), + ({"base_os": "ubuntu1804", "enable_efa": "compute"}, "arm64", None), + ], +) +def test_efa_os_arch_validator(mocker, cluster_dict, architecture, expected_error): + mocker.patch( + "pcluster.config.cfn_param_types.BaseOSCfnParam.get_instance_type_architecture", return_value=architecture + ) + + config_parser_dict = {"cluster default": cluster_dict} + config_parser = configparser.ConfigParser() + config_parser.read_dict(config_parser_dict) + + pcluster_config = utils.init_pcluster_config_from_configparser(config_parser, False, auto_refresh=False) + pcluster_config.get_section("cluster").get_param("architecture").value = architecture + enable_efa_value = pcluster_config.get_section("cluster").get_param_value("enable_efa") + + errors, warnings = efa_os_arch_validator("enable_efa", enable_efa_value, pcluster_config) + if expected_error: + assert_that(errors[0]).matches(expected_error) + else: + assert_that(errors).is_empty() + + +@pytest.mark.parametrize( + "section_dict, expected_message", + [ + ({"volume_type": "gp3", "volume_throughput": 125}, None), + ( + {"volume_type": "gp3", "volume_throughput": 100}, + "Throughput must be between 125 MB/s and 1000 MB/s when provisioning gp3 volumes.", + ), + ( + {"volume_type": "gp3", "volume_throughput": 1001}, + "Throughput must be between 125 MB/s and 1000 MB/s when provisioning gp3 volumes.", + ), + ({"volume_type": "gp3", "volume_throughput": 125, "volume_iops": 3000}, None), + ( + {"volume_type": "gp3", "volume_throughput": 760, "volume_iops": 3000}, + "Throughput to IOPS ratio of .* is too high", + ), + ({"volume_type": "gp3", "volume_throughput": 760, "volume_iops": 10000}, None), + ], +) +def test_ebs_volume_throughput_validator(mocker, section_dict, expected_message): + config_parser_dict = {"cluster default": {"ebs_settings": "default"}, "ebs default": section_dict} + utils.assert_param_validator(mocker, config_parser_dict, expected_message) diff --git a/cli/tests/pcluster/config/utils.py b/cli/tests/pcluster/config/utils.py index 61835ba1f6..c10ebbd00f 100644 --- a/cli/tests/pcluster/config/utils.py +++ b/cli/tests/pcluster/config/utils.py @@ -12,6 +12,7 @@ import os import shutil import tempfile +from collections import OrderedDict import configparser import pytest @@ -22,6 +23,7 @@ from pcluster.config.cfn_param_types import CfnParam from pcluster.config.param_types import StorageData from pcluster.config.pcluster_config import PclusterConfig +from pcluster.utils import InstanceTypeInfo from tests.pcluster.config.defaults import CFN_HIT_CONFIG_NUM_OF_PARAMS, CFN_SIT_CONFIG_NUM_OF_PARAMS, DefaultDict # List of parameters ignored by default when comparing sections @@ -46,7 +48,7 @@ def merge_dicts(*args): def get_pcluster_config_example(): current_dir = os.path.dirname(os.path.abspath(__file__)) - return os.path.join(current_dir, "..", "..", "..", "pcluster", "examples", "config") + return os.path.join(current_dir, "..", "..", "..", "src", "pcluster", "examples", "config") def set_default_values_for_required_cluster_section_params(cluster_section_dict, only_if_not_present=False): @@ -102,16 +104,20 @@ def assert_param_from_file( def get_mock_pcluster_config_patches(scheduler, extra_patches=None): """Return mocks for a set of functions that should be mocked by default because they access the network.""" architectures = ["x86_64"] - master_instances = ["t2.micro", "t2.large", "c4.xlarge", "p4d.24xlarge"] - compute_instances = ["t2.micro", "t2.large", "t2", "optimal"] if scheduler == "awsbatch" else master_instances + head_node_instances = ["t2.micro", "t2.large", "c4.xlarge", "p4d.24xlarge"] + compute_instances = ["t2.micro", "t2.large", "t2", "optimal"] if scheduler == "awsbatch" else head_node_instances patches = { - "pcluster.config.validators.get_supported_instance_types": master_instances, + "pcluster.config.validators.get_supported_instance_types": head_node_instances, "pcluster.config.validators.get_supported_compute_instance_types": compute_instances, "pcluster.config.validators.get_supported_architectures_for_instance_type": architectures, "pcluster.config.cfn_param_types.get_availability_zone_of_subnet": "mocked_avail_zone", "pcluster.config.cfn_param_types.get_supported_architectures_for_instance_type": architectures, - "pcluster.config.validators.get_instance_vcpus": 1, - "pcluster.config.cfn_param_types.get_instance_network_interfaces": 1, + "pcluster.config.cfn_param_types.InstanceTypeInfo.init_from_instance_type": InstanceTypeInfo( + { + "VCpuInfo": {"DefaultVCpus": 96, "DefaultCores": 48, "DefaultThreadsPerCore": 2}, + "NetworkInfo": {"EfaSupported": True, "MaximumNetworkCards": 1}, + } + ), } if extra_patches: patches = merge_dicts(patches, extra_patches) @@ -126,34 +132,19 @@ def mock_pcluster_config(mocker, scheduler=None, extra_patches=None, patch_funcs mocker.patch.object(PclusterConfig, "_PclusterConfig__test_configuration") -def mock_get_instance_type(mocker, instance_type="t2.micro"): +def mock_instance_type_info(mocker, instance_type="t2.micro"): mocker.patch( - "pcluster.utils.get_instance_type", - return_value={ - "InstanceType": instance_type, - "VCpuInfo": {"DefaultVCpus": 4, "DefaultCores": 2}, - "NetworkInfo": {"EfaSupported": False}, - }, + "pcluster.utils.InstanceTypeInfo.init_from_instance_type", + return_value=InstanceTypeInfo( + { + "InstanceType": instance_type, + "VCpuInfo": {"DefaultVCpus": 4, "DefaultCores": 2}, + "NetworkInfo": {"EfaSupported": False}, + } + ), ) -def mock_ec2_key_pair(mocker, cluster_section_dict): - if cluster_section_dict.get("key_name") is None: - cluster_section_dict["key_name"] = "test_key" - - mocker.patch( - "pcluster.config.validators._describe_ec2_key_pair", - return_value={ - "KeyPairs": [ - { - "KeyFingerprint": "12:bf:7c:56:6c:dd:4f:8c:24:45:75:f1:1b:16:54:89:82:09:a4:26", - "KeyName": "test_key", - } - ] - }, - ) - - def assert_param_validator( mocker, config_parser_dict, @@ -161,7 +152,6 @@ def assert_param_validator( capsys=None, expected_warning=None, extra_patches=None, - use_mock_ec2_key_pair=True, ): config_parser = configparser.ConfigParser() @@ -170,11 +160,10 @@ def assert_param_validator( set_default_values_for_required_cluster_section_params( config_parser_dict.get("cluster default"), only_if_not_present=True ) - mock_ec2_key_pair(mocker, config_parser_dict.get("cluster default")) config_parser.read_dict(config_parser_dict) mock_pcluster_config(mocker, config_parser_dict.get("cluster default").get("scheduler"), extra_patches) - mock_get_instance_type(mocker) + mock_instance_type_info(mocker) if expected_error: with pytest.raises(SystemExit, match=expected_error): @@ -355,12 +344,14 @@ def assert_section_params(mocker, pcluster_config_reader, settings_label, expect "pcluster.config.cfn_param_types.get_supported_architectures_for_instance_type", return_value=["x86_64"] ) mocker.patch( - "pcluster.utils.get_instance_type", - return_value={ - "InstanceType": "t2.micro", - "VCpuInfo": {"DefaultVCpus": 1, "DefaultCores": 1, "DefaultThreadsPerCore": 1}, - "NetworkInfo": {"EfaSupported": False}, - }, + "pcluster.utils.InstanceTypeInfo.init_from_instance_type", + return_value=InstanceTypeInfo( + { + "InstanceType": "t2.micro", + "VCpuInfo": {"DefaultVCpus": 1, "DefaultCores": 1, "DefaultThreadsPerCore": 1}, + "NetworkInfo": {"EfaSupported": False}, + } + ), ) if isinstance(expected_cfn_params, SystemExit): with pytest.raises(SystemExit): @@ -402,10 +393,29 @@ def init_pcluster_config_from_configparser(config_parser, validate=True, auto_re config_file=config_file.name, cluster_label="default", fail_on_file_absence=True, auto_refresh=auto_refresh ) if validate: - pcluster_config.validate() + _validate_config(config_parser, pcluster_config) return pcluster_config +def _validate_config(config_parser, pcluster_config): + """Validate sections and params in config_parser by the order specified in the pcluster_config.""" + for section_key in pcluster_config.get_section_keys(): + for section_label in pcluster_config.get_sections(section_key).keys(): + section_name = section_key + " " + section_label if section_label else section_key + if section_name in config_parser.sections(): + pcluster_config_section = pcluster_config.get_section(section_key, section_label) + for validation_func in pcluster_config_section.definition.get("validators", []): + errors, warnings = validation_func(section_key, section_label, pcluster_config) + if errors: + pcluster_config.error(errors) + elif warnings: + pcluster_config.warn(warnings) + config_parser_section = OrderedDict(config_parser.items(section_name)) + for param_key in pcluster_config_section.params: + if param_key in config_parser_section: + pcluster_config_section.get_param(param_key).validate() + + def duplicate_config_file(dst_config_file, test_datadir): # Make a copy of the src template to the target file. # The two resulting PClusterConfig instances will be identical diff --git a/cli/tests/pcluster/configure/__init__.py b/cli/tests/pcluster/configure/__init__.py new file mode 100644 index 0000000000..492c81bc88 --- /dev/null +++ b/cli/tests/pcluster/configure/__init__.py @@ -0,0 +1,10 @@ +# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance +# with the License. A copy of the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and +# limitations under the License. diff --git a/cli/tests/pcluster/configure/test_pcluster_configure.py b/cli/tests/pcluster/configure/test_pcluster_configure.py index 3b2d26a977..63a9359ade 100644 --- a/cli/tests/pcluster/configure/test_pcluster_configure.py +++ b/cli/tests/pcluster/configure/test_pcluster_configure.py @@ -8,7 +8,7 @@ from pcluster.configure.easyconfig import configure from pcluster.configure.networking import NetworkConfiguration -from tests.pcluster.config.utils import mock_get_instance_type +from tests.pcluster.config.utils import mock_instance_type_info EASYCONFIG = "pcluster.configure.easyconfig." NETWORKING = "pcluster.configure.networking." @@ -303,7 +303,7 @@ def _mock_parallel_cluster_config(mocker): ) for instance_type in supported_instance_types: - mock_get_instance_type(mocker, instance_type) + mock_instance_type_info(mocker, instance_type) def _run_configuration(mocker, path, with_config=False, region=None): @@ -345,15 +345,15 @@ def __init__(self, aws_region_name, key, scheduler): self.input_list = [] if aws_region_name is None else [aws_region_name] self.input_list.extend([key, scheduler]) - def add_first_flow(self, op_sys, min_size, max_size, master_instance, compute_instance): + def add_first_flow(self, op_sys, min_size, max_size, head_node_instance, compute_instance): if self.is_not_aws_batch: self.input_list.append(op_sys) - self.input_list.extend([min_size, max_size, master_instance]) + self.input_list.extend([min_size, max_size, head_node_instance]) if self.is_not_aws_batch: self.input_list.append(compute_instance) - def add_no_automation_no_empty_vpc(self, vpc_id, master_id, compute_id): - self.input_list.extend(["n", vpc_id, "n", master_id, compute_id]) + def add_no_automation_no_empty_vpc(self, vpc_id, head_node_id, compute_id): + self.input_list.extend(["n", vpc_id, "n", head_node_id, compute_id]) def add_sub_automation(self, vpc_id, network_configuration, vpc_has_subnets=True): self.input_list.extend(["n", vpc_id]) @@ -382,6 +382,7 @@ def __init__(self, mocker, empty_region=False, partition="commercial", mock_avai _mock_list_vpcs_and_subnets(self.mocker, empty_region, partition) _mock_parallel_cluster_config(self.mocker) _mock_cache_availability_zones(self.mocker) + mocker.patch("pcluster.configure.easyconfig.get_default_instance_type", return_value="t2.micro") if mock_availability_zone: _mock_availability_zone(self.mocker) @@ -416,7 +417,7 @@ def _run_input_test_with_config( output, capsys, with_input=False, - master_instance="c5.xlarge", + head_node_instance="c5.xlarge", compute_instance="g3.8xlarge", ): if with_input: @@ -425,16 +426,16 @@ def _run_input_test_with_config( op_sys="ubuntu1604", min_size="7", max_size="18", - master_instance=master_instance, + head_node_instance=head_node_instance, compute_instance=compute_instance, ) input_composer.add_no_automation_no_empty_vpc( - vpc_id="vpc-34567891", master_id="subnet-34567891", compute_id="subnet-45678912" + vpc_id="vpc-34567891", head_node_id="subnet-34567891", compute_id="subnet-45678912" ) else: input_composer = ComposeInput(aws_region_name="", key="", scheduler="") - input_composer.add_first_flow(op_sys="", min_size="", max_size="", master_instance="", compute_instance="") - input_composer.add_no_automation_no_empty_vpc(vpc_id="", master_id="", compute_id="") + input_composer.add_first_flow(op_sys="", min_size="", max_size="", head_node_instance="", compute_instance="") + input_composer.add_no_automation_no_empty_vpc(vpc_id="", head_node_id="", compute_id="") input_composer.mock_input(mocker) @@ -447,10 +448,10 @@ def test_no_automation_no_awsbatch_no_errors(mocker, capsys, test_datadir): MockHandler(mocker) input_composer = ComposeInput(aws_region_name="eu-west-1", key="key1", scheduler="torque") input_composer.add_first_flow( - op_sys="alinux", min_size="13", max_size="14", master_instance="t2.nano", compute_instance="t2.micro" + op_sys="alinux", min_size="13", max_size="14", head_node_instance="t2.nano", compute_instance="t2.micro" ) input_composer.add_no_automation_no_empty_vpc( - vpc_id="vpc-12345678", master_id="subnet-12345678", compute_id="subnet-23456789" + vpc_id="vpc-12345678", head_node_id="subnet-12345678", compute_id="subnet-23456789" ) input_composer.mock_input(mocker) @@ -483,10 +484,10 @@ def test_with_region_arg_with_config_file(mocker, capsys, test_datadir): input_composer = ComposeInput(aws_region_name=None, key="key1", scheduler="torque") input_composer.add_first_flow( - op_sys="alinux", min_size="13", max_size="14", master_instance="t2.nano", compute_instance="t2.micro" + op_sys="alinux", min_size="13", max_size="14", head_node_instance="t2.nano", compute_instance="t2.micro" ) input_composer.add_no_automation_no_empty_vpc( - vpc_id="vpc-12345678", master_id="subnet-12345678", compute_id="subnet-23456789" + vpc_id="vpc-12345678", head_node_id="subnet-12345678", compute_id="subnet-23456789" ) input_composer.mock_input(mocker) os.environ["AWS_DEFAULT_REGION"] = "env_region_name_to_be_overwritten" @@ -524,7 +525,7 @@ def test_unexisting_instance_type(mocker, capsys, test_datadir): output, capsys, with_input=True, - master_instance="m6g.xlarge", + head_node_instance="m6g.xlarge", compute_instance="m6g.xlarge", ) @@ -533,7 +534,7 @@ def test_no_available_no_input_no_automation_no_errors_with_config_file(mocker, """ Testing easy config with user hitting return on all prompts. - Mocking the case where parameters: aws_region_name, key_name, vpc_id, compute_subnet_id, master_subnet_id. + Mocking the case where parameters: aws_region_name, key_name, vpc_id, compute_subnet_id, head_node_subnet_id. Are not found in available list under new partition/region/vpc configuration. After running easy config, the old original_config_file should be the same as pcluster.config.ini """ @@ -565,7 +566,7 @@ def test_with_input_no_automation_no_errors_with_config_file(mocker, capsys, tes output, capsys, with_input=True, - master_instance="m6g.xlarge", + head_node_instance="m6g.xlarge", compute_instance="m6g.xlarge", ) @@ -577,10 +578,10 @@ def test_no_automation_yes_awsbatch_no_errors(mocker, capsys, test_datadir): input_composer = ComposeInput(aws_region_name="eu-west-1", key="key1", scheduler="awsbatch") input_composer.add_first_flow( - op_sys=None, min_size="13", max_size="14", master_instance="t2.nano", compute_instance=None + op_sys=None, min_size="13", max_size="14", head_node_instance="t2.nano", compute_instance=None ) input_composer.add_no_automation_no_empty_vpc( - vpc_id="vpc-12345678", master_id="subnet-12345678", compute_id="subnet-23456789" + vpc_id="vpc-12345678", head_node_id="subnet-12345678", compute_id="subnet-23456789" ) input_composer.mock_input(mocker) @@ -595,7 +596,7 @@ def test_subnet_automation_no_awsbatch_no_errors_empty_vpc(mocker, capsys, test_ input_composer = ComposeInput(aws_region_name="eu-west-1", key="key1", scheduler="sge") input_composer.add_first_flow( - op_sys="centos7", min_size="13", max_size="14", master_instance="t2.nano", compute_instance="t2.micro" + op_sys="centos7", min_size="13", max_size="14", head_node_instance="t2.nano", compute_instance="t2.micro" ) input_composer.add_sub_automation( vpc_id="vpc-23456789", network_configuration=PUBLIC_PRIVATE_CONFIGURATION, vpc_has_subnets=False @@ -613,7 +614,7 @@ def test_subnet_automation_no_awsbatch_no_errors(mocker, capsys, test_datadir): input_composer = ComposeInput(aws_region_name="eu-west-1", key="key1", scheduler="sge") input_composer.add_first_flow( - op_sys="centos7", min_size="13", max_size="14", master_instance="t2.nano", compute_instance="t2.micro" + op_sys="centos7", min_size="13", max_size="14", head_node_instance="t2.nano", compute_instance="t2.micro" ) input_composer.add_sub_automation( vpc_id="vpc-12345678", network_configuration=PUBLIC_PRIVATE_CONFIGURATION, vpc_has_subnets=True @@ -632,7 +633,7 @@ def test_subnet_automation_no_awsbatch_no_errors_with_config_file(mocker, capsys input_composer = ComposeInput(aws_region_name="eu-west-1", key="key1", scheduler="sge") input_composer.add_first_flow( - op_sys="centos7", min_size="13", max_size="14", master_instance="t2.nano", compute_instance="t2.micro" + op_sys="centos7", min_size="13", max_size="14", head_node_instance="t2.nano", compute_instance="t2.micro" ) input_composer.add_sub_automation( vpc_id="vpc-12345678", network_configuration=PUBLIC_PRIVATE_CONFIGURATION, vpc_has_subnets=True @@ -650,7 +651,7 @@ def test_vpc_automation_no_awsbatch_no_errors(mocker, capsys, test_datadir): input_composer = ComposeInput(aws_region_name="eu-west-1", key="key1", scheduler="sge") input_composer.add_first_flow( - op_sys="centos7", min_size="13", max_size="14", master_instance="t2.nano", compute_instance="t2.micro" + op_sys="centos7", min_size="13", max_size="14", head_node_instance="t2.nano", compute_instance="t2.micro" ) input_composer.add_vpc_sub_automation(network_configuration=PUBLIC_PRIVATE_CONFIGURATION) input_composer.mock_input(mocker) @@ -666,7 +667,7 @@ def test_vpc_automation_yes_awsbatch_no_errors(mocker, capsys, test_datadir): input_composer = ComposeInput(aws_region_name="eu-west-1", key="key1", scheduler="awsbatch") input_composer.add_first_flow( - op_sys=None, min_size="13", max_size="14", master_instance="t2.nano", compute_instance=None + op_sys=None, min_size="13", max_size="14", head_node_instance="t2.nano", compute_instance=None ) input_composer.add_vpc_sub_automation(network_configuration=PUBLIC_PRIVATE_CONFIGURATION) input_composer.mock_input(mocker) @@ -685,7 +686,7 @@ def test_vpc_automation_invalid_vpc_block(mocker, capsys, test_datadir): input_composer = ComposeInput(aws_region_name="eu-west-1", key="key1", scheduler="awsbatch") input_composer.add_first_flow( - op_sys=None, min_size="13", max_size="14", master_instance="t2.nano", compute_instance=None + op_sys=None, min_size="13", max_size="14", head_node_instance="t2.nano", compute_instance=None ) input_composer.add_vpc_sub_automation(network_configuration=PUBLIC_PRIVATE_CONFIGURATION) input_composer.mock_input(mocker) @@ -702,7 +703,7 @@ def test_subnet_automation_yes_awsbatch_invalid_vpc(mocker, capsys, test_datadir input_composer = ComposeInput(aws_region_name="eu-west-1", key="key1", scheduler="awsbatch") input_composer.add_first_flow( - op_sys=None, min_size="13", max_size="14", master_instance="t2.nano", compute_instance=None + op_sys=None, min_size="13", max_size="14", head_node_instance="t2.nano", compute_instance=None ) input_composer.add_sub_automation(vpc_id="vpc-12345678", network_configuration=PUBLIC_PRIVATE_CONFIGURATION) input_composer.mock_input(mocker) @@ -718,7 +719,7 @@ def test_vpc_automation_no_vpc_in_region(mocker, capsys, test_datadir): input_composer = ComposeInput(aws_region_name="eu-west-1", key="key1", scheduler="slurm") input_composer.add_first_flow( - op_sys="centos7", min_size="13", max_size="14", master_instance="t2.nano", compute_instance="t2.micro" + op_sys="centos7", min_size="13", max_size="14", head_node_instance="t2.nano", compute_instance="t2.micro" ) input_composer.add_vpc_sub_automation_empty_region(network_configuration=PUBLIC_PRIVATE_CONFIGURATION) input_composer.mock_input(mocker) @@ -734,7 +735,7 @@ def test_vpc_automation_no_vpc_in_region_public(mocker, capsys, test_datadir): input_composer = ComposeInput(aws_region_name="eu-west-1", key="key1", scheduler="slurm") input_composer.add_first_flow( - op_sys="centos7", min_size="13", max_size="14", master_instance="t2.nano", compute_instance="t2.micro" + op_sys="centos7", min_size="13", max_size="14", head_node_instance="t2.nano", compute_instance="t2.micro" ) input_composer.add_vpc_sub_automation_empty_region(network_configuration="2") input_composer.mock_input(mocker) @@ -761,7 +762,7 @@ def test_bad_config_file(mocker, capsys, test_datadir): input_composer = ComposeInput(aws_region_name="eu-west-1", key="key1", scheduler="sge") input_composer.add_first_flow( - op_sys="centos7", min_size="13", max_size="14", master_instance="t2.nano", compute_instance="t2.micro" + op_sys="centos7", min_size="13", max_size="14", head_node_instance="t2.nano", compute_instance="t2.micro" ) input_composer.add_sub_automation( vpc_id="vpc-12345678", network_configuration=PUBLIC_PRIVATE_CONFIGURATION, vpc_has_subnets=True @@ -778,18 +779,18 @@ def general_wrapper_for_prompt_testing( op_sys="centos7", min_size="0", max_size="10", - master_instance="t2.nano", + head_node_instance="t2.nano", compute_instance="t2.micro", key="key1", vpc_id="vpc-12345678", - master_id="subnet-12345678", + head_node_id="subnet-12345678", compute_id="subnet-23456789", ): path = os.path.join(tempfile.gettempdir(), "test_pcluster_configure") MockHandler(mocker) input_composer = ComposeInput(aws_region_name=region, key=key, scheduler=scheduler) - input_composer.add_first_flow(op_sys, min_size, max_size, master_instance, compute_instance) - input_composer.add_no_automation_no_empty_vpc(vpc_id, master_id, compute_id) + input_composer.add_first_flow(op_sys, min_size, max_size, head_node_instance, compute_instance) + input_composer.add_no_automation_no_empty_vpc(vpc_id, head_node_id, compute_id) input_composer.mock_input(mocker) _run_configuration(mocker, path) @@ -808,7 +809,7 @@ def test_vpc_automation_with_no_single_qualified_az(mocker, capsys, test_datadir input_composer = ComposeInput(aws_region_name="eu-west-1", key="key1", scheduler="sge") input_composer.add_first_flow( - op_sys="centos7", min_size="13", max_size="14", master_instance="t2.nano", compute_instance="t2.micro" + op_sys="centos7", min_size="13", max_size="14", head_node_instance="t2.nano", compute_instance="t2.micro" ) input_composer.add_vpc_sub_automation(network_configuration=PUBLIC_PRIVATE_CONFIGURATION) input_composer.mock_input(mocker) @@ -878,30 +879,30 @@ def test_invalid_vpc(mocker, vpc_id): @pytest.mark.parametrize( - "vpc_id, master_id, compute_id", + "vpc_id, head_node_id, compute_id", [ ("vpc-12345678", "subnet-34567891", "subnet-45678912"), ("vpc-23456789", "subnet-34567891", "subnet-45678912"), ("vpc-34567891", "subnet-12345678", "subnet-23456789"), ], ) -def test_invalid_subnet(mocker, vpc_id, master_id, compute_id): +def test_invalid_subnet(mocker, vpc_id, head_node_id, compute_id): with pytest.raises(StopIteration): assert_that( - general_wrapper_for_prompt_testing(mocker, vpc_id=vpc_id, master_id=master_id, compute_id=compute_id) + general_wrapper_for_prompt_testing(mocker, vpc_id=vpc_id, head_node_id=head_node_id, compute_id=compute_id) ).is_true() @pytest.mark.parametrize( - "vpc_id, master_id, compute_id", + "vpc_id, head_node_id, compute_id", [("vpc-12345678", "subnet-12345678", "subnet-23456789"), ("vpc-34567891", "subnet-45678912", "subnet-45678912")], ) -def test_valid_subnet(mocker, vpc_id, master_id, compute_id): +def test_valid_subnet(mocker, vpc_id, head_node_id, compute_id): # valid subnets assert_that( - general_wrapper_for_prompt_testing(mocker, vpc_id=vpc_id, master_id=master_id, compute_id=compute_id) + general_wrapper_for_prompt_testing(mocker, vpc_id=vpc_id, head_node_id=head_node_id, compute_id=compute_id) ).is_true() @@ -913,12 +914,3 @@ def test_hit_config_file(mocker, capsys, test_datadir): # Expected sys exit with error with pytest.raises(SystemExit, match="ERROR: Configuration in file .* cannot be overwritten"): _run_configuration(mocker, old_config_file, with_config=True) - - -def test_invalid_p4d_head_node_type(mocker): - with pytest.raises(StopIteration): - assert_that(general_wrapper_for_prompt_testing(mocker, master_instance="p4d.24xlarge")).is_true() - - -def test_valid_p4d_compute_node_type(mocker): - assert_that(general_wrapper_for_prompt_testing(mocker, compute_instance="p4d.24xlarge")).is_true() diff --git a/cli/tests/pcluster/configure/test_pcluster_configure/test_bad_config_file/output.txt b/cli/tests/pcluster/configure/test_pcluster_configure/test_bad_config_file/output.txt index 5d09937dda..d7bef6e2fc 100644 --- a/cli/tests/pcluster/configure/test_pcluster_configure/test_bad_config_file/output.txt +++ b/cli/tests/pcluster/configure/test_pcluster_configure/test_bad_config_file/output.txt @@ -51,7 +51,7 @@ Allowed values for VPC ID: 3 vpc-34567891 default 3 4 vpc-45678912 ParallelClusterVPC-20190626095403 1 Allowed values for Network Configuration: -1. Master in a public subnet and compute fleet in a private subnet -2. Master and compute fleet in the same public subnet +1. Head node in a public subnet and compute fleet in a private subnet +2. Head node and compute fleet in the same public subnet Configuration file written to {{ CONFIG_FILE }} You can edit your configuration file or simply run 'pcluster create -c {{ CONFIG_FILE }} cluster-name' to create your cluster diff --git a/cli/tests/pcluster/configure/test_pcluster_configure/test_bad_config_file/pcluster.config.ini b/cli/tests/pcluster/configure/test_pcluster_configure/test_bad_config_file/pcluster.config.ini index 584ef221cd..4bbcff5504 100644 --- a/cli/tests/pcluster/configure/test_pcluster_configure/test_bad_config_file/pcluster.config.ini +++ b/cli/tests/pcluster/configure/test_pcluster_configure/test_bad_config_file/pcluster.config.ini @@ -14,8 +14,7 @@ vpc_settings = default # Implied value scheduler = sge base_os = centos7 -# Implied value -# compute_instance_type = t2.micro +compute_instance_type = t2.micro master_instance_type = t2.nano #Invalid value type max_queue_size = 14 diff --git a/cli/tests/pcluster/configure/test_pcluster_configure/test_filtered_subnets_by_az/output.txt b/cli/tests/pcluster/configure/test_pcluster_configure/test_filtered_subnets_by_az/output.txt index cc832b0c57..c65d2bd4ad 100644 --- a/cli/tests/pcluster/configure/test_pcluster_configure/test_filtered_subnets_by_az/output.txt +++ b/cli/tests/pcluster/configure/test_pcluster_configure/test_filtered_subnets_by_az/output.txt @@ -46,12 +46,12 @@ Allowed values for VPC ID: 3 vpc-34567891 default 3 4 vpc-45678912 ParallelClusterVPC-20190626095403 1 Note: 2 subnet(s) is/are not listed, because the instance type is not in their availability zone(s) -Allowed values for Master Subnet ID: +Allowed values for head node Subnet ID: # id name size availability_zone --- --------------- ------ ------ ------------------- 1 subnet-45678912 4096 eu-west-1a Note: 2 subnet(s) is/are not listed, because the instance type is not in their availability zone(s) -Allowed values for Compute Subnet ID: +Allowed values for compute Subnet ID: # id name size availability_zone --- --------------- ------ ------ ------------------- 1 subnet-45678912 4096 eu-west-1a diff --git a/cli/tests/pcluster/configure/test_pcluster_configure/test_no_automation_no_awsbatch_no_errors/output.txt b/cli/tests/pcluster/configure/test_pcluster_configure/test_no_automation_no_awsbatch_no_errors/output.txt index d07d619a28..5bc8c99931 100644 --- a/cli/tests/pcluster/configure/test_pcluster_configure/test_no_automation_no_awsbatch_no_errors/output.txt +++ b/cli/tests/pcluster/configure/test_pcluster_configure/test_no_automation_no_awsbatch_no_errors/output.txt @@ -45,12 +45,12 @@ Allowed values for VPC ID: 2 vpc-23456789 ParallelClusterVPC-20190624105051 0 3 vpc-34567891 default 3 4 vpc-45678912 ParallelClusterVPC-20190626095403 1 -Allowed values for Master Subnet ID: +Allowed values for head node Subnet ID: # id name size availability_zone --- --------------- ---------------------------- ------ ------------------- 1 subnet-12345678 ParallelClusterPublicSubnet 256 eu-west-1b 2 subnet-23456789 ParallelClusterPrivateSubnet 4096 eu-west-1b -Allowed values for Compute Subnet ID: +Allowed values for compute Subnet ID: # id name size availability_zone --- --------------- ---------------------------- ------ ------------------- 1 subnet-12345678 ParallelClusterPublicSubnet 256 eu-west-1b diff --git a/cli/tests/pcluster/configure/test_pcluster_configure/test_no_automation_no_awsbatch_no_errors/pcluster.config.ini b/cli/tests/pcluster/configure/test_pcluster_configure/test_no_automation_no_awsbatch_no_errors/pcluster.config.ini index e2fe3749f1..1f4800a4ca 100644 --- a/cli/tests/pcluster/configure/test_pcluster_configure/test_no_automation_no_awsbatch_no_errors/pcluster.config.ini +++ b/cli/tests/pcluster/configure/test_pcluster_configure/test_no_automation_no_awsbatch_no_errors/pcluster.config.ini @@ -6,6 +6,7 @@ key_name = key1 vpc_settings = default scheduler = torque master_instance_type = t2.nano +compute_instance_type = t2.micro max_queue_size = 14 initial_queue_size = 13 maintain_initial_size = true diff --git a/cli/tests/pcluster/configure/test_pcluster_configure/test_no_automation_yes_awsbatch_no_errors/output.txt b/cli/tests/pcluster/configure/test_pcluster_configure/test_no_automation_yes_awsbatch_no_errors/output.txt index 372869e07e..c6ab2cbec7 100644 --- a/cli/tests/pcluster/configure/test_pcluster_configure/test_no_automation_yes_awsbatch_no_errors/output.txt +++ b/cli/tests/pcluster/configure/test_pcluster_configure/test_no_automation_yes_awsbatch_no_errors/output.txt @@ -38,12 +38,12 @@ Allowed values for VPC ID: 2 vpc-23456789 ParallelClusterVPC-20190624105051 0 3 vpc-34567891 default 3 4 vpc-45678912 ParallelClusterVPC-20190626095403 1 -Allowed values for Master Subnet ID: +Allowed values for head node Subnet ID: # id name size availability_zone --- --------------- ---------------------------- ------ ------------------- 1 subnet-12345678 ParallelClusterPublicSubnet 256 eu-west-1b 2 subnet-23456789 ParallelClusterPrivateSubnet 4096 eu-west-1b -Allowed values for Compute Subnet ID: +Allowed values for compute Subnet ID: # id name size availability_zone --- --------------- ---------------------------- ------ ------------------- 1 subnet-12345678 ParallelClusterPublicSubnet 256 eu-west-1b diff --git a/cli/tests/pcluster/configure/test_pcluster_configure/test_no_automation_yes_awsbatch_no_errors/pcluster.config.ini b/cli/tests/pcluster/configure/test_pcluster_configure/test_no_automation_yes_awsbatch_no_errors/pcluster.config.ini index 95356e94e9..8c696b839e 100644 --- a/cli/tests/pcluster/configure/test_pcluster_configure/test_no_automation_yes_awsbatch_no_errors/pcluster.config.ini +++ b/cli/tests/pcluster/configure/test_pcluster_configure/test_no_automation_yes_awsbatch_no_errors/pcluster.config.ini @@ -7,8 +7,7 @@ key_name = key1 base_os = alinux2 vpc_settings = default scheduler = awsbatch -# Implied value -# compute_instance_type = optimal +compute_instance_type = optimal master_instance_type = t2.nano max_vcpus = 14 min_vcpus = 13 diff --git a/cli/tests/pcluster/configure/test_pcluster_configure/test_no_available_no_input_no_automation_no_errors_with_config_file/output.txt b/cli/tests/pcluster/configure/test_pcluster_configure/test_no_available_no_input_no_automation_no_errors_with_config_file/output.txt index aa697307ab..fdedfe9131 100644 --- a/cli/tests/pcluster/configure/test_pcluster_configure/test_no_available_no_input_no_automation_no_errors_with_config_file/output.txt +++ b/cli/tests/pcluster/configure/test_pcluster_configure/test_no_available_no_input_no_automation_no_errors_with_config_file/output.txt @@ -28,12 +28,12 @@ Allowed values for VPC ID: 2 vpc-bcdefghi ParallelClusterVPC-20190624105051 0 3 vpc-cdefghij default 3 4 vpc-abdbabcb ParallelClusterVPC-20190626095403 1 -Allowed values for Master Subnet ID: +Allowed values for head node Subnet ID: # id name size availability_zone --- --------------- ---------------------------- ------ ------------------- 1 subnet-77777777 ParallelClusterPublicSubnet 256 cn-north-1a 2 subnet-66666666 ParallelClusterPrivateSubnet 4096 cn-north-1a -Allowed values for Compute Subnet ID: +Allowed values for compute Subnet ID: # id name size availability_zone --- --------------- ---------------------------- ------ ------------------- 1 subnet-77777777 ParallelClusterPublicSubnet 256 cn-north-1a diff --git a/cli/tests/pcluster/configure/test_pcluster_configure/test_no_input_no_automation_no_errors_with_config_file/output.txt b/cli/tests/pcluster/configure/test_pcluster_configure/test_no_input_no_automation_no_errors_with_config_file/output.txt index 0c5dfb007b..26e9c4795b 100644 --- a/cli/tests/pcluster/configure/test_pcluster_configure/test_no_input_no_automation_no_errors_with_config_file/output.txt +++ b/cli/tests/pcluster/configure/test_pcluster_configure/test_no_input_no_automation_no_errors_with_config_file/output.txt @@ -45,12 +45,12 @@ Allowed values for VPC ID: 2 vpc-23456789 ParallelClusterVPC-20190624105051 0 3 vpc-34567891 default 3 4 vpc-45678912 ParallelClusterVPC-20190626095403 1 -Allowed values for Master Subnet ID: +Allowed values for head node Subnet ID: # id name size availability_zone --- --------------- ---------------------------- ------ ------------------- 1 subnet-12345678 ParallelClusterPublicSubnet 256 eu-west-1b 2 subnet-23456789 ParallelClusterPrivateSubnet 4096 eu-west-1b -Allowed values for Compute Subnet ID: +Allowed values for compute Subnet ID: # id name size availability_zone --- --------------- ---------------------------- ------ ------------------- 1 subnet-12345678 ParallelClusterPublicSubnet 256 eu-west-1b diff --git a/cli/tests/pcluster/configure/test_pcluster_configure/test_region_env_overwrite_region_config/output.txt b/cli/tests/pcluster/configure/test_pcluster_configure/test_region_env_overwrite_region_config/output.txt index 0c5dfb007b..26e9c4795b 100644 --- a/cli/tests/pcluster/configure/test_pcluster_configure/test_region_env_overwrite_region_config/output.txt +++ b/cli/tests/pcluster/configure/test_pcluster_configure/test_region_env_overwrite_region_config/output.txt @@ -45,12 +45,12 @@ Allowed values for VPC ID: 2 vpc-23456789 ParallelClusterVPC-20190624105051 0 3 vpc-34567891 default 3 4 vpc-45678912 ParallelClusterVPC-20190626095403 1 -Allowed values for Master Subnet ID: +Allowed values for head node Subnet ID: # id name size availability_zone --- --------------- ---------------------------- ------ ------------------- 1 subnet-12345678 ParallelClusterPublicSubnet 256 eu-west-1b 2 subnet-23456789 ParallelClusterPrivateSubnet 4096 eu-west-1b -Allowed values for Compute Subnet ID: +Allowed values for compute Subnet ID: # id name size availability_zone --- --------------- ---------------------------- ------ ------------------- 1 subnet-12345678 ParallelClusterPublicSubnet 256 eu-west-1b diff --git a/cli/tests/pcluster/configure/test_pcluster_configure/test_region_env_overwrite_region_config/pcluster.config.ini b/cli/tests/pcluster/configure/test_pcluster_configure/test_region_env_overwrite_region_config/pcluster.config.ini index 639f3e57a5..48e4bf5524 100644 --- a/cli/tests/pcluster/configure/test_pcluster_configure/test_region_env_overwrite_region_config/pcluster.config.ini +++ b/cli/tests/pcluster/configure/test_pcluster_configure/test_region_env_overwrite_region_config/pcluster.config.ini @@ -6,6 +6,8 @@ key_name = key3 vpc_settings = default scheduler = torque base_os = alinux2 +master_instance_type = t2.micro +compute_instance_type = t2.micro [vpc default] vpc_id = vpc-12345678 diff --git a/cli/tests/pcluster/configure/test_pcluster_configure/test_subnet_automation_no_awsbatch_no_errors/output.txt b/cli/tests/pcluster/configure/test_pcluster_configure/test_subnet_automation_no_awsbatch_no_errors/output.txt index bc9aff4e37..6598ea03cd 100644 --- a/cli/tests/pcluster/configure/test_pcluster_configure/test_subnet_automation_no_awsbatch_no_errors/output.txt +++ b/cli/tests/pcluster/configure/test_pcluster_configure/test_subnet_automation_no_awsbatch_no_errors/output.txt @@ -46,7 +46,7 @@ Allowed values for VPC ID: 3 vpc-34567891 default 3 4 vpc-45678912 ParallelClusterVPC-20190626095403 1 Allowed values for Network Configuration: -1. Master in a public subnet and compute fleet in a private subnet -2. Master and compute fleet in the same public subnet +1. Head node in a public subnet and compute fleet in a private subnet +2. Head node and compute fleet in the same public subnet Configuration file written to {{ CONFIG_FILE }} You can edit your configuration file or simply run 'pcluster create -c {{ CONFIG_FILE }} cluster-name' to create your cluster diff --git a/cli/tests/pcluster/configure/test_pcluster_configure/test_subnet_automation_no_awsbatch_no_errors/pcluster.config.ini b/cli/tests/pcluster/configure/test_pcluster_configure/test_subnet_automation_no_awsbatch_no_errors/pcluster.config.ini index d00170d6f6..f663bc15fc 100644 --- a/cli/tests/pcluster/configure/test_pcluster_configure/test_subnet_automation_no_awsbatch_no_errors/pcluster.config.ini +++ b/cli/tests/pcluster/configure/test_pcluster_configure/test_subnet_automation_no_awsbatch_no_errors/pcluster.config.ini @@ -7,8 +7,7 @@ vpc_settings = default # Implied value scheduler = sge base_os = centos7 -# Implied value -# compute_instance_type = t2.micro +compute_instance_type = t2.micro master_instance_type = t2.nano max_queue_size = 14 initial_queue_size = 13 diff --git a/cli/tests/pcluster/configure/test_pcluster_configure/test_subnet_automation_no_awsbatch_no_errors_empty_vpc/output.txt b/cli/tests/pcluster/configure/test_pcluster_configure/test_subnet_automation_no_awsbatch_no_errors_empty_vpc/output.txt index 17e549ead9..e21739c6aa 100644 --- a/cli/tests/pcluster/configure/test_pcluster_configure/test_subnet_automation_no_awsbatch_no_errors_empty_vpc/output.txt +++ b/cli/tests/pcluster/configure/test_pcluster_configure/test_subnet_automation_no_awsbatch_no_errors_empty_vpc/output.txt @@ -47,7 +47,7 @@ Allowed values for VPC ID: 4 vpc-45678912 ParallelClusterVPC-20190626095403 1 There are no qualified subnets. Starting automatic creation of subnets... Allowed values for Network Configuration: -1. Master in a public subnet and compute fleet in a private subnet -2. Master and compute fleet in the same public subnet +1. Head node in a public subnet and compute fleet in a private subnet +2. Head node and compute fleet in the same public subnet Configuration file written to {{ CONFIG_FILE }} You can edit your configuration file or simply run 'pcluster create -c {{ CONFIG_FILE }} cluster-name' to create your cluster diff --git a/cli/tests/pcluster/configure/test_pcluster_configure/test_subnet_automation_no_awsbatch_no_errors_empty_vpc/pcluster.config.ini b/cli/tests/pcluster/configure/test_pcluster_configure/test_subnet_automation_no_awsbatch_no_errors_empty_vpc/pcluster.config.ini index fc7f4ed2b3..bd2b285434 100644 --- a/cli/tests/pcluster/configure/test_pcluster_configure/test_subnet_automation_no_awsbatch_no_errors_empty_vpc/pcluster.config.ini +++ b/cli/tests/pcluster/configure/test_pcluster_configure/test_subnet_automation_no_awsbatch_no_errors_empty_vpc/pcluster.config.ini @@ -6,8 +6,7 @@ key_name = key1 vpc_settings = default scheduler = sge base_os = centos7 -# Implied value -# compute_instance_type = t2.micro +compute_instance_type = t2.micro master_instance_type = t2.nano max_queue_size = 14 initial_queue_size = 13 diff --git a/cli/tests/pcluster/configure/test_pcluster_configure/test_subnet_automation_no_awsbatch_no_errors_with_config_file/output.txt b/cli/tests/pcluster/configure/test_pcluster_configure/test_subnet_automation_no_awsbatch_no_errors_with_config_file/output.txt index 36cef0e6cb..a5e9fafbdf 100644 --- a/cli/tests/pcluster/configure/test_pcluster_configure/test_subnet_automation_no_awsbatch_no_errors_with_config_file/output.txt +++ b/cli/tests/pcluster/configure/test_pcluster_configure/test_subnet_automation_no_awsbatch_no_errors_with_config_file/output.txt @@ -46,7 +46,7 @@ Allowed values for VPC ID: 3 vpc-34567891 default 3 4 vpc-45678912 ParallelClusterVPC-20190626095403 1 Allowed values for Network Configuration: -1. Master in a public subnet and compute fleet in a private subnet -2. Master and compute fleet in the same public subnet +1. Head node in a public subnet and compute fleet in a private subnet +2. Head node and compute fleet in the same public subnet Configuration file written to {{ CONFIG_FILE }} You can edit your configuration file or simply run 'pcluster create -c {{ CONFIG_FILE }} cluster-name' to create your cluster diff --git a/cli/tests/pcluster/configure/test_pcluster_configure/test_subnet_automation_no_awsbatch_no_errors_with_config_file/pcluster.config.ini b/cli/tests/pcluster/configure/test_pcluster_configure/test_subnet_automation_no_awsbatch_no_errors_with_config_file/pcluster.config.ini index fff2041ee2..a401a06e00 100644 --- a/cli/tests/pcluster/configure/test_pcluster_configure/test_subnet_automation_no_awsbatch_no_errors_with_config_file/pcluster.config.ini +++ b/cli/tests/pcluster/configure/test_pcluster_configure/test_subnet_automation_no_awsbatch_no_errors_with_config_file/pcluster.config.ini @@ -7,8 +7,7 @@ vpc_settings = default # Implied value scheduler = sge base_os = centos7 -# Implied value -# compute_instance_type = t2.micro +compute_instance_type = t2.micro master_instance_type = t2.nano max_queue_size = 14 initial_queue_size = 13 diff --git a/cli/tests/pcluster/configure/test_pcluster_configure/test_subnet_automation_yes_awsbatch_invalid_vpc/pcluster.config.ini b/cli/tests/pcluster/configure/test_pcluster_configure/test_subnet_automation_yes_awsbatch_invalid_vpc/pcluster.config.ini index 59256eeabd..170494afc4 100644 --- a/cli/tests/pcluster/configure/test_pcluster_configure/test_subnet_automation_yes_awsbatch_invalid_vpc/pcluster.config.ini +++ b/cli/tests/pcluster/configure/test_pcluster_configure/test_subnet_automation_yes_awsbatch_invalid_vpc/pcluster.config.ini @@ -5,9 +5,8 @@ aws_region_name = eu-west-1 key_name = key1 vpc_settings = default scheduler = awsbatch -# Implied value base_os = alinux2 -# compute_instance_type = optimal +compute_instance_type = optimal master_instance_type = t2.nano max_vcpus = 14 min_vcpus = 13 diff --git a/cli/tests/pcluster/configure/test_pcluster_configure/test_unexisting_instance_type/output.txt b/cli/tests/pcluster/configure/test_pcluster_configure/test_unexisting_instance_type/output.txt index 74396f7b89..24e5885c62 100644 --- a/cli/tests/pcluster/configure/test_pcluster_configure/test_unexisting_instance_type/output.txt +++ b/cli/tests/pcluster/configure/test_pcluster_configure/test_unexisting_instance_type/output.txt @@ -45,13 +45,13 @@ Allowed values for VPC ID: 2 vpc-23456789 ParallelClusterVPC-20190624105051 0 3 vpc-34567891 default 3 4 vpc-45678912 ParallelClusterVPC-20190626095403 1 -Allowed values for Master Subnet ID: +Allowed values for head node Subnet ID: # id name size availability_zone --- --------------- ------ ------ ------------------- 1 subnet-34567891 4096 eu-west-1b 2 subnet-45678912 4096 eu-west-1a 3 subnet-56789123 4096 eu-west-1c -Allowed values for Compute Subnet ID: +Allowed values for compute Subnet ID: # id name size availability_zone --- --------------- ------ ------ ------------------- 1 subnet-34567891 4096 eu-west-1b diff --git a/cli/tests/pcluster/configure/test_pcluster_configure/test_vpc_automation_no_awsbatch_no_errors/output.txt b/cli/tests/pcluster/configure/test_pcluster_configure/test_vpc_automation_no_awsbatch_no_errors/output.txt index 913654dce4..10ef27030c 100644 --- a/cli/tests/pcluster/configure/test_pcluster_configure/test_vpc_automation_no_awsbatch_no_errors/output.txt +++ b/cli/tests/pcluster/configure/test_pcluster_configure/test_vpc_automation_no_awsbatch_no_errors/output.txt @@ -39,8 +39,8 @@ Allowed values for Operating System: 5. ubuntu1604 6. ubuntu1804 Allowed values for Network Configuration: -1. Master in a public subnet and compute fleet in a private subnet -2. Master and compute fleet in the same public subnet +1. Head node in a public subnet and compute fleet in a private subnet +2. Head node and compute fleet in the same public subnet Beginning VPC creation. Please do not leave the terminal until the creation is finalized Configuration file written to {{ CONFIG_FILE }} You can edit your configuration file or simply run 'pcluster create -c {{ CONFIG_FILE }} cluster-name' to create your cluster diff --git a/cli/tests/pcluster/configure/test_pcluster_configure/test_vpc_automation_no_awsbatch_no_errors/pcluster.config.ini b/cli/tests/pcluster/configure/test_pcluster_configure/test_vpc_automation_no_awsbatch_no_errors/pcluster.config.ini index d00170d6f6..f663bc15fc 100644 --- a/cli/tests/pcluster/configure/test_pcluster_configure/test_vpc_automation_no_awsbatch_no_errors/pcluster.config.ini +++ b/cli/tests/pcluster/configure/test_pcluster_configure/test_vpc_automation_no_awsbatch_no_errors/pcluster.config.ini @@ -7,8 +7,7 @@ vpc_settings = default # Implied value scheduler = sge base_os = centos7 -# Implied value -# compute_instance_type = t2.micro +compute_instance_type = t2.micro master_instance_type = t2.nano max_queue_size = 14 initial_queue_size = 13 diff --git a/cli/tests/pcluster/configure/test_pcluster_configure/test_vpc_automation_no_vpc_in_region/output.txt b/cli/tests/pcluster/configure/test_pcluster_configure/test_vpc_automation_no_vpc_in_region/output.txt index 2c506db741..642de60f6b 100644 --- a/cli/tests/pcluster/configure/test_pcluster_configure/test_vpc_automation_no_vpc_in_region/output.txt +++ b/cli/tests/pcluster/configure/test_pcluster_configure/test_vpc_automation_no_vpc_in_region/output.txt @@ -40,8 +40,8 @@ Allowed values for Operating System: 6. ubuntu1804 There are no VPC for the given region. Starting automatic creation of VPC and subnets... Allowed values for Network Configuration: -1. Master in a public subnet and compute fleet in a private subnet -2. Master and compute fleet in the same public subnet +1. Head node in a public subnet and compute fleet in a private subnet +2. Head node and compute fleet in the same public subnet Beginning VPC creation. Please do not leave the terminal until the creation is finalized Configuration file written to {{ CONFIG_FILE }} You can edit your configuration file or simply run 'pcluster create -c {{ CONFIG_FILE }} cluster-name' to create your cluster diff --git a/cli/tests/pcluster/configure/test_pcluster_configure/test_vpc_automation_no_vpc_in_region_public/output.txt b/cli/tests/pcluster/configure/test_pcluster_configure/test_vpc_automation_no_vpc_in_region_public/output.txt index 2c506db741..642de60f6b 100644 --- a/cli/tests/pcluster/configure/test_pcluster_configure/test_vpc_automation_no_vpc_in_region_public/output.txt +++ b/cli/tests/pcluster/configure/test_pcluster_configure/test_vpc_automation_no_vpc_in_region_public/output.txt @@ -40,8 +40,8 @@ Allowed values for Operating System: 6. ubuntu1804 There are no VPC for the given region. Starting automatic creation of VPC and subnets... Allowed values for Network Configuration: -1. Master in a public subnet and compute fleet in a private subnet -2. Master and compute fleet in the same public subnet +1. Head node in a public subnet and compute fleet in a private subnet +2. Head node and compute fleet in the same public subnet Beginning VPC creation. Please do not leave the terminal until the creation is finalized Configuration file written to {{ CONFIG_FILE }} You can edit your configuration file or simply run 'pcluster create -c {{ CONFIG_FILE }} cluster-name' to create your cluster diff --git a/cli/tests/pcluster/configure/test_pcluster_configure/test_vpc_automation_yes_awsbatch_no_errors/pcluster.config.ini b/cli/tests/pcluster/configure/test_pcluster_configure/test_vpc_automation_yes_awsbatch_no_errors/pcluster.config.ini index 59256eeabd..170494afc4 100644 --- a/cli/tests/pcluster/configure/test_pcluster_configure/test_vpc_automation_yes_awsbatch_no_errors/pcluster.config.ini +++ b/cli/tests/pcluster/configure/test_pcluster_configure/test_vpc_automation_yes_awsbatch_no_errors/pcluster.config.ini @@ -5,9 +5,8 @@ aws_region_name = eu-west-1 key_name = key1 vpc_settings = default scheduler = awsbatch -# Implied value base_os = alinux2 -# compute_instance_type = optimal +compute_instance_type = optimal master_instance_type = t2.nano max_vcpus = 14 min_vcpus = 13 diff --git a/cli/tests/pcluster/configure/test_pcluster_configure/test_with_input_no_automation_no_errors_with_config_file/output.txt b/cli/tests/pcluster/configure/test_pcluster_configure/test_with_input_no_automation_no_errors_with_config_file/output.txt index 74396f7b89..24e5885c62 100644 --- a/cli/tests/pcluster/configure/test_pcluster_configure/test_with_input_no_automation_no_errors_with_config_file/output.txt +++ b/cli/tests/pcluster/configure/test_pcluster_configure/test_with_input_no_automation_no_errors_with_config_file/output.txt @@ -45,13 +45,13 @@ Allowed values for VPC ID: 2 vpc-23456789 ParallelClusterVPC-20190624105051 0 3 vpc-34567891 default 3 4 vpc-45678912 ParallelClusterVPC-20190626095403 1 -Allowed values for Master Subnet ID: +Allowed values for head node Subnet ID: # id name size availability_zone --- --------------- ------ ------ ------------------- 1 subnet-34567891 4096 eu-west-1b 2 subnet-45678912 4096 eu-west-1a 3 subnet-56789123 4096 eu-west-1c -Allowed values for Compute Subnet ID: +Allowed values for compute Subnet ID: # id name size availability_zone --- --------------- ------ ------ ------------------- 1 subnet-34567891 4096 eu-west-1b diff --git a/cli/tests/pcluster/configure/test_pcluster_configure/test_with_region_arg_with_config_file/output.txt b/cli/tests/pcluster/configure/test_pcluster_configure/test_with_region_arg_with_config_file/output.txt index 8e1ad7ce2c..8579f9e437 100644 --- a/cli/tests/pcluster/configure/test_pcluster_configure/test_with_region_arg_with_config_file/output.txt +++ b/cli/tests/pcluster/configure/test_pcluster_configure/test_with_region_arg_with_config_file/output.txt @@ -28,12 +28,12 @@ Allowed values for VPC ID: 2 vpc-23456789 ParallelClusterVPC-20190624105051 0 3 vpc-34567891 default 3 4 vpc-45678912 ParallelClusterVPC-20190626095403 1 -Allowed values for Master Subnet ID: +Allowed values for head node Subnet ID: # id name size availability_zone --- --------------- ---------------------------- ------ ------------------- 1 subnet-12345678 ParallelClusterPublicSubnet 256 eu-west-1b 2 subnet-23456789 ParallelClusterPrivateSubnet 4096 eu-west-1b -Allowed values for Compute Subnet ID: +Allowed values for compute Subnet ID: # id name size availability_zone --- --------------- ---------------------------- ------ ------------------- 1 subnet-12345678 ParallelClusterPublicSubnet 256 eu-west-1b diff --git a/cli/tests/pcluster/configure/test_pcluster_configure/test_with_region_arg_with_config_file/pcluster.config.ini b/cli/tests/pcluster/configure/test_pcluster_configure/test_with_region_arg_with_config_file/pcluster.config.ini index d80f4a8a9f..9b38fa7ec4 100644 --- a/cli/tests/pcluster/configure/test_pcluster_configure/test_with_region_arg_with_config_file/pcluster.config.ini +++ b/cli/tests/pcluster/configure/test_pcluster_configure/test_with_region_arg_with_config_file/pcluster.config.ini @@ -14,6 +14,7 @@ key_name = key1 base_os = alinux scheduler = torque master_instance_type = t2.nano +compute_instance_type = t2.micro vpc_settings = default initial_queue_size = 13 max_queue_size = 14 diff --git a/cli/tests/pcluster/createami/__init__.py b/cli/tests/pcluster/createami/__init__.py new file mode 100644 index 0000000000..492c81bc88 --- /dev/null +++ b/cli/tests/pcluster/createami/__init__.py @@ -0,0 +1,10 @@ +# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance +# with the License. A copy of the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and +# limitations under the License. diff --git a/cli/tests/pcluster/createami/test_pcluster_createami.py b/cli/tests/pcluster/createami/test_pcluster_createami.py index 55bdbf9cac..2f83ea7443 100644 --- a/cli/tests/pcluster/createami/test_pcluster_createami.py +++ b/cli/tests/pcluster/createami/test_pcluster_createami.py @@ -30,7 +30,7 @@ def test_get_default_createami_instance_type( ): """Verify that the function to select default instance types for the createami command behaves as expected.""" instance_type_info_patch = mocker.patch( - "pcluster.createami.utils.get_instance_types_info", + "pcluster.createami.utils.InstanceTypeInfo.init_from_instance_type", side_effect=SystemExit(instance_info_err) if instance_info_err else None, ) logger_error_patch = mocker.patch("pcluster.createami.LOGGER.error") @@ -56,7 +56,7 @@ def test_get_default_createami_instance_type( with pytest.raises(SystemExit) as sysexit: createami._get_default_createami_instance_type(ami_architecture) assert_that(sysexit.value.code).is_not_equal_to(0) - instance_type_info_patch.assert_called_with([expected_default_instance_type], fail_on_error=True) + instance_type_info_patch.assert_called_with(expected_default_instance_type) if instance_unavailable_in_region: assert_that(logger_error_patch.call_count).is_equal_to(1) @@ -65,7 +65,7 @@ def test_get_default_createami_instance_type( assert_that(createami._get_default_createami_instance_type(ami_architecture)).is_equal_to( expected_default_instance_type ) - instance_type_info_patch.assert_called_with([expected_default_instance_type], fail_on_error=True) + instance_type_info_patch.assert_called_with(expected_default_instance_type) @pytest.mark.parametrize( diff --git a/cli/tests/pcluster/utils/test_pcluster_utils.py b/cli/tests/pcluster/test_utils.py similarity index 83% rename from cli/tests/pcluster/utils/test_pcluster_utils.py rename to cli/tests/pcluster/test_utils.py index d1d879c016..ab0081633a 100644 --- a/cli/tests/pcluster/utils/test_pcluster_utils.py +++ b/cli/tests/pcluster/test_utils.py @@ -11,7 +11,7 @@ from botocore.exceptions import ClientError, EndpointConnectionError import pcluster.utils as utils -from pcluster.utils import get_bucket_url +from pcluster.utils import Cache, get_bucket_url from tests.common import MockedBoto3Request FAKE_CLUSTER_NAME = "cluster_name" @@ -499,47 +499,6 @@ def test_get_info_for_amis(boto3_stubber, image_ids, response, error_message): assert_that(sysexit.value.code).is_not_equal_to(0) -@pytest.mark.parametrize( - "instance_types, error_message, fail_on_error", - [ - # Test when calling for single instance types - (["t2.micro"], None, None), - (["bad.instance.type"], "some error message", True), - (["bad.instance.type"], "some error message", False), - # Test when calling for multiple instance types - (["t2.micro", "t2.xlarge"], None, None), - (["a1.medium", "m6g.xlarge"], None, None), - (["bad.instance.type1", "bad.instance.type2"], "some error message", True), - (["bad.instance.type1", "bad.instance.type2"], "some error message", False), - ], -) -def test_get_instance_types_info(boto3_stubber, capsys, instance_types, error_message, fail_on_error): - """Verify that get_instance_types_info makes the expected API call.""" - response_dict = {"InstanceTypes": [{"InstanceType": instance_type} for instance_type in instance_types]} - mocked_requests = [ - MockedBoto3Request( - method="describe_instance_types", - response=response_dict if error_message is None else error_message, - expected_params={"InstanceTypes": instance_types}, - generate_error=error_message, - ) - ] - boto3_stubber("ec2", mocked_requests) - if error_message and fail_on_error: - full_error_message = "calling DescribeInstanceTypes for instances {0}: {1}".format( - ", ".join(instance_types), error_message - ) - with pytest.raises(SystemExit, match=full_error_message) as sysexit: - utils.get_instance_types_info(instance_types, fail_on_error) - assert_that(sysexit.value.code).is_not_equal_to(0) - elif error_message: - utils.get_instance_types_info(instance_types, fail_on_error) - assert_that(capsys.readouterr().out).matches(error_message) - else: - instance_types_info = utils.get_instance_types_info(instance_types, fail_on_error) - assert_that(instance_types_info).is_equal_to(response_dict.get("InstanceTypes")) - - @pytest.mark.parametrize( "instance_type, supported_architectures, error_message", [ @@ -552,8 +511,8 @@ def test_get_instance_types_info(boto3_stubber, capsys, instance_types, error_me def test_get_supported_architectures_for_instance_type(mocker, instance_type, supported_architectures, error_message): """Verify that get_supported_architectures_for_instance_type behaves as expected for various cases.""" get_instance_types_info_patch = mocker.patch( - "pcluster.utils.get_instance_types_info", - return_value=[{"ProcessorInfo": {"SupportedArchitectures": supported_architectures}}], + "pcluster.utils.InstanceTypeInfo.init_from_instance_type", + return_value=utils.InstanceTypeInfo({"ProcessorInfo": {"SupportedArchitectures": supported_architectures}}), ) observed_architectures = utils.get_supported_architectures_for_instance_type(instance_type) expected_architectures = list(set(supported_architectures) & set(["x86_64", "arm64"])) @@ -562,15 +521,15 @@ def test_get_supported_architectures_for_instance_type(mocker, instance_type, su if instance_type == "optimal": get_instance_types_info_patch.assert_not_called() else: - get_instance_types_info_patch.assert_called_with([instance_type]) + get_instance_types_info_patch.assert_called_with(instance_type) @pytest.mark.parametrize( "node_type, expected_fallback, expected_response, expected_instances", [ - (utils.NodeType.master, False, {"Reservations": [{"Groups": [], "Instances": [{}]}]}, 1), - (utils.NodeType.master, True, {"Reservations": [{"Groups": [], "Instances": [{}]}]}, 1), - (utils.NodeType.master, True, {"Reservations": []}, 0), + (utils.NodeType.head_node, False, {"Reservations": [{"Groups": [], "Instances": [{}]}]}, 1), + (utils.NodeType.head_node, True, {"Reservations": [{"Groups": [], "Instances": [{}]}]}, 1), + (utils.NodeType.head_node, True, {"Reservations": []}, 0), (utils.NodeType.compute, False, {"Reservations": [{"Groups": [], "Instances": [{}, {}, {}]}]}, 3), (utils.NodeType.compute, True, {"Reservations": [{"Groups": [], "Instances": [{}, {}]}]}, 2), (utils.NodeType.compute, True, {"Reservations": []}, 0), @@ -611,7 +570,7 @@ def test_describe_cluster_instances(boto3_stubber, node_type, expected_fallback, @pytest.mark.parametrize( - "master_instance, expected_ip, error", + "head_node_instance, expected_ip, error", [ ( { @@ -635,17 +594,17 @@ def test_describe_cluster_instances(boto3_stubber, node_type, expected_fallback, ], ids=["public_ip", "private_ip", "stopped"], ) -def test_get_master_server_ips(mocker, master_instance, expected_ip, error): +def test_get_head_node_ips(mocker, head_node_instance, expected_ip, error): describe_cluster_instances_mock = mocker.patch( - "pcluster.utils.describe_cluster_instances", return_value=[master_instance] + "pcluster.utils.describe_cluster_instances", return_value=[head_node_instance] ) if error: with pytest.raises(SystemExit, match=error): - utils._get_master_server_ip("stack-name") + utils._get_head_node_ip("stack-name") else: - assert_that(utils._get_master_server_ip("stack-name")).is_equal_to(expected_ip) - describe_cluster_instances_mock.assert_called_with("stack-name", node_type=utils.NodeType.master) + assert_that(utils._get_head_node_ip("stack-name")).is_equal_to(expected_ip) + describe_cluster_instances_mock.assert_called_with("stack-name", node_type=utils.NodeType.head_node) @pytest.mark.parametrize( @@ -1008,3 +967,166 @@ def test_get_ebs_snapshot_info(boto3_stubber, snapshot_id, raise_exceptions, err with pytest.raises(SystemExit, match=error_message) as sysexit: utils.get_ebs_snapshot_info(snapshot_id, raise_exceptions=raise_exceptions) assert_that(sysexit.value.code).is_not_equal_to(0) + + +@pytest.mark.cache +class TestCache: + invocations = [] + + @pytest.fixture(autouse=True) + def clear_cache(self): + utils.Cache.clear_all() + + @pytest.fixture(autouse=True) + def clear_invocations(self): + del self.invocations[:] + + @pytest.fixture + def disabled_cache(self): + os.environ["PCLUSTER_CACHE_DISABLED"] = "true" + yield + del os.environ["PCLUSTER_CACHE_DISABLED"] + + @staticmethod + @Cache.cached + def _cached_method_1(arg1, arg2): + TestCache.invocations.append((arg1, arg2)) + return arg1, arg2 + + @staticmethod + @Cache.cached + def _cached_method_2(arg1, arg2): + TestCache.invocations.append((arg1, arg2)) + return arg1, arg2 + + def test_cached_method(self): + for _ in range(0, 2): + assert_that(self._cached_method_1(1, 2)).is_equal_to((1, 2)) + assert_that(self._cached_method_2(1, 2)).is_equal_to((1, 2)) + assert_that(self._cached_method_1(2, 1)).is_equal_to((2, 1)) + assert_that(self._cached_method_1(1, arg2=2)).is_equal_to((1, 2)) + assert_that(self._cached_method_1(arg1=1, arg2=2)).is_equal_to((1, 2)) + + assert_that(self.invocations).is_length(5) + + def test_disabled_cache(self, disabled_cache): + assert_that(self._cached_method_1(1, 2)).is_equal_to((1, 2)) + assert_that(self._cached_method_1(1, 2)).is_equal_to((1, 2)) + + assert_that(self.invocations).is_length(2) + + def test_clear_all(self): + for _ in range(0, 2): + assert_that(self._cached_method_1(1, 2)).is_equal_to((1, 2)) + assert_that(self._cached_method_2(1, 2)).is_equal_to((1, 2)) + + Cache.clear_all() + + for _ in range(0, 2): + assert_that(self._cached_method_1(1, 2)).is_equal_to((1, 2)) + assert_that(self._cached_method_2(1, 2)).is_equal_to((1, 2)) + + assert_that(self.invocations).is_length(4) + + +class TestInstanceTypeInfo: + @pytest.fixture(autouse=True) + def clear_cache(self): + utils.Cache.clear_all() + + def test_init_from_instance_type(self, boto3_stubber, capsys): + mocked_requests = [ + MockedBoto3Request( + method="describe_instance_types", + response={ + "InstanceTypes": [ + { + "InstanceType": "c4.xlarge", + "VCpuInfo": {"DefaultVCpus": 4, "DefaultCores": 2, "DefaultThreadsPerCore": 2}, + "NetworkInfo": {"EfaSupported": False, "MaximumNetworkCards": 1}, + "ProcessorInfo": {"SupportedArchitectures": ["x86_64"]}, + } + ] + }, + expected_params={"InstanceTypes": ["c4.xlarge"]}, + ), + MockedBoto3Request( + method="describe_instance_types", + response={ + "InstanceTypes": [ + { + "InstanceType": "g4dn.metal", + "VCpuInfo": {"DefaultVCpus": 96}, + "GpuInfo": {"Gpus": [{"Name": "T4", "Manufacturer": "NVIDIA", "Count": 8}]}, + "NetworkInfo": {"EfaSupported": True, "MaximumNetworkCards": 4}, + "ProcessorInfo": {"SupportedArchitectures": ["x86_64"]}, + } + ] + }, + expected_params={"InstanceTypes": ["g4dn.metal"]}, + ), + MockedBoto3Request( + method="describe_instance_types", + response={ + "InstanceTypes": [ + { + "InstanceType": "g4ad.16xlarge", + "VCpuInfo": {"DefaultVCpus": 64}, + "GpuInfo": {"Gpus": [{"Name": "*", "Manufacturer": "AMD", "Count": 4}]}, + "NetworkInfo": {"EfaSupported": False, "MaximumNetworkCards": 1}, + "ProcessorInfo": {"SupportedArchitectures": ["x86_64"]}, + } + ] + }, + expected_params={"InstanceTypes": ["g4ad.16xlarge"]}, + ), + ] + boto3_stubber("ec2", mocked_requests) + + for _ in range(0, 2): + c4_instance_info = utils.InstanceTypeInfo.init_from_instance_type("c4.xlarge") + g4dn_instance_info = utils.InstanceTypeInfo.init_from_instance_type("g4dn.metal") + g4ad_instance_info = utils.InstanceTypeInfo.init_from_instance_type("g4ad.16xlarge") + + assert_that(c4_instance_info.gpu_count()).is_equal_to(0) + assert_that(capsys.readouterr().out).is_empty() + assert_that(c4_instance_info.max_network_interface_count()).is_equal_to(1) + assert_that(c4_instance_info.default_threads_per_core()).is_equal_to(2) + assert_that(c4_instance_info.vcpus_count()).is_equal_to(4) + assert_that(c4_instance_info.supported_architecture()).is_equal_to(["x86_64"]) + assert_that(c4_instance_info.is_efa_supported()).is_equal_to(False) + + assert_that(g4dn_instance_info.gpu_count()).is_equal_to(8) + assert_that(capsys.readouterr().out).is_empty() + assert_that(g4dn_instance_info.max_network_interface_count()).is_equal_to(4) + assert_that(g4dn_instance_info.default_threads_per_core()).is_equal_to(2) + assert_that(g4dn_instance_info.vcpus_count()).is_equal_to(96) + assert_that(g4dn_instance_info.supported_architecture()).is_equal_to(["x86_64"]) + assert_that(g4dn_instance_info.is_efa_supported()).is_equal_to(True) + + assert_that(g4ad_instance_info.gpu_count()).is_equal_to(0) + assert_that(capsys.readouterr().out).matches("not offer native support for 'AMD' GPUs.") + assert_that(g4ad_instance_info.max_network_interface_count()).is_equal_to(1) + assert_that(g4ad_instance_info.default_threads_per_core()).is_equal_to(2) + assert_that(g4ad_instance_info.vcpus_count()).is_equal_to(64) + assert_that(g4ad_instance_info.supported_architecture()).is_equal_to(["x86_64"]) + assert_that(g4ad_instance_info.is_efa_supported()).is_equal_to(False) + + def test_init_from_instance_type_failure(self, boto3_stubber): + boto3_stubber( + "ec2", + 2 + * [ + MockedBoto3Request( + method="describe_instance_types", + expected_params={"InstanceTypes": ["g4dn.metal"]}, + generate_error=True, + response="Error message", + ) + ], + ) + error_message = "Failed when retrieving instance type data for instance g4dn.metal: Error message" + with pytest.raises(SystemExit, match=error_message): + utils.InstanceTypeInfo.init_from_instance_type("g4dn.metal") + + utils.InstanceTypeInfo.init_from_instance_type("g4dn.metal", exit_on_error=False) diff --git a/cli/tests/pcluster_config/test_pcluster_config_convert.py b/cli/tests/pcluster_config/test_pcluster_config_convert.py index 1b8c92e8cb..0523aba74f 100644 --- a/cli/tests/pcluster_config/test_pcluster_config_convert.py +++ b/cli/tests/pcluster_config/test_pcluster_config_convert.py @@ -57,7 +57,7 @@ def _convert_and_assert_file_content( mocker.patch("pcluster.config.cfn_param_types.get_availability_zone_of_subnet") mocker.patch("pcluster.config.cfn_param_types.get_supported_architectures_for_instance_type") - mocker.patch("pcluster.config.json_param_types.utils.get_instance_type") + mocker.patch("pcluster.config.json_param_types.utils.InstanceTypeInfo.init_from_instance_type") original_default_region = os.environ.get("AWS_DEFAULT_REGION") if original_default_region: diff --git a/cli/tests/pcluster_config/test_pcluster_config_convert/test_slurm_sit_full/expected_output.ini b/cli/tests/pcluster_config/test_pcluster_config_convert/test_slurm_sit_full/expected_output.ini index 78222099bb..c3c7369bda 100644 --- a/cli/tests/pcluster_config/test_pcluster_config_convert/test_slurm_sit_full/expected_output.ini +++ b/cli/tests/pcluster_config/test_pcluster_config_convert/test_slurm_sit_full/expected_output.ini @@ -21,7 +21,9 @@ fsx_fs_id = fs-0aaae053900f84047 [ebs settings1] shared_dir = sharedebs -volume_size = 50 +volume_type = gp3 +volume_size = 20 +volume_iops = 3000 [ebs settings2] shared_dir = sharedebs2 @@ -49,10 +51,10 @@ vpc_security_group_id = sg-0fa8d8e11dc6e9491 [cluster slurm-sit-full] key_name = test-key -base_os = centos7 scheduler = slurm master_instance_type = t2.large master_root_volume_size = 30 +base_os = centos7 compute_root_volume_size = 30 proxy_server = proxy ec2_iam_role = role diff --git a/cli/tests/pcluster_config/test_pcluster_config_convert/test_slurm_sit_full/pcluster.config.ini b/cli/tests/pcluster_config/test_pcluster_config_convert/test_slurm_sit_full/pcluster.config.ini index 03342acd51..1e98f059ef 100644 --- a/cli/tests/pcluster_config/test_pcluster_config_convert/test_slurm_sit_full/pcluster.config.ini +++ b/cli/tests/pcluster_config/test_pcluster_config_convert/test_slurm_sit_full/pcluster.config.ini @@ -61,8 +61,7 @@ fsx_fs_id = fs-0aaae053900f84047 [ebs settings1] shared_dir = sharedebs -volume_type = gp2 -volume_size = 50 +volume_type = gp3 [ebs settings2] shared_dir = sharedebs2 diff --git a/cli/tests/pcluster_config/test_pcluster_config_convert/test_slurm_sit_simple/expected_output.ini b/cli/tests/pcluster_config/test_pcluster_config_convert/test_slurm_sit_simple/expected_output.ini index e27b63153c..909995b851 100644 --- a/cli/tests/pcluster_config/test_pcluster_config_convert/test_slurm_sit_simple/expected_output.ini +++ b/cli/tests/pcluster_config/test_pcluster_config_convert/test_slurm_sit_simple/expected_output.ini @@ -15,9 +15,9 @@ sanity_check = true [cluster slurm-sit-simple] key_name = test -base_os = centos7 scheduler = slurm master_instance_type = c5.2xlarge +base_os = centos7 shared_dir = /test vpc_settings = public additional_iam_policies = arn:aws:iam::aws:policy/CloudWatchFullAccess diff --git a/cli/tests/pcluster_config/test_pcluster_config_convert/test_slurm_unrelated_sections/expected_output.ini b/cli/tests/pcluster_config/test_pcluster_config_convert/test_slurm_unrelated_sections/expected_output.ini index 43d20fa990..a4e5b5a8fe 100644 --- a/cli/tests/pcluster_config/test_pcluster_config_convert/test_slurm_unrelated_sections/expected_output.ini +++ b/cli/tests/pcluster_config/test_pcluster_config_convert/test_slurm_unrelated_sections/expected_output.ini @@ -25,7 +25,9 @@ volume_size = 50 [ebs settings2] shared_dir = sharedebs2 +volume_type = gp3 volume_size = 10 +volume_iops = 3500 [raid settings1] shared_dir = /raid_dir @@ -49,10 +51,10 @@ vpc_security_group_id = sg-0fa8d8e11dc6e9491 [cluster slurm-sit-full] key_name = test-key -base_os = centos7 scheduler = slurm master_instance_type = t2.large master_root_volume_size = 30 +base_os = centos7 compute_root_volume_size = 30 proxy_server = proxy ec2_iam_role = role diff --git a/cli/tests/pcluster_config/test_pcluster_config_convert/test_slurm_unrelated_sections/pcluster.config.ini b/cli/tests/pcluster_config/test_pcluster_config_convert/test_slurm_unrelated_sections/pcluster.config.ini index c6c1e5b638..904cee0961 100644 --- a/cli/tests/pcluster_config/test_pcluster_config_convert/test_slurm_unrelated_sections/pcluster.config.ini +++ b/cli/tests/pcluster_config/test_pcluster_config_convert/test_slurm_unrelated_sections/pcluster.config.ini @@ -62,13 +62,13 @@ fsx_fs_id = fs-0aaae053900f84047 [ebs settings1] shared_dir = sharedebs -volume_type = gp2 volume_size = 50 [ebs settings2] shared_dir = sharedebs2 -volume_type = gp2 +volume_type = gp3 volume_size = 10 +volume_iops = 3500 [raid settings1] shared_dir = /raid_dir diff --git a/cli/tox.ini b/cli/tox.ini index e6ce3fcb09..1667cb02cd 100644 --- a/cli/tox.ini +++ b/cli/tox.ini @@ -1,29 +1,35 @@ [tox] toxworkdir=../.tox envlist = - py{27,34,35,36,37,38} + py{27,34,35,36,37,38,39}-{cov,nocov} code-linters cfn-{tests,lint,format-check} # Default testenv. Used to run tests on all python versions. [testenv] -passenv = CI TRAVIS_BUILD_ID TRAVIS TRAVIS_BRANCH TRAVIS_JOB_NUMBER TRAVIS_PULL_REQUEST TRAVIS_JOB_ID TRAVIS_REPO_SLUG TRAVIS_COMMIT +passenv = + CI GITHUB_* +usedevelop = + cov: true + nocov: false whitelist_externals = bash deps = -rtests/requirements.txt - py38: codecov + pytest-travis-fold + cov: codecov commands = bash ./tests/pcluster/test.sh - py{27,34,35,36,37,38}: py.test -l -v --basetemp={envtmpdir} --html=report.html --cov=awsbatch --cov=pcluster tests/ - py38: codecov -e TOXENV + nocov: pytest -l -v --basetemp={envtmpdir} --html=report.html --ignore=src tests/ + cov: python setup.py clean --all build_ext --force --inplace + cov: pytest -l -v --basetemp={envtmpdir} --html=report.html --cov=src tests/ + cov: codecov -e TOXENV # Section used to define common variables used by multiple testenvs. [vars] code_dirs = setup.py \ - awsbatch/ \ - pcluster/ \ + src/ \ tests/ \ ../cloudformation/ \ ../tests/ \ @@ -107,8 +113,7 @@ deps = commands = flake8 \ setup.py \ - awsbatch/ \ - pcluster/ \ + src/ \ tests/ \ ../cloudformation/ \ ../tests/integration-tests/ \ @@ -124,8 +129,7 @@ deps = commands = bandit -r \ setup.py \ - awsbatch/ \ - pcluster/ \ + src/ \ ../util/ \ {posargs} @@ -148,9 +152,7 @@ deps = commands = pylint \ setup.py \ - awsbatch/ \ - pcluster/ \ - pcluster/resources/batch/custom_resources_code \ + src/ \ ../util/ \ {posargs} @@ -162,9 +164,7 @@ commands = --disable=all \ --enable=no-value-for-parameter \ setup.py \ - awsbatch/ \ - pcluster/ \ - pcluster/resources/custom_resources/custom_resources_code/ \ + src \ ../util/ \ {posargs} @@ -177,8 +177,7 @@ deps = commands = vulture \ setup.py \ - awsbatch/ \ - pcluster/ \ + src/ \ ../util/ \ {posargs} diff --git a/cloudformation/aws-parallelcluster.cfn.json b/cloudformation/aws-parallelcluster.cfn.json index 5dafbc0235..4f9c98eba3 100644 --- a/cloudformation/aws-parallelcluster.cfn.json +++ b/cloudformation/aws-parallelcluster.cfn.json @@ -1,21 +1,20 @@ { "AWSTemplateFormatVersion": "2010-09-09", - "Description": "AWS ParallelCluster Template. Version: aws-parallelcluster-2.10.0", + "Description": "AWS ParallelCluster Template. Version: aws-parallelcluster-2.10.1", "Parameters": { "KeyName": { "Description": "Name of an existing EC2 KeyPair to enable SSH access to the instances using the default cluster user.", "Type": "AWS::EC2::KeyPair::KeyName" }, "MasterInstanceType": { - "Description": "MasterServer EC2 instance type", + "Description": "Head node EC2 instance type", "Type": "String", - "Default": "t2.micro", "ConstraintDescription": "Must be a valid EC2 instance type, with support for HVM." }, "ComputeInstanceType": { "Description": "ComputeFleet EC2 instance type", "Type": "String", - "Default": "t2.micro", + "Default": "NONE", "ConstraintDescription": "Must be a valid EC2 instance type, with support for HVM." }, "MinSize": { @@ -68,11 +67,11 @@ "Description": "Comma delimited list of type of volume to create either new or from snapshot", "Type": "String", "Default": "gp2,gp2,gp2,gp2,gp2", - "ConstraintDescription": "must be a supported volume type: standard, io1, gp2, st1, sc1", - "AllowedPattern": "^(NONE|standard|io1|gp2|st1|sc1)((,|, )(NONE|standard|io1|gp2|st1|sc1)){4}$" + "ConstraintDescription": "must be a supported volume type: standard, io1, io2, gp2, gp3, st1, sc1", + "AllowedPattern": "^(NONE|standard|io1|io2|gp2|gp3|st1|sc1)((,|, )(NONE|standard|io1|io2|gp2|gp3|st1|sc1)){4}$" }, "MasterSubnetId": { - "Description": "ID of the Subnet you want to provision the Master server into", + "Description": "ID of the Subnet you want to provision the head node into", "Type": "AWS::EC2::Subnet::Id" }, "AvailabilityZone": { @@ -122,9 +121,14 @@ ] }, "VolumeIOPS": { - "Description": "Comma delimited list of number of IOPS for volume type io1. Not used for other volume types.", + "Description": "Comma delimited list of number of IOPS for volume type io1, io2 and gp3. Not used for other volume types.", + "Type": "String", + "Default": "NONE,NONE,NONE,NONE,NONE" + }, + "VolumeThroughput": { + "Description": "Comma delimited list of number of Throughtput for volume type gp3. Not used for other volume types.", "Type": "String", - "Default": "100,100,100,100,100" + "Default": "125,125,125,125,125" }, "PreInstallScript": { "Description": "Preinstall script URL. This is run before any host configuration.", @@ -265,7 +269,7 @@ "Default": "NONE,NONE,NONE,NONE,NONE" }, "MasterRootVolumeSize": { - "Description": "Size of MasterServer EBS root volume in GB", + "Description": "Size of head node EBS root volume in GB", "Type": "Number", "Default": "25", "MinValue": "25" @@ -286,6 +290,11 @@ "Type": "CommaDelimitedList", "Default": "NONE" }, + "IAMLambdaRoleName": { + "Description": "Existing IAM role name for Lambda functions", + "Type": "String", + "Default": "NONE" + }, "VPCSecurityGroupId": { "Description": "Existing VPC security group Id", "Type": "String", @@ -316,10 +325,10 @@ "Type": "String" }, "RAIDOptions": { - "Description": "Comma Separated List of RAID related options, 8 parameters in total, [shared_dir,raid_type,num_of_raid_volumes,volume_type,volume_size,volume_iops,encrypted,ebs_kms_key_id]", + "Description": "Comma Separated List of RAID related options, 9 parameters in total, [shared_dir,raid_type,num_of_raid_volumes,volume_type,volume_size,volume_iops,encrypted,ebs_kms_key_id,volume_throughput]", "Type": "String", - "Default": "NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE", - "AllowedPattern": "^(NONE|.+)(,|, )(NONE|\\d)(,|, )(NONE|\\d)(,|, )(NONE|standard|io1|gp2|st1|sc1)(,|, )(NONE|\\d+)(,|, )(NONE|\\d+)(,|, )(NONE|true|false)(,|, )(NONE|.+)$" + "Default": "NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE", + "AllowedPattern": "^(NONE|.+)(,|, )(NONE|\\d)(,|, )(NONE|\\d)(,|, )(NONE|standard|io1|io2|gp2|gp3|st1|sc1)(,|, )(NONE|\\d+)(,|, )(NONE|\\d+)(,|, )(NONE|true|false)(,|, )(NONE|.+)(,|, )(NONE|\\d+)$" }, "NumberOfEBSVol": { "Description": "Number of EBS Volumes the user requested, up to 5", @@ -332,12 +341,12 @@ "Default": "NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE" }, "EFSOptions": { - "Description": "Comma separated list of EFS related options, 9 parameters in total, [shared_dir,efs_fs_id,performance_mode,efs_kms_key_id,provisioned_throughput,encrypted,throughput_mode,exists_valid_master_mt,exists_valid_compute_mt]", + "Description": "Comma separated list of EFS related options, 9 parameters in total, [shared_dir,efs_fs_id,performance_mode,efs_kms_key_id,provisioned_throughput,encrypted,throughput_mode,exists_valid_head_node_mt,exists_valid_compute_mt]", "Type": "String", "Default": "NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE" }, "Cores": { - "Description": "Comma seperated string of [master cores], [compute cores], [master instance type supports disabling hyperthreading via CPU options], [compute instance type supports disabling hyperthreading via CPU options].", + "Description": "Comma seperated string of [head node cores], [compute cores], [head node instance type supports disabling hyperthreading via CPU options], [compute instance type supports disabling hyperthreading via CPU options].", "Type": "CommaDelimitedList", "Default": "NONE,NONE,NONE,NONE" }, @@ -370,7 +379,7 @@ "Default": "true,14" }, "NetworkInterfacesCount": { - "Description": "Comma separated string of [master network interfaces], [compute network interfaces].", + "Description": "Comma separated string of [head node network interfaces], [compute network interfaces].", "Type": "CommaDelimitedList", "Default": "1,1" } @@ -661,6 +670,14 @@ "NONE" ] }, + "CreateIAMLambdaRole": { + "Fn::Equals": [ + { + "Ref": "IAMLambdaRoleName" + }, + "NONE" + ] + }, "AddHITIamPolicies": { "Fn::And": [ { @@ -797,29 +814,37 @@ }, "Mappings": { "AWSRegionOS2AMIarm64": { + "af-south-1": { + "alinux": "UNSUPPORTED", + "alinux2": "ami-00cd9a9915d5abf79", + "centos7": "UNSUPPORTED", + "centos8": "ami-0b0a12580b8fcdfe5", + "ubuntu1604": "UNSUPPORTED", + "ubuntu1804": "ami-046d98477dd9fc7eb" + }, "ap-east-1": { "alinux": "UNSUPPORTED", - "alinux2": "ami-0afa2c302be613354", + "alinux2": "ami-0f73d5986d43564e2", "centos7": "UNSUPPORTED", - "centos8": "ami-090b2a584a3f7f287", + "centos8": "ami-007ed99f82a962cc1", "ubuntu1604": "UNSUPPORTED", - "ubuntu1804": "ami-026aa8e70fb166ea9" + "ubuntu1804": "ami-0d2624d4b9e6970e7" }, "ap-northeast-1": { "alinux": "UNSUPPORTED", - "alinux2": "ami-0d90445a8e0dd846e", + "alinux2": "ami-0802175acef9f342e", "centos7": "UNSUPPORTED", - "centos8": "ami-082dd4b62e4501fd3", + "centos8": "ami-0490e30625fc3466f", "ubuntu1604": "UNSUPPORTED", - "ubuntu1804": "ami-0bb8f711d35faf564" + "ubuntu1804": "ami-08efda47b57a7c048" }, "ap-northeast-2": { "alinux": "UNSUPPORTED", - "alinux2": "ami-012482cac933631dd", + "alinux2": "ami-0b67c1511fe4c46e1", "centos7": "UNSUPPORTED", - "centos8": "ami-0a1d5d4acc59b8c42", + "centos8": "ami-03222a04de4228c08", "ubuntu1604": "UNSUPPORTED", - "ubuntu1804": "ami-001e5f95b7285db48" + "ubuntu1804": "ami-05ae60c35b06f34e5" }, "ap-northeast-3": { "alinux": "UNSUPPORTED", @@ -831,35 +856,35 @@ }, "ap-south-1": { "alinux": "UNSUPPORTED", - "alinux2": "ami-0e66f1d2824238a00", + "alinux2": "ami-0e75152f8053094c3", "centos7": "UNSUPPORTED", - "centos8": "ami-07ce7adc39b9e28b2", + "centos8": "ami-0075369b2abf46b05", "ubuntu1604": "UNSUPPORTED", - "ubuntu1804": "ami-060347326df2488ce" + "ubuntu1804": "ami-0ef07d3677758fee7" }, "ap-southeast-1": { "alinux": "UNSUPPORTED", - "alinux2": "ami-0944c0f8c608a1dbe", + "alinux2": "ami-0da4635ceb6d846d9", "centos7": "UNSUPPORTED", - "centos8": "ami-04a794d883bdaf470", + "centos8": "ami-086fd32165ac4e0a2", "ubuntu1604": "UNSUPPORTED", - "ubuntu1804": "ami-0e8da59005bbebc4c" + "ubuntu1804": "ami-052b40fbf4a7d852f" }, "ap-southeast-2": { "alinux": "UNSUPPORTED", - "alinux2": "ami-059405472e209bf90", + "alinux2": "ami-0d7a81adbdde9dce5", "centos7": "UNSUPPORTED", - "centos8": "ami-0a4a36662eaed020f", + "centos8": "ami-07fb6c0c050a39292", "ubuntu1604": "UNSUPPORTED", - "ubuntu1804": "ami-03e63532571847086" + "ubuntu1804": "ami-02a57f04d151d2ce2" }, "ca-central-1": { "alinux": "UNSUPPORTED", - "alinux2": "ami-075d7c38d2c7f2347", + "alinux2": "ami-070c24fa069a27265", "centos7": "UNSUPPORTED", - "centos8": "ami-0f6f89bc747dc927c", + "centos8": "ami-04be4e45fe60b19e1", "ubuntu1604": "UNSUPPORTED", - "ubuntu1804": "ami-0e28334f9f98bce1c" + "ubuntu1804": "ami-0bef62a8de524cc3d" }, "cn-north-1": { "alinux": "UNSUPPORTED", @@ -879,67 +904,83 @@ }, "eu-central-1": { "alinux": "UNSUPPORTED", - "alinux2": "ami-0d99e8ab1b493e9bc", + "alinux2": "ami-0331d559f079efd03", "centos7": "UNSUPPORTED", - "centos8": "ami-0ff2b3c8975a3db85", + "centos8": "ami-02ea94742b76197a4", "ubuntu1604": "UNSUPPORTED", - "ubuntu1804": "ami-0bdcdad522abee324" + "ubuntu1804": "ami-0ab72b2539a8e4eaf" }, "eu-north-1": { "alinux": "UNSUPPORTED", - "alinux2": "ami-064946adc4b641961", + "alinux2": "ami-03e4cc7f565c8efec", + "centos7": "UNSUPPORTED", + "centos8": "ami-052bb6243fc4b84a3", + "ubuntu1604": "UNSUPPORTED", + "ubuntu1804": "ami-00315391fc87d6a4a" + }, + "eu-south-1": { + "alinux": "UNSUPPORTED", + "alinux2": "ami-0392d65ba7c8af2b5", "centos7": "UNSUPPORTED", - "centos8": "ami-0047a557a82efb0d0", + "centos8": "ami-015ae8f55912771fe", "ubuntu1604": "UNSUPPORTED", - "ubuntu1804": "ami-0abb2f4b0fb7f9419" + "ubuntu1804": "ami-00d476744294f458a" }, "eu-west-1": { "alinux": "UNSUPPORTED", - "alinux2": "ami-0bb09e10d8b5cb747", + "alinux2": "ami-0b86c4a8da59d6d11", "centos7": "UNSUPPORTED", - "centos8": "ami-0163897289a397a7b", + "centos8": "ami-0773e67d6a7466681", "ubuntu1604": "UNSUPPORTED", - "ubuntu1804": "ami-07b73abb892002aba" + "ubuntu1804": "ami-0224e61822e0603ba" }, "eu-west-2": { "alinux": "UNSUPPORTED", - "alinux2": "ami-0f857639eb82d291e", + "alinux2": "ami-039c5ad24328545a9", "centos7": "UNSUPPORTED", - "centos8": "ami-041508f55e7353f16", + "centos8": "ami-009ac53f9323b9865", "ubuntu1604": "UNSUPPORTED", - "ubuntu1804": "ami-0c9501e4fae3574e1" + "ubuntu1804": "ami-06496496998c0480a" }, "eu-west-3": { "alinux": "UNSUPPORTED", - "alinux2": "ami-0b606e6e56b851d66", + "alinux2": "ami-067a371d97ca6fcf2", + "centos7": "UNSUPPORTED", + "centos8": "ami-021787c0ad5f60f6b", + "ubuntu1604": "UNSUPPORTED", + "ubuntu1804": "ami-0d16162157fa31ea4" + }, + "me-south-1": { + "alinux": "UNSUPPORTED", + "alinux2": "ami-0ea4909cdeeef4b03", "centos7": "UNSUPPORTED", - "centos8": "ami-08b703b2dce4d24db", + "centos8": "ami-085892d9aec1ed9f9", "ubuntu1604": "UNSUPPORTED", - "ubuntu1804": "ami-0fe23436c8cd7c55d" + "ubuntu1804": "ami-0ce1e2728e4ba14c6" }, "sa-east-1": { "alinux": "UNSUPPORTED", - "alinux2": "ami-010d19c007ce5a2a4", + "alinux2": "ami-013861b8a2b1a63b5", "centos7": "UNSUPPORTED", - "centos8": "ami-00cb9fa66203e7004", + "centos8": "ami-0d3e6fbc43aaa308c", "ubuntu1604": "UNSUPPORTED", - "ubuntu1804": "ami-0e2ac51ba079f6df3" + "ubuntu1804": "ami-079a46ad6559d1023" }, "us-east-1": { "alinux": "UNSUPPORTED", - "alinux2": "ami-0e96b1f22bc4ded89", + "alinux2": "ami-0b1f998cf2b1498db", "centos7": "UNSUPPORTED", - "centos8": "ami-02597a6058d9bb415", + "centos8": "ami-02839a5871b4ec582", "ubuntu1604": "UNSUPPORTED", - "ubuntu1804": "ami-0bb7443216d8ea706" + "ubuntu1804": "ami-0a5c0725ce4d960f1" }, "us-east-2": { "alinux": "UNSUPPORTED", - "alinux2": "ami-065f40fae77a9fb41", + "alinux2": "ami-059703a477566540c", "centos7": "UNSUPPORTED", - "centos8": "ami-0b833d4b88b0df37f", + "centos8": "ami-0d4f0890ef069afc0", "ubuntu1604": "UNSUPPORTED", - "ubuntu1804": "ami-02a92e06fd643c11b" + "ubuntu1804": "ami-08776e764b05c8fa7" }, "us-gov-east-1": { "alinux": "UNSUPPORTED", @@ -959,45 +1000,53 @@ }, "us-west-1": { "alinux": "UNSUPPORTED", - "alinux2": "ami-0715d8a272300c41f", + "alinux2": "ami-08540a991b0cd29bd", "centos7": "UNSUPPORTED", - "centos8": "ami-00aa1f310b54e3cd5", + "centos8": "ami-0aaeb3457c5f4a511", "ubuntu1604": "UNSUPPORTED", - "ubuntu1804": "ami-084f3531a939fa437" + "ubuntu1804": "ami-0ca03ec3eca322ace" }, "us-west-2": { "alinux": "UNSUPPORTED", - "alinux2": "ami-01c9e5995bc8ee16a", + "alinux2": "ami-0257f455a26d9ed84", "centos7": "UNSUPPORTED", - "centos8": "ami-0e6ed2c534bab4d35", + "centos8": "ami-02a5f2d441ae4ea8e", "ubuntu1604": "UNSUPPORTED", - "ubuntu1804": "ami-0a81e33be478706e1" + "ubuntu1804": "ami-0f67bacfcdf1f2374" } }, "AWSRegionOS2AMIx86": { + "af-south-1": { + "alinux": "ami-0f2e2135a05f814df", + "alinux2": "ami-046f49b550ce90d8a", + "centos7": "ami-0e0fb5acd64f2be5e", + "centos8": "ami-0d881910a58319c15", + "ubuntu1604": "ami-041e26e4bfed8cd7b", + "ubuntu1804": "ami-04280c7f4bee35afe" + }, "ap-east-1": { - "alinux": "ami-086d023ae9be29265", - "alinux2": "ami-060927fff43c77a88", - "centos7": "ami-0f7168e4940a70237", - "centos8": "ami-081b80aa11a8a4c50", - "ubuntu1604": "ami-0e4408eaab978d69d", - "ubuntu1804": "ami-05bd159bdb29e9aa9" + "alinux": "ami-05ef7cb79d3a43092", + "alinux2": "ami-0ec0d099b8a276aec", + "centos7": "ami-0d6f16d7fceae84ee", + "centos8": "ami-0ca1228e9ddcfa963", + "ubuntu1604": "ami-04539233794f181f7", + "ubuntu1804": "ami-0d16f6585c134b76d" }, "ap-northeast-1": { - "alinux": "ami-071d1f9ecf81d5866", - "alinux2": "ami-068b3b3104ae04d62", - "centos7": "ami-07555b739b7d7f81c", - "centos8": "ami-09151d65b9344095c", - "ubuntu1604": "ami-021df455e65a94ba1", - "ubuntu1804": "ami-0f99a29d4392ac446" + "alinux": "ami-006b7f4c929aaf4a9", + "alinux2": "ami-0a13402dc88c19be2", + "centos7": "ami-03a451be7ebcc159e", + "centos8": "ami-075b3892ecd63214f", + "ubuntu1604": "ami-01fe6e75948fa65df", + "ubuntu1804": "ami-096205fd8c1ea23b8" }, "ap-northeast-2": { - "alinux": "ami-0f13dd00403ae009c", - "alinux2": "ami-07cd9137f04b28895", - "centos7": "ami-0c0df7a4adaebab82", - "centos8": "ami-09e91a17a5454daf8", - "ubuntu1604": "ami-02a8aa869263d3268", - "ubuntu1804": "ami-0556b2de682ca759d" + "alinux": "ami-06a49824cc501a981", + "alinux2": "ami-0bdfbd3521caa5dd2", + "centos7": "ami-0ba51cb6c4ceae756", + "centos8": "ami-02287a8528bba818d", + "ubuntu1604": "ami-05dd0b07a0e8cf644", + "ubuntu1804": "ami-04ef860d893888eee" }, "ap-northeast-3": { "alinux": "UNSUPPORTED", @@ -1008,148 +1057,164 @@ "ubuntu1804": "UNSUPPORTED" }, "ap-south-1": { - "alinux": "ami-09952eb2d4dd3f2e2", - "alinux2": "ami-06cc0e0c03bd6abd9", - "centos7": "ami-0d4e69b97911c88aa", - "centos8": "ami-055bc2174e27b050e", - "ubuntu1604": "ami-0b939ee54f0c676de", - "ubuntu1804": "ami-059ddcf5c8e408012" + "alinux": "ami-0a47fd68cf7034c58", + "alinux2": "ami-0059d599d21636768", + "centos7": "ami-0919124b7770af8d9", + "centos8": "ami-0602d9f62b83744f5", + "ubuntu1604": "ami-06aec0a1241e29730", + "ubuntu1804": "ami-09fe484636519a7fd" }, "ap-southeast-1": { - "alinux": "ami-043d99842781aff9d", - "alinux2": "ami-005fd4e1d2ccdd8a9", - "centos7": "ami-0e715873ed4fdf15a", - "centos8": "ami-0f221ad56e980433c", - "ubuntu1604": "ami-04b44a45309e8f7b7", - "ubuntu1804": "ami-04878f0e013df9869" + "alinux": "ami-0e6cfdde386164836", + "alinux2": "ami-074f58cccc7ebb68f", + "centos7": "ami-021e3e90b0458781f", + "centos8": "ami-0e789f414c14f6332", + "ubuntu1604": "ami-0f35154dc0071f71b", + "ubuntu1804": "ami-0d2894ee85aac22c0" }, "ap-southeast-2": { - "alinux": "ami-02656322bc3cae72c", - "alinux2": "ami-0256ef02207960118", - "centos7": "ami-04ba6428ce77e728a", - "centos8": "ami-085265c078d86c75b", - "ubuntu1604": "ami-06367ff848861ddfc", - "ubuntu1804": "ami-02fa7e4f4b1073823" + "alinux": "ami-0ba7f788162c4a4de", + "alinux2": "ami-04b4a20ee9f67608f", + "centos7": "ami-0f5a161afeed62a35", + "centos8": "ami-0d1d7229f7b73a5de", + "ubuntu1604": "ami-0da4c39e17cdfef89", + "ubuntu1804": "ami-07de67a2c91b2605c" }, "ca-central-1": { - "alinux": "ami-0a853d47df00d434a", - "alinux2": "ami-08f2692bcab1f1660", - "centos7": "ami-08f798212e2b9dcc9", - "centos8": "ami-0fad54150dd6e4d1a", - "ubuntu1604": "ami-0438f2fca91841651", - "ubuntu1804": "ami-0e6aa6758cd219754" + "alinux": "ami-0808f2df200c7006c", + "alinux2": "ami-0523a9bc4151ee96e", + "centos7": "ami-057fc92460a096dab", + "centos8": "ami-03ea006f20d390940", + "ubuntu1604": "ami-098c762477bc6b1fb", + "ubuntu1804": "ami-0b8b3c3a561758ae3" }, "cn-north-1": { - "alinux": "ami-03a764b8e78792057", - "alinux2": "ami-0b03a127af1f7956c", + "alinux": "ami-050c2b7b0181fbfd5", + "alinux2": "ami-0a12307d2d0ddc535", "centos7": "UNSUPPORTED", "centos8": "UNSUPPORTED", - "ubuntu1604": "ami-0e4c187d835abb08b", - "ubuntu1804": "ami-099431ae52fbcc1b9" + "ubuntu1604": "ami-0a3f41e4d89bdff32", + "ubuntu1804": "ami-0abc7e40f18e6cda4" }, "cn-northwest-1": { - "alinux": "ami-077d4bb98d5ca5def", - "alinux2": "ami-0beb9d63e7fe38381", + "alinux": "ami-07d8dd2f175498353", + "alinux2": "ami-0ef379c7fd5eb332e", "centos7": "UNSUPPORTED", "centos8": "UNSUPPORTED", - "ubuntu1604": "ami-0be4b9f412690daa2", - "ubuntu1804": "ami-09277e9f7bb212c56" + "ubuntu1604": "ami-0d304a0c5d04ac4e2", + "ubuntu1804": "ami-0f52a155923e4de7f" }, "eu-central-1": { - "alinux": "ami-09f3ed2c18ba86996", - "alinux2": "ami-04d384114ab202c13", - "centos7": "ami-0b7dd5253f4218850", - "centos8": "ami-0d986e39b68bd9add", - "ubuntu1604": "ami-0f81af77a1e347a75", - "ubuntu1804": "ami-0928231bd3b6a52b2" + "alinux": "ami-032358687770c43e1", + "alinux2": "ami-07055c21834b0bc56", + "centos7": "ami-0a0f1b95d41e6a651", + "centos8": "ami-02277a3e208351cc6", + "ubuntu1604": "ami-0df794834b461ba22", + "ubuntu1804": "ami-07d1489352b517f39" }, "eu-north-1": { - "alinux": "ami-03d6705f566e99836", - "alinux2": "ami-01afd40fed001ed87", - "centos7": "ami-01d49e3863d1f2d11", - "centos8": "ami-014f4cd90479d0c71", - "ubuntu1604": "ami-0875c09d8a230cc3f", - "ubuntu1804": "ami-0dfc3abcaeeafebd6" + "alinux": "ami-038781ae9b21b98ca", + "alinux2": "ami-0fe475ca307943eb1", + "centos7": "ami-0c191b20554866575", + "centos8": "ami-00b01d23b71fff297", + "ubuntu1604": "ami-06975933696e0263a", + "ubuntu1804": "ami-0c47a559ed268c649" + }, + "eu-south-1": { + "alinux": "ami-0a9a6c50dc32a934f", + "alinux2": "ami-042fc69d71433a75c", + "centos7": "ami-03276d70dacf1a574", + "centos8": "ami-0ffbb41ac00ef5dff", + "ubuntu1604": "ami-0842531296e56778e", + "ubuntu1804": "ami-01cfe122f3044a34d" }, "eu-west-1": { - "alinux": "ami-02c2421d6cd745994", - "alinux2": "ami-04627ea002a11c93c", - "centos7": "ami-009bfc0fb11ad5c6c", - "centos8": "ami-09a3a5c16f6c84015", - "ubuntu1604": "ami-0da1dd9a8b40ee87a", - "ubuntu1804": "ami-06065b90e25ef853b" + "alinux": "ami-0dd799c2e1a68608e", + "alinux2": "ami-063ac3df7f8595751", + "centos7": "ami-000a3d84d3c77fdb3", + "centos8": "ami-04a5c9c04faa5b9f7", + "ubuntu1604": "ami-0d9c6bf221068c7c3", + "ubuntu1804": "ami-0cbef8b383ddeff80" }, "eu-west-2": { - "alinux": "ami-0ef2ff4ce73c7208e", - "alinux2": "ami-0953845dcd74d8d34", - "centos7": "ami-0100afbc14f4008ce", - "centos8": "ami-09956ce8a4fda6388", - "ubuntu1604": "ami-0519e22cbfd281d2c", - "ubuntu1804": "ami-0a26ef861ba74b872" + "alinux": "ami-0849a887182dd033a", + "alinux2": "ami-086111e12527fa455", + "centos7": "ami-0a347ff9b26c5e34c", + "centos8": "ami-017fab72fc1db3851", + "ubuntu1604": "ami-0d2a3fa50134294e9", + "ubuntu1804": "ami-08d337feaf0f0a59f" }, "eu-west-3": { - "alinux": "ami-07e87c1e0dd3d3e75", - "alinux2": "ami-01525b6212f1a191b", - "centos7": "ami-0b2a108cb09e900ac", - "centos8": "ami-08454ae2840055567", - "ubuntu1604": "ami-0bf33211929a48c9f", - "ubuntu1804": "ami-0ee11f5712078f6e3" + "alinux": "ami-060df914a5b5ad680", + "alinux2": "ami-0258a5a8320ccfa42", + "centos7": "ami-02f8c6e2622c3804f", + "centos8": "ami-041e8760a7b80cfde", + "ubuntu1604": "ami-010b0bc4570ec96a3", + "ubuntu1804": "ami-074fae2e6420b8ac7" + }, + "me-south-1": { + "alinux": "ami-0d68c8e916ccf0418", + "alinux2": "ami-0c98692d98eb38c50", + "centos7": "ami-0a64f83dd01c08dab", + "centos8": "ami-0e30fd8da5a1ddc84", + "ubuntu1604": "ami-08d2926b1669d79d2", + "ubuntu1804": "ami-0d7bc19407d2b26a9" }, "sa-east-1": { - "alinux": "ami-0d552ac237f838360", - "alinux2": "ami-0cabfea2e0cf31af2", - "centos7": "ami-0dde4970d46cc7abc", - "centos8": "ami-0f2a6ece6191f85e8", - "ubuntu1604": "ami-0c337a8051bd0ed6f", - "ubuntu1804": "ami-0ff7ab2d586a7f0a6" + "alinux": "ami-0d4a17532432d5aa2", + "alinux2": "ami-0f463bcf6d86cad85", + "centos7": "ami-082aafa914dc04479", + "centos8": "ami-005096f26ee208519", + "ubuntu1604": "ami-0ae46391f64fa0b71", + "ubuntu1804": "ami-0b7b49d35034bbc2f" }, "us-east-1": { - "alinux": "ami-01a1cf6f36f2bd13b", - "alinux2": "ami-018cd948cda2d7384", - "centos7": "ami-0bbd714da0c7763e2", - "centos8": "ami-0615c0838767b8e00", - "ubuntu1604": "ami-04ce9ff46c759ffa8", - "ubuntu1804": "ami-05b80d924accf2dac" + "alinux": "ami-0604e4a14869de93f", + "alinux2": "ami-0b71488efbe422723", + "centos7": "ami-0516dc2ba9f4fc177", + "centos8": "ami-0f56d9873066cb6b9", + "ubuntu1604": "ami-0b3dfe986b324a1bf", + "ubuntu1804": "ami-009fdaa0002906c5b" }, "us-east-2": { - "alinux": "ami-0a8d40acce6869be4", - "alinux2": "ami-08aa991eca6bf394a", - "centos7": "ami-0b3d403d025f9ba61", - "centos8": "ami-0a07f43c650ad20e9", - "ubuntu1604": "ami-0eb0a40959685a105", - "ubuntu1804": "ami-05e89121e9222b5c6" + "alinux": "ami-00d4efc81188687a0", + "alinux2": "ami-0075df3faa5b6e07e", + "centos7": "ami-07d1461ceceb4df43", + "centos8": "ami-073e63c94c971cc69", + "ubuntu1604": "ami-04fd4dda7bb2fcaff", + "ubuntu1804": "ami-0ec51c20170525d3f" }, "us-gov-east-1": { - "alinux": "ami-05bb72e83f0973bdc", - "alinux2": "ami-0cbbce3284f341759", + "alinux": "ami-01d57910cd71ea0c4", + "alinux2": "ami-057f7c2d5a1ca7b7d", "centos7": "UNSUPPORTED", "centos8": "UNSUPPORTED", - "ubuntu1604": "ami-09bdb46f2643fefbd", - "ubuntu1804": "ami-0b540f46f29ef9019" + "ubuntu1604": "ami-00150b953797bdaa4", + "ubuntu1804": "ami-04a13dedb7a0a1cfa" }, "us-gov-west-1": { - "alinux": "ami-0318093bb66476048", - "alinux2": "ami-01b605af54e3fdd9e", + "alinux": "ami-0cfc4f4eb94c9f403", + "alinux2": "ami-01222b796bafd609f", "centos7": "UNSUPPORTED", "centos8": "UNSUPPORTED", - "ubuntu1604": "ami-0b790f0b4c3856aa3", - "ubuntu1804": "ami-00c69b11f502f4f08" + "ubuntu1604": "ami-06bfdc6f4185351c5", + "ubuntu1804": "ami-0be2fc1895e4b4d9f" }, "us-west-1": { - "alinux": "ami-0425c898b65b066f3", - "alinux2": "ami-04b508b1dae54310b", - "centos7": "ami-0e589824d8d821ce0", - "centos8": "ami-0e71f40a3e18cb184", - "ubuntu1604": "ami-012699e63307886c5", - "ubuntu1804": "ami-0339ba2c62b77a99e" + "alinux": "ami-0c8decb747bfca25f", + "alinux2": "ami-01c4b0b6d5597b80b", + "centos7": "ami-0a426e145ced105df", + "centos8": "ami-0f3085000b53339e0", + "ubuntu1604": "ami-00fbdde9fb06d3b09", + "ubuntu1804": "ami-04ac69ccbff147270" }, "us-west-2": { - "alinux": "ami-0ab937dbac92ae27e", - "alinux2": "ami-0f085dccfc6937af7", - "centos7": "ami-0f386f4a170027bbb", - "centos8": "ami-0668454eee630c595", - "ubuntu1604": "ami-08b09c217d19d5e1a", - "ubuntu1804": "ami-036a032a9f6c44f84" + "alinux": "ami-018ccd7660ecade5e", + "alinux2": "ami-079facc5ab3fdf701", + "centos7": "ami-0e92bdb4aee551791", + "centos8": "ami-029dc099ae4e121f1", + "ubuntu1604": "ami-008383c0ab2a2d425", + "ubuntu1804": "ami-01a7264e2e3bf272f" } }, "OSFeatures": { @@ -1180,8 +1245,8 @@ }, "PackagesVersions": { "default": { - "parallelcluster": "2.10.0", - "cookbook": "aws-parallelcluster-cookbook-2.10.0", + "parallelcluster": "2.10.1", + "cookbook": "aws-parallelcluster-cookbook-2.10.1", "chef": "15.11.8", "berkshelf": "7.0.10", "ami": "dev" @@ -1935,7 +2000,7 @@ "MasterSecurityGroup": { "Type": "AWS::EC2::SecurityGroup", "Properties": { - "GroupDescription": "Enable access to the Master host", + "GroupDescription": "Enable access to the head node", "VpcId": { "Ref": "VPCId" }, @@ -2084,7 +2149,7 @@ "MasterENI": { "Type": "AWS::EC2::NetworkInterface", "Properties": { - "Description": "AWS ParallelCluster Master Server", + "Description": "AWS ParallelCluster head node interface", "SubnetId": { "Ref": "MasterSubnetId" }, @@ -2345,6 +2410,15 @@ }, "Architecture": { "Ref": "Architecture" + }, + "MasterPrivateIP": { + "Fn::GetAtt": [ + "MasterServerSubstack", + "Outputs.MasterPrivateIP" + ] + }, + "IAMLambdaRoleName": { + "Ref": "IAMLambdaRoleName" } }, "TemplateURL": { @@ -2505,7 +2579,8 @@ "PolicyName": "LambdaPolicy" } ] - } + }, + "Condition": "CreateIAMLambdaRole" }, "CleanupResourcesS3BucketCustomResource": { "Type": "AWS::CloudFormation::CustomResource", @@ -2582,9 +2657,17 @@ "Handler": "cleanup_resources.handler", "MemorySize": 128, "Role": { - "Fn::GetAtt": [ - "CleanupResourcesFunctionExecutionRole", - "Arn" + "Fn::If": [ + "CreateIAMLambdaRole", + { + "Fn::GetAtt": [ + "CleanupResourcesFunctionExecutionRole", + "Arn" + ] + }, + { + "Fn::Sub": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/${IAMLambdaRoleName}" + } ] }, "Runtime": "python3.8", @@ -2644,6 +2727,9 @@ "VolumeIOPS": { "Ref": "VolumeIOPS" }, + "VolumeThroughput": { + "Ref": "VolumeThroughput" + }, "EBSEncryption": { "Ref": "EBSEncryption" }, @@ -3958,6 +4044,9 @@ } ] }, + "IAMLambdaRoleName": { + "Ref": "IAMLambdaRoleName" + }, "ResourcesS3Bucket": { "Ref": "ResourcesS3Bucket" }, @@ -4087,7 +4176,7 @@ }, "Outputs": { "ClusterUser": { - "Description": "Username to login to Master host", + "Description": "Username to login to head node", "Value": { "Fn::FindInMap": [ "OSFeatures", @@ -4099,7 +4188,7 @@ } }, "MasterPrivateIP": { - "Description": "Private IP Address of the Master host", + "Description": "Private IP Address of the head node", "Value": { "Fn::GetAtt": [ "MasterServerSubstack", @@ -4108,7 +4197,7 @@ } }, "MasterPublicIP": { - "Description": "Public IP Address of the Master host", + "Description": "Public IP Address of the head node", "Value": { "Fn::GetAtt": [ "MasterServerSubstack", diff --git a/cloudformation/batch-substack.cfn.json b/cloudformation/batch-substack.cfn.json index a8a4ba1616..980fceb0fd 100644 --- a/cloudformation/batch-substack.cfn.json +++ b/cloudformation/batch-substack.cfn.json @@ -95,6 +95,14 @@ "x86_64", "arm64" ] + }, + "MasterPrivateIP": { + "Description": "Private IP of the head node", + "Type": "String" + }, + "IAMLambdaRoleName": { + "Description": "Existing IAM role name for Lambda functions", + "Type": "String" } }, "Conditions": { @@ -113,6 +121,14 @@ }, "arm64" ] + }, + "CreateIAMLambdaRole": { + "Fn::Equals": [ + { + "Ref": "IAMLambdaRoleName" + }, + "NONE" + ] } }, "Resources": { @@ -525,6 +541,12 @@ "Value": { "Ref": "RAIDSharedDir" } + }, + { + "Name": "PCLUSTER_MASTER_IP", + "Value": { + "Ref": "MasterPrivateIP" + } } ] } @@ -589,6 +611,12 @@ "Value": { "Ref": "RAIDSharedDir" } + }, + { + "Name": "PCLUSTER_MASTER_IP", + "Value": { + "Ref": "MasterPrivateIP" + } } ] } @@ -805,9 +833,17 @@ "Handler": "manage_docker_images.handler", "MemorySize": 128, "Role": { - "Fn::GetAtt": [ - "ManageDockerImagesFunctionExecutionRole", - "Arn" + "Fn::If": [ + "CreateIAMLambdaRole", + { + "Fn::GetAtt": [ + "ManageDockerImagesFunctionExecutionRole", + "Arn" + ] + }, + { + "Fn::Sub": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/${IAMLambdaRoleName}" + } ] }, "Runtime": "python3.6", @@ -881,7 +917,8 @@ "PolicyName": "LambdaPolicy" } ] - } + }, + "Condition": "CreateIAMLambdaRole" }, "DockerBuildWaitHandle": { "Type": "AWS::CloudFormation::WaitConditionHandle", @@ -936,7 +973,8 @@ "PolicyName": "LambdaPolicy" } ] - } + }, + "Condition": "CreateIAMLambdaRole" }, "SendBuildNotificationFunction": { "Type": "AWS::Lambda::Function", @@ -955,9 +993,17 @@ "Handler": "send_build_notification.handler", "MemorySize": 128, "Role": { - "Fn::GetAtt": [ - "SendBuildNotificationFunctionExecutionRole", - "Arn" + "Fn::If": [ + "CreateIAMLambdaRole", + { + "Fn::GetAtt": [ + "SendBuildNotificationFunctionExecutionRole", + "Arn" + ] + }, + { + "Fn::Sub": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/${IAMLambdaRoleName}" + } ] }, "Runtime": "python3.6", diff --git a/cloudformation/compute-fleet-hit-substack.cfn.yaml b/cloudformation/compute-fleet-hit-substack.cfn.yaml index 69c1df4e0d..3efd304eb1 100644 --- a/cloudformation/compute-fleet-hit-substack.cfn.yaml +++ b/cloudformation/compute-fleet-hit-substack.cfn.yaml @@ -85,6 +85,8 @@ Parameters: Type: AWS::EC2::VPC::Id RootRole: Type: String + IAMLambdaRoleName: + Type: String ResourcesS3Bucket: Type: String ArtifactS3RootDirectory: @@ -101,6 +103,9 @@ Conditions: UseAssociatePublicIpAddress: !Equals - !Ref 'AssociatePublicIpAddress' - true + CreateIAMLambdaRole: !Equals + - !Ref 'IAMLambdaRoleName' + - NONE Resources: {%- for queue, queue_config in queues.items() %} {%- for compute_resource in queue_config.compute_resource_settings.values() %} @@ -528,7 +533,10 @@ Resources: S3Key: !Sub '${ArtifactS3RootDirectory}/custom_resources_code/artifacts.zip' Handler: cleanup_resources.handler MemorySize: 128 - Role: !GetAtt 'CleanupRoute53FunctionExecutionRole.Arn' + Role: !If + - CreateIAMLambdaRole + - !GetAtt 'CleanupRoute53FunctionExecutionRole.Arn' + - !Sub 'arn:${AWS::Partition}:iam::${AWS::AccountId}:role/${IAMLambdaRoleName}' Runtime: python3.8 Timeout: 900 CleanupRoute53CustomResource: @@ -570,6 +578,7 @@ Resources: - ClusterHostedZone: !Ref 'ClusterHostedZone' Version: '2012-10-17' PolicyName: LambdaPolicy + Condition: CreateIAMLambdaRole {%- endif %} UpdateWaiterFunction: Type: AWS::Lambda::Function @@ -580,7 +589,10 @@ Resources: S3Key: !Sub '${ArtifactS3RootDirectory}/custom_resources_code/artifacts.zip' Handler: wait_for_update.handler MemorySize: 128 - Role: !GetAtt 'UpdateWaiterFunctionExecutionRole.Arn' + Role: !If + - CreateIAMLambdaRole + - !GetAtt 'UpdateWaiterFunctionExecutionRole.Arn' + - !Sub 'arn:${AWS::Partition}:iam::${AWS::AccountId}:role/${IAMLambdaRoleName}' Runtime: python3.8 Timeout: 900 UpdateWaiterFunctionExecutionRole: @@ -613,6 +625,7 @@ Resources: Resource: !Sub 'arn:${AWS::Partition}:dynamodb:${AWS::Region}:${AWS::AccountId}:table/${DynamoDBTable}' Version: '2012-10-17' PolicyName: LambdaPolicy + Condition: CreateIAMLambdaRole Metadata: RootRole: !Ref 'RootRole' VPCId: !Ref 'VPCId' diff --git a/cloudformation/cw-dashboard-substack.cfn.yaml b/cloudformation/cw-dashboard-substack.cfn.yaml index 155832ac48..fcaf288b50 100644 --- a/cloudformation/cw-dashboard-substack.cfn.yaml +++ b/cloudformation/cw-dashboard-substack.cfn.yaml @@ -53,10 +53,10 @@ Parameters: Type: String {#- Head Node parameters #} MasterInstanceId: - Description: ID of the Master instance + Description: ID of the head node instance Type: AWS::EC2::Instance::Id MasterPrivateIP: - Description: Private IP of the Master instance + Description: Private IP of the head node instance Type: String {#- EBS parameters #} EBSVolumesIds: @@ -144,8 +144,8 @@ Resources: {%- endfor %} {#- Conditional EBS metrics #} - {%- set ebs_metrics_conditions = [{'metric': 'VolumeConsumedReadWriteOps', 'supported_vol_types': ["io1"], 'extra_params': ['"title":"Consumed Read/Write Ops"']}, - {'metric': 'VolumeThroughputPercentage', 'supported_vol_types': ["io1"], 'extra_params': ['"title":"Throughput Percentage"']}, + {%- set ebs_metrics_conditions = [{'metric': 'VolumeConsumedReadWriteOps', 'supported_vol_types': ["io1", "io2", "gp3"], 'extra_params': ['"title":"Consumed Read/Write Ops"']}, + {'metric': 'VolumeThroughputPercentage', 'supported_vol_types': ["io1", "io2", "gp3"], 'extra_params': ['"title":"Throughput Percentage"']}, {'metric': 'BurstBalance', 'supported_vol_types': ["gp2", "st1", "sc1"], 'extra_params': ['"title":"Burst Balance"']}] %} {%- for metric_condition_params in ebs_metrics_conditions %} @@ -207,8 +207,8 @@ Resources: {%- endfor %} {#- Conditional RAID metrics #} - {%- set raid_metrics_conditions_params = [{'metric': 'VolumeConsumedReadWriteOps', 'supported_vol_types': ["io1"], 'extra_params': ['"title":"Consumed Read/Write Ops"']}, - {'metric': 'VolumeThroughputPercentage', 'supported_vol_types': ["io1"], 'extra_params': ['"title":"Throughput Percentage"']}, + {%- set raid_metrics_conditions_params = [{'metric': 'VolumeConsumedReadWriteOps', 'supported_vol_types': ["io1" ,"io2"], 'extra_params': ['"title":"Consumed Read/Write Ops"']}, + {'metric': 'VolumeThroughputPercentage', 'supported_vol_types': ["io1", "io2"], 'extra_params': ['"title":"Throughput Percentage"']}, {'metric': 'BurstBalance', 'supported_vol_types': ["gp2", "st1", "sc1"], 'extra_params': ['"title":"Burst Balance"']}] %} {%- for metric_condition_params in raid_metrics_conditions_params %} diff --git a/cloudformation/ebs-substack.cfn.json b/cloudformation/ebs-substack.cfn.json index 58b66f04d5..e2f80ff92e 100644 --- a/cloudformation/ebs-substack.cfn.json +++ b/cloudformation/ebs-substack.cfn.json @@ -120,6 +120,49 @@ ] }, "Vol1_UseEBSPIOPS": { + "Fn::Or": [ + { + "Fn::Equals": [ + { + "Fn::Select": [ + "0", + { + "Ref": "VolumeType" + } + ] + }, + "gp3" + ] + }, + { + "Fn::Equals": [ + { + "Fn::Select": [ + "0", + { + "Ref": "VolumeType" + } + ] + }, + "io1" + ] + }, + { + "Fn::Equals": [ + { + "Fn::Select": [ + "0", + { + "Ref": "VolumeType" + } + ] + }, + "io2" + ] + } + ] + }, + "Vol1_UseEBSThroughput": { "Fn::Equals": [ { "Fn::Select": [ @@ -129,7 +172,7 @@ } ] }, - "io1" + "gp3" ] }, "Vol1_UseEBSSnapshot": { @@ -258,6 +301,49 @@ ] }, "Vol2_UseEBSPIOPS": { + "Fn::Or": [ + { + "Fn::Equals": [ + { + "Fn::Select": [ + "1", + { + "Ref": "VolumeType" + } + ] + }, + "gp3" + ] + }, + { + "Fn::Equals": [ + { + "Fn::Select": [ + "1", + { + "Ref": "VolumeType" + } + ] + }, + "io1" + ] + }, + { + "Fn::Equals": [ + { + "Fn::Select": [ + "1", + { + "Ref": "VolumeType" + } + ] + }, + "io2" + ] + } + ] + }, + "Vol2_UseEBSThroughput": { "Fn::Equals": [ { "Fn::Select": [ @@ -267,7 +353,7 @@ } ] }, - "io1" + "gp3" ] }, "Vol2_UseEBSSnapshot": { @@ -396,6 +482,49 @@ ] }, "Vol3_UseEBSPIOPS": { + "Fn::Or": [ + { + "Fn::Equals": [ + { + "Fn::Select": [ + "2", + { + "Ref": "VolumeType" + } + ] + }, + "gp3" + ] + }, + { + "Fn::Equals": [ + { + "Fn::Select": [ + "2", + { + "Ref": "VolumeType" + } + ] + }, + "io1" + ] + }, + { + "Fn::Equals": [ + { + "Fn::Select": [ + "2", + { + "Ref": "VolumeType" + } + ] + }, + "io2" + ] + } + ] + }, + "Vol3_UseEBSThroughput": { "Fn::Equals": [ { "Fn::Select": [ @@ -405,7 +534,7 @@ } ] }, - "io1" + "gp3" ] }, "Vol3_UseEBSSnapshot": { @@ -534,6 +663,49 @@ ] }, "Vol4_UseEBSPIOPS": { + "Fn::Or": [ + { + "Fn::Equals": [ + { + "Fn::Select": [ + "3", + { + "Ref": "VolumeType" + } + ] + }, + "gp3" + ] + }, + { + "Fn::Equals": [ + { + "Fn::Select": [ + "3", + { + "Ref": "VolumeType" + } + ] + }, + "io1" + ] + }, + { + "Fn::Equals": [ + { + "Fn::Select": [ + "3", + { + "Ref": "VolumeType" + } + ] + }, + "io2" + ] + } + ] + }, + "Vol4_UseEBSThroughput": { "Fn::Equals": [ { "Fn::Select": [ @@ -543,7 +715,7 @@ } ] }, - "io1" + "gp3" ] }, "Vol4_UseEBSSnapshot": { @@ -672,6 +844,49 @@ ] }, "Vol5_UseEBSPIOPS": { + "Fn::Or": [ + { + "Fn::Equals": [ + { + "Fn::Select": [ + "4", + { + "Ref": "VolumeType" + } + ] + }, + "gp3" + ] + }, + { + "Fn::Equals": [ + { + "Fn::Select": [ + "4", + { + "Ref": "VolumeType" + } + ] + }, + "io1" + ] + }, + { + "Fn::Equals": [ + { + "Fn::Select": [ + "4", + { + "Ref": "VolumeType" + } + ] + }, + "io2" + ] + } + ] + }, + "Vol5_UseEBSThroughput": { "Fn::Equals": [ { "Fn::Select": [ @@ -681,7 +896,7 @@ } ] }, - "io1" + "gp3" ] }, "Vol5_UseEBSSnapshot": { @@ -1072,7 +1287,11 @@ "Type": "Number" }, "VolumeIOPS": { - "Description": "Number of IOPS for volume type io1. Not used for other volume types.", + "Description": "Number of IOPS for volume type io1, io2 and gp3. Not used for other volume types.", + "Type": "CommaDelimitedList" + }, + "VolumeThroughput": { + "Description": "Throughput for volume type gp3. Not used for other volume types.", "Type": "CommaDelimitedList" }, "VolumeSize": { @@ -1123,6 +1342,22 @@ } ] }, + "Throughput": { + "Fn::If": [ + "Vol1_UseEBSThroughput", + { + "Fn::Select": [ + "0", + { + "Ref": "VolumeThroughput" + } + ] + }, + { + "Ref": "AWS::NoValue" + } + ] + }, "KmsKeyId": { "Fn::If": [ "Vol1_UseEBSKMSKey", @@ -1224,6 +1459,22 @@ } ] }, + "Throughput": { + "Fn::If": [ + "Vol2_UseEBSThroughput", + { + "Fn::Select": [ + "1", + { + "Ref": "VolumeThroughput" + } + ] + }, + { + "Ref": "AWS::NoValue" + } + ] + }, "KmsKeyId": { "Fn::If": [ "Vol2_UseEBSKMSKey", @@ -1325,6 +1576,22 @@ } ] }, + "Throughput": { + "Fn::If": [ + "Vol3_UseEBSThroughput", + { + "Fn::Select": [ + "2", + { + "Ref": "VolumeThroughput" + } + ] + }, + { + "Ref": "AWS::NoValue" + } + ] + }, "KmsKeyId": { "Fn::If": [ "Vol3_UseEBSKMSKey", @@ -1426,6 +1693,22 @@ } ] }, + "Throughput": { + "Fn::If": [ + "Vol4_UseEBSThroughput", + { + "Fn::Select": [ + "3", + { + "Ref": "VolumeThroughput" + } + ] + }, + { + "Ref": "AWS::NoValue" + } + ] + }, "KmsKeyId": { "Fn::If": [ "Vol4_UseEBSKMSKey", @@ -1527,6 +1810,22 @@ } ] }, + "Throughput": { + "Fn::If": [ + "Vol5_UseEBSThroughput", + { + "Fn::Select": [ + "4", + { + "Ref": "VolumeThroughput" + } + ] + }, + { + "Ref": "AWS::NoValue" + } + ] + }, "KmsKeyId": { "Fn::If": [ "Vol5_UseEBSKMSKey", diff --git a/cloudformation/efs-substack.cfn.json b/cloudformation/efs-substack.cfn.json index 57c8f0f695..8f00c106a5 100644 --- a/cloudformation/efs-substack.cfn.json +++ b/cloudformation/efs-substack.cfn.json @@ -247,7 +247,7 @@ "Type": "CommaDelimitedList" }, "MasterSubnetId": { - "Description": "Master subnet id for master mount target", + "Description": "Head node subnet id for head node mount target", "Type": "String" } }, diff --git a/cloudformation/master-server-substack.cfn.yaml b/cloudformation/master-server-substack.cfn.yaml index 3ef897b1b7..eaecadd4c7 100644 --- a/cloudformation/master-server-substack.cfn.yaml +++ b/cloudformation/master-server-substack.cfn.yaml @@ -696,7 +696,7 @@ Resources: command: chef-client --local-mode --config /etc/chef/client.rb --log_level info --logfile /var/log/chef-client.log --force-formatter --no-color --chef-zero-port 8889 --json-attributes /etc/chef/dna.json --override-runlist - aws-parallelcluster::update_master + aws-parallelcluster::update_head_node cwd: /etc/chef UpdateWaiterCustomResource: Type: AWS::CloudFormation::CustomResource @@ -707,17 +707,17 @@ Resources: Condition: HasUpdateWaiterFunction Outputs: MasterInstanceID: - Description: ID of the Master instance + Description: ID of the head node instance Value: !Ref 'MasterServer' MasterPrivateIP: - Description: Private IP Address of the Master host + Description: Private IP Address of the head node Value: !GetAtt 'MasterServer.PrivateIp' MasterPublicIP: - Description: Public IP Address of the Master host + Description: Public IP Address of the head node Value: !GetAtt 'MasterServer.PublicIp' Condition: HasMasterPublicIp MasterPrivateDnsName: - Description: Private DNS name of the Master host + Description: Private DNS name of the head node Value: !GetAtt 'MasterServer.PrivateDnsName' Metadata: DependsOnCustomResources: !Ref 'DependsOnCustomResources' diff --git a/cloudformation/raid-substack.cfn.json b/cloudformation/raid-substack.cfn.json index 006139de7f..148eca9629 100644 --- a/cloudformation/raid-substack.cfn.json +++ b/cloudformation/raid-substack.cfn.json @@ -151,6 +151,49 @@ ] }, "Vol1_UseEBSPIOPS": { + "Fn::Or": [ + { + "Fn::Equals": [ + { + "Fn::Select": [ + "3", + { + "Ref": "RAIDOptions" + } + ] + }, + "gp3" + ] + }, + { + "Fn::Equals": [ + { + "Fn::Select": [ + "3", + { + "Ref": "RAIDOptions" + } + ] + }, + "io1" + ] + }, + { + "Fn::Equals": [ + { + "Fn::Select": [ + "3", + { + "Ref": "RAIDOptions" + } + ] + }, + "io2" + ] + } + ] + }, + "Vol1_UseEBSThroughput": { "Fn::Equals": [ { "Fn::Select": [ @@ -160,7 +203,7 @@ } ] }, - "io1" + "gp3" ] }, "Vol1_UseVolumeSize": { @@ -235,6 +278,49 @@ ] }, "Vol2_UseEBSPIOPS": { + "Fn::Or": [ + { + "Fn::Equals": [ + { + "Fn::Select": [ + "3", + { + "Ref": "RAIDOptions" + } + ] + }, + "gp3" + ] + }, + { + "Fn::Equals": [ + { + "Fn::Select": [ + "3", + { + "Ref": "RAIDOptions" + } + ] + }, + "io1" + ] + }, + { + "Fn::Equals": [ + { + "Fn::Select": [ + "3", + { + "Ref": "RAIDOptions" + } + ] + }, + "io2" + ] + } + ] + }, + "Vol2_UseEBSThroughput": { "Fn::Equals": [ { "Fn::Select": [ @@ -244,7 +330,7 @@ } ] }, - "io1" + "gp3" ] }, "Vol2_UseVolumeSize": { @@ -319,6 +405,49 @@ ] }, "Vol3_UseEBSPIOPS": { + "Fn::Or": [ + { + "Fn::Equals": [ + { + "Fn::Select": [ + "3", + { + "Ref": "RAIDOptions" + } + ] + }, + "gp3" + ] + }, + { + "Fn::Equals": [ + { + "Fn::Select": [ + "3", + { + "Ref": "RAIDOptions" + } + ] + }, + "io1" + ] + }, + { + "Fn::Equals": [ + { + "Fn::Select": [ + "3", + { + "Ref": "RAIDOptions" + } + ] + }, + "io2" + ] + } + ] + }, + "Vol3_UseEBSThroughput": { "Fn::Equals": [ { "Fn::Select": [ @@ -328,7 +457,7 @@ } ] }, - "io1" + "gp3" ] }, "Vol3_UseVolumeSize": { @@ -403,6 +532,49 @@ ] }, "Vol4_UseEBSPIOPS": { + "Fn::Or": [ + { + "Fn::Equals": [ + { + "Fn::Select": [ + "3", + { + "Ref": "RAIDOptions" + } + ] + }, + "gp3" + ] + }, + { + "Fn::Equals": [ + { + "Fn::Select": [ + "3", + { + "Ref": "RAIDOptions" + } + ] + }, + "io1" + ] + }, + { + "Fn::Equals": [ + { + "Fn::Select": [ + "3", + { + "Ref": "RAIDOptions" + } + ] + }, + "io2" + ] + } + ] + }, + "Vol4_UseEBSThroughput": { "Fn::Equals": [ { "Fn::Select": [ @@ -412,7 +584,7 @@ } ] }, - "io1" + "gp3" ] }, "Vol4_UseVolumeSize": { @@ -487,6 +659,49 @@ ] }, "Vol5_UseEBSPIOPS": { + "Fn::Or": [ + { + "Fn::Equals": [ + { + "Fn::Select": [ + "3", + { + "Ref": "RAIDOptions" + } + ] + }, + "gp3" + ] + }, + { + "Fn::Equals": [ + { + "Fn::Select": [ + "3", + { + "Ref": "RAIDOptions" + } + ] + }, + "io1" + ] + }, + { + "Fn::Equals": [ + { + "Fn::Select": [ + "3", + { + "Ref": "RAIDOptions" + } + ] + }, + "io2" + ] + } + ] + }, + "Vol5_UseEBSThroughput": { "Fn::Equals": [ { "Fn::Select": [ @@ -496,7 +711,7 @@ } ] }, - "io1" + "gp3" ] }, "Vol5_UseVolumeSize": { @@ -644,7 +859,7 @@ "Type": "AWS::EC2::AvailabilityZone::Name" }, "RAIDOptions": { - "Description": "Comma separated list of RAID related options, 8 parameters in total, [0 shared_dir,1 raid_type,2 num_of_vols,3 vol_type,4 vol_size,5 vol_IOPS,6 encrypted, 7 ebs_kms_key]", + "Description": "Comma separated list of RAID related options, 9 parameters in total, [0 shared_dir,1 raid_type,2 num_of_vols,3 vol_type,4 vol_size,5 vol_IOPS,6 encrypted, 7 ebs_kms_key, 8 volume_throughput]", "Type": "CommaDelimitedList" } }, @@ -687,6 +902,22 @@ } ] }, + "Throughput": { + "Fn::If": [ + "Vol1_UseEBSThroughput", + { + "Fn::Select": [ + "8", + { + "Ref": "RAIDOptions" + } + ] + }, + { + "Ref": "AWS::NoValue" + } + ] + }, "KmsKeyId": { "Fn::If": [ "Vol1_UseEBSKMSKey", @@ -772,6 +1003,22 @@ } ] }, + "Throughput": { + "Fn::If": [ + "Vol2_UseEBSThroughput", + { + "Fn::Select": [ + "8", + { + "Ref": "RAIDOptions" + } + ] + }, + { + "Ref": "AWS::NoValue" + } + ] + }, "KmsKeyId": { "Fn::If": [ "Vol2_UseEBSKMSKey", @@ -857,6 +1104,22 @@ } ] }, + "Throughput": { + "Fn::If": [ + "Vol3_UseEBSThroughput", + { + "Fn::Select": [ + "8", + { + "Ref": "RAIDOptions" + } + ] + }, + { + "Ref": "AWS::NoValue" + } + ] + }, "KmsKeyId": { "Fn::If": [ "Vol3_UseEBSKMSKey", @@ -942,6 +1205,22 @@ } ] }, + "Throughput": { + "Fn::If": [ + "Vol4_UseEBSThroughput", + { + "Fn::Select": [ + "8", + { + "Ref": "RAIDOptions" + } + ] + }, + { + "Ref": "AWS::NoValue" + } + ] + }, "KmsKeyId": { "Fn::If": [ "Vol4_UseEBSKMSKey", @@ -1027,6 +1306,22 @@ } ] }, + "Throughput": { + "Fn::If": [ + "Vol5_UseEBSThroughput", + { + "Fn::Select": [ + "8", + { + "Ref": "RAIDOptions" + } + ] + }, + { + "Ref": "AWS::NoValue" + } + ] + }, "KmsKeyId": { "Fn::If": [ "Vol5_UseEBSKMSKey", diff --git a/tests/integration-tests/README.md b/tests/integration-tests/README.md index 7cc9b9a70c..ac78f2c275 100644 --- a/tests/integration-tests/README.md +++ b/tests/integration-tests/README.md @@ -39,7 +39,8 @@ python -m test_runner --help usage: test_runner.py [-h] --key-name KEY_NAME --key-path KEY_PATH [-n PARALLELISM] [--sequential] [--credential CREDENTIAL] [--retry-on-failures] [--tests-root-dir TESTS_ROOT_DIR] [-c TESTS_CONFIG] [-i [INSTANCES [INSTANCES ...]]] [-o [OSS [OSS ...]]] [-s [SCHEDULERS [SCHEDULERS ...]]] [-r [REGIONS [REGIONS ...]]] [-f FEATURES [FEATURES ...]] [--show-output] [--reports {html,junitxml,json,cw} [{html,junitxml,json,cw} ...]] [--cw-region CW_REGION] [--cw-namespace CW_NAMESPACE] [--cw-timestamp-day-start] [--output-dir OUTPUT_DIR] - [--custom-node-url CUSTOM_NODE_URL] [--custom-cookbook-url CUSTOM_COOKBOOK_URL] [--createami-custom-cookbook-url CREATEAMI_CUSTOM_COOKBOOK_URL] [--custom-template-url CUSTOM_TEMPLATE_URL] + [--custom-node-url CUSTOM_NODE_URL] [--custom-cookbook-url CUSTOM_COOKBOOK_URL] [--createami-custom-cookbook-url CREATEAMI_CUSTOM_COOKBOOK_URL] + [--createami-custom-node-url CREATEAMI_CUSTOM_NODE_URL] [--custom-template-url CUSTOM_TEMPLATE_URL] [--custom-hit-template-url CUSTOM_HIT_TEMPLATE_URL] [--custom-awsbatchcli-url CUSTOM_AWSBATCHCLI_URL] [--custom-ami CUSTOM_AMI] [--pre-install PRE_INSTALL] [--post-install POST_INSTALL] [--benchmarks] [--benchmarks-target-capacity BENCHMARKS_TARGET_CAPACITY] [--benchmarks-max-time BENCHMARKS_MAX_TIME] [--vpc-stack VPC_STACK] [--cluster CLUSTER] [--no-delete] [--keep-logs-on-cluster-failure] [--keep-logs-on-test-failure] [--stackname-suffix STACKNAME_SUFFIX] [--dry-run] @@ -98,6 +99,8 @@ Custom packages and templates: URL to a custom cookbook package. (default: None) --createami-custom-cookbook-url CREATEAMI_CUSTOM_COOKBOOK_URL URL to a custom cookbook package for the createami command. (default: None) + --createami-custom-node-url CREATEAMI_CUSTOM_NODE_URL + URL to a custom node package for the createami command. (default: None) --custom-template-url CUSTOM_TEMPLATE_URL URL to a custom cfn template. (default: None) --custom-hit-template-url CUSTOM_HIT_TEMPLATE_URL @@ -226,7 +229,6 @@ cloudwatch_logging suite defined above will produce the following parametrizatio ``` cloudwatch_logging/test_cloudwatch_logging.py::test_cloudwatch_logging[ap-east-1-c5.xlarge-alinux-slurm] cloudwatch_logging/test_cloudwatch_logging.py::test_cloudwatch_logging[ap-east-1-c5.xlarge-alinux2-slurm] -cloudwatch_logging/test_cloudwatch_logging.py::test_cloudwatch_logging[ap-east-1-c5.xlarge-centos6-slurm] cloudwatch_logging/test_cloudwatch_logging.py::test_cloudwatch_logging[ap-east-1-c5.xlarge-centos7-slurm] cloudwatch_logging/test_cloudwatch_logging.py::test_cloudwatch_logging[ap-east-1-c5.xlarge-ubuntu1604-slurm] cloudwatch_logging/test_cloudwatch_logging.py::test_cloudwatch_logging[ap-east-1-c5.xlarge-ubuntu1804-slurm] @@ -704,7 +706,7 @@ included the CloudFormation stack outputs. ### Execute Remote Commands -To execute remote commands or scripts on the Master instance of the cluster under test, the `RemoteCommandExecutor` +To execute remote commands or scripts on the head node of the cluster under test, the `RemoteCommandExecutor` class can be used. It simply requires a valid `Cluster` object to be initialized and it offers some utility methods to execute remote commands and scripts as shown in the example below: diff --git a/tests/integration-tests/cfn_stacks_factory.py b/tests/integration-tests/cfn_stacks_factory.py index f721212419..65a78b4499 100644 --- a/tests/integration-tests/cfn_stacks_factory.py +++ b/tests/integration-tests/cfn_stacks_factory.py @@ -64,30 +64,24 @@ def create_stack(self, stack): """ name = stack.name region = stack.region - try: - set_credentials(region, self.__credentials) - id = self.__get_stack_internal_id(name, region) - if id in self.__created_stacks: - raise ValueError("Stack {0} already exists in region {1}".format(name, region)) + id = self.__get_stack_internal_id(name, region) + if id in self.__created_stacks: + raise ValueError("Stack {0} already exists in region {1}".format(name, region)) - logging.info("Creating stack {0} in region {1}".format(name, region)) - self.__created_stacks[id] = stack - try: - cfn_client = boto3.client("cloudformation", region_name=region) - result = cfn_client.create_stack( - StackName=name, TemplateBody=stack.template, Parameters=stack.parameters - ) - stack.cfn_stack_id = result["StackId"] - final_status = self.__wait_for_stack_creation(stack.cfn_stack_id, cfn_client) - self.__assert_stack_status(final_status, "CREATE_COMPLETE") - except Exception as e: - logging.error("Creation of stack {0} in region {1} failed with exception: {2}".format(name, region, e)) - raise - - logging.info("Stack {0} created successfully in region {1}".format(name, region)) - finally: - unset_credentials() + logging.info("Creating stack {0} in region {1}".format(name, region)) + self.__created_stacks[id] = stack + try: + cfn_client = boto3.client("cloudformation", region_name=region) + result = cfn_client.create_stack(StackName=name, TemplateBody=stack.template, Parameters=stack.parameters) + stack.cfn_stack_id = result["StackId"] + final_status = self.__wait_for_stack_creation(stack.cfn_stack_id, cfn_client) + self.__assert_stack_status(final_status, "CREATE_COMPLETE") + except Exception as e: + logging.error("Creation of stack {0} in region {1} failed with exception: {2}".format(name, region, e)) + raise + + logging.info("Stack {0} created successfully in region {1}".format(name, region)) @retry( stop_max_attempt_number=10, diff --git a/tests/integration-tests/clusters_factory.py b/tests/integration-tests/clusters_factory.py index 35bc4aa8db..8643d43746 100644 --- a/tests/integration-tests/clusters_factory.py +++ b/tests/integration-tests/clusters_factory.py @@ -159,8 +159,8 @@ def region(self): return self.config.get("aws", "aws_region_name", fallback="us-east-1") @property - def master_ip(self): - """Return the public ip of the cluster master node.""" + def head_node_ip(self): + """Return the public ip of the cluster head node.""" if "MasterPublicIP" in self.cfn_outputs: return self.cfn_outputs["MasterPublicIP"] else: diff --git a/tests/integration-tests/configs/common.jinja2 b/tests/integration-tests/configs/common.jinja2 index e8d05bd703..d73d3ddec8 100644 --- a/tests/integration-tests/configs/common.jinja2 +++ b/tests/integration-tests/configs/common.jinja2 @@ -1,4 +1,4 @@ -{%- set REGIONS_COMMERCIAL = ["us-east-1", "us-east-2", "us-west-1", "us-west-2", "ca-central-1", "eu-central-1", "eu-west-1", "eu-west-2", "eu-west-3", "sa-east-1", "ap-east-1", "ap-northeast-1", "ap-northeast-2", "ap-south-1", "ap-southeast-1", "ap-southeast-2", "eu-north-1"] -%} +{%- set REGIONS_COMMERCIAL = ["us-east-1", "us-east-2", "us-west-1", "us-west-2", "ca-central-1", "eu-central-1", "eu-west-1", "eu-west-2", "eu-west-3", "sa-east-1", "ap-east-1", "ap-northeast-1", "ap-northeast-2", "ap-south-1", "ap-southeast-1", "ap-southeast-2", "eu-north-1", "me-south-1", "af-south-1", "eu-south-1"] -%} {%- set REGIONS_CHINA = ["cn-north-1", "cn-northwest-1"] -%} {%- set REGIONS_GOVCLOUD = ["us-gov-west-1", "us-gov-east-1"] -%} {%- set REGIONS_ALL = REGIONS_COMMERCIAL + REGIONS_CHINA + REGIONS_GOVCLOUD -%} diff --git a/tests/integration-tests/configs/common/common.yaml b/tests/integration-tests/configs/common/common.yaml index 258298bb11..46d251235a 100644 --- a/tests/integration-tests/configs/common/common.yaml +++ b/tests/integration-tests/configs/common/common.yaml @@ -1,7 +1,14 @@ +arm_pl: + test_arm_pl.py::test_arm_pl: + dimensions: + - regions: ["ap-northeast-1"] + instances: {{ common.INSTANCES_DEFAULT_ARM }} + oss: ["alinux2", "centos8", "ubuntu1804"] + schedulers: ["slurm"] cfn-init: test_cfn_init.py::test_replace_compute_on_failure: dimensions: - - regions: ["eu-central-1"] + - regions: ["af-south-1"] instances: {{ common.INSTANCES_DEFAULT_X86 }} oss: {{ common.OSS_ONE_PER_DISTRO }} schedulers: ["slurm", "sge"] @@ -69,6 +76,11 @@ configure: instances: {{ common.INSTANCES_DEFAULT_X86 }} oss: ["alinux2"] schedulers: ["slurm"] + test_pcluster_configure.py::test_region_without_t2micro: + dimensions: + - regions: ["eu-north-1"] # must be regions that do not have t2.micro + oss: ["centos7"] + schedulers: ["slurm"] create: test_create.py::test_create_wrong_os: dimensions: @@ -87,14 +99,10 @@ createami: dimensions: - regions: ["eu-west-3"] instances: {{ common.INSTANCES_DEFAULT_X86 }} - # Ubuntu18.04 disabled due to SGE package missing - # see https://github.com/aws/aws-parallelcluster-cookbook/commit/db8c63d900c7837157519ae7eef462ac3af627a5 - oss: ["alinux", "alinux2", "ubuntu1604"] # temporary disable FPGA AMI since there is not enough free space on root partition + oss: ["alinux", "alinux2", "ubuntu1604", "ubuntu1804"] # temporary disable FPGA AMI since there is not enough free space on root partition - regions: ["us-gov-east-1"] instances: {{ common.INSTANCES_DEFAULT_X86 }} - # Ubuntu18.04 disabled due to SGE package missing - # see https://github.com/aws/aws-parallelcluster-cookbook/commit/db8c63d900c7837157519ae7eef462ac3af627a5 - oss: ["ubuntu1604"] + oss: ["ubuntu1604", "ubuntu1804"] - regions: ["cn-northwest-1"] instances: {{ common.INSTANCES_DEFAULT_X86 }} oss: ["alinux2"] @@ -129,12 +137,12 @@ dcv: # DCV on GPU enabled instance - regions: ["eu-west-1"] instances: ["g3.8xlarge"] - oss: ["alinux2", "centos7", "ubuntu1804"] + oss: ["alinux2", "centos7", "centos8", "ubuntu1804"] schedulers: ["slurm"] # DCV on ARM - regions: ["eu-west-1"] instances: {{ common.INSTANCES_DEFAULT_ARM }} - oss: ["alinux2", "ubuntu1804"] + oss: ["alinux2", "centos8", "ubuntu1804"] schedulers: ["slurm"] # DCV in cn regions and non GPU enabled instance - regions: ["cn-northwest-1"] @@ -199,6 +207,10 @@ efa: instances: ["p4d.24xlarge"] oss: ["alinux2", "ubuntu1604", "centos8"] schedulers: ["slurm"] + - regions: ["us-west-2"] + instances: ["c6gn.16xlarge"] + oss: ["alinux2", "ubuntu1804"] + schedulers: ["slurm"] test_efa.py::test_sit_efa: dimensions: - regions: ["us-east-1"] @@ -207,13 +219,23 @@ efa: # Torque is not supported by OpenMPI distributed with EFA # Slurm test is to verify EFA works correctly when using the SIT model in the config file schedulers: ["sge", "slurm"] -iam_policies: - test_iam_policies.py::test_iam_policies: + - regions: ["us-west-2"] + instances: ["p4d.24xlarge"] + oss: ["alinux", "ubuntu1804", "centos7"] + schedulers: ["sge"] +iam: + test_iam.py::test_iam_policies: dimensions: - regions: ["eu-north-1"] instances: {{ common.INSTANCES_DEFAULT_X86 }} oss: ["alinux2"] schedulers: ["slurm", "awsbatch"] + test_iam.py::test_iam_roles: + dimensions: + - regions: ["us-east-2"] + schedulers: ["awsbatch", "slurm", "sge"] + oss: ["alinux2"] + instances: {{ common.INSTANCES_DEFAULT_X86 }} intel_hpc: test_intel_hpc.py::test_intel_hpc: dimensions: @@ -224,7 +246,7 @@ intel_hpc: networking: test_cluster_networking.py::test_cluster_in_private_subnet: dimensions: - - regions: ["us-west-2"] + - regions: ["me-south-1"] instances: {{ common.INSTANCES_DEFAULT_X86 }} oss: ["alinux2"] schedulers: ["slurm"] @@ -234,16 +256,36 @@ networking: schedulers: ["sge"] test_networking.py::test_public_network_topology: dimensions: - - regions: ["eu-central-1", "us-gov-east-1", "cn-northwest-1"] + - regions: ["af-south-1", "us-gov-east-1", "cn-northwest-1"] test_networking.py::test_public_private_network_topology: dimensions: - - regions: ["eu-central-1", "us-gov-east-1", "cn-northwest-1"] + - regions: ["af-south-1", "us-gov-east-1", "cn-northwest-1"] test_multi_cidr.py::test_multi_cidr: dimensions: - regions: ["ap-northeast-2"] instances: {{ common.INSTANCES_DEFAULT_X86 }} oss: ["alinux2"] schedulers: ["slurm", "awsbatch"] + test_security_groups.py::test_additional_sg_and_ssh_from: + dimensions: + - regions: ["eu-north-1"] + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: ["centos7"] + schedulers: ["slurm"] + - regions: ["eu-north-1"] + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: ["alinux2"] + schedulers: ["awsbatch"] + test_security_groups.py::test_overwrite_sg: + dimensions: + - regions: ["eu-north-1"] + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: ["centos7"] + schedulers: ["slurm"] + - regions: ["eu-north-1"] + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: ["alinux2"] + schedulers: ["awsbatch"] scaling: test_scaling.py::test_hit_scaling: dimensions: @@ -280,7 +322,7 @@ scaling: schedulers: test_sge.py::test_sge: dimensions: - - regions: ["eu-central-1"] + - regions: ["eu-south-1"] instances: {{ common.INSTANCES_DEFAULT_X86 }} oss: {{ common.OSS_COMMERCIAL_X86 }} schedulers: ["sge"] @@ -331,7 +373,7 @@ schedulers: spot: test_spot.py::test_spot_default: dimensions: - - regions: ["us-west-2"] + - regions: ["me-south-1"] instances: {{ common.INSTANCES_DEFAULT_X86 }} oss: ["centos7"] schedulers: ["sge", "slurm"] @@ -351,6 +393,10 @@ storage: instances: {{ common.INSTANCES_DEFAULT_X86 }} oss: ["alinux"] schedulers: ["slurm"] + - regions: ["us-gov-west-1"] + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: ["alinux2"] + schedulers: ["slurm"] test_fsx_lustre.py::test_fsx_lustre_configuration_options: dimensions: - regions: ["us-east-2"] @@ -385,9 +431,19 @@ storage: oss: {{ common.OSS_CHINA_X86 }} schedulers: ["slurm"] - regions: ["ap-northeast-1", "cn-north-1"] + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: {{ common.OSS_BATCH }} + schedulers: ["awsbatch"] + test_efs.py::test_existing_efs: + dimensions: + - regions: ["ap-northeast-2"] instances: {{ common.INSTANCES_DEFAULT_X86 }} oss: ["alinux2"] schedulers: ["awsbatch"] + - regions: ["ap-northeast-2"] + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: ["centos8"] + schedulers: ["slurm"] test_raid.py::test_raid_fault_tolerance_mode: dimensions: - regions: ["cn-northwest-1"] @@ -406,7 +462,7 @@ storage: schedulers: ["sge"] - regions: ["ap-south-1"] instances: {{ common.INSTANCES_DEFAULT_X86 }} - oss: ["alinux"] + oss: {{ common.OSS_BATCH }} schedulers: ["awsbatch"] test_ebs.py::test_default_ebs: dimensions: @@ -448,6 +504,12 @@ storage: instances: {{ common.INSTANCES_DEFAULT_X86 }} oss: ["ubuntu1804"] schedulers: ["slurm"] + test_ebs.py::test_ebs_existing: + dimensions: + - regions: ["ap-northeast-2"] + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: ["centos7"] + schedulers: ["sge"] tags: test_tag_propagation.py::test_tag_propagation: dimensions: @@ -458,7 +520,7 @@ tags: update: test_update.py::test_update_awsbatch: dimensions: - - regions: ["eu-west-1"] + - regions: ["eu-south-1"] instances: {{ common.INSTANCES_DEFAULT_X86 }} oss: ["alinux2"] schedulers: ["awsbatch"] @@ -503,4 +565,4 @@ resource_bucket: - regions: ["ap-southeast-1"] instances: {{ common.INSTANCES_DEFAULT_X86 }} oss: ["alinux2"] - schedulers: ["slurm", "awsbatch"] \ No newline at end of file + schedulers: ["slurm", "awsbatch"] diff --git a/tests/integration-tests/configs/develop.yaml b/tests/integration-tests/configs/develop.yaml index 0eb3fc82b1..77a9dd7f4e 100644 --- a/tests/integration-tests/configs/develop.yaml +++ b/tests/integration-tests/configs/develop.yaml @@ -26,6 +26,38 @@ test-suites: schedulers: ["{{ scheduler }}"] {%- endfor %} scaling: + test_scaling.py::test_hit_scaling: + dimensions: + - regions: ["us-west-1"] + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: {{ common.OSS_ONE_PER_DISTRO }} + schedulers: ["slurm"] + test_scaling.py::test_nodewatcher_terminates_failing_node: + dimensions: + - regions: ["sa-east-1"] + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: {{ common.OSS_ONE_PER_DISTRO }} + schedulers: ["sge", "torque"] + test_mpi.py::test_mpi: # TODO: move outside of the scaling dir + dimensions: + - regions: ["ap-east-1"] + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: {{ common.OSS_COMMERCIAL_X86 }} + schedulers: ["slurm", "sge"] + - regions: ["us-east-1"] + instances: {{ common.INSTANCES_DEFAULT_ARM }} + oss: {{ common.OSS_COMMERCIAL_ARM }} + schedulers: ["slurm", "sge"] + test_mpi.py::test_mpi_ssh: + dimensions: + - regions: ["eu-north-1"] + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: {{ common.OSS_COMMERCIAL_X86 }} + schedulers: ["slurm"] + - regions: ["ca-central-1"] + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: {{ common.OSS_COMMERCIAL_X86 }} + schedulers: ["sge"] test_scaling.py::test_multiple_jobs_submission: dimensions: - regions: {{ common.REGIONS_COMMERCIAL }} diff --git a/tests/integration-tests/configs/new_region.yaml b/tests/integration-tests/configs/new_region.yaml new file mode 100644 index 0000000000..3f7b79e4df --- /dev/null +++ b/tests/integration-tests/configs/new_region.yaml @@ -0,0 +1,169 @@ +{%- import 'common.jinja2' as common -%} +{%- set NEW_REGION = ["##PLACEHOLDER##"] -%} +--- +test-suites: + scaling: + test_scaling.py::test_multiple_jobs_submission: + dimensions: + - regions: {{ NEW_REGION }} + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: {{ common.OSS_COMMERCIAL_X86 }} + schedulers: {{ common.SCHEDULERS_TRAD }} + - regions: {{ NEW_REGION }} + instances: {{ common.INSTANCES_DEFAULT_ARM }} + oss: {{ common.OSS_COMMERCIAL_ARM }} + schedulers: {{ common.SCHEDULERS_TRAD }} + test_mpi.py::test_mpi: + dimensions: + - regions: {{ NEW_REGION }} + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: {{ common.OSS_COMMERCIAL_X86 }} + schedulers: ["slurm", "sge"] + - regions: {{ NEW_REGION }} + instances: {{ common.INSTANCES_DEFAULT_ARM }} + oss: {{ common.OSS_COMMERCIAL_ARM }} + schedulers: ["slurm", "sge"] + schedulers: + test_awsbatch.py::test_awsbatch: + dimensions: + - regions: {{ NEW_REGION }} + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: ["alinux2"] + schedulers: ["awsbatch"] + - regions: {{ NEW_REGION }} + instances: {{ common.INSTANCES_DEFAULT_ARM }} + oss: ["alinux2"] + schedulers: ["awsbatch"] + cloudwatch_logging: + test_cloudwatch_logging.py::test_cloudwatch_logging: + dimensions: + - regions: {{ NEW_REGION }} + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: ["ubuntu1804"] + schedulers: ["slurm"] + configure: + test_pcluster_configure.py::test_pcluster_configure: + dimensions: + - regions: {{ NEW_REGION }} + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: {{ common.OSS_ONE_PER_DISTRO }} + schedulers: {{ common.SCHEDULERS_TRAD }} + cli_commands: + test_cli_commands.py::test_hit_cli_commands: + dimensions: + - regions: {{ NEW_REGION }} + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: ["ubuntu1604"] + schedulers: ["slurm"] + test_cli_commands.py::test_sit_cli_commands: + dimensions: + - regions: {{ NEW_REGION }} + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: ["centos7"] + schedulers: ["sge"] + update: + test_update.py::test_update_hit: + dimensions: + - regions: {{ NEW_REGION }} + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: ["alinux2"] + schedulers: ["slurm"] + createami: + test_createami.py::test_createami: + dimensions: + - regions: {{ NEW_REGION }} + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: ["alinux", "alinux2", "ubuntu1604", "ubuntu1804"] # temporary disable FPGA AMI since there is not enough free space on root partition + dashboard: + test_dashboard.py::test_dashboard: + dimensions: + - regions: {{ NEW_REGION }} + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: ["centos8"] + schedulers: ["slurm"] + dcv: + test_dcv.py::test_dcv_configuration: + dimensions: + # DCV on GPU enabled instance + - regions: {{ NEW_REGION }} + instances: ["g3.8xlarge"] + oss: ["ubuntu1804"] + schedulers: ["slurm"] + # DCV om non GPU enabled instance + - regions: {{ NEW_REGION }} + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: ["alinux2"] + schedulers: ["slurm"] + dns: + test_dns.py::test_hit_no_cluster_dns_mpi: + dimensions: + - regions: {{ NEW_REGION }} + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: ["alinux2"] + schedulers: ["slurm"] + efa: + test_efa.py::test_hit_efa: + dimensions: + - regions: {{ NEW_REGION }} + instances: ["c5n.18xlarge"] + oss: ["alinux2"] + schedulers: ["slurm"] + iam: + test_iam.py::test_iam_policies: + dimensions: + - regions: {{ NEW_REGION }} + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: ["alinux2"] + schedulers: ["slurm"] + networking: + test_cluster_networking.py::test_cluster_in_private_subnet: + dimensions: + - regions: {{ NEW_REGION }} + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: ["ubuntu1804"] + schedulers: ["slurm"] + test_networking.py::test_public_network_topology: + dimensions: + - regions: {{ NEW_REGION }} + test_networking.py::test_public_private_network_topology: + dimensions: + - regions: {{ NEW_REGION }} + test_multi_cidr.py::test_multi_cidr: + dimensions: + - regions: {{ NEW_REGION }} + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: ["alinux2"] + schedulers: ["slurm"] + spot: + test_spot.py::test_spot_default: + dimensions: + - regions: {{ NEW_REGION }} + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: ["centos8"] + schedulers: ["slurm"] + storage: + test_fsx_lustre.py::test_fsx_lustre: + dimensions: + - regions: {{ NEW_REGION }} + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: ["alinux"] + schedulers: ["slurm"] + test_efs.py::test_efs_compute_az: + dimensions: + - regions: {{ NEW_REGION }} + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: ["alinux2"] + schedulers: ["slurm"] + test_ebs.py::test_ebs_multiple: + dimensions: + - regions: {{ NEW_REGION }} + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: ["alinux2"] + schedulers: ["slurm"] + resource_bucket: + test_resource_bucket.py::test_resource_bucket: + dimensions: + - regions: {{ NEW_REGION }} + instances: {{ common.INSTANCES_DEFAULT_X86 }} + oss: ["alinux2"] + schedulers: ["slurm"] diff --git a/tests/integration-tests/conftest.py b/tests/integration-tests/conftest.py index 9c550762bd..ae7beee5f0 100644 --- a/tests/integration-tests/conftest.py +++ b/tests/integration-tests/conftest.py @@ -18,11 +18,13 @@ import os import random import re +import time from shutil import copyfile from traceback import format_tb import boto3 import configparser +import pkg_resources import pytest from cfn_stacks_factory import CfnStack, CfnStacksFactory from clusters_factory import Cluster, ClustersFactory @@ -43,6 +45,7 @@ from utils import ( create_s3_bucket, delete_s3_bucket, + generate_stack_name, get_architecture_supported_by_instance_type, get_vpc_snakecase_value, random_alphanumeric, @@ -51,7 +54,7 @@ unset_credentials, ) -from tests.common.utils import retrieve_pcluster_ami_without_standard_naming +from tests.common.utils import get_sts_endpoint, retrieve_pcluster_ami_without_standard_naming def pytest_addoption(parser): @@ -70,6 +73,7 @@ def pytest_addoption(parser): parser.addoption( "--createami-custom-chef-cookbook", help="url to a custom cookbook package for the createami command" ) + parser.addoption("--createami-custom-node-package", help="url to a custom node package for the createami command") parser.addoption("--custom-awsbatch-template-url", help="url to a custom awsbatch template") parser.addoption("--template-url", help="url to a custom cfn template") parser.addoption("--hit-template-url", help="url to a custom HIT cfn template") @@ -291,6 +295,11 @@ def _cluster_factory(cluster_config, extra_args=None, raise_on_error=True): ) +@pytest.fixture(scope="class") +def cluster_model(scheduler): + return "HIT" if scheduler == "slurm" else "SIT" + + def _write_cluster_config_to_outdir(request, cluster_config): out_dir = request.config.getoption("output_dir") @@ -328,7 +337,7 @@ def test_datadir(request, datadir): @pytest.fixture() -def pcluster_config_reader(test_datadir, vpc_stacks, region, request): +def pcluster_config_reader(test_datadir, vpc_stack, region, request): """ Define a fixture to render pcluster config templates associated to the running test. @@ -348,7 +357,7 @@ def _config_renderer(config_file="pcluster.config.ini", **kwargs): config_file_path = test_datadir / config_file if not os.path.isfile(config_file_path): raise FileNotFoundError(f"Cluster config file not found in the expected dir {config_file_path}") - default_values = _get_default_template_values(vpc_stacks, region, request) + default_values = _get_default_template_values(vpc_stack, request) file_loader = FileSystemLoader(str(test_datadir)) env = Environment(loader=file_loader) rendered_template = env.get_template(config_file).render(**{**kwargs, **default_values}) @@ -440,9 +449,9 @@ def _enable_sanity_check_if_unset(cluster_config): config.write(f) -def _get_default_template_values(vpc_stacks, region, request): +def _get_default_template_values(vpc_stack, request): """Build a dictionary of default values to inject in the jinja templated cluster configs.""" - default_values = get_vpc_snakecase_value(region, vpc_stacks) + default_values = get_vpc_snakecase_value(vpc_stack) default_values.update({dimension: request.node.funcargs.get(dimension) for dimension in DIMENSIONS_MARKER_ARGS}) default_values["key_name"] = request.config.getoption("key_name") @@ -559,7 +568,7 @@ def vpc_stacks(cfn_stacks_factory, request): for region in regions: # Creating private_subnet_different_cidr in a different AZ for test_efs - # To-do: isolate this logic and create a compute subnet in different AZ than master in test_efs + # To-do: isolate this logic and create a compute subnet in different AZ than head node in test_efs # if region has a non-empty list in AVAILABILITY_ZONE_OVERRIDES, select a subset of those AZs credential = request.config.getoption("credential") @@ -616,7 +625,109 @@ def vpc_stacks(cfn_stacks_factory, request): return vpc_stacks -@pytest.fixture() +@pytest.fixture(scope="class") +def common_pcluster_policies(region): + """Create four policies to be attached to ec2_iam_role, iam_lamda_role for awsbatch or traditional schedulers.""" + policies = {} + policies["awsbatch_instance_policy"] = _create_iam_policies( + "integ-tests-ParallelClusterInstancePolicy-batch-" + random_alphanumeric(), region, "batch_instance_policy.json" + ) + policies["traditional_instance_policy"] = _create_iam_policies( + "integ-tests-ParallelClusterInstancePolicy-traditional-" + random_alphanumeric(), + region, + "traditional_instance_policy.json", + ) + policies["awsbatch_lambda_policy"] = _create_iam_policies( + "integ-tests-ParallelClusterLambdaPolicy-batch-" + random_alphanumeric(), + region, + "batch_lambda_function_policy.json", + ) + policies["traditional_lambda_policy"] = _create_iam_policies( + "integ-tests-ParallelClusterLambdaPolicy-traditional-" + random_alphanumeric(), + region, + "traditional_lambda_function_policy.json", + ) + + yield policies + + iam_client = boto3.client("iam", region_name=region) + for policy in policies.values(): + iam_client.delete_policy(PolicyArn=policy) + + +@pytest.fixture(scope="class") +def role_factory(region): + roles = [] + iam_client = boto3.client("iam", region_name=region) + + def create_role(trusted_service, policies=()): + iam_role_name = f"integ-tests_{trusted_service}_{region}_{random_alphanumeric()}" + logging.info(f"Creating iam role {iam_role_name} for {trusted_service}") + + partition = _get_arn_partition(region) + domain_suffix = ".cn" if partition == "aws-cn" else "" + + trust_relationship_policy_ec2 = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": {"Service": f"{trusted_service}.amazonaws.com{domain_suffix}"}, + "Action": "sts:AssumeRole", + } + ], + } + iam_client.create_role( + RoleName=iam_role_name, + AssumeRolePolicyDocument=json.dumps(trust_relationship_policy_ec2), + Description="Role for create custom KMS key", + ) + + logging.info(f"Attaching iam policy to the role {iam_role_name}...") + for policy in policies: + iam_client.attach_role_policy(RoleName=iam_role_name, PolicyArn=policy) + + # Having time.sleep here because because it take a while for the the IAM role to become valid for use in the + # put_key_policy step for creating KMS key, read the following link for reference : + # https://stackoverflow.com/questions/20156043/how-long-should-i-wait-after-applying-an-aws-iam-policy-before-it-is-valid + time.sleep(60) + logging.info(f"Iam role is ready: {iam_role_name}") + roles.append({"role_name": iam_role_name, "policies": policies}) + return iam_role_name + + yield create_role + + for role in roles: + role_name = role["role_name"] + policies = role["policies"] + for policy in policies: + iam_client.detach_role_policy(RoleName=role_name, PolicyArn=policy) + logging.info(f"Deleting iam role {role_name}") + iam_client.delete_role(RoleName=role_name) + + +def _create_iam_policies(iam_policy_name, region, policy_filename): + logging.info("Creating iam policy {0}...".format(iam_policy_name)) + file_loader = FileSystemLoader(pkg_resources.resource_filename(__name__, "/resources")) + env = Environment(loader=file_loader, trim_blocks=True, lstrip_blocks=True) + partition = _get_arn_partition(region) + account_id = ( + boto3.client("sts", region_name=region, endpoint_url=get_sts_endpoint(region)) + .get_caller_identity() + .get("Account") + ) + parallel_cluster_instance_policy = env.get_template(policy_filename).render( + partition=partition, + region=region, + account_id=account_id, + cluster_bucket_name="parallelcluster-*", + ) + return boto3.client("iam", region_name=region).create_policy( + PolicyName=iam_policy_name, PolicyDocument=parallel_cluster_instance_policy + )["Policy"]["Arn"] + + +@pytest.fixture(scope="class") def vpc_stack(vpc_stacks, region): return vpc_stacks[region] @@ -629,20 +740,21 @@ def vpc_stack(vpc_stacks, region): retry_on_exception=lambda exception: not isinstance(exception, KeyboardInterrupt), ) def _create_vpc_stack(request, template, region, cfn_stacks_factory): - if request.config.getoption("vpc_stack"): - logging.info("Using stack {0} in region {1}".format(request.config.getoption("vpc_stack"), region)) - stack = CfnStack(name=request.config.getoption("vpc_stack"), region=region, template=template.to_json()) - else: - stack = CfnStack( - name="integ-tests-vpc-{0}{1}{2}".format( - random_alphanumeric(), - "-" if request.config.getoption("stackname_suffix") else "", - request.config.getoption("stackname_suffix"), - ), - region=region, - template=template.to_json(), - ) - cfn_stacks_factory.create_stack(stack) + try: + set_credentials(region, request.config.getoption("credential")) + if request.config.getoption("vpc_stack"): + logging.info("Using stack {0} in region {1}".format(request.config.getoption("vpc_stack"), region)) + stack = CfnStack(name=request.config.getoption("vpc_stack"), region=region, template=template.to_json()) + else: + stack = CfnStack( + name=generate_stack_name("integ-tests-vpc", request.config.getoption("stackname_suffix")), + region=region, + template=template.to_json(), + ) + cfn_stacks_factory.create_stack(stack) + + finally: + unset_credentials() return stack diff --git a/tests/integration-tests/remote_command_executor.py b/tests/integration-tests/remote_command_executor.py index 73cec42784..3088bbcf33 100644 --- a/tests/integration-tests/remote_command_executor.py +++ b/tests/integration-tests/remote_command_executor.py @@ -25,18 +25,18 @@ def __init__(self, result): class RemoteCommandExecutor: - """Execute remote commands on the cluster master node.""" + """Execute remote commands on the cluster head node.""" def __init__(self, cluster, username=None): if not username: username = get_username_for_os(cluster.os) self.__connection = Connection( - host=cluster.master_ip, + host=cluster.head_node_ip, user=username, forward_agent=False, connect_kwargs={"key_filename": [cluster.ssh_key]}, ) - self.__user_at_hostname = "{0}@{1}".format(username, cluster.master_ip) + self.__user_at_hostname = "{0}@{1}".format(username, cluster.head_node_ip) def __del__(self): try: @@ -57,7 +57,7 @@ def run_remote_command( timeout=None, ): """ - Execute remote command on the cluster master node. + Execute remote command on the cluster head node. :param command: command to execute. :param log_error: log errors. @@ -95,9 +95,9 @@ def run_remote_script( self, script_file, args=None, log_error=True, additional_files=None, hide=False, timeout=None, run_as_root=False ): """ - Execute a script remotely on the cluster master node. + Execute a script remotely on the cluster head node. - Script is copied to the master home dir before being executed. + Script is copied to the head node home dir before being executed. :param script_file: local path to the script to execute remotely. :param args: args to pass to the script when invoked. :param log_error: log errors. diff --git a/tests/integration-tests/resources/batch_instance_policy.json b/tests/integration-tests/resources/batch_instance_policy.json new file mode 100644 index 0000000000..9cc41a740b --- /dev/null +++ b/tests/integration-tests/resources/batch_instance_policy.json @@ -0,0 +1,127 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Action": [ + "batch:SubmitJob", + "cloudformation:DescribeStacks", + "ecs:ListContainerInstances", + "ecs:DescribeContainerInstances", + "logs:FilterLogEvents", + "s3:PutObject", + "s3:DeleteObject", + "iam:PassRole" + ], + "Resource": [ + "arn:{{ partition }}:batch:{{ region }}:{{ account_id }}:job-definition/*", + "arn:{{ partition }}:batch:{{ region }}:{{ account_id }}:job-definition/*", + "arn:{{ partition }}:batch:{{ region }}:{{ account_id }}:job-queue/*", + "arn:{{ partition }}:cloudformation:{{ region }}:{{ account_id }}:stack/*", + "arn:{{ partition }}:s3:::{{ cluster_bucket_name }}/*", + "arn:{{ partition }}:iam::{{ account_id }}:role/*", + "arn:{{ partition }}:ecs:{{ region }}:{{ account_id }}:cluster/*", + "arn:{{ partition }}:ecs:{{ region }}:{{ account_id }}:container-instance/*", + "arn:{{ partition }}:logs:{{ region }}:{{ account_id }}:log-group:/aws/batch/job:log-stream:*" + ], + "Effect": "Allow" + }, + { + "Action": [ + "batch:RegisterJobDefinition", + "logs:GetLogEvents" + ], + "Resource": [ + "*" + ], + "Effect": "Allow" + }, + { + "Action": [ + "s3:Get*" + ], + "Resource": [ + "arn:{{ partition }}:s3:::{{ cluster_bucket_name }}/*" + ], + "Effect": "Allow" + }, + { + "Action": [ + "s3:List*" + ], + "Resource": [ + "arn:{{ partition }}:s3:::{{ cluster_bucket_name }}" + ], + "Effect": "Allow" + }, + { + "Action": [ + "batch:DescribeJobQueues", + "batch:TerminateJob", + "batch:DescribeJobs", + "batch:CancelJob", + "batch:DescribeJobDefinitions", + "batch:ListJobs", + "batch:DescribeComputeEnvironments" + ], + "Resource": [ + "*" + ], + "Effect": "Allow" + }, + { + "Action": [ + "ec2:DescribeInstances", + "ec2:AttachVolume", + "ec2:DescribeVolumes", + "ec2:DescribeInstanceAttribute" + ], + "Resource": [ + "*" + ], + "Effect": "Allow", + "Sid": "EC2" + }, + { + "Action": [ + "cloudformation:DescribeStackResource", + "cloudformation:SignalResource" + ], + "Resource": [ + "*" + ], + "Effect": "Allow", + "Sid": "CloudFormation" + }, + { + "Action": [ + "fsx:DescribeFileSystems" + ], + "Resource": [ + "*" + ], + "Effect": "Allow", + "Sid": "FSx" + }, + { + "Action": [ + "logs:CreateLogGroup", + "logs:CreateLogStream" + ], + "Resource": [ + "*" + ], + "Effect": "Allow", + "Sid": "CWLogs" + }, + { + "Action": [ + "s3:GetObject" + ], + "Resource": [ + "arn:{{ partition }}:s3:::aws-parallelcluster-jenkins-*" + ], + "Effect": "Allow", + "Sid": "Chronicle" + } + ] +} \ No newline at end of file diff --git a/tests/integration-tests/resources/batch_lambda_function_policy.json b/tests/integration-tests/resources/batch_lambda_function_policy.json new file mode 100644 index 0000000000..ed92d0fbf2 --- /dev/null +++ b/tests/integration-tests/resources/batch_lambda_function_policy.json @@ -0,0 +1,44 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Action": [ + "logs:CreateLogStream", + "logs:PutLogEvents" + ], + "Effect": "Allow", + "Resource": "arn:{{ partition }}:logs:*:*:*", + "Sid": "CloudWatchLogsPolicy" + }, + { + "Action": [ + "ecr:BatchDeleteImage", + "ecr:ListImages" + ], + "Effect": "Allow", + "Resource": "*", + "Sid": "ECRPolicy" + }, + { + "Action": [ + "codebuild:BatchGetBuilds", + "codebuild:StartBuild" + ], + "Effect": "Allow", + "Resource": "*", + "Sid": "CodeBuildPolicy" + }, + { + "Action": [ + "s3:DeleteBucket", + "s3:DeleteObject", + "s3:DeleteObjectVersion", + "s3:ListBucket", + "s3:ListBucketVersions" + ], + "Effect": "Allow", + "Resource": "*", + "Sid": "S3BucketPolicy" + } + ] +} \ No newline at end of file diff --git a/tests/integration-tests/resources/key_policy.json b/tests/integration-tests/resources/key_policy.json new file mode 100644 index 0000000000..579f3bc0c8 --- /dev/null +++ b/tests/integration-tests/resources/key_policy.json @@ -0,0 +1,37 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "Enable IAM User Permissions", + "Effect": "Allow", + "Principal": {"AWS": "arn:{{ partition }}:iam::{{ account_id }}:root"}, + "Action": "kms:*", + "Resource": "*" + }, + { + "Sid": "Allow use of the key", + "Effect": "Allow", + "Principal": { + "AWS": "arn:{{ partition }}:iam::{{ account_id }}:role/{{ iam_role_name }}" + }, + "Action": [ + "kms:Encrypt", + "kms:Decrypt", + "kms:ReEncrypt*", + "kms:GenerateDataKey*", + "kms:DescribeKey" + ], + "Resource": "*" + }, + { + "Sid": "Allow attachment of persistent resources", + "Effect": "Allow", + "Principal": { + "AWS": "arn:{{ partition }}:iam::{{ account_id }}:role/{{ iam_role_name }}" + }, + "Action": ["kms:CreateGrant", "kms:ListGrants", "kms:RevokeGrant"], + "Resource": "*", + "Condition": {"Bool": {"kms:GrantIsForAWSResource": "true"}} + } + ] +} \ No newline at end of file diff --git a/tests/integration-tests/resources/traditional_instance_policy.json b/tests/integration-tests/resources/traditional_instance_policy.json new file mode 100644 index 0000000000..710a918fa4 --- /dev/null +++ b/tests/integration-tests/resources/traditional_instance_policy.json @@ -0,0 +1,183 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Action": [ + "ec2:DescribeVolumes", + "ec2:AttachVolume", + "ec2:DescribeInstanceAttribute", + "ec2:DescribeInstanceStatus", + "ec2:DescribeInstances", + "ec2:DescribeInstanceTypes", + "ec2:DescribeRegions", + "ec2:RunInstances", + "ec2:TerminateInstances", + "ec2:DescribeLaunchTemplates", + "ec2:CreateTags" + ], + "Resource": [ + "*" + ], + "Effect": "Allow", + "Sid": "EC2" + }, + { + "Action": [ + "dynamodb:ListTables" + ], + "Resource": [ + "*" + ], + "Effect": "Allow", + "Sid": "DynamoDBList" + }, + { + "Action": [ + "sqs:SendMessage", + "sqs:ReceiveMessage", + "sqs:ChangeMessageVisibility", + "sqs:DeleteMessage", + "sqs:GetQueueUrl" + ], + "Resource": [ + "arn:{{ partition }}:sqs:{{ region }}:{{ account_id }}:parallelcluster-*" + ], + "Effect": "Allow", + "Sid": "SQSQueue" + }, + { + "Action": [ + "autoscaling:DescribeAutoScalingGroups", + "autoscaling:TerminateInstanceInAutoScalingGroup", + "autoscaling:SetDesiredCapacity", + "autoscaling:UpdateAutoScalingGroup", + "autoscaling:DescribeTags", + "autoscaling:SetInstanceHealth" + ], + "Resource": [ + "*" + ], + "Effect": "Allow", + "Sid": "Autoscaling" + }, + { + "Action": [ + "cloudformation:DescribeStacks", + "cloudformation:DescribeStackResource", + "cloudformation:SignalResource" + ], + "Resource": [ + "arn:{{ partition }}:cloudformation:{{ region }}:{{ account_id }}:stack/parallelcluster-*/*" + ], + "Effect": "Allow", + "Sid": "CloudFormation" + }, + { + "Action": [ + "dynamodb:PutItem", + "dynamodb:Query", + "dynamodb:GetItem", + "dynamodb:BatchWriteItem", + "dynamodb:DeleteItem", + "dynamodb:DescribeTable" + ], + "Resource": [ + "arn:{{ partition }}:dynamodb:{{ region }}:{{ account_id }}:table/parallelcluster-*" + ], + "Effect": "Allow", + "Sid": "DynamoDBTable" + }, + { + "Action": [ + "s3:GetObject" + ], + "Resource": [ + "arn:{{ partition }}:s3:::{{ region }}-aws-parallelcluster/*" + ], + "Effect": "Allow", + "Sid": "S3GetObj" + }, + { + "Action": [ + "sqs:ListQueues" + ], + "Resource": [ + "*" + ], + "Effect": "Allow", + "Sid": "SQSList" + }, + { + "Action": [ + "iam:PassRole" + ], + "Resource": [ + "*" + ], + "Effect": "Allow", + "Sid": "IAMPassRole" + }, + { + "Action": [ + "s3:GetObject" + ], + "Resource": [ + "arn:{{ partition }}:s3:::dcv-license.{{ region }}/*" + ], + "Effect": "Allow", + "Sid": "DcvLicense" + }, + { + "Action": [ + "s3:GetObject", + "s3:GetObjectVersion" + ], + "Resource": [ + "arn:{{ partition }}:s3:::{{ cluster_bucket_name }}/*" + ], + "Effect": "Allow", + "Sid": "GetClusterConfig" + }, + { + "Action": [ + "fsx:DescribeFileSystems" + ], + "Resource": [ + "*" + ], + "Effect": "Allow", + "Sid": "FSx" + }, + { + "Action": [ + "logs:CreateLogStream", + "logs:PutLogEvents" + ], + "Resource": [ + "*" + ], + "Effect": "Allow", + "Sid": "CWLogs" + }, + { + "Action": [ + "route53:ChangeResourceRecordSets" + ], + "Resource": [ + "arn:{{ partition }}:route53:::hostedzone/*" + ], + "Effect": "Allow", + "Sid": "Route53" + }, + { + "Action": [ + "s3:GetObject" + ], + "Resource": [ + "arn:{{ partition }}:s3:::aws-parallelcluster-jenkins-*" + ], + "Effect": "Allow", + "Sid": "Chronicle" + } + ] +} \ No newline at end of file diff --git a/tests/integration-tests/resources/traditional_lambda_function_policy.json b/tests/integration-tests/resources/traditional_lambda_function_policy.json new file mode 100644 index 0000000000..a3545486ac --- /dev/null +++ b/tests/integration-tests/resources/traditional_lambda_function_policy.json @@ -0,0 +1,64 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Action": [ + "logs:CreateLogStream", + "logs:PutLogEvents" + ], + "Resource": "arn:{{ partition }}:logs:*:*:*", + "Effect": "Allow", + "Sid": "CloudWatchLogsPolicy" + }, + { + "Action": [ + "s3:DeleteBucket", + "s3:DeleteObject", + "s3:DeleteObjectVersion", + "s3:ListBucket", + "s3:ListBucketVersions" + ], + "Resource": [ + "arn:{{ partition }}:s3:::*" + ], + "Effect": "Allow", + "Sid": "S3BucketPolicy" + }, + { + "Action": [ + "ec2:DescribeInstances" + ], + "Resource": "*", + "Effect": "Allow", + "Sid": "DescribeInstances" + }, + { + "Action": [ + "ec2:TerminateInstances" + ], + "Resource": "*", + "Effect": "Allow", + "Sid": "FleetTerminatePolicy" + }, + { + "Action": [ + "dynamodb:GetItem", + "dynamodb:PutItem" + ], + "Resource": "arn:{{ partition }}:dynamodb:{{ region }}:{{ account_id }}:table/parallelcluster-*", + "Effect": "Allow", + "Sid": "DynamoDBTable" + }, + { + "Action": [ + "route53:ListResourceRecordSets", + "route53:ChangeResourceRecordSets" + ], + "Resource": [ + "arn:{{ partition }}:route53:::hostedzone/*" + ], + "Effect": "Allow", + "Sid": "Route53DeletePolicy" + } + ] +} \ No newline at end of file diff --git a/tests/integration-tests/test_runner.py b/tests/integration-tests/test_runner.py index 700476d3cf..b2e8fc7725 100644 --- a/tests/integration-tests/test_runner.py +++ b/tests/integration-tests/test_runner.py @@ -53,6 +53,7 @@ "custom_node_url": None, "custom_cookbook_url": None, "createami_custom_cookbook_url": None, + "createami_custom_node_url": None, "custom_template_url": None, "custom_awsbatchcli_url": None, "custom_hit_template_url": None, @@ -212,6 +213,12 @@ def _init_argparser(): default=TEST_DEFAULTS.get("createami_custom_cookbook_url"), type=_is_url, ) + custom_group.add_argument( + "--createami-custom-node-url", + help="URL to a custom node package for the createami command.", + default=TEST_DEFAULTS.get("createami_custom_node_url"), + type=_is_url, + ) custom_group.add_argument( "--custom-template-url", help="URL to a custom cfn template.", @@ -426,6 +433,9 @@ def _set_custom_packages_args(args, pytest_args): # noqa: C901 if args.createami_custom_cookbook_url: pytest_args.extend(["--createami-custom-chef-cookbook", args.createami_custom_cookbook_url]) + if args.createami_custom_node_url: + pytest_args.extend(["--createami-custom-node-package", args.createami_custom_node_url]) + if args.custom_template_url: pytest_args.extend(["--template-url", args.custom_template_url]) diff --git a/tests/integration-tests/tests/arm_pl/test_arm_pl.py b/tests/integration-tests/tests/arm_pl/test_arm_pl.py new file mode 100644 index 0000000000..9208c4448c --- /dev/null +++ b/tests/integration-tests/tests/arm_pl/test_arm_pl.py @@ -0,0 +1,70 @@ +# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# A copy of the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "LICENSE.txt" file accompanying this file. +# This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. +# See the License for the specific language governing permissions and limitations under the License. +import logging + +import pytest +from assertpy import assert_that +from remote_command_executor import RemoteCommandExecutor + + +@pytest.mark.regions(["ap-northeast-1"]) +@pytest.mark.instances(["m6g.xlarge"]) +@pytest.mark.oss(["ubuntu1804", "alinux2", "centos8"]) +@pytest.mark.schedulers(["slurm"]) +def test_arm_pl(region, scheduler, instance, os, pcluster_config_reader, clusters_factory, test_datadir): + """Test Arm Performance Library""" + cluster_config = pcluster_config_reader() + cluster = clusters_factory(cluster_config) + remote_command_executor = RemoteCommandExecutor(cluster) + + # arm performance library version and gcc version + armpl_version = "20.2.1" + gcc_version = "9.3" + + # loading module armpl/{armpl_version} will load module armpl/gcc-{gcc_version} + # and armpl/{armpl_version}_gcc-{gcc_vesion} sequentially + armpl_module_general_name = f"armpl/{armpl_version}" + armpl_module_name = f"armpl/{armpl_version}_gcc-{gcc_version}" + gcc_module_name = f"armpl/gcc-{gcc_version}" + _test_armpl_examples( + remote_command_executor, + armpl_module_general_name, + armpl_module_name, + gcc_module_name, + armpl_version, + gcc_version, + ) + + +def _test_armpl_examples( + remote_command_executor, armpl_module_general_name, armpl_module_name, gcc_module_name, armpl_version, gcc_version +): + # Test arm performance library examples to check arm performance library is available in cluster + logging.info("Test arm performance library examples") + + # Load armpl module and gcc-9.3 module and assert module loaded + module_result = remote_command_executor.run_remote_command( + f"module load {armpl_module_general_name} && module list" + ).stdout + for module in [armpl_module_general_name, armpl_module_name, gcc_module_name]: + assert_that(module_result).contains(module) + + # Assert pass the example tests + remote_command_executor.run_remote_command( + f"sudo chmod 777 /opt/arm/armpl/{armpl_version}/armpl_{armpl_version}_gcc-{gcc_version}/examples" + ) + test_result = remote_command_executor.run_remote_command( + f"module load {armpl_module_general_name} && " + f"cd /opt/arm/armpl/{armpl_version}/armpl_{armpl_version}_gcc-{gcc_version}/examples && make clean && make" + ).stdout.lower() + assert_that(test_result).contains("testing: no example difference files were generated") + assert_that(test_result).contains("test passed ok") diff --git a/tests/integration-tests/tests/arm_pl/test_arm_pl/test_arm_pl/pcluster.config.ini b/tests/integration-tests/tests/arm_pl/test_arm_pl/test_arm_pl/pcluster.config.ini new file mode 100644 index 0000000000..ce19644041 --- /dev/null +++ b/tests/integration-tests/tests/arm_pl/test_arm_pl/test_arm_pl/pcluster.config.ini @@ -0,0 +1,20 @@ +[global] +cluster_template = default + +[aws] +aws_region_name = {{ region }} + +[cluster default] +base_os = {{ os }} +key_name = {{ key_name }} +vpc_settings = parallelcluster-vpc +scheduler = {{ scheduler }} +master_instance_type = {{ instance }} +compute_instance_type = {{ instance }} + + +[vpc parallelcluster-vpc] +vpc_id = {{ vpc_id }} +master_subnet_id = {{ public_subnet_id }} +compute_subnet_id = {{ private_subnet_id }} +use_public_ips = false \ No newline at end of file diff --git a/tests/integration-tests/tests/cfn-init/test_cfn_init.py b/tests/integration-tests/tests/cfn-init/test_cfn_init.py index 0b04217583..408de4dffc 100644 --- a/tests/integration-tests/tests/cfn-init/test_cfn_init.py +++ b/tests/integration-tests/tests/cfn-init/test_cfn_init.py @@ -72,7 +72,7 @@ def test_install_args_quotes(region, pcluster_config_reader, clusters_factory, s init_config_file = pcluster_config_reader(bucket_name=bucket_name) cluster = clusters_factory(init_config_file) - # Check master and compute node status + # Check head node and compute node status _assert_server_status(cluster) diff --git a/tests/integration-tests/tests/cli_commands/test_cli_commands/test_hit_cli_commands/pcluster.config.ini b/tests/integration-tests/tests/cli_commands/test_cli_commands/test_hit_cli_commands/pcluster.config.ini index 5a5c2ed474..a6a795cfe1 100644 --- a/tests/integration-tests/tests/cli_commands/test_cli_commands/test_hit_cli_commands/pcluster.config.ini +++ b/tests/integration-tests/tests/cli_commands/test_cli_commands/test_hit_cli_commands/pcluster.config.ini @@ -20,14 +20,14 @@ compute_resource_settings = ondemand_i1,ondemand_i2 compute_resource_settings = ondemand_i3,ondemand_i4 [compute_resource ondemand_i1] -instance_type = c4.xlarge +instance_type = c5.large [compute_resource ondemand_i2] instance_type = {{ instance }} min_count = 1 [compute_resource ondemand_i3] -instance_type = c4.xlarge +instance_type = c5.large [compute_resource ondemand_i4] instance_type = {{ instance }} diff --git a/tests/integration-tests/tests/cloudwatch_logging/test_cloudwatch_logging.py b/tests/integration-tests/tests/cloudwatch_logging/test_cloudwatch_logging.py index 59cf9aed93..7c1b3cad04 100644 --- a/tests/integration-tests/tests/cloudwatch_logging/test_cloudwatch_logging.py +++ b/tests/integration-tests/tests/cloudwatch_logging/test_cloudwatch_logging.py @@ -29,9 +29,9 @@ DEFAULT_SHARED_DIR = "/shared" DEFAULT_RETENTION_DAYS = 14 NODE_CONFIG_PATH = "/etc/chef/dna.json" -MASTER_NODE_ROLE_NAME = "MasterServer" +HEAD_NODE_ROLE_NAME = "MasterServer" COMPUTE_NODE_ROLE_NAME = "ComputeFleet" -NODE_ROLE_NAMES = {MASTER_NODE_ROLE_NAME, COMPUTE_NODE_ROLE_NAME} +NODE_ROLE_NAMES = {HEAD_NODE_ROLE_NAME, COMPUTE_NODE_ROLE_NAME} def _get_log_group_name_for_cluster(cluster_name): @@ -95,8 +95,8 @@ def __init__(self, scheduler, os, cluster, feature_key=None, shared_dir=DEFAULT_ self.shared_dir = self._get_shared_dir(shared_dir) self.remote_command_executor = RemoteCommandExecutor(self.cluster) self.scheduler_commands = get_scheduler_commands(self.scheduler, self.remote_command_executor) - self._relevant_logs = {MASTER_NODE_ROLE_NAME: [], COMPUTE_NODE_ROLE_NAME: []} - self._cluster_log_state = {MASTER_NODE_ROLE_NAME: {}, COMPUTE_NODE_ROLE_NAME: {}} + self._relevant_logs = {HEAD_NODE_ROLE_NAME: [], COMPUTE_NODE_ROLE_NAME: []} + self._cluster_log_state = {HEAD_NODE_ROLE_NAME: {}, COMPUTE_NODE_ROLE_NAME: {}} self._set_cluster_log_state() @property @@ -115,14 +115,14 @@ def is_feature_specific(self): def get_logs_state(self): """Get the state of the log files applicable to each of the cluster's EC2 instances.""" desired_keys = ["hostname", "instance_id", "node_role", "agent_status", "logs"] - states = [{key: self._cluster_log_state.get(MASTER_NODE_ROLE_NAME).get(key) for key in desired_keys}] + states = [{key: self._cluster_log_state.get(HEAD_NODE_ROLE_NAME).get(key) for key in desired_keys}] states.extend( [ {key: host_dict[key] for key in desired_keys} for hostname, host_dict in self._cluster_log_state.get(COMPUTE_NODE_ROLE_NAME).items() ] ) - assert_that(states).is_length(self.compute_nodes_count + 1) # computes + master + assert_that(states).is_length(self.compute_nodes_count + 1) # computes + head node return states @staticmethod @@ -145,11 +145,11 @@ def _base_os_to_platform(base_os): no_digits = base_os.rstrip(string.digits) return translations.get(no_digits, no_digits) - def _set_master_instance(self, instance): - """Set the master instance field in self.cluster_log_state.""" - self._cluster_log_state.get(MASTER_NODE_ROLE_NAME).update( + def _set_head_node_instance(self, instance): + """Set the head node instance field in self.cluster_log_state.""" + self._cluster_log_state.get(HEAD_NODE_ROLE_NAME).update( { - "node_role": MASTER_NODE_ROLE_NAME, + "node_role": HEAD_NODE_ROLE_NAME, "hostname": instance.get("PrivateDnsName"), "instance_id": instance.get("InstanceId"), } @@ -157,7 +157,7 @@ def _set_master_instance(self, instance): def _add_compute_instance(self, instance): """Update the cluster's log state by adding a compute node.""" - compute_hostname = self._run_command_on_master( + compute_hostname = self._run_command_on_head_node( "ssh -o StrictHostKeyChecking=no -q {} hostname -f".format(instance.get("PrivateDnsName")) ) self._cluster_log_state[COMPUTE_NODE_ROLE_NAME][compute_hostname] = { @@ -173,24 +173,24 @@ def _get_initial_cluster_log_state(self): if tags.get("ClusterName", "") != self.cluster.name: continue elif tags.get("Name", "") == "Master": - self._set_master_instance(instance) + self._set_head_node_instance(instance) else: self._add_compute_instance(instance) LOGGER.debug("After getting initial cluster state:\n{0}".format(self._dump_cluster_log_state())) - def _read_log_configs_from_master(self): + def _read_log_configs_from_head_node(self): """Read the log configs file at /usr/local/etc/cloudwatch_log_files.json.""" read_cmd = "cat /usr/local/etc/cloudwatch_log_files.json" - config = json.loads(self._run_command_on_master(read_cmd)) + config = json.loads(self._run_command_on_head_node(read_cmd)) return config.get("log_configs") - def _read_master_node_config(self): - """Read the node configuration JSON file at NODE_CONFIG_PATH on the master node.""" + def _read_head_node_config(self): + """Read the node configuration JSON file at NODE_CONFIG_PATH on the head node.""" read_cmd = "cat {0}".format(NODE_CONFIG_PATH) - master_node_config = json.loads(self._run_command_on_master(read_cmd)).get("cfncluster", {}) - assert_that(master_node_config).is_not_empty() - LOGGER.info("DNA config read from master node: {0}".format(_dump_json(master_node_config))) - return master_node_config + head_node_config = json.loads(self._run_command_on_head_node(read_cmd)).get("cfncluster", {}) + assert_that(head_node_config).is_not_empty() + LOGGER.info("DNA config read from head node: {0}".format(_dump_json(head_node_config))) + return head_node_config def _read_compute_node_config(self): """Read the node configuration JSON file at NODE_CONFIG_PATH on a compute node.""" @@ -209,7 +209,7 @@ def _read_compute_node_config(self): def _read_node_configs(self): """Return a dict mapping node role names to the config at NODE_CONFIG_PATH.""" return { - MASTER_NODE_ROLE_NAME: self._read_master_node_config(), + HEAD_NODE_ROLE_NAME: self._read_head_node_config(), COMPUTE_NODE_ROLE_NAME: self._read_compute_node_config(), } @@ -273,7 +273,7 @@ def _populate_relevant_logs_for_node_roles(self, logs): """Populate self._relevant_logs with the entries of logs.""" # When the scheduler is AWS Batch, only keep log that whose config's node_role value is MasterServer, since # Batch doesn't have compute nodes in the traditional sense. - desired_node_roles = {MASTER_NODE_ROLE_NAME} if self.scheduler == "awsbatch" else NODE_ROLE_NAMES + desired_node_roles = {HEAD_NODE_ROLE_NAME} if self.scheduler == "awsbatch" else NODE_ROLE_NAMES for log in logs: for node_role in set(log.get("node_roles")) & desired_node_roles: self._relevant_logs[node_role].append(self._clean_log_config(log)) @@ -286,8 +286,8 @@ def _filter_logs(self, logs): def _create_log_entries_for_nodes(self): """Create an entry for each relevant log in self._cluster_log_state.""" - self._cluster_log_state[MASTER_NODE_ROLE_NAME]["logs"] = { - log.get("file_path"): log for log in self._relevant_logs.get(MASTER_NODE_ROLE_NAME) + self._cluster_log_state[HEAD_NODE_ROLE_NAME]["logs"] = { + log.get("file_path"): log for log in self._relevant_logs.get(HEAD_NODE_ROLE_NAME) } for _hostname, compute_instance_dict in self._cluster_log_state.get(COMPUTE_NODE_ROLE_NAME).items(): compute_instance_dict["logs"] = { @@ -296,19 +296,19 @@ def _create_log_entries_for_nodes(self): def _get_relevant_logs(self): """Get subset of all log configs that apply to this cluster's scheduler/os combo.""" - logs = self._read_log_configs_from_master() + logs = self._read_log_configs_from_head_node() self._filter_logs(logs) self._create_log_entries_for_nodes() LOGGER.debug("After populating relevant logs:\n{0}".format(self._dump_cluster_log_state())) - def _run_command_on_master(self, cmd): - """Run cmd on cluster's MasterServer.""" + def _run_command_on_head_node(self, cmd): + """Run cmd on cluster's head node.""" return self.remote_command_executor.run_remote_command(cmd, timeout=60).stdout.strip() def _run_command_on_computes(self, cmd, assert_success=True): """Run cmd on all computes in the cluster.""" # Create directory in self.shared_dir to direct outputs to - out_dir = Path(self._run_command_on_master("mktemp -d -p {shared_dir}".format(shared_dir=self.shared_dir))) + out_dir = Path(self._run_command_on_head_node("mktemp -d -p {shared_dir}".format(shared_dir=self.shared_dir))) redirect = " > {out_dir}/$(hostname -f) ".format(out_dir=out_dir) remote_cmd = cmd.format(redirect=redirect) @@ -321,17 +321,17 @@ def _run_command_on_computes(self, cmd, assert_success=True): # Read the output and map it to the hostname outputs = {} - result_files = self._run_command_on_master("ls {0}".format(out_dir)) + result_files = self._run_command_on_head_node("ls {0}".format(out_dir)) for hostname in result_files.split(): - outputs[hostname] = self._run_command_on_master("sudo cat {0}".format(out_dir / hostname)) - self._run_command_on_master("rm -rf {0}".format(out_dir)) + outputs[hostname] = self._run_command_on_head_node("sudo cat {0}".format(out_dir / hostname)) + self._run_command_on_head_node("rm -rf {0}".format(out_dir)) return outputs - def _populate_master_log_existence(self): - """Figure out which of the relevant logs for the MasterServer don't exist.""" - for log_path, log_dict in self._cluster_log_state.get(MASTER_NODE_ROLE_NAME).get("logs").items(): + def _populate_head_node_log_existence(self): + """Figure out which of the relevant logs for the head node don't exist.""" + for log_path, log_dict in self._cluster_log_state.get(HEAD_NODE_ROLE_NAME).get("logs").items(): cmd = "[ -f {path} ] && echo exists || echo does not exist".format(path=log_path) - output = self._run_command_on_master(cmd) + output = self._run_command_on_head_node(cmd) log_dict["exists"] = output == "exists" def _populate_compute_log_existence(self): @@ -354,16 +354,16 @@ def _populate_compute_log_existence(self): def _populate_log_existence(self): """Figure out which of the relevant logs for each node type don't exist.""" - self._populate_master_log_existence() + self._populate_head_node_log_existence() self._populate_compute_log_existence() LOGGER.debug("After populating log existence:\n{0}".format(self._dump_cluster_log_state())) - def _populate_master_log_emptiness_and_tail(self): - """Figure out which of the relevant logs for the MasterServer are empty.""" - for log_path, log_dict in self._cluster_log_state.get(MASTER_NODE_ROLE_NAME).get("logs").items(): + def _populate_head_node_log_emptiness_and_tail(self): + """Figure out which of the relevant logs for the head node are empty.""" + for log_path, log_dict in self._cluster_log_state.get(HEAD_NODE_ROLE_NAME).get("logs").items(): if not log_dict.get("exists"): continue - output = self._run_command_on_master("sudo tail -n 1 {path}".format(path=log_path)) + output = self._run_command_on_head_node("sudo tail -n 1 {path}".format(path=log_path)) log_dict["is_empty"] = output == "" log_dict["tail"] = output @@ -392,15 +392,15 @@ def _populate_compute_log_emptiness_and_tail(self): def _populate_log_emptiness_and_tail(self): """Figure out which of the relevant logs for each node type are empty.""" - self._populate_master_log_emptiness_and_tail() + self._populate_head_node_log_emptiness_and_tail() self._populate_compute_log_emptiness_and_tail() LOGGER.debug("After populating log emptiness and tails:\n{0}".format(self._dump_cluster_log_state())) - def _populate_master_agent_status(self): - """Get the cloudwatch agent's status for the MasterServer.""" + def _populate_head_node_agent_status(self): + """Get the cloudwatch agent's status for the head node.""" status_cmd = "/opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a status" - status = json.loads(self._run_command_on_master(status_cmd)) - self._cluster_log_state[MASTER_NODE_ROLE_NAME]["agent_status"] = status.get("status") + status = json.loads(self._run_command_on_head_node(status_cmd)) + self._cluster_log_state[HEAD_NODE_ROLE_NAME]["agent_status"] = status.get("status") def _populate_compute_agent_status(self): """Get the cloudwatch agent's status for all the compute nodes in the cluster.""" @@ -414,7 +414,7 @@ def _populate_compute_agent_status(self): def _populate_agent_status(self): """Get the cloudwatch agent's status for all the nodes in the cluster.""" - self._populate_master_agent_status() + self._populate_head_node_agent_status() self._populate_compute_agent_status() LOGGER.debug("After populating agent statuses:\n{0}".format(self._dump_cluster_log_state())) @@ -424,7 +424,7 @@ def _set_cluster_log_state(self): In particular: * Identify which EC2 instances belong to this cluster - * Identify which logs are relevant to the MasterServer and ComputeFleet nodes + * Identify which logs are relevant to the head node and compute fleet nodes * Identify whether each of a node's relevant logs contain data or not. If they do contain data, save the last line of the file. * Get the CloudWatch agent's status for each node @@ -511,7 +511,7 @@ def verify_log_group_retention_days(self, log_groups, cluster_has_been_deleted): ) def verify_agent_status(self, logs_state): - """Verify CloudWatch agent is running on the MasterServer (or not if not enabled).""" + """Verify CloudWatch agent is running on the head node (or not if not enabled).""" expected_status = "running" if self.enabled else "stopped" assert_that(logs_state).extracting("agent_status").contains_only(expected_status) diff --git a/tests/integration-tests/tests/common/schedulers_common.py b/tests/integration-tests/tests/common/schedulers_common.py index 46373a588c..620665aeff 100644 --- a/tests/integration-tests/tests/common/schedulers_common.py +++ b/tests/integration-tests/tests/common/schedulers_common.py @@ -443,7 +443,7 @@ def cancel_job(self, job_id): return self._remote_command_executor.run_remote_command("scancel {}".format(job_id)) def set_nodes_state(self, compute_nodes, state): - """Put nodes into down state.""" + """Put nodes into a state.""" self._remote_command_executor.run_remote_command( "sudo /opt/slurm/bin/scontrol update NodeName={} state={} reason=testing".format( ",".join(compute_nodes), state @@ -541,7 +541,7 @@ def get_compute_nodes(self): # noqa: D102 @retry(retry_on_result=lambda result: "offline" not in result, wait_fixed=seconds(5), stop_max_delay=minutes(5)) def wait_for_locked_node(self): # noqa: D102 - # discard the first node since that is the master server + # discard the first node since that is the head node return self._remote_command_executor.run_remote_command(r'pbsnodes | grep -e "\sstate = " | tail -n +2').stdout def get_node_cores(self): diff --git a/tests/integration-tests/tests/common/utils.py b/tests/integration-tests/tests/common/utils.py index 3eccc4a09d..53bb8172c6 100644 --- a/tests/integration-tests/tests/common/utils.py +++ b/tests/integration-tests/tests/common/utils.py @@ -52,8 +52,8 @@ OS_TO_PCLUSTER_AMI_NAME_OWNER_MAP = { "alinux": {"name": "amzn-hvm-x86_64-*", "owners": ["amazon"]}, "alinux2": {"name": "amzn2-hvm-*-*", "owners": ["amazon"]}, - "centos6": {"name": "centos6-hvm-x86_64-*", "owners": ["amazon"]}, "centos7": {"name": "centos7-hvm-x86_64-*", "owners": ["amazon"]}, + "centos8": {"name": "centos8-hvm-x86_64-*", "owners": ["amazon"]}, "ubuntu1604": {"name": "ubuntu-1604-lts-hvm-x86_64-*", "owners": ["amazon"]}, "ubuntu1804": {"name": "ubuntu-1804-lts-hvm-*-*", "owners": ["amazon"]}, } @@ -144,3 +144,8 @@ def _assert_ami_is_available(region, ami_id): def get_installed_parallelcluster_version(): """Get the version of the installed aws-parallelcluster package.""" return pkg_resources.get_distribution("aws-parallelcluster").version + + +def get_sts_endpoint(region): + """Get regionalized STS endpoint.""" + return "https://sts.{0}.{1}".format(region, "amazonaws.com.cn" if region.startswith("cn-") else "amazonaws.com") diff --git a/tests/integration-tests/tests/configure/test_pcluster_configure.py b/tests/integration-tests/tests/configure/test_pcluster_configure.py index 6eef0a28eb..bc00100247 100644 --- a/tests/integration-tests/tests/configure/test_pcluster_configure.py +++ b/tests/integration-tests/tests/configure/test_pcluster_configure.py @@ -41,7 +41,6 @@ def test_pcluster_configure( vpc_stack.cfn_outputs["VpcId"], vpc_stack.cfn_outputs["PublicSubnetId"], vpc_stack.cfn_outputs["PrivateSubnetId"], - vpc_stack, ) assert_configure_workflow(region, stages, config_path) assert_config_contains_expected_values( @@ -92,7 +91,6 @@ def test_pcluster_configure_avoid_bad_subnets( # and use the first subnet in the remaining list of subnets "", "", - vpc_stack, omitted_subnets_num=1, ) assert_configure_workflow(region, stages, config_path) @@ -109,6 +107,44 @@ def test_pcluster_configure_avoid_bad_subnets( ) +def test_region_without_t2micro( + vpc_stack, + pcluster_config_reader, + key_name, + region, + os, + scheduler, + test_datadir, +): + """ + Verify the default instance type (free tier) is retrieved dynamically according to region. + In other words, t3.micro is retrieved when the region does not contain t2.micro + """ + config_path = test_datadir / "config.ini" + stages = orchestrate_pcluster_configure_stages( + region, + key_name, + scheduler, + os, + "", + vpc_stack.cfn_outputs["VpcId"], + vpc_stack.cfn_outputs["PublicSubnetId"], + vpc_stack.cfn_outputs["PrivateSubnetId"], + ) + assert_configure_workflow(region, stages, config_path) + assert_config_contains_expected_values( + region, + key_name, + scheduler, + os, + "", + vpc_stack.cfn_outputs["VpcId"], + vpc_stack.cfn_outputs["PublicSubnetId"], + vpc_stack.cfn_outputs["PrivateSubnetId"], + config_path, + ) + + def skip_if_unsupported_test_options_were_used(request): unsupported_options = get_unsupported_test_runner_options(request) if unsupported_options: @@ -205,7 +241,6 @@ def orchestrate_pcluster_configure_stages( vpc_id, headnode_subnet_id, compute_subnet_id, - vpc_stack, omitted_subnets_num=0, ): compute_units = "vcpus" if scheduler == "awsbatch" else "instances" @@ -220,17 +255,17 @@ def orchestrate_pcluster_configure_stages( {"prompt": r"Operating System \[alinux2\]: ", "response": os, "skip_for_batch": True}, {"prompt": fr"Minimum cluster size \({compute_units}\) \[0\]: ", "response": "1"}, {"prompt": fr"Maximum cluster size \({compute_units}\) \[10\]: ", "response": ""}, - {"prompt": r"Master instance type \[t2\.micro\]: ", "response": instance}, - {"prompt": r"Compute instance type \[t2\.micro\]: ", "response": instance, "skip_for_batch": True}, + {"prompt": r"Head node instance type \[t.\.micro\]: ", "response": instance}, + {"prompt": r"Compute instance type \[t.\.micro\]: ", "response": instance, "skip_for_batch": True}, {"prompt": r"Automate VPC creation\? \(y/n\) \[n\]: ", "response": "n"}, {"prompt": r"VPC ID \[vpc-.+\]: ", "response": vpc_id}, {"prompt": r"Automate Subnet creation\? \(y/n\) \[y\]: ", "response": "n"}, { - "prompt": fr"{omitted_note}Master Subnet ID \[subnet-.+\]: ", + "prompt": fr"{omitted_note}head node Subnet ID \[subnet-.+\]: ", "response": headnode_subnet_id, }, { - "prompt": fr"{omitted_note}Compute Subnet ID \[{default_compute_subnet}\]: ", + "prompt": fr"{omitted_note}compute Subnet ID \[{default_compute_subnet}\]: ", "response": compute_subnet_id, }, ] diff --git a/tests/integration-tests/tests/create/test_create.py b/tests/integration-tests/tests/create/test_create.py index 0e5079bfc1..d80929462c 100644 --- a/tests/integration-tests/tests/create/test_create.py +++ b/tests/integration-tests/tests/create/test_create.py @@ -76,7 +76,7 @@ def _assert_head_node_is_running(region, cluster): logging.info("Asserting the head node is running") head_node_state = ( boto3.client("ec2", region_name=region) - .describe_instances(Filters=[{"Name": "ip-address", "Values": [cluster.master_ip]}]) + .describe_instances(Filters=[{"Name": "ip-address", "Values": [cluster.head_node_ip]}]) .get("Reservations")[0] .get("Instances")[0] .get("State") diff --git a/tests/integration-tests/tests/createami/test_createami.py b/tests/integration-tests/tests/createami/test_createami.py index e1839a7c95..36178048fa 100644 --- a/tests/integration-tests/tests/createami/test_createami.py +++ b/tests/integration-tests/tests/createami/test_createami.py @@ -11,6 +11,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging +from os import environ import pytest from assertpy import assert_that @@ -45,6 +46,14 @@ def test_createami(region, os, instance, request, pcluster_config_reader, vpc_st custom_cookbook = request.config.getoption("createami_custom_chef_cookbook") custom_cookbook_args = [] if not custom_cookbook else ["-cc", custom_cookbook] + # Custom Node + # inject PARALLELCLUSTER_NODE_URL into packer environment + custom_node = request.config.getoption("createami_custom_node_package") + env = None + if custom_node: + env = environ.copy() + env["PARALLELCLUSTER_NODE_URL"] = custom_node + # Instance type pcluster_version_result = run_command(["pcluster", "version"]) instance_args = ( @@ -55,7 +64,8 @@ def test_createami(region, os, instance, request, pcluster_config_reader, vpc_st ["pcluster", "createami", "-ai", base_ami, "-os", os, "-r", region, "-c", cluster_config.as_posix()] + custom_cookbook_args + instance_args - + networking_args + + networking_args, + env=env, ) stdout_lower = pcluster_createami_result.stdout.lower() diff --git a/tests/integration-tests/tests/dcv/test_dcv.py b/tests/integration-tests/tests/dcv/test_dcv.py index 511a2fff00..3b25b1e1a3 100644 --- a/tests/integration-tests/tests/dcv/test_dcv.py +++ b/tests/integration-tests/tests/dcv/test_dcv.py @@ -12,11 +12,16 @@ import os as operating_system import re -import boto3 import pytest from assertpy import assert_that from remote_command_executor import RemoteCommandExecutor -from utils import add_keys_to_known_hosts, get_username_for_os, remove_keys_from_known_hosts, run_command +from utils import ( + add_keys_to_known_hosts, + check_headnode_security_group, + get_username_for_os, + remove_keys_from_known_hosts, + run_command, +) from tests.cloudwatch_logging.test_cloudwatch_logging import FeatureSpecificCloudWatchLoggingTestRunner @@ -106,7 +111,7 @@ def _test_dcv_configuration( remote_command_executor = RemoteCommandExecutor(cluster) # check configuration parameters - _check_security_group(region, cluster, dcv_port, expected_cidr=access_from) + check_headnode_security_group(region, cluster, dcv_port, expected_cidr=access_from) # dcv connect show url env = operating_system.environ.copy() @@ -114,13 +119,13 @@ def _test_dcv_configuration( # add ssh key to jenkins user known hosts file to avoid ssh keychecking prompt host_keys_file = operating_system.path.expanduser("~/.ssh/known_hosts") - add_keys_to_known_hosts(cluster.master_ip, host_keys_file) + add_keys_to_known_hosts(cluster.head_node_ip, host_keys_file) try: result = run_command(["pcluster", "dcv", "connect", cluster.name, "--show-url"], env=env) finally: # remove ssh key from jenkins user known hosts file - remove_keys_from_known_hosts(cluster.master_ip, host_keys_file, env=env) + remove_keys_from_known_hosts(cluster.head_node_ip, host_keys_file, env=env) assert_that(result.stdout).matches( r"Please use the following one-time URL in your browser within 30 seconds:\n" @@ -198,15 +203,6 @@ def _check_auth_ok(remote_command_executor, external_authenticator_port, session ).is_equal_to('{0}'.format(username)) -def _check_security_group(region, cluster, port, expected_cidr): - security_group_id = cluster.cfn_resources.get("MasterSecurityGroup") - response = boto3.client("ec2", region_name=region).describe_security_groups(GroupIds=[security_group_id]) - - ips = response["SecurityGroups"][0]["IpPermissions"] - target = next(filter(lambda x: x.get("FromPort", -1) == port, ips), {}) - assert_that(target["IpRanges"][0]["CidrIp"]).is_equal_to(expected_cidr) - - def _check_no_crashes(remote_command_executor, test_datadir): """Verify no core files in /var/crash, which on ubuntu18 causes a popup when logging into the 1st session.""" remote_command_executor.run_remote_script(str(test_datadir / "verify_no_core_files.sh")) diff --git a/tests/integration-tests/tests/disable_hyperthreading/test_disable_hyperthreading.py b/tests/integration-tests/tests/disable_hyperthreading/test_disable_hyperthreading.py index 0b001fba0d..4f34f62640 100644 --- a/tests/integration-tests/tests/disable_hyperthreading/test_disable_hyperthreading.py +++ b/tests/integration-tests/tests/disable_hyperthreading/test_disable_hyperthreading.py @@ -85,12 +85,12 @@ def _test_disable_hyperthreading_settings( expected_cpus_per_instance = slots_per_instance // 2 if hyperthreading_disabled else slots_per_instance expected_threads_per_core = 1 if hyperthreading_disabled else 2 - # Test disable hyperthreading on Master - logging.info("Test Disable Hyperthreading on Master") + # Test disable hyperthreading on head node + logging.info("Test Disable Hyperthreading on head node") result = remote_command_executor.run_remote_command("lscpu") if partition: # If partition is supplied, assume this is HIT setting where ht settings are at the queue level - # In this case, ht is not disabled on master + # In this case, ht is not disabled on head node assert_that(result.stdout).matches(r"Thread\(s\) per core:\s+{0}".format(2)) _assert_active_cpus(result.stdout, slots_per_instance) else: diff --git a/tests/integration-tests/tests/efa/test_efa.py b/tests/integration-tests/tests/efa/test_efa.py index 5c12bb75c7..32ecec533d 100644 --- a/tests/integration-tests/tests/efa/test_efa.py +++ b/tests/integration-tests/tests/efa/test_efa.py @@ -29,7 +29,7 @@ # Slurm test is to verify EFA works correctly when using the SIT model in the config file @pytest.mark.schedulers(["sge", "slurm"]) @pytest.mark.usefixtures("os") -def test_sit_efa(region, scheduler, instance, pcluster_config_reader, clusters_factory, test_datadir): +def test_sit_efa(region, scheduler, instance, pcluster_config_reader, clusters_factory, test_datadir, architecture): """ Test all EFA Features. @@ -46,7 +46,8 @@ def test_sit_efa(region, scheduler, instance, pcluster_config_reader, clusters_f _test_mpi(remote_command_executor, slots_per_instance, scheduler) logging.info("Running on Instances: {0}".format(get_compute_nodes_instance_ids(cluster.cfn_name, region))) _test_osu_benchmarks("openmpi", remote_command_executor, scheduler_commands, test_datadir, slots_per_instance) - _test_osu_benchmarks("intelmpi", remote_command_executor, scheduler_commands, test_datadir, slots_per_instance) + if architecture == "x86_64": + _test_osu_benchmarks("intelmpi", remote_command_executor, scheduler_commands, test_datadir, slots_per_instance) _test_shm_transfer_is_enabled(scheduler_commands, remote_command_executor) assert_no_errors_in_logs(remote_command_executor, scheduler) @@ -57,7 +58,7 @@ def test_sit_efa(region, scheduler, instance, pcluster_config_reader, clusters_f @pytest.mark.oss(["alinux2"]) @pytest.mark.schedulers(["slurm"]) @pytest.mark.usefixtures("os") -def test_hit_efa(region, scheduler, instance, pcluster_config_reader, clusters_factory, test_datadir): +def test_hit_efa(region, scheduler, instance, pcluster_config_reader, clusters_factory, test_datadir, architecture): """ Test all EFA Features. @@ -82,14 +83,15 @@ def test_hit_efa(region, scheduler, instance, pcluster_config_reader, clusters_f slots_per_instance, partition="efa-enabled", ) - _test_osu_benchmarks( - "intelmpi", - remote_command_executor, - scheduler_commands, - test_datadir, - slots_per_instance, - partition="efa-enabled", - ) + if architecture == "x86_64": + _test_osu_benchmarks( + "intelmpi", + remote_command_executor, + scheduler_commands, + test_datadir, + slots_per_instance, + partition="efa-enabled", + ) _test_shm_transfer_is_enabled(scheduler_commands, remote_command_executor, partition="efa-enabled") assert_no_errors_in_logs(remote_command_executor, scheduler) @@ -111,13 +113,13 @@ def _test_efa_installation(scheduler_commands, remote_command_executor, efa_inst # Check if EFA interface is on compute node result = remote_command_executor.run_remote_command("cat /shared/lspci.out") if efa_installed: - assert_that(result.stdout).contains("1d0f:efa0") + assert_that(result.stdout).contains("1d0f:efa") else: - assert_that(result.stdout).does_not_contain("1d0f:efa0") + assert_that(result.stdout).does_not_contain("1d0f:efa") - # Check EFA interface not present on master + # Check EFA interface not present on head node result = remote_command_executor.run_remote_command("lspci -n") - assert_that(result.stdout).does_not_contain("1d0f:efa0") + assert_that(result.stdout).does_not_contain("1d0f:efa") def _test_osu_benchmarks( diff --git a/tests/integration-tests/tests/efa/test_efa/test_hit_efa/pcluster.config.ini b/tests/integration-tests/tests/efa/test_efa/test_hit_efa/pcluster.config.ini index e856c358cd..fe9a7803e4 100644 --- a/tests/integration-tests/tests/efa/test_efa/test_hit_efa/pcluster.config.ini +++ b/tests/integration-tests/tests/efa/test_efa/test_hit_efa/pcluster.config.ini @@ -9,7 +9,7 @@ base_os = {{ os }} key_name = {{ key_name }} vpc_settings = parallelcluster-vpc scheduler = {{ scheduler }} -master_instance_type = c5.xlarge +master_instance_type = {{ instance }} queue_settings = efa-enabled,efa-disabled [queue efa-enabled] @@ -39,4 +39,4 @@ max_count = {{ max_queue_size }} vpc_id = {{ vpc_id }} master_subnet_id = {{ public_subnet_id }} compute_subnet_id = {{ private_subnet_id }} -use_public_ips = false +use_public_ips = true diff --git a/tests/integration-tests/tests/efa/test_efa/test_sit_efa/pcluster.config.ini b/tests/integration-tests/tests/efa/test_efa/test_sit_efa/pcluster.config.ini index 865603075f..4ef406cf1d 100644 --- a/tests/integration-tests/tests/efa/test_efa/test_sit_efa/pcluster.config.ini +++ b/tests/integration-tests/tests/efa/test_efa/test_sit_efa/pcluster.config.ini @@ -9,7 +9,7 @@ base_os = {{ os }} key_name = {{ key_name }} vpc_settings = parallelcluster-vpc scheduler = {{ scheduler }} -master_instance_type = c5.xlarge +master_instance_type = {{ instance }} compute_instance_type = {{ instance }} initial_queue_size = 2 maintain_initial_size = true @@ -24,4 +24,4 @@ placement_group = DYNAMIC vpc_id = {{ vpc_id }} master_subnet_id = {{ public_subnet_id }} compute_subnet_id = {{ private_subnet_id }} -use_public_ips = false +use_public_ips = true diff --git a/tests/integration-tests/tests/iam/test_iam.py b/tests/integration-tests/tests/iam/test_iam.py new file mode 100644 index 0000000000..632071f269 --- /dev/null +++ b/tests/integration-tests/tests/iam/test_iam.py @@ -0,0 +1,133 @@ +# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# A copy of the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "LICENSE.txt" file accompanying this file. +# This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. +# See the License for the specific language governing permissions and limitations under the License. +import logging +import os +from shutil import copyfile + +import boto3 +import pytest +from assertpy import assert_that +from remote_command_executor import RemoteCommandExecutor + +from tests.common.assertions import assert_no_errors_in_logs + + +@pytest.mark.usefixtures("os", "instance") +def test_iam_roles( + region, + scheduler, + common_pcluster_policies, + role_factory, + pcluster_config_reader, + clusters_factory, + cluster_model, + test_datadir, +): + is_awsbatch = scheduler == "awsbatch" + if is_awsbatch: + instance_policies = common_pcluster_policies["awsbatch_instance_policy"] + lambda_policies = common_pcluster_policies["awsbatch_lambda_policy"] + else: + instance_policies = common_pcluster_policies["traditional_instance_policy"] + lambda_policies = common_pcluster_policies["traditional_lambda_policy"] + cluster_role_name = role_factory("ec2", [instance_policies]) + lambda_role_name = role_factory("lambda", [lambda_policies]) + + # Copy the config file template for reuse in update. + config_file_name = cluster_model + ".ini" + config_file_path = os.path.join(str(test_datadir), config_file_name) + updated_config_file_name = cluster_model + ".update.ini" + updated_config_file_path = os.path.join(str(test_datadir), updated_config_file_name) + copyfile(config_file_path, updated_config_file_path) + + cluster_config = pcluster_config_reader( + config_file=config_file_name, ec2_iam_role=cluster_role_name, iam_lambda_role=lambda_role_name + ) + cluster = clusters_factory(cluster_config) + + main_stack_name = "parallelcluster-" + cluster.name + cfn_client = boto3.client("cloudformation", region_name=region) + lambda_client = boto3.client("lambda", region_name=region) + + # Check all CloudFormation stacks after creation + # If scheduler is awsbatch, there will still be IAM roles created. + _check_lambda_role(cfn_client, lambda_client, main_stack_name, lambda_role_name, not is_awsbatch) + + # Test updating the iam_lambda_role + updated_lambda_role_name = role_factory("lambda", [lambda_policies]) + assert_that(updated_lambda_role_name == lambda_role_name).is_false() + cluster.config_file = str( + pcluster_config_reader( + config_file=updated_config_file_name, + ec2_iam_role=cluster_role_name, + iam_lambda_role=updated_lambda_role_name, + ) + ) + cluster.update() + + # Check all CloudFormation stacks after update + _check_lambda_role(cfn_client, lambda_client, main_stack_name, updated_lambda_role_name, not is_awsbatch) + + +def _check_lambda_role(cfn_client, lambda_client, stack_name, lambda_role_name, check_no_role_is_created): + """Test lambda role is attached to all Lambda functions in the stack and its substack.""" + resources = cfn_client.describe_stack_resources(StackName=stack_name)["StackResources"] + for resource in resources: + resource_type = resource["ResourceType"] + if check_no_role_is_created: + # If check_no_role_is_created, check that there is no role created in the stack and its substack. + assert_that(resource_type).is_not_equal_to("AWS::IAM::Role") + if resource_type == "AWS::CloudFormation::Stack": + # Recursively check substacks + _check_lambda_role( + cfn_client, lambda_client, resource["PhysicalResourceId"], lambda_role_name, check_no_role_is_created + ) + if resource_type == "AWS::Lambda::Function": + # Check the role is attached to the Lambda function + lambda_function = lambda_client.get_function(FunctionName=resource["PhysicalResourceId"])["Configuration"] + assert_that(lambda_role_name in lambda_function["Role"]).is_true() + + +@pytest.mark.regions(["ap-northeast-2"]) +@pytest.mark.schedulers(["slurm", "awsbatch"]) +@pytest.mark.oss(["alinux2"]) +@pytest.mark.usefixtures("os", "instance") +def test_iam_policies(region, scheduler, pcluster_config_reader, clusters_factory): + """Test IAM Policies""" + cluster_config = pcluster_config_reader( + iam_policies="arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess, arn:aws:iam::aws:policy/AWSBatchFullAccess" + ) + cluster = clusters_factory(cluster_config) + remote_command_executor = RemoteCommandExecutor(cluster) + + _test_s3_access(remote_command_executor, region) + + if scheduler == "awsbatch": + _test_batch_access(remote_command_executor, region) + + assert_no_errors_in_logs(remote_command_executor, scheduler) + + +def _test_s3_access(remote_command_executor, region): + logging.info("Testing S3 Access") + result = remote_command_executor.run_remote_command(f"AWS_DEFAULT_REGION={region} aws s3 ls").stdout + # An error occurred (AccessDenied) when calling the ListBuckets operation: Access Denied + assert_that(result).does_not_contain("AccessDenied") + + +def _test_batch_access(remote_command_executor, region): + logging.info("Testing AWS Batch Access") + result = remote_command_executor.run_remote_command( + f"AWS_DEFAULT_REGION={region} aws batch describe-compute-environments" + ).stdout + # An error occurred (AccessDeniedException) when calling the DescribeComputeEnvironments operation: ... + assert_that(result).does_not_contain("AccessDeniedException") diff --git a/tests/integration-tests/tests/iam_policies/test_iam_policies/test_iam_policies/pcluster.config.ini b/tests/integration-tests/tests/iam/test_iam/test_iam_policies/pcluster.config.ini similarity index 100% rename from tests/integration-tests/tests/iam_policies/test_iam_policies/test_iam_policies/pcluster.config.ini rename to tests/integration-tests/tests/iam/test_iam/test_iam_policies/pcluster.config.ini diff --git a/tests/integration-tests/tests/iam/test_iam/test_iam_roles/HIT.ini b/tests/integration-tests/tests/iam/test_iam/test_iam_roles/HIT.ini new file mode 100644 index 0000000000..da7e4e06c6 --- /dev/null +++ b/tests/integration-tests/tests/iam/test_iam/test_iam_roles/HIT.ini @@ -0,0 +1,29 @@ +[global] +cluster_template = default + +[aws] +aws_region_name = {{ region }} + +[cluster default] +key_name = {{ key_name }} +vpc_settings = parallelcluster-vpc +scheduler = {{ scheduler }} +master_instance_type = {{ instance }} +base_os = {{ os }} +queue_settings = compute +ec2_iam_role = {{ ec2_iam_role }} +iam_lambda_role = {{ iam_lambda_role }} + +[vpc parallelcluster-vpc] +vpc_id = {{ vpc_id }} +master_subnet_id = {{ public_subnet_id }} +compute_subnet_id = {{ private_subnet_id }} +use_public_ips = false + +[queue compute] +enable_efa = false +enable_efa_gdr = false +compute_resource_settings = default + +[compute_resource default] +instance_type = {{ instance }} diff --git a/tests/integration-tests/tests/iam/test_iam/test_iam_roles/SIT.ini b/tests/integration-tests/tests/iam/test_iam/test_iam_roles/SIT.ini new file mode 100644 index 0000000000..79707cc3a7 --- /dev/null +++ b/tests/integration-tests/tests/iam/test_iam/test_iam_roles/SIT.ini @@ -0,0 +1,28 @@ +[global] +cluster_template = default + +[aws] +aws_region_name = {{ region }} + +[cluster default] +key_name = {{ key_name }} +vpc_settings = parallelcluster-vpc +scheduler = {{ scheduler }} +master_instance_type = {{ instance }} +compute_instance_type = {{ instance }} +base_os = {{ os }} +ec2_iam_role = {{ ec2_iam_role }} +iam_lambda_role = {{ iam_lambda_role }} +{% if scheduler == "awsbatch" %} +min_vcpus = 1 +desired_vcpus = 1 +{% else %} +initial_queue_size = 1 +maintain_initial_size = true +{% endif %} + +[vpc parallelcluster-vpc] +vpc_id = {{ vpc_id }} +master_subnet_id = {{ public_subnet_id }} +compute_subnet_id = {{ private_subnet_id }} +use_public_ips = false diff --git a/tests/integration-tests/tests/iam_policies/test_iam_policies.py b/tests/integration-tests/tests/iam_policies/test_iam_policies.py deleted file mode 100644 index 038b6c3263..0000000000 --- a/tests/integration-tests/tests/iam_policies/test_iam_policies.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "LICENSE.txt" file accompanying this file. -# This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. -# See the License for the specific language governing permissions and limitations under the License. -import logging - -import pytest -from assertpy import assert_that -from remote_command_executor import RemoteCommandExecutor - -from tests.common.assertions import assert_no_errors_in_logs - - -@pytest.mark.regions(["ap-northeast-2"]) -@pytest.mark.schedulers(["slurm", "awsbatch"]) -@pytest.mark.oss(["alinux2"]) -@pytest.mark.usefixtures("os", "instance") -def test_iam_policies(region, scheduler, pcluster_config_reader, clusters_factory): - """Test IAM Policies""" - cluster_config = pcluster_config_reader( - iam_policies="arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess, arn:aws:iam::aws:policy/AWSBatchFullAccess" - ) - cluster = clusters_factory(cluster_config) - remote_command_executor = RemoteCommandExecutor(cluster) - - _test_s3_access(remote_command_executor, region) - _test_batch_access(remote_command_executor, region) - - assert_no_errors_in_logs(remote_command_executor, scheduler) - - -def _test_s3_access(remote_command_executor, region): - logging.info("Testing S3 Access") - result = remote_command_executor.run_remote_command("AWS_DEFAULT_REGION={0} aws s3 ls".format(region)).stdout - # An error occurred (AccessDenied) when calling the ListBuckets operation: Access Denied - assert_that(result).does_not_contain("AccessDenied") - - -def _test_batch_access(remote_command_executor, region): - logging.info("Testing AWS Batch Access") - result = remote_command_executor.run_remote_command( - "AWS_DEFAULT_REGION={0} aws batch describe-compute-environments".format(region) - ).stdout - # An error occurred (AccessDeniedException) when calling the DescribeComputeEnvironments operation: ... - assert_that(result).does_not_contain("AccessDeniedException") diff --git a/tests/integration-tests/tests/intel_hpc/test_intel_hpc.py b/tests/integration-tests/tests/intel_hpc/test_intel_hpc.py index 3651c80c2f..44a7471253 100644 --- a/tests/integration-tests/tests/intel_hpc/test_intel_hpc.py +++ b/tests/integration-tests/tests/intel_hpc/test_intel_hpc.py @@ -35,7 +35,7 @@ def test_intel_hpc(region, scheduler, instance, os, pcluster_config_reader, clus def _test_intel_clck(remote_command_executor, scheduler_commands, test_datadir, os): - # Install Intel Cluster Checker CLCK Master + # Install Intel Cluster Checker CLCK on head node logging.info("Installing Intel Cluster Checker") remote_command_executor.run_remote_script(str(test_datadir / "install_clck.sh"), hide=False) diff --git a/tests/integration-tests/tests/multiple_nics/test_multiple_nics.py b/tests/integration-tests/tests/multiple_nics/test_multiple_nics.py index a865bec94b..f68bfc951b 100644 --- a/tests/integration-tests/tests/multiple_nics/test_multiple_nics.py +++ b/tests/integration-tests/tests/multiple_nics/test_multiple_nics.py @@ -27,7 +27,7 @@ def test_multiple_nics(scheduler, region, pcluster_config_reader, clusters_facto remote_command_executor = RemoteCommandExecutor(cluster) scheduler_commands = get_scheduler_commands(scheduler, remote_command_executor) - _test_master_node_nics(remote_command_executor, region) + _test_head_node_nics(remote_command_executor, region) _test_compute_node_nics(cluster, region, remote_command_executor, scheduler_commands) @@ -40,16 +40,16 @@ def _get_private_ip_addresses(instance_id, region, remote_command_executor): return result.stdout.strip().split("\n") -def _test_master_node_nics(remote_command_executor, region): - # On the master node we just check that all the private IPs have been assigned to NICs - master_instance_id = remote_command_executor.run_remote_command( +def _test_head_node_nics(remote_command_executor, region): + # On the head node we just check that all the private IPs have been assigned to NICs + head_node_instance_id = remote_command_executor.run_remote_command( "curl http://169.254.169.254/latest/meta-data/instance-id" ).stdout - master_ip_addresses = _get_private_ip_addresses(master_instance_id, region, remote_command_executor) + head_node_ip_addresses = _get_private_ip_addresses(head_node_instance_id, region, remote_command_executor) ip_a_result = remote_command_executor.run_remote_command("ip a").stdout - for ip_address in master_ip_addresses: + for ip_address in head_node_ip_addresses: assert_that(ip_a_result).matches(".* inet {0}.*".format(ip_address)) diff --git a/tests/integration-tests/tests/multiple_nics/test_multiple_nics/test_multiple_nics/pcluster.config.ini b/tests/integration-tests/tests/multiple_nics/test_multiple_nics/test_multiple_nics/pcluster.config.ini index 71e62ed1cf..ac6fc00483 100644 --- a/tests/integration-tests/tests/multiple_nics/test_multiple_nics/test_multiple_nics/pcluster.config.ini +++ b/tests/integration-tests/tests/multiple_nics/test_multiple_nics/test_multiple_nics/pcluster.config.ini @@ -8,7 +8,7 @@ cluster_template = default base_os = {{ os }} key_name = {{ key_name }} scheduler = {{ scheduler }} -master_instance_type = c5.xlarge +master_instance_type = {{ instance }} compute_instance_type = {{ instance }} initial_queue_size = 1 maintain_initial_size = true diff --git a/tests/integration-tests/tests/networking/test_networking.py b/tests/integration-tests/tests/networking/test_networking.py index 5efb20c802..144b0fcee5 100644 --- a/tests/integration-tests/tests/networking/test_networking.py +++ b/tests/integration-tests/tests/networking/test_networking.py @@ -15,10 +15,11 @@ import pytest from assertpy import assert_that from cfn_stacks_factory import CfnStack, CfnStacksFactory -from utils import random_alphanumeric +from utils import generate_stack_name @pytest.fixture() +@pytest.mark.usefixtures("setup_sts_credentials") def networking_stack_factory(request): """Define a fixture to manage the creation and destruction of CloudFormation stacks.""" factory = CfnStacksFactory(request.config.getoption("credential")) @@ -26,11 +27,7 @@ def networking_stack_factory(request): def _create_network(region, template_path, parameters): file_content = extract_template(template_path) stack = CfnStack( - name="integ-tests-networking-{0}{1}{2}".format( - random_alphanumeric(), - "-" if request.config.getoption("stackname_suffix") else "", - request.config.getoption("stackname_suffix"), - ), + name=generate_stack_name("integ-tests-networking", request.config.getoption("stackname_suffix")), region=region, template=file_content, parameters=parameters, diff --git a/tests/integration-tests/tests/networking/test_security_groups.py b/tests/integration-tests/tests/networking/test_security_groups.py new file mode 100644 index 0000000000..3bc8c9d2d3 --- /dev/null +++ b/tests/integration-tests/tests/networking/test_security_groups.py @@ -0,0 +1,148 @@ +# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# A copy of the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "LICENSE.txt" file accompanying this file. +# This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. +# See the License for the specific language governing permissions and limitations under the License. +import logging + +import boto3 +import pytest +from assertpy import assert_that +from cfn_stacks_factory import CfnStack +from troposphere import Ref, Template +from troposphere.ec2 import SecurityGroup, SecurityGroupIngress +from utils import check_headnode_security_group, generate_stack_name + + +@pytest.mark.usefixtures("os", "scheduler", "instance") +def test_additional_sg_and_ssh_from(region, custom_security_group, pcluster_config_reader, clusters_factory): + """ + Test when additional_sg ssh_from are provided in the config file + + The additional security group should be added to the head and compute nodes. The + """ + custom_security_group_id = custom_security_group.cfn_resources["SecurityGroupResource"] + ssh_from = "10.11.12.0/32" + cluster_config = pcluster_config_reader(additional_sg=custom_security_group_id, ssh_from=ssh_from) + cluster = clusters_factory(cluster_config) + ec2_client = boto3.client("ec2", region_name=region) + instances = _get_instances_by_security_group(ec2_client, custom_security_group_id) + logging.info("Asserting that head node and compute node has the additional security group") + assert_that(instances).is_length(2) + logging.info("Asserting the security group of pcluster is not overwritten by additional seurity group") + for instance in instances: + assert_that( + any( + security_group["GroupName"].startswith("parallelcluster") + for security_group in instance["SecurityGroups"] + ) + ).is_true() + logging.info("Asserting the security group of pcluster on the head node is aligned with ssh_from") + check_headnode_security_group(region, cluster, 22, ssh_from) + + +@pytest.mark.usefixtures("os", "scheduler", "instance") +def test_overwrite_sg(region, custom_security_group, pcluster_config_reader, clusters_factory): + """Test vpc_security_group_id overwrites pcluster default sg on head and compute nodes, efs, fsx""" + custom_security_group_id = custom_security_group.cfn_resources["SecurityGroupResource"] + cluster_config = pcluster_config_reader(vpc_security_group_id=custom_security_group_id) + cluster = clusters_factory(cluster_config) + ec2_client = boto3.client("ec2", region_name=region) + instances = _get_instances_by_security_group(ec2_client, custom_security_group_id) + logging.info("Asserting that head node and compute node has and only has the custom security group") + assert_that(instances).is_length(2) + for instance in instances: + assert_that(instance["SecurityGroups"]).is_length(1) + + cfn_client = boto3.client("cloudformation", region_name=region) + + logging.info("Collecting security groups of the FSx") + fsx_id = cfn_client.describe_stack_resource( + StackName=cluster.cfn_resources["FSXSubstack"], LogicalResourceId="FileSystem" + )["StackResourceDetail"]["PhysicalResourceId"] + fsx_client = boto3.client("fsx", region_name=region) + network_interface_id = fsx_client.describe_file_systems(FileSystemIds=[fsx_id])["FileSystems"][0][ + "NetworkInterfaceIds" + ][0] + fsx_security_groups = ec2_client.describe_network_interfaces(NetworkInterfaceIds=[network_interface_id])[ + "NetworkInterfaces" + ][0]["Groups"] + logging.info("Asserting the network interface of FSx has and only has the custom security group") + assert_that(fsx_security_groups[0]["GroupId"]).is_equal_to(custom_security_group_id) + assert_that(fsx_security_groups).is_length(1) + + logging.info("Collecting security groups of the EFS") + efs_id = cfn_client.describe_stack_resource( + StackName=cluster.cfn_resources["EFSSubstack"], LogicalResourceId="EFSFS" + )["StackResourceDetail"]["PhysicalResourceId"] + efs_client = boto3.client("efs", region_name=region) + mount_target_ids = [ + mount_target["MountTargetId"] + for mount_target in efs_client.describe_mount_targets(FileSystemId=efs_id)["MountTargets"] + ] + logging.info("Asserting the mount targets of EFS has and only has the custom security group") + for mount_target_id in mount_target_ids: + mount_target_security_groups = efs_client.describe_mount_target_security_groups(MountTargetId=mount_target_id)[ + "SecurityGroups" + ] + assert_that(mount_target_security_groups[0]).is_equal_to(custom_security_group_id) + assert_that(mount_target_security_groups).is_length(1) + + +@pytest.fixture(scope="class") +def custom_security_group(vpc_stack, region, request, cfn_stacks_factory): + template = Template() + template.set_version("2010-09-09") + template.set_description("custom security group stack for testing additional_sg and vpc_security_group_id") + security_group = template.add_resource( + SecurityGroup( + "SecurityGroupResource", + GroupDescription="custom security group for testing additional_sg and vpc_security_group_id", + VpcId=vpc_stack.cfn_outputs["VpcId"], + ) + ) + template.add_resource( + SecurityGroupIngress( + "SecurityGroupIngressResource", + IpProtocol="-1", + FromPort=0, + ToPort=65535, + SourceSecurityGroupId=Ref(security_group), + GroupId=Ref(security_group), + ) + ) + stack = CfnStack( + name=generate_stack_name("integ-tests-custom-sg", request.config.getoption("stackname_suffix")), + region=region, + template=template.to_json(), + ) + cfn_stacks_factory.create_stack(stack) + + yield stack + + if not request.config.getoption("no_delete"): + cfn_stacks_factory.delete_stack(stack.name, region) + + +def _get_instances_by_security_group(ec2_client, security_group_id): + logging.info("Collecting security groups of the head node and compute node") + paginator = ec2_client.get_paginator("describe_instances") + page_iterator = paginator.paginate( + Filters=[ + { + "Name": "network-interface.group-id", + "Values": [security_group_id], + } + ] + ) + instances = [] + for page in page_iterator: + for reservation in page["Reservations"]: + instances.extend(reservation["Instances"]) + return instances diff --git a/tests/integration-tests/tests/networking/test_security_groups/test_additional_sg_and_ssh_from/pcluster.config.ini b/tests/integration-tests/tests/networking/test_security_groups/test_additional_sg_and_ssh_from/pcluster.config.ini new file mode 100644 index 0000000000..fa0ff66786 --- /dev/null +++ b/tests/integration-tests/tests/networking/test_security_groups/test_additional_sg_and_ssh_from/pcluster.config.ini @@ -0,0 +1,28 @@ +[global] +cluster_template = default + +[aws] +aws_region_name = {{ region }} + +[cluster default] +base_os = {{ os }} +key_name = {{ key_name }} +vpc_settings = parallelcluster-vpc +scheduler = {{ scheduler }} +master_instance_type = {{ instance }} +compute_instance_type = {{ instance }} +{% if scheduler == "awsbatch" %} +min_vcpus = 1 +desired_vcpus = 1 +{% else %} +initial_queue_size = 1 +maintain_initial_size = true +{% endif %} + +[vpc parallelcluster-vpc] +vpc_id = {{ vpc_id }} +master_subnet_id = {{ public_subnet_id }} +compute_subnet_id = {{ private_additional_cidr_subnet_id }} +additional_sg = {{ additional_sg }} +use_public_ips = false +ssh_from = {{ ssh_from }} \ No newline at end of file diff --git a/tests/integration-tests/tests/networking/test_security_groups/test_overwrite_sg/pcluster.config.ini b/tests/integration-tests/tests/networking/test_security_groups/test_overwrite_sg/pcluster.config.ini new file mode 100644 index 0000000000..254d4f10f4 --- /dev/null +++ b/tests/integration-tests/tests/networking/test_security_groups/test_overwrite_sg/pcluster.config.ini @@ -0,0 +1,37 @@ +[global] +cluster_template = default + +[aws] +aws_region_name = {{ region }} + +[cluster default] +base_os = {{ os }} +key_name = {{ key_name }} +vpc_settings = parallelcluster-vpc +scheduler = {{ scheduler }} +master_instance_type = {{ instance }} +compute_instance_type = {{ instance }} +{% if scheduler == "awsbatch" %} +min_vcpus = 1 +desired_vcpus = 1 +{% else %} +initial_queue_size = 1 +maintain_initial_size = true +{% endif %} +efs_settings = parallelcluster-efs +fsx_settings = parallelcluster-fsx + +[vpc parallelcluster-vpc] +vpc_id = {{ vpc_id }} +master_subnet_id = {{ public_subnet_id }} +compute_subnet_id = {{ private_additional_cidr_subnet_id }} +vpc_security_group_id = {{ vpc_security_group_id }} +use_public_ips = false + +[efs parallelcluster-efs] +shared_dir = efs + +[fsx parallelcluster-fsx] +shared_dir = fsx +storage_capacity = 1200 +deployment_type = SCRATCH_2 diff --git a/tests/integration-tests/tests/runtime_bake/test_runtime_bake.py b/tests/integration-tests/tests/runtime_bake/test_runtime_bake.py index de0668ae14..9daa739a4a 100644 --- a/tests/integration-tests/tests/runtime_bake/test_runtime_bake.py +++ b/tests/integration-tests/tests/runtime_bake/test_runtime_bake.py @@ -40,7 +40,7 @@ def test_runtime_bake(scheduler, os, region, pcluster_config_reader, clusters_fa remote_command_executor = RemoteCommandExecutor(cluster) # Verify no chef.io endpoint is called in cloud-init-output log to download chef installer or chef packages""" - # on master + # on head node remote_command_executor.run_remote_script(str(test_datadir / "verify_chef_download.sh")) # on compute scheduler_commands = get_scheduler_commands(scheduler, remote_command_executor) diff --git a/tests/integration-tests/tests/scaling/test_scaling.py b/tests/integration-tests/tests/scaling/test_scaling.py index 8308a6dada..7298f2d3a9 100644 --- a/tests/integration-tests/tests/scaling/test_scaling.py +++ b/tests/integration-tests/tests/scaling/test_scaling.py @@ -23,6 +23,7 @@ from utils import get_compute_nodes_instance_ids, get_instance_ids_compute_hostnames_conversion_dict from tests.common.assertions import ( + assert_errors_in_logs, assert_instance_replaced_or_terminating, assert_no_errors_in_logs, assert_num_instances_constant, @@ -126,16 +127,16 @@ def test_nodewatcher_terminates_failing_node(scheduler, region, pcluster_config_ @pytest.mark.instances(["c5.xlarge"]) @pytest.mark.schedulers(["slurm"]) @pytest.mark.oss(["alinux2", "centos7", "centos8", "ubuntu1804"]) -@pytest.mark.usefixtures("region", "os", "instance") +@pytest.mark.usefixtures("region", "os") @pytest.mark.hit_scaling -def test_hit_scaling(scheduler, region, pcluster_config_reader, clusters_factory, test_datadir): +def test_hit_scaling(scheduler, region, instance, pcluster_config_reader, clusters_factory, test_datadir): """Test that slurm-specific scaling logic is resistent to manual actions and failures.""" cluster_config = pcluster_config_reader(scaledown_idletime=3) cluster = clusters_factory(cluster_config) remote_command_executor = RemoteCommandExecutor(cluster) scheduler_commands = get_scheduler_commands(scheduler, remote_command_executor) - _assert_cluster_initial_conditions(scheduler_commands) + _assert_cluster_initial_conditions(scheduler_commands, instance) _test_partition_states( scheduler_commands, cluster.cfn_name, @@ -144,7 +145,7 @@ def test_hit_scaling(scheduler, region, pcluster_config_reader, clusters_factory inactive_partition="ondemand2", num_static_nodes=2, num_dynamic_nodes=3, - dynamic_instance_type="c5.xlarge", + dynamic_instance_type=instance, ) _test_reset_terminated_nodes( scheduler_commands, @@ -153,7 +154,7 @@ def test_hit_scaling(scheduler, region, pcluster_config_reader, clusters_factory partition="ondemand1", num_static_nodes=2, num_dynamic_nodes=3, - dynamic_instance_type="c5.xlarge", + dynamic_instance_type=instance, ) _test_replace_down_nodes( remote_command_executor, @@ -164,7 +165,7 @@ def test_hit_scaling(scheduler, region, pcluster_config_reader, clusters_factory partition="ondemand1", num_static_nodes=2, num_dynamic_nodes=3, - dynamic_instance_type="c5.xlarge", + dynamic_instance_type=instance, ) _test_keep_or_replace_suspended_nodes( scheduler_commands, @@ -173,9 +174,11 @@ def test_hit_scaling(scheduler, region, pcluster_config_reader, clusters_factory partition="ondemand1", num_static_nodes=2, num_dynamic_nodes=3, - dynamic_instance_type="c5.xlarge", + dynamic_instance_type=instance, ) - _test_computemgtd_logic( + # Next test will introduce error in logs, assert no error now + assert_no_errors_in_logs(remote_command_executor, scheduler) + _test_clustermgtd_down_logic( remote_command_executor, scheduler_commands, cluster.cfn_name, @@ -184,29 +187,28 @@ def test_hit_scaling(scheduler, region, pcluster_config_reader, clusters_factory partition="ondemand1", num_static_nodes=2, num_dynamic_nodes=3, - dynamic_instance_type="c5.xlarge", + dynamic_instance_type=instance, ) - assert_no_errors_in_logs(remote_command_executor, scheduler) - -def _assert_cluster_initial_conditions(scheduler_commands): +def _assert_cluster_initial_conditions(scheduler_commands, instance): """Assert that expected nodes are in cluster.""" cluster_node_states = scheduler_commands.get_nodes_status() - c4_nodes, c5_nodes, static_nodes, dynamic_nodes = [], [], [], [] + c5l_nodes, instance_nodes, static_nodes, dynamic_nodes = [], [], [], [] logging.info(cluster_node_states) for nodename, node_states in cluster_node_states.items(): - if "c4" in nodename: - c4_nodes.append(nodename) - if "c5" in nodename: - c5_nodes.append(nodename) + if "c5l" in nodename: + c5l_nodes.append(nodename) + # "c5.xlarge"[: "c5.xlarge".index(".")+2].replace(".", "") = c5x + if instance[: instance.index(".") + 2].replace(".", "") in nodename: + instance_nodes.append(nodename) if node_states == "idle": if "-st-" in nodename: static_nodes.append(nodename) if "-dy-" in nodename: dynamic_nodes.append(nodename) - assert_that(len(c4_nodes)).is_equal_to(20) - assert_that(len(c5_nodes)).is_equal_to(20) + assert_that(len(c5l_nodes)).is_equal_to(20) + assert_that(len(instance_nodes)).is_equal_to(20) assert_that(len(static_nodes)).is_equal_to(4) assert_that(len(dynamic_nodes)).is_equal_to(1) @@ -345,7 +347,7 @@ def _test_keep_or_replace_suspended_nodes( assert_num_instances_in_cluster(cluster_name, region, len(static_nodes)) -def _test_computemgtd_logic( +def _test_clustermgtd_down_logic( remote_command_executor, scheduler_commands, cluster_name, @@ -357,7 +359,7 @@ def _test_computemgtd_logic( dynamic_instance_type, ): """Test that computemgtd is able to shut nodes down when clustermgtd and slurmctld are offline.""" - logging.info("Testing that nodes are shut down when clustermgtd and slurmctld are offline") + logging.info("Testing cluster protection logic when clustermgtd is down.") submit_initial_job( scheduler_commands, "sleep infinity", @@ -366,8 +368,10 @@ def _test_computemgtd_logic( num_dynamic_nodes, other_options="--no-requeue", ) - assert_initial_conditions(scheduler_commands, num_static_nodes, num_dynamic_nodes, partition) - logging.info("Killing clustermgtd and rewriting timestamp file") + static_nodes, dynamic_nodes = assert_initial_conditions( + scheduler_commands, num_static_nodes, num_dynamic_nodes, partition + ) + logging.info("Killing clustermgtd and rewriting timestamp file to trigger timeout.") remote_command_executor.run_remote_script(str(test_datadir / "slurm_kill_clustermgtd.sh"), run_as_root=True) # Overwrite clusterctld heartbeat to trigger timeout path timestamp_format = "%Y-%m-%d %H:%M:%S.%f%z" @@ -375,13 +379,37 @@ def _test_computemgtd_logic( remote_command_executor.run_remote_command( f"echo -n '{overwrite_time_str}' | sudo tee /opt/slurm/etc/pcluster/.slurm_plugin/clustermgtd_heartbeat" ) + # Test that computemgtd will terminate compute nodes that are down or in power_save + # Put first static node and first dynamic node into DOWN + # Put rest of dynamic nodes into POWER_DOWN + logging.info("Asserting that computemgtd will terminate nodes in DOWN or POWER_SAVE") + _set_nodes_to_down_manually(scheduler_commands, static_nodes[:1] + dynamic_nodes[:1]) + _set_nodes_to_power_down_manually(scheduler_commands, dynamic_nodes[1:]) + wait_for_num_instances_in_cluster(cluster_name, region, num_static_nodes - 1) + + logging.info("Testing that ResumeProgram launches no instance when clustermgtd is down") + submit_initial_job( + scheduler_commands, + "sleep infinity", + partition, + dynamic_instance_type, + num_dynamic_nodes, + ) + logging.info("Asserting that computemgtd is not self-terminating when slurmctld is up") - assert_num_instances_constant(cluster_name, region, desired=num_static_nodes + num_dynamic_nodes, timeout=2) + assert_num_instances_constant(cluster_name, region, desired=num_static_nodes - 1, timeout=2) + logging.info("Killing slurmctld") remote_command_executor.run_remote_script(str(test_datadir / "slurm_kill_slurmctld.sh"), run_as_root=True) logging.info("Waiting for computemgtd to self-terminate all instances") wait_for_num_instances_in_cluster(cluster_name, region, 0) + assert_errors_in_logs( + remote_command_executor, + ["/var/log/parallelcluster/slurm_resume.log"], + ["No valid clustermgtd heartbeat detected"], + ) + @retry(wait_fixed=seconds(30), stop_max_delay=minutes(15)) def _assert_failing_nodes_terminated(nodes_to_remove, hostname_to_instance_id, region): @@ -446,6 +474,13 @@ def _set_nodes_to_down_manually(scheduler_commands, compute_nodes): _assert_compute_node_states(scheduler_commands, compute_nodes, expected_states=["down"]) +def _set_nodes_to_power_down_manually(scheduler_commands, compute_nodes): + scheduler_commands.set_nodes_state(compute_nodes, state="power_down") + time.sleep(5) + scheduler_commands.set_nodes_state(compute_nodes, state="resume") + _assert_compute_node_states(scheduler_commands, compute_nodes, expected_states=["idle~"]) + + def _assert_compute_node_states(scheduler_commands, compute_nodes, expected_states): node_states = scheduler_commands.get_nodes_status(compute_nodes) for node in compute_nodes: diff --git a/tests/integration-tests/tests/scaling/test_scaling/test_hit_scaling/pcluster.config.ini b/tests/integration-tests/tests/scaling/test_scaling/test_hit_scaling/pcluster.config.ini index 1d2563852a..6635fa44c8 100644 --- a/tests/integration-tests/tests/scaling/test_scaling/test_hit_scaling/pcluster.config.ini +++ b/tests/integration-tests/tests/scaling/test_scaling/test_hit_scaling/pcluster.config.ini @@ -20,14 +20,14 @@ compute_resource_settings = ondemand_i1,ondemand_i2 compute_resource_settings = ondemand_i3,ondemand_i4 [compute_resource ondemand_i1] -instance_type = c4.xlarge +instance_type = c5.large [compute_resource ondemand_i2] instance_type = {{ instance }} min_count = 2 [compute_resource ondemand_i3] -instance_type = c4.xlarge +instance_type = c5.large [compute_resource ondemand_i4] instance_type = {{ instance }} diff --git a/tests/integration-tests/tests/scaling/test_scaling/test_multiple_jobs_submission/cluster-check.sh b/tests/integration-tests/tests/scaling/test_scaling/test_multiple_jobs_submission/cluster-check.sh index 58c4b97108..4ace1ae7b2 100755 --- a/tests/integration-tests/tests/scaling/test_scaling/test_multiple_jobs_submission/cluster-check.sh +++ b/tests/integration-tests/tests/scaling/test_scaling/test_multiple_jobs_submission/cluster-check.sh @@ -19,7 +19,7 @@ # minutes, one of which (hopefully) requires scaling (note that # scaling is currently not tested on Torque, because it's too big of a # pain to determine how many slots per node are on a Torque compute -# node from the master node). +# node from the head node). # # Usage: diff --git a/tests/integration-tests/tests/schedulers/test_torque.py b/tests/integration-tests/tests/schedulers/test_torque.py index 12434bd5ba..677a9107fb 100644 --- a/tests/integration-tests/tests/schedulers/test_torque.py +++ b/tests/integration-tests/tests/schedulers/test_torque.py @@ -192,7 +192,7 @@ def _test_dynamic_cluster_limits(remote_command_executor, max_queue_size, max_sl # Make sure cluster is scaled to 0 when this test starts assert_that(torque_commands.compute_nodes_count()).is_equal_to(0) - # sleeping for 1 second to give time to sqswatcher to reconfigure the master with np = max_nodes * node_slots + # sleeping for 1 second to give time to sqswatcher to reconfigure the head node with np = max_nodes * node_slots # operation that is performed right after sqswatcher removes the compute nodes from the scheduler time.sleep(1) _assert_scheduler_configuration(remote_command_executor, torque_commands, max_slots, max_queue_size) diff --git a/tests/integration-tests/tests/storage/kms_key_factory.py b/tests/integration-tests/tests/storage/kms_key_factory.py new file mode 100644 index 0000000000..7d81752a49 --- /dev/null +++ b/tests/integration-tests/tests/storage/kms_key_factory.py @@ -0,0 +1,214 @@ +import json +import logging +import random +import string +import time + +import boto3 +import pkg_resources +from jinja2 import Environment, FileSystemLoader + + +class KMSKeyFactory: + """Manage creation for kms key.""" + + def __init__(self): + self.iam_client = None + self.kms_client = None + self.kms_key_id = None + self.account_id = None + self.region = None + self.partition = None + self.iam_role = None + self.iam_policy_arn_batch = None + self.iam_policy_arn_traditional = None + + def create_kms_key(self, region): + """ + Create a kms key with given region. + :param region: Different region need to create different keys + """ + self.region = region + self.account_id = ( + boto3.client("sts", endpoint_url=_get_sts_endpoint(region), region_name=region) + .get_caller_identity() + .get("Account") + ) + + if self.kms_key_id: + return self.kms_key_id + + self.iam_role = self._create_role(region) + self.kms_key_id = self._create_kms_key(region) + return self.kms_key_id + + def _create_role(self, region): + """ + Create iam role in given region. + :param region: Create different roles on different regions, since we need to attach different policies + """ + random_string = "".join(random.choice(string.ascii_lowercase + string.digits) for _ in range(8)) + iam_role_name = "Integ_test_InstanceRole_{0}_{1}".format(self.region, random_string) + + iam_policy_name_batch = "".join("Integ_test_InstancePolicy_batch" + random_string) + logging.info("iam policy for awsbatch is {0}".format(iam_policy_name_batch)) + iam_policy_name_traditional = "".join("Integ_test_InstancePolicy" + random_string) + logging.info("iam_policy for traditional scheduler is {0}".format(iam_policy_name_traditional)) + + self.iam_client = boto3.client("iam", region_name=region) + + # Create the iam role + logging.info("creating iam role {0} for creating KMS key...".format(iam_role_name)) + + self.partition = next( + ("aws-" + partition for partition in ["us-gov", "cn"] if self.region.startswith(partition)), "aws" + ) + domain_suffix = ".cn" if self.partition == "aws-cn" else "" + + # Add EC2 as trust entity of the IAM role + trust_relationship_policy_ec2 = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": {"Service": "ec2.amazonaws.com{0}".format(domain_suffix)}, + "Action": "sts:AssumeRole", + } + ], + } + self.iam_client.create_role( + RoleName=iam_role_name, + AssumeRolePolicyDocument=json.dumps(trust_relationship_policy_ec2), + Description="Role for create custom KMS key", + ) + # Having time.sleep here because because it take a while for the the IAM role to become valid for use in the + # put_key_policy step for creating KMS key, read the following link for reference : + # https://stackoverflow.com/questions/20156043/how-long-should-i-wait-after-applying-an-aws-iam-policy-before-it-is-valid + time.sleep(15) + + # create instance policies for awsbatch and traditional schedulers + self.iam_policy_arn_batch = self._create_iam_policies(iam_policy_name_batch, "awsbatch") + self.iam_policy_arn_traditional = self._create_iam_policies(iam_policy_name_traditional, "traditional") + + # attach the Instance policies to the role + logging.info("Attaching iam policy to the role {0}...".format(iam_role_name)) + + # attach the Instance policy for awsBatch + self.iam_client.attach_role_policy(RoleName=iam_role_name, PolicyArn=self.iam_policy_arn_batch) + + # attach the Instance policy for traditional scheduler + self.iam_client.attach_role_policy(RoleName=iam_role_name, PolicyArn=self.iam_policy_arn_traditional) + + logging.info("Iam role is ready: {0}".format(iam_role_name)) + return iam_role_name + + def _create_iam_policies(self, iam_policy_name, scheduler): + # the param "scheduler" here can have the value "awsbatch" and "traditional" + + # create the iam policy + # for different scheduler, attach different instance policy + logging.info("Creating iam policy {0} for iam role...".format(iam_policy_name)) + file_loader = FileSystemLoader(pkg_resources.resource_filename(__name__, "/../../resources")) + env = Environment(loader=file_loader, trim_blocks=True, lstrip_blocks=True) + policy_filename = ( + "batch_instance_policy.json" if scheduler == "awsbatch" else "traditional_instance_policy.json" + ) + parallel_cluster_instance_policy = env.get_template(policy_filename).render( + partition=self.partition, + region=self.region, + account_id=self.account_id, + cluster_bucket_name="parallelcluster-*", + ) + + policy_res = self.iam_client.create_policy( + PolicyName=iam_policy_name, PolicyDocument=parallel_cluster_instance_policy + ) + policy_arn = policy_res["Policy"]["Arn"] + return policy_arn + + def _create_kms_key(self, region): + # create KMS key + self.kms_client = boto3.client("kms", region_name=region) + random_string = "".join(random.choice(string.ascii_lowercase + string.digits) for _ in range(8)) + key_alias = "alias/Integ_test_KMS_{0}_{1}".format(self.region, random_string) + + # If the key already existed, use the existing key + for alias in self.kms_client.list_aliases().get("Aliases"): + if alias.get("AliasName") == key_alias: + kms_key_id = alias.get("TargetKeyId") + logging.info("Use existing KMS key {0}".format(kms_key_id)) + return kms_key_id + + # if the key doesn't existed in the account, create a new key + logging.info("Creating KMS key...") + response = self.kms_client.create_key( + Description="create kms key", + KeyUsage="ENCRYPT_DECRYPT", + Origin="AWS_KMS", + BypassPolicyLockoutSafetyCheck=False, + ) + kms_key_id = response["KeyMetadata"]["KeyId"] + + # create KMS key policy + logging.info("Attaching key policy...") + file_loader = FileSystemLoader(pkg_resources.resource_filename(__name__, "/../../resources")) + env = Environment(loader=file_loader, trim_blocks=True, lstrip_blocks=True) + key_policy = env.get_template("key_policy.json").render( + partition=self.partition, account_id=self.account_id, iam_role_name=self.iam_role + ) + + # attach key policy to the key + logging.info("Kms key {0} is ".format(kms_key_id)) + # poll_on_key_creation(kms_key_id, self.kms_client) + self.kms_client.put_key_policy( + KeyId=kms_key_id, + Policy=key_policy, + PolicyName="default", + ) + + # create alias for the key + self.kms_client.create_alias( + AliasName=key_alias, + TargetKeyId=kms_key_id, + ) + logging.info("Kms key {0} is ready".format(kms_key_id)) + return kms_key_id + + def release_all(self): + """Release all resources""" + self._release_iam_policy() + self._release_iam_role() + self._release_kms_key() + + def _release_iam_policy(self): + if self.iam_policy_arn_batch or self.iam_policy_arn_traditional: + logging.info("Deleting iam policy for awsbatch %s" % self.iam_policy_arn_batch) + # detach iam policy for awsbatch from iam role + self.iam_client.detach_role_policy(RoleName=self.iam_role, PolicyArn=self.iam_policy_arn_batch) + # delete the awsbatch policy + self.iam_client.delete_policy(PolicyArn=self.iam_policy_arn_batch) + logging.info("Deleting iam policy for traditional scheduler %s" % self.iam_policy_arn_traditional) + # detach iam policy for traditional schedluer from iam role + self.iam_client.detach_role_policy(RoleName=self.iam_role, PolicyArn=self.iam_policy_arn_traditional) + # delete the traditional schedluer policy + self.iam_client.delete_policy(PolicyArn=self.iam_policy_arn_traditional) + + def _release_iam_role(self): + logging.info("Deleting iam role %s" % self.iam_role) + self.iam_client.delete_role( + RoleName=self.iam_role, + ) + + def _release_kms_key(self): + logging.info("Scheduling delete Kms key %s" % self.iam_role) + self.kms_client.schedule_key_deletion( + KeyId=self.kms_key_id, + # The waiting period, specified in number of days. After the waiting period ends, AWS KMS deletes the CMK. + # The waiting period is at least 7 days. + PendingWindowInDays=7, + ) + + +def _get_sts_endpoint(region): + """Get regionalized STS endpoint.""" + return "https://sts.{0}.{1}".format(region, "amazonaws.com.cn" if region.startswith("cn-") else "amazonaws.com") diff --git a/tests/integration-tests/tests/storage/snapshots_factory.py b/tests/integration-tests/tests/storage/snapshots_factory.py index 3d43c6524d..be208ee747 100644 --- a/tests/integration-tests/tests/storage/snapshots_factory.py +++ b/tests/integration-tests/tests/storage/snapshots_factory.py @@ -18,7 +18,7 @@ from retrying import retry from utils import random_alphanumeric -SnapshotConfig = namedtuple("ClusterConfig", ["ssh_key", "key_name", "vpc_id", "master_subnet_id"]) +SnapshotConfig = namedtuple("ClusterConfig", ["ssh_key", "key_name", "vpc_id", "head_node_subnet_id"]) class EBSSnapshotsFactory: @@ -56,13 +56,35 @@ def create_snapshot(self, request, subnet_id, region): self.snapshot = self._create_snapshot(region, snapshot_config) return self.snapshot.id - def _create_snapshot(self, region, snapshot_config): + def create_existing_volume(self, request, subnet_id, region): + """ + Create a volume in a given region. + :param request: The current request + :param subnet_id: The subnet id where to get the snapshot + :param region: The region where to get the snapshot + """ + # Only one volume creation per factory allowed + if self.volume: + raise Exception("Volume already created") + + self.ec2 = boto3.resource("ec2", region_name=region) + self.boto_client = boto3.client("ec2", region_name=region) + volume_config = SnapshotConfig( + request.config.getoption("key_path"), + request.config.getoption("key_name"), + self.ec2.Subnet(subnet_id).vpc_id, + subnet_id, + ) + self._create_volume_process(region, volume_config) + return self.volume.id + + def _create_volume_process(self, region, snapshot_config): self.config = snapshot_config ami_id = self._get_amazonlinux_ami() self.security_group_id = self._get_security_group_id() - subnet = self.ec2.Subnet(self.config.master_subnet_id) + subnet = self.ec2.Subnet(self.config.head_node_subnet_id) # Create a new volume and attach to the instance self.volume = self._create_volume(subnet) @@ -77,6 +99,8 @@ def _create_snapshot(self, region, snapshot_config): # Stops the instance before taking the snapshot self._release_instance() + def _create_snapshot(self, region, snapshot_config): + self._create_volume_process(region, snapshot_config) self.snapshot = self._create_volume_snapshot() return self.snapshot diff --git a/tests/integration-tests/tests/storage/storage_common.py b/tests/integration-tests/tests/storage/storage_common.py index 7528ff180d..14353a4680 100644 --- a/tests/integration-tests/tests/storage/storage_common.py +++ b/tests/integration-tests/tests/storage/storage_common.py @@ -13,13 +13,13 @@ def verify_directory_correctly_shared(remote_command_executor, mount_dir, scheduler_commands): - master_file = random_alphanumeric() + head_node_file = random_alphanumeric() compute_file = random_alphanumeric() remote_command_executor.run_remote_command( - "touch {mount_dir}/{master_file}".format(mount_dir=mount_dir, master_file=master_file) + "touch {mount_dir}/{head_node_file}".format(mount_dir=mount_dir, head_node_file=head_node_file) ) - job_command = "cat {mount_dir}/{master_file} && touch {mount_dir}/{compute_file}".format( - mount_dir=mount_dir, master_file=master_file, compute_file=compute_file + job_command = "cat {mount_dir}/{head_node_file} && touch {mount_dir}/{compute_file}".format( + mount_dir=mount_dir, head_node_file=head_node_file, compute_file=compute_file ) result = scheduler_commands.submit_command(job_command) diff --git a/tests/integration-tests/tests/storage/test_ebs.py b/tests/integration-tests/tests/storage/test_ebs.py index f3ee4003d7..1d26eaa5af 100644 --- a/tests/integration-tests/tests/storage/test_ebs.py +++ b/tests/integration-tests/tests/storage/test_ebs.py @@ -11,11 +11,14 @@ # See the License for the specific language governing permissions and limitations under the License. import logging +import boto3 import pytest +import utils from assertpy import assert_that from remote_command_executor import RemoteCommandExecutor from tests.common.schedulers_common import get_scheduler_commands +from tests.storage.kms_key_factory import KMSKeyFactory from tests.storage.snapshots_factory import EBSSnapshotsFactory from tests.storage.storage_common import verify_directory_correctly_shared @@ -24,16 +27,22 @@ @pytest.mark.instances(["c4.xlarge", "c5.xlarge"]) @pytest.mark.schedulers(["sge"]) @pytest.mark.usefixtures("region", "os", "instance") -def test_ebs_single(scheduler, pcluster_config_reader, clusters_factory): +def test_ebs_single(scheduler, pcluster_config_reader, clusters_factory, kms_key_factory, region): mount_dir = "ebs_mount_dir" - cluster_config = pcluster_config_reader(mount_dir=mount_dir) + kms_key_id = kms_key_factory.create_kms_key(region) + cluster_config = pcluster_config_reader( + mount_dir=mount_dir, ec2_iam_role=kms_key_factory.iam_role, ebs_kms_key_id=kms_key_id + ) cluster = clusters_factory(cluster_config) remote_command_executor = RemoteCommandExecutor(cluster) mount_dir = "/" + mount_dir scheduler_commands = get_scheduler_commands(scheduler, remote_command_executor) + volume_id = get_ebs_volume_ids(cluster, region) + _test_ebs_correctly_mounted(remote_command_executor, mount_dir, volume_size=20) _test_ebs_correctly_shared(remote_command_executor, mount_dir, scheduler_commands) + _test_ebs_encrypted_with_kms(volume_id, region, kms_key_id) @pytest.mark.dimensions("ap-northeast-2", "c5.xlarge", "alinux2", "sge") @@ -73,19 +82,43 @@ def test_ebs_snapshot( @pytest.mark.dimensions("ca-central-1", "c5.xlarge", "alinux2", "awsbatch") @pytest.mark.dimensions("ca-central-1", "c5.xlarge", "ubuntu1804", "slurm") @pytest.mark.dimensions("eu-west-2", "c5.xlarge", "centos8", "slurm") -@pytest.mark.usefixtures("region", "os", "instance") -def test_ebs_multiple(scheduler, pcluster_config_reader, clusters_factory): +@pytest.mark.usefixtures("os", "instance") +def test_ebs_multiple(scheduler, pcluster_config_reader, clusters_factory, region): mount_dirs = ["/ebs_mount_dir_{0}".format(i) for i in range(0, 5)] volume_sizes = [15 + 5 * i for i in range(0, 5)] + + # for volume type sc1 and st1, the minimum volume sizes are 500G + volume_sizes[3] = 500 + volume_sizes[4] = 500 cluster_config = pcluster_config_reader(mount_dirs=mount_dirs, volume_sizes=volume_sizes) cluster = clusters_factory(cluster_config) remote_command_executor = RemoteCommandExecutor(cluster) scheduler_commands = get_scheduler_commands(scheduler, remote_command_executor) for mount_dir, volume_size in zip(mount_dirs, volume_sizes): - _test_ebs_correctly_mounted(remote_command_executor, mount_dir, volume_size) + # for volume size equal to 500G, the filesystem size is only about 492G + # This is because the file systems use some of the total space available on a device for storing internal + # structures and data (the file system's metadata). The overhead of the XFS filesystem is around 0.5%. + # If we test with small volume size(eg: 40G), the number is not large enough to show the gap between the + # partition size and the filesystem size. For sc1 and st1, the minimum size is 500G, so there will be a size + # difference. + _test_ebs_correctly_mounted( + remote_command_executor, mount_dir, volume_size if volume_size != 500 else "49[0-9]" + ) _test_ebs_correctly_shared(remote_command_executor, mount_dir, scheduler_commands) + volume_ids = get_ebs_volume_ids(cluster, region) + for i in range(len(volume_ids)): + # test different volume types + volume_type = cluster.config.get("ebs ebs{0}".format(i + 1), "volume_type") + volume = describe_volume(volume_ids[i], region) + assert_that(volume[0]).is_equal_to(volume_type) + # test different iops + # only the iops of io1 and io2 can be configured by us + if volume_type in ["io1", "io2", "gp3"]: + volume_iops = cluster.config.get("ebs ebs{0}".format(i + 1), "volume_iops") + assert_that(volume[1]).is_equal_to(int(volume_iops)) + @pytest.mark.dimensions("cn-northwest-1", "c4.xlarge", "alinux", "slurm") @pytest.mark.usefixtures("region", "os", "instance") @@ -113,6 +146,42 @@ def test_ebs_single_empty(scheduler, pcluster_config_reader, clusters_factory): _test_ebs_correctly_shared(remote_command_executor, mount_dir, scheduler_commands) +@pytest.mark.dimensions("ap-northeast-2", "c5.xlarge", "centos7", "sge") +@pytest.mark.usefixtures("os", "instance") +def test_ebs_existing( + request, vpc_stacks, region, scheduler, pcluster_config_reader, clusters_factory, snapshots_factory +): + logging.info("Testing ebs existing") + existing_mount_dir = "existing_mount_dir" + + logging.info("Creating volume") + + volume_id = snapshots_factory.create_existing_volume( + request, vpc_stacks[region].cfn_outputs["PublicSubnetId"], region + ) + + logging.info("Existing Volume id: %s" % volume_id) + cluster_config = pcluster_config_reader( + volume_id=volume_id, + existing_mount_dir=existing_mount_dir, + ) + + cluster = clusters_factory(cluster_config) + remote_command_executor = RemoteCommandExecutor(cluster) + scheduler_commands = get_scheduler_commands(scheduler, remote_command_executor) + existing_mount_dir = "/" + existing_mount_dir + _test_ebs_correctly_mounted(remote_command_executor, existing_mount_dir, volume_size="9.8") + _test_ebs_correctly_shared(remote_command_executor, existing_mount_dir, scheduler_commands) + # Checks for test data + result = remote_command_executor.run_remote_command("cat {}/test.txt".format(existing_mount_dir)) + assert_that(result.stdout.strip()).is_equal_to("hello world") + + # delete the cluster before detaching the EBS volume + cluster.delete() + # check the volume still exists after deleting the cluster + _assert_volume_exist(volume_id, region) + + def _test_ebs_correctly_mounted(remote_command_executor, mount_dir, volume_size): logging.info("Testing ebs {0} is correctly mounted".format(mount_dir)) result = remote_command_executor.run_remote_command( @@ -175,8 +244,43 @@ def _test_ebs_resize(remote_command_executor, mount_dir, volume_size): assert_that(result.stdout).matches(r"{size}G".format(size=volume_size)) +def get_ebs_volume_ids(cluster, region): + # get the list of configured ebs volume ids + # example output: ['vol-000', 'vol-001', 'vol-002'] + ebs_stack = utils.get_substacks(cluster.cfn_name, region=region, sub_stack_name="EBSCfnStack")[0] + return utils.retrieve_cfn_outputs(ebs_stack, region).get("Volumeids").split(",") + + +def describe_volume(volume_id, region): + volume = boto3.client("ec2", region_name=region).describe_volumes(VolumeIds=[volume_id]).get("Volumes")[0] + volume_type = volume.get("VolumeType") + volume_iops = volume.get("Iops") + return volume_type, volume_iops + + +def _assert_volume_exist(volume_id, region): + volume_status = ( + boto3.client("ec2", region_name=region).describe_volumes(VolumeIds=[volume_id]).get("Volumes")[0].get("State") + ) + assert_that(volume_status).is_equal_to("available") + + +def _test_ebs_encrypted_with_kms(volume_id, region, kms_key_id): + logging.info("Getting Encrypted information from DescribeVolumes API.") + volume_info = boto3.client("ec2", region_name=region).describe_volumes(VolumeIds=volume_id).get("Volumes")[0] + assert_that(volume_info.get("Encrypted")).is_true() + assert_that(volume_info.get("KmsKeyId")).matches(kms_key_id) + + @pytest.fixture() def snapshots_factory(): factory = EBSSnapshotsFactory() yield factory factory.release_all() + + +@pytest.fixture(scope="module") +def kms_key_factory(): + factory = KMSKeyFactory() + yield factory + factory.release_all() diff --git a/tests/integration-tests/tests/storage/test_ebs/test_ebs_existing/pcluster.config.ini b/tests/integration-tests/tests/storage/test_ebs/test_ebs_existing/pcluster.config.ini new file mode 100644 index 0000000000..c579ccfcba --- /dev/null +++ b/tests/integration-tests/tests/storage/test_ebs/test_ebs_existing/pcluster.config.ini @@ -0,0 +1,32 @@ +[global] +cluster_template = default + +[aws] +aws_region_name = {{ region }} + +[cluster default] +base_os = {{ os }} +key_name = {{ key_name }} +vpc_settings = parallelcluster-vpc +scheduler = {{ scheduler }} +master_instance_type = {{ instance }} +compute_instance_type = {{ instance }} +{% if scheduler == "awsbatch" %} +min_vcpus = 1 +desired_vcpus = 1 +{% else %} +initial_queue_size = 1 +maintain_initial_size = false +{% endif %} +ebs_settings = ebs + +[vpc parallelcluster-vpc] +vpc_id = {{ vpc_id }} +master_subnet_id = {{ public_subnet_id }} +compute_subnet_id = {{ private_subnet_id }} +use_public_ips = false + +[ebs ebs] +ebs_volume_id = {{ volume_id }} +shared_dir = {{ existing_mount_dir }} +volume_type = gp2 diff --git a/tests/integration-tests/tests/storage/test_ebs/test_ebs_multiple/pcluster.config.ini b/tests/integration-tests/tests/storage/test_ebs/test_ebs_multiple/pcluster.config.ini index 8abf07f99a..cd6ac59241 100644 --- a/tests/integration-tests/tests/storage/test_ebs/test_ebs_multiple/pcluster.config.ini +++ b/tests/integration-tests/tests/storage/test_ebs/test_ebs_multiple/pcluster.config.ini @@ -28,29 +28,31 @@ use_public_ips = false [ebs ebs1] shared_dir = {{ mount_dirs[0] }} -volume_type = io1 +volume_type = gp3 volume_size = {{ volume_sizes[0] }} -volume_iops = 100 +volume_iops = 3200 encrypted = true +volume_throughput = 130 [ebs ebs2] shared_dir = {{ mount_dirs[1] }} volume_size = {{ volume_sizes[1] }} -volume_iops = 125 +volume_type = gp2 encrypted = false [ebs ebs3] shared_dir = {{ mount_dirs[2] }} volume_size = {{ volume_sizes[2] }} volume_iops = 150 +volume_type = io2 [ebs ebs4] shared_dir = {{ mount_dirs[3] }} volume_size = {{ volume_sizes[3] }} +volume_type = sc1 [ebs ebs5] shared_dir = {{ mount_dirs[4] }} volume_size = {{ volume_sizes[4] }} -volume_type = io1 -volume_iops = 200 +volume_type = st1 encrypted = false diff --git a/tests/integration-tests/tests/storage/test_ebs/test_ebs_single/pcluster.config.ini b/tests/integration-tests/tests/storage/test_ebs/test_ebs_single/pcluster.config.ini index 0e5b8c2fec..83b6cc2b0f 100644 --- a/tests/integration-tests/tests/storage/test_ebs/test_ebs_single/pcluster.config.ini +++ b/tests/integration-tests/tests/storage/test_ebs/test_ebs_single/pcluster.config.ini @@ -19,6 +19,7 @@ initial_queue_size = 1 maintain_initial_size = true {% endif %} ebs_settings = ebs +ec2_iam_role = {{ ec2_iam_role }} [vpc parallelcluster-vpc] vpc_id = {{ vpc_id }} @@ -30,3 +31,5 @@ use_public_ips = false shared_dir = {{ mount_dir }} volume_type = io1 volume_iops = 210 +encrypted = true +ebs_kms_key_id = {{ ebs_kms_key_id }} \ No newline at end of file diff --git a/tests/integration-tests/tests/storage/test_efs.py b/tests/integration-tests/tests/storage/test_efs.py index 57fd9e7be2..b718b65099 100644 --- a/tests/integration-tests/tests/storage/test_efs.py +++ b/tests/integration-tests/tests/storage/test_efs.py @@ -14,10 +14,15 @@ import boto3 import pytest from assertpy import assert_that +from cfn_stacks_factory import CfnStack from remote_command_executor import RemoteCommandExecutor -from utils import get_vpc_snakecase_value +from troposphere import Base64, Sub, Template +from troposphere.ec2 import Instance +from troposphere.efs import FileSystem, MountTarget +from utils import generate_stack_name, get_vpc_snakecase_value, random_alphanumeric from tests.common.schedulers_common import get_scheduler_commands +from tests.common.utils import retrieve_latest_ami from tests.storage.storage_common import verify_directory_correctly_shared @@ -28,13 +33,13 @@ @pytest.mark.schedulers(["slurm", "awsbatch"]) @pytest.mark.oss(["alinux2"]) @pytest.mark.usefixtures("region", "os", "instance") -def test_efs_compute_az(region, scheduler, pcluster_config_reader, clusters_factory, vpc_stacks): +def test_efs_compute_az(region, scheduler, pcluster_config_reader, clusters_factory, vpc_stack): """ - Test when compute subnet is in a different AZ from master subnet. + Test when compute subnet is in a different AZ from head node subnet. A compute mount target should be created and the efs correctly mounted on compute. """ - _assert_subnet_az_relations(region, vpc_stacks, expected_in_same_az=False) + _assert_subnet_az_relations(region, vpc_stack, expected_in_same_az=False) mount_dir = "efs_mount_dir" cluster_config = pcluster_config_reader(mount_dir=mount_dir) cluster = clusters_factory(cluster_config) @@ -50,13 +55,13 @@ def test_efs_compute_az(region, scheduler, pcluster_config_reader, clusters_fact @pytest.mark.instances(["c4.xlarge", "c5.xlarge"]) @pytest.mark.schedulers(["slurm", "awsbatch"]) @pytest.mark.usefixtures("region", "os", "instance") -def test_efs_same_az(region, scheduler, pcluster_config_reader, clusters_factory, vpc_stacks): +def test_efs_same_az(region, scheduler, pcluster_config_reader, clusters_factory, vpc_stack): """ - Test when compute subnet is in the same AZ as master subnet. + Test when compute subnet is in the same AZ as head node subnet. No compute mount point needed and the efs correctly mounted on compute. """ - _assert_subnet_az_relations(region, vpc_stacks, expected_in_same_az=True) + _assert_subnet_az_relations(region, vpc_stack, expected_in_same_az=True) mount_dir = "efs_mount_dir" cluster_config = pcluster_config_reader(mount_dir=mount_dir) cluster = clusters_factory(cluster_config) @@ -68,6 +73,135 @@ def test_efs_same_az(region, scheduler, pcluster_config_reader, clusters_factory _test_efs_correctly_shared(remote_command_executor, mount_dir, scheduler_commands) +@pytest.mark.usefixtures("os", "instance") +def test_existing_efs( + region, + scheduler, + efs_stack, + pcluster_config_reader, + clusters_factory, + vpc_stack, + request, + key_name, + cfn_stacks_factory, +): + """ + Test when efs_fs_id is provided in the config file, the existing efs can be correctly mounted. + + To verify the efs is the existing efs, the test expects a file with random ran inside the efs mounted + """ + file_name = _write_file_into_efs(region, vpc_stack, efs_stack, request, key_name, cfn_stacks_factory) + + _assert_subnet_az_relations(region, vpc_stack, expected_in_same_az=False) + mount_dir = "/efs_mount_dir" + cluster_config = pcluster_config_reader( + mount_dir=mount_dir, efs_fs_id=efs_stack.cfn_resources["FileSystemResource"] + ) + cluster = clusters_factory(cluster_config) + remote_command_executor = RemoteCommandExecutor(cluster) + + # test file in efs exist + logging.info("Testing efs {0} is correctly mounted".format(mount_dir)) + result = remote_command_executor.run_remote_command("df | grep '{0}'".format(mount_dir)) + assert_that(result.stdout).contains(mount_dir) + + remote_command_executor.run_remote_command(f"cat {mount_dir}/{file_name}") + scheduler_commands = get_scheduler_commands(scheduler, remote_command_executor) + _test_efs_correctly_mounted(remote_command_executor, mount_dir) + _test_efs_correctly_shared(remote_command_executor, mount_dir, scheduler_commands) + remote_command_executor.run_remote_command(f"cat {mount_dir}/{file_name}") + + +@pytest.fixture(scope="class") +def efs_stack(cfn_stacks_factory, request, region): + """EFS stack contains a single efs resource.""" + efs_template = Template() + efs_template.set_version("2010-09-09") + efs_template.set_description("EFS stack created for testing existing EFS") + efs_template.add_resource(FileSystem("FileSystemResource")) + stack = CfnStack( + name=generate_stack_name("integ-tests-efs", request.config.getoption("stackname_suffix")), + region=region, + template=efs_template.to_json(), + ) + cfn_stacks_factory.create_stack(stack) + + yield stack + + if not request.config.getoption("no_delete"): + cfn_stacks_factory.delete_stack(stack.name, region) + + +def _write_file_into_efs(region, vpc_stack, efs_stack, request, key_name, cfn_stacks_factory): + """Write file stack contains a mount target and a instance to write a empty file with random name into the efs.""" + write_file_template = Template() + write_file_template.set_version("2010-09-09") + write_file_template.set_description("Stack to write a file to the existing EFS") + default_security_group_id = ( + boto3.client("ec2", region_name=region) + .describe_security_groups( + Filters=[ + {"Name": "vpc-id", "Values": [vpc_stack.cfn_outputs["VpcId"]]}, + {"Name": "group-name", "Values": ["default"]}, + ] + ) + .get("SecurityGroups")[0] + .get("GroupId") + ) + write_file_template.add_resource( + MountTarget( + "MountTargetResource", + FileSystemId=efs_stack.cfn_resources["FileSystemResource"], + SubnetId=vpc_stack.cfn_outputs["PublicSubnetId"], + SecurityGroups=[default_security_group_id], + ) + ) + random_file_name = random_alphanumeric() + user_data = ( + """ + #cloud-config + package_update: true + package_upgrade: true + runcmd: + - yum install -y amazon-efs-utils + - yum install -y nfs-utils + - file_system_id_1=""" + + efs_stack.cfn_resources["FileSystemResource"] + + """ + - efs_mount_point_1=/mnt/efs/fs1 + - mkdir -p "${!efs_mount_point_1}" + - mount -t efs ${!file_system_id_1}:/ ${!efs_mount_point_1} + - touch ${!efs_mount_point_1}/""" + + random_file_name + + """ + - umount ${!efs_mount_point_1} + - opt/aws/bin/cfn-signal -e $? --stack ${AWS::StackName} --resource InstanceToWriteEFS --region ${AWS::Region} + """ + ) + write_file_template.add_resource( + Instance( + "InstanceToWriteEFS", + CreationPolicy={"ResourceSignal": {"Timeout": "PT10M"}}, + ImageId=retrieve_latest_ami(region, "alinux2"), + InstanceType="c5.xlarge", + SubnetId=vpc_stack.cfn_outputs["PublicSubnetId"], + UserData=Base64(Sub(user_data)), + KeyName=key_name, + DependsOn=["MountTargetResource"], + ) + ) + write_file_stack = CfnStack( + name=generate_stack_name("integ-tests-efs-write-file", request.config.getoption("stackname_suffix")), + region=region, + template=write_file_template.to_json(), + ) + cfn_stacks_factory.create_stack(write_file_stack) + + cfn_stacks_factory.delete_stack(write_file_stack.name, region) + + return random_file_name + + def _test_efs_correctly_shared(remote_command_executor, mount_dir, scheduler_commands): logging.info("Testing efs correctly mounted on compute nodes") verify_directory_correctly_shared(remote_command_executor, mount_dir, scheduler_commands) @@ -87,13 +221,13 @@ def _test_efs_correctly_mounted(remote_command_executor, mount_dir): ) -def _assert_subnet_az_relations(region, vpc_stacks, expected_in_same_az): - vpc = get_vpc_snakecase_value(region, vpc_stacks) - master_subnet_id = vpc["public_subnet_id"] +def _assert_subnet_az_relations(region, vpc_stack, expected_in_same_az): + vpc = get_vpc_snakecase_value(vpc_stack) + head_node_subnet_id = vpc["public_subnet_id"] compute_subnet_id = vpc["private_subnet_id"] if expected_in_same_az else vpc["private_additional_cidr_subnet_id"] - master_subnet_az = boto3.resource("ec2", region_name=region).Subnet(master_subnet_id).availability_zone + head_node_subnet_az = boto3.resource("ec2", region_name=region).Subnet(head_node_subnet_id).availability_zone compute_subnet_az = boto3.resource("ec2", region_name=region).Subnet(compute_subnet_id).availability_zone if expected_in_same_az: - assert_that(master_subnet_az).is_equal_to(compute_subnet_az) + assert_that(head_node_subnet_az).is_equal_to(compute_subnet_az) else: - assert_that(master_subnet_az).is_not_equal_to(compute_subnet_az) + assert_that(head_node_subnet_az).is_not_equal_to(compute_subnet_az) diff --git a/tests/integration-tests/tests/storage/test_efs/test_efs_compute_az/pcluster.config.ini b/tests/integration-tests/tests/storage/test_efs/test_efs_compute_az/pcluster.config.ini index 2c916fbb2d..9fda919812 100644 --- a/tests/integration-tests/tests/storage/test_efs/test_efs_compute_az/pcluster.config.ini +++ b/tests/integration-tests/tests/storage/test_efs/test_efs_compute_az/pcluster.config.ini @@ -23,7 +23,7 @@ efs_settings = efs [vpc parallelcluster-vpc] vpc_id = {{ vpc_id }} master_subnet_id = {{ public_subnet_id }} -# This compute subnet would be in a different AZ than master for regions defined in AVAILABILITY_ZONE_OVERRIDES +# This compute subnet would be in a different AZ than head node for regions defined in AVAILABILITY_ZONE_OVERRIDES # See conftest for details compute_subnet_id = {{ private_additional_cidr_subnet_id }} diff --git a/tests/integration-tests/tests/storage/test_efs/test_existing_efs/pcluster.config.ini b/tests/integration-tests/tests/storage/test_efs/test_existing_efs/pcluster.config.ini new file mode 100644 index 0000000000..881335a66e --- /dev/null +++ b/tests/integration-tests/tests/storage/test_efs/test_existing_efs/pcluster.config.ini @@ -0,0 +1,33 @@ +[global] +cluster_template = default + +[aws] +aws_region_name = {{ region }} + +[cluster default] +base_os = {{ os }} +key_name = {{ key_name }} +vpc_settings = parallelcluster-vpc +scheduler = {{ scheduler }} +master_instance_type = {{ instance }} +compute_instance_type = {{ instance }} +{% if scheduler == "awsbatch" %} +min_vcpus = 4 +desired_vcpus = 4 +{% else %} +initial_queue_size = 1 +maintain_initial_size = true +{% endif %} +efs_settings = efs + +[vpc parallelcluster-vpc] +vpc_id = {{ vpc_id }} +master_subnet_id = {{ public_subnet_id }} +# This compute subnet would be in a different AZ than master for regions defined in AVAILABILITY_ZONE_OVERRIDES +# See conftest for details +compute_subnet_id = {{ private_additional_cidr_subnet_id }} +use_public_ips = false + +[efs efs] +efs_fs_id = {{ efs_fs_id }} +shared_dir = {{ mount_dir }} diff --git a/tests/integration-tests/tests/storage/test_fsx_lustre.py b/tests/integration-tests/tests/storage/test_fsx_lustre.py index c832f3e68d..79bb16254f 100644 --- a/tests/integration-tests/tests/storage/test_fsx_lustre.py +++ b/tests/integration-tests/tests/storage/test_fsx_lustre.py @@ -54,8 +54,6 @@ @pytest.mark.instances(["c5.xlarge"]) @pytest.mark.schedulers(["slurm"]) @pytest.mark.usefixtures("instance") -# FSx is not supported on CentOS 6 -@pytest.mark.skip_oss(["centos6"]) def test_fsx_lustre_configuration_options( deployment_type, per_unit_storage_throughput, @@ -86,14 +84,16 @@ def test_fsx_lustre_configuration_options( storage_capacity=storage_capacity, ) cluster = clusters_factory(cluster_config) - _test_fsx_lustre(cluster, region, scheduler, os, mount_dir, bucket_name, storage_type, auto_import_policy) + _test_fsx_lustre( + cluster, region, scheduler, os, mount_dir, bucket_name, storage_type, auto_import_policy, deployment_type + ) @pytest.mark.regions(["eu-west-1"]) @pytest.mark.instances(["c5.xlarge", "m6g.xlarge"]) @pytest.mark.schedulers(["slurm"]) @pytest.mark.usefixtures("instance") -# FSx is only supported on ARM instances for Ubuntu 18.04 and Amazon Linux 2 +# FSx is only supported on ARM instances for Ubuntu 18.04, Amazon Linux 2 and CentOS 8 @pytest.mark.skip_dimensions("*", "m6g.xlarge", "alinux", "*") @pytest.mark.skip_dimensions("*", "m6g.xlarge", "centos7", "*") @pytest.mark.skip_dimensions("*", "m6g.xlarge", "ubuntu1604", "*") @@ -121,10 +121,22 @@ def test_fsx_lustre( storage_capacity=1200, ) cluster = clusters_factory(cluster_config) - _test_fsx_lustre(cluster, region, scheduler, os, mount_dir, bucket_name, storage_type=None, auto_import_policy=None) + _test_fsx_lustre( + cluster, + region, + scheduler, + os, + mount_dir, + bucket_name, + storage_type=None, + auto_import_policy=None, + deployment_type=None, + ) -def _test_fsx_lustre(cluster, region, scheduler, os, mount_dir, bucket_name, storage_type, auto_import_policy): +def _test_fsx_lustre( + cluster, region, scheduler, os, mount_dir, bucket_name, storage_type, auto_import_policy, deployment_type +): remote_command_executor = RemoteCommandExecutor(cluster) scheduler_commands = get_scheduler_commands(scheduler, remote_command_executor) fsx_fs_id = get_fsx_fs_id(cluster, region) @@ -133,6 +145,7 @@ def _test_fsx_lustre(cluster, region, scheduler, os, mount_dir, bucket_name, sto _test_import_path(remote_command_executor, mount_dir) _test_fsx_lustre_correctly_shared(scheduler_commands, remote_command_executor, mount_dir) _test_storage_type(storage_type, fsx_fs_id, region) + _test_deployment_type(deployment_type, fsx_fs_id, region) _test_export_path(remote_command_executor, mount_dir, bucket_name, region) _test_auto_import(auto_import_policy, remote_command_executor, mount_dir, bucket_name, region) _test_data_repository_task(remote_command_executor, mount_dir, bucket_name, fsx_fs_id, region) @@ -142,7 +155,7 @@ def _test_fsx_lustre(cluster, region, scheduler, os, mount_dir, bucket_name, sto @pytest.mark.instances(["c5.xlarge", "m6g.xlarge"]) @pytest.mark.schedulers(["sge"]) @pytest.mark.usefixtures("instance") -# FSx is only supported on ARM instances for Ubuntu 18.04 and Amazon Linux 2 +# FSx is only supported on ARM instances for Ubuntu 18.04, Amazon Linux 2 and CentOS 8 @pytest.mark.skip_dimensions("*", "m6g.xlarge", "alinux", "*") @pytest.mark.skip_dimensions("*", "m6g.xlarge", "centos7", "*") @pytest.mark.skip_dimensions("*", "m6g.xlarge", "ubuntu1604", "*") @@ -266,6 +279,23 @@ def _test_storage_type(storage_type, fsx_fs_id, region): assert_that(get_storage_type(fsx_fs_id, region)).is_equal_to("SSD") +def _get_deployment_type(fsx_fs_id, region): + deployment_type = ( + boto3.client("fsx", region_name=region) + .describe_file_systems(FileSystemIds=[fsx_fs_id]) + .get("FileSystems")[0] + .get("LustreConfiguration") + .get("DeploymentType") + ) + logging.info(f"Getting DeploymentType {deployment_type} from DescribeFilesystem API.") + return deployment_type + + +def _test_deployment_type(deployment_type, fsx_fs_id, region): + if deployment_type: + assert_that(_get_deployment_type(fsx_fs_id, region)).is_equal_to(deployment_type) + + def _test_import_path(remote_command_executor, mount_dir): logging.info("Testing fsx lustre import path") result = remote_command_executor.run_remote_command("cat {mount_dir}/s3_test_file".format(mount_dir=mount_dir)) diff --git a/tests/integration-tests/tests/storage/test_fsx_lustre/test_fsx_lustre/pcluster.config.ini b/tests/integration-tests/tests/storage/test_fsx_lustre/test_fsx_lustre/pcluster.config.ini index 04a3a17142..c92c5dd9af 100644 --- a/tests/integration-tests/tests/storage/test_fsx_lustre/test_fsx_lustre/pcluster.config.ini +++ b/tests/integration-tests/tests/storage/test_fsx_lustre/test_fsx_lustre/pcluster.config.ini @@ -38,8 +38,8 @@ shared_dir = {{ mount_dir }} storage_capacity = {{ storage_capacity }} import_path = s3://{{ bucket_name }} export_path = s3://{{ bucket_name }}/export_dir -{% if region.startswith("cn-") %} -# the only deployment_type supported in China regions is PERSISTENT_1 +{% if region.startswith(("cn-", "us-gov-")) %} +# SCRATCH_1 not available in China/GovCloud regions deployment_type = PERSISTENT_1 per_unit_storage_throughput = 200 {% endif %} diff --git a/tests/integration-tests/tests/tags/test_tag_propagation.py b/tests/integration-tests/tests/tags/test_tag_propagation.py index 5540410ff2..6b5e75c04c 100644 --- a/tests/integration-tests/tests/tags/test_tag_propagation.py +++ b/tests/integration-tests/tests/tags/test_tag_propagation.py @@ -190,8 +190,8 @@ def get_root_volume_id(instance_id, region, os): logging.info("Getting root volume for instance %s", instance_id) os_to_root_volume_device = { # These are taken from the main CFN template - "centos6": "/dev/sda1", "centos7": "/dev/sda1", + "centos8": "/dev/sda1", "alinux": "/dev/xvda", "alinux2": "/dev/xvda", "ubuntu1604": "/dev/sda1", diff --git a/tests/integration-tests/utils.py b/tests/integration-tests/utils.py index c47061e4bf..cfce9d9c0b 100644 --- a/tests/integration-tests/utils.py +++ b/tests/integration-tests/utils.py @@ -56,6 +56,15 @@ def run_command(command, capture_output=True, log_error=True, env=None, timeout= return result +def generate_stack_name(prefix, suffix): + """Generate a stack name with prefix, suffix, and a random string in the middle""" + return prefix + "-{0}{1}{2}".format( + random_alphanumeric(), + "-" if suffix else "", + suffix, + ) + + def random_alphanumeric(size=16): """Generate a random alphanumeric string.""" return "".join(random.choice(string.ascii_lowercase + string.digits) for _ in range(size)) @@ -223,6 +232,10 @@ def set_credentials(region, credential_arg): :param region: region of the bucket :param credential_arg: credential list """ + if os.environ.get("AWS_CREDENTIALS_FOR_REGION", "no_region") == region: + logging.info(f"AWS credentials are already set for region: {region}") + return + if credential_arg: # credentials = dict { region1: (endpoint1, arn1, external_id1), # region2: (endpoint2, arn2, external_id2), @@ -243,6 +256,8 @@ def set_credentials(region, credential_arg): credential_endpoint, credential_arn, credential_external_id, region ) + logging.info(f"Setting AWS credentials for region: {region}") + # Set credential for all boto3 client boto3.setup_default_session( aws_access_key_id=aws_credentials["AccessKeyId"], @@ -254,6 +269,7 @@ def set_credentials(region, credential_arg): os.environ["AWS_ACCESS_KEY_ID"] = aws_credentials["AccessKeyId"] os.environ["AWS_SECRET_ACCESS_KEY"] = aws_credentials["SecretAccessKey"] os.environ["AWS_SESSION_TOKEN"] = aws_credentials["SessionToken"] + os.environ["AWS_CREDENTIALS_FOR_REGION"] = region def _retrieve_sts_credential(credential_endpoint, credential_arn, credential_external_id, region): @@ -274,6 +290,7 @@ def _retrieve_sts_credential(credential_endpoint, credential_arn, credential_ext def unset_credentials(): """Unset credentials""" # Unset credential for all boto3 client + logging.info("Unsetting AWS credentials") boto3.setup_default_session() # Unset credential for cli command e.g. pcluster create if "AWS_ACCESS_KEY_ID" in os.environ: @@ -282,6 +299,8 @@ def unset_credentials(): del os.environ["AWS_SECRET_ACCESS_KEY"] if "AWS_SESSION_TOKEN" in os.environ: del os.environ["AWS_SESSION_TOKEN"] + if "AWS_CREDENTIALS_FOR_REGION" in os.environ: + del os.environ["AWS_CREDENTIALS_FOR_REGION"] def set_logger_formatter(formatter): @@ -304,11 +323,10 @@ def paginate_boto3(method, **kwargs): yield result -def get_vpc_snakecase_value(region, vpc_stacks): +def get_vpc_snakecase_value(vpc_stack): """Return dict containing snakecase vpc variables.""" vpc_output_dict = {} - vpc = vpc_stacks[region] - for key, value in vpc.cfn_outputs.items(): + for key, value in vpc_stack.cfn_outputs.items(): vpc_output_dict[to_snake_case(key)] = value return vpc_output_dict @@ -362,3 +380,13 @@ def get_architecture_supported_by_instance_type(instance_type, region_name=None) assert_that(len(instance_architectures)).is_equal_to(1) return instance_architectures[0] + + +def check_headnode_security_group(region, cluster, port, expected_cidr): + """Check CIDR restriction for a port is in the security group of the head node of the cluster""" + security_group_id = cluster.cfn_resources.get("MasterSecurityGroup") + response = boto3.client("ec2", region_name=region).describe_security_groups(GroupIds=[security_group_id]) + + ips = response["SecurityGroups"][0]["IpPermissions"] + target = next(filter(lambda x: x.get("FromPort", -1) == port, ips), {}) + assert_that(target["IpRanges"][0]["CidrIp"]).is_equal_to(expected_cidr) diff --git a/util/cfn-stacks-generators/generate-efs-substack.py b/util/cfn-stacks-generators/generate-efs-substack.py index 22a5ebaa9d..3e2548b511 100644 --- a/util/cfn-stacks-generators/generate-efs-substack.py +++ b/util/cfn-stacks-generators/generate-efs-substack.py @@ -7,7 +7,7 @@ def main(args): t = Template() # [0 shared_dir, 1 efs_fs_id, 2 performance_mode, 3 efs_kms_key_id, - # 4 provisioned_throughput, 5 encrypted, 6 throughput_mode, 7 exists_valid_master_mt, 8 exists_valid_compute_mt] + # 4 provisioned_throughput, 5 encrypted, 6 throughput_mode, 7 exists_valid_head_node_mt, 8 exists_valid_compute_mt] efs_options = t.add_parameter( Parameter( "EFSOptions", @@ -18,8 +18,8 @@ def main(args): compute_security_group = t.add_parameter( Parameter("ComputeSecurityGroup", Type="String", Description="Security Group for Mount Target") ) - master_subnet_id = t.add_parameter( - Parameter("MasterSubnetId", Type="String", Description="Master subnet id for master mount target") + head_node_subnet_id = t.add_parameter( + Parameter("MasterSubnetId", Type="String", Description="Head node subnet id for head node mount target") ) compute_subnet_id = t.add_parameter( Parameter( @@ -33,7 +33,7 @@ def main(args): "CreateEFS", And(Not(Equals(Select(str(0), Ref(efs_options)), "NONE")), Equals(Select(str(1), Ref(efs_options)), "NONE")), ) - create_master_mt = t.add_condition( + create_head_node_mt = t.add_condition( "CreateMasterMT", And(Not(Equals(Select(str(0), Ref(efs_options)), "NONE")), Equals(Select(str(7), Ref(efs_options)), "NONE")), ) @@ -43,10 +43,10 @@ def main(args): ) # Need to create compute mount target if: # user is providing a compute subnet and - # there is no existing MT in compute subnet's AZ(includes case where master AZ == compute AZ). + # there is no existing MT in compute subnet's AZ(includes case where head node AZ == compute AZ). # - # If user is not providing a compute subnet, either we are using the master subnet as compute subnet, - # or we will be creating a compute subnet that is in the same AZ as master subnet, + # If user is not providing a compute subnet, either we are using the head node subnet as compute subnet, + # or we will be creating a compute subnet that is in the same AZ as head node subnet, # see ComputeSubnet resource in the main stack. # In both cases no compute MT is needed. create_compute_mt = t.add_condition( @@ -82,8 +82,8 @@ def main(args): "MasterSubnetEFSMT", FileSystemId=If(create_efs, Ref(fs), Select(str(1), Ref(efs_options))), SecurityGroups=[Ref(compute_security_group)], - SubnetId=Ref(master_subnet_id), - Condition=create_master_mt, + SubnetId=Ref(head_node_subnet_id), + Condition=create_head_node_mt, ) ) diff --git a/util/common.py b/util/common.py index 87e727c79a..b2bab38fe5 100644 --- a/util/common.py +++ b/util/common.py @@ -73,10 +73,13 @@ def generate_rollback_data(regions, dest_bucket, files, sts_credentials): rollback_data[bucket_name] = {"region": region, "files": {}} doc_manager = S3DocumentManager(region, sts_credentials.get(region)) for file_type in files: + s3_path = FILE_TO_S3_PATH.get(file_type, file_type) version = doc_manager.get_current_version( - dest_bucket.format(region=region), FILE_TO_S3_PATH[file_type], raise_on_object_not_found=False + dest_bucket.format(region=region), + s3_path, + raise_on_object_not_found=False, ) - rollback_data[bucket_name]["files"][FILE_TO_S3_PATH[file_type]] = version + rollback_data[bucket_name]["files"][s3_path] = version logging.info("Rollback data:\n%s", json.dumps(rollback_data, indent=2)) rollback_file_name = "rollback-data.json"