diff --git a/.github/workflows/ansible-lint.yml b/.github/workflows/ansible-lint.yml new file mode 100644 index 000000000..31628259d --- /dev/null +++ b/.github/workflows/ansible-lint.yml @@ -0,0 +1,66 @@ +name: Ansible Lint # feel free to pick your own name + +on: + pull_request: + branches: + - release + - devel + +jobs: + ansible-lint: + + runs-on: ubuntu-latest + + steps: + # Important: This sets up your GITHUB_WORKSPACE environment variable + - uses: actions/checkout@v2 + + - name: ansible-lint + # replace "master" with any valid ref + uses: ansible/ansible-lint-action@master + with: + # [required] + # Paths to ansible files (i.e., playbooks, tasks, handlers etc..) + # or valid Ansible directories according to the Ansible role + # directory structure. + # If you want to lint multiple ansible files, use the following syntax + # targets: | + # playbook_1.yml + # playbook_2.yml + targets: | + /github/workspace/omnia.yml + /github/workspace/omnia_config.yml + /github/workspace/platforms/jupyterhub.yml + /github/workspace/platforms/kubeflow.yml + /github/workspace/tools/install_tools.yml + /github/workspace/tools/intel_tools.yml + /github/workspace/tools/olm.yml + # [optional] + # Arguments to override a package and its version to be set explicitly. + # Must follow the example syntax. + #override-deps: | + # ansible==2.9 + # ansible-lint==4.2.0 + # [optional] + # Arguments to be passed to the ansible-lint + + # Options: + # -q quieter, although not silent output + # -p parseable output in the format of pep8 + # --parseable-severity parseable output including severity of rule + # -r RULESDIR specify one or more rules directories using one or + # more -r arguments. Any -r flags override the default + # rules in ansiblelint/rules, unless -R is also used. + # -R Use default rules in ansiblelint/rules in addition to + # any extra + # rules directories specified with -r. There is no need + # to specify this if no -r flags are used + # -t TAGS only check rules whose id/tags match these values + # -x SKIP_LIST only check rules whose id/tags do not match these + # values + # --nocolor disable colored output + # --exclude=EXCLUDE_PATHS + # path to directories or files to skip. This option is + # repeatable. + # -c C Specify configuration file to use. Defaults to ".ansible-lint" + args: "-x 305" diff --git a/.metadata/omnia_version b/.metadata/omnia_version new file mode 100644 index 000000000..29c4a6e99 --- /dev/null +++ b/.metadata/omnia_version @@ -0,0 +1 @@ +Omnia version 1.0.0 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 71c86f47f..c0c1df7fb 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -32,7 +32,7 @@ Contributions to Omnia are made through [Pull Requests (PRs)](https://help.githu 6. **Create a pull request:** [Create a pull request](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request) with a title following this format Issue ###: Description (_i.e., Issue 1023: Reformat testutils_). It is important that you do a good job with the description to make the job of the code reviewer easier. A good description not only reduces review time, but also reduces the probability of a misunderstanding with the pull request. * **Important:** When preparing a pull request it is important to stay up-to-date with the project repository. We recommend that you rebase against the upstream repo _frequently_. To do this, use the following commands: ``` - git pull --rebase upstream master #upstream is dellhpc/omnia + git pull --rebase upstream devel #upstream is dellhpc/omnia git push --force origin #origin is your fork of the repository (e.g., /omnia.git) ``` * **PR Description:** Be sure to fully describe the pull request. Ideally, your PR description will contain: @@ -42,7 +42,7 @@ Contributions to Omnia are made through [Pull Requests (PRs)](https://help.githu 4. How to verify that the changes work correctly. ## Omnia Branches and Contribution Flow -The diagram below describes the contribution flow. Omnia has two lifetime branches: `devel` and `master`. The `master` branch is reserved for releases and their associated tags. The `devel` branch is where all development work occurs. The `devel` branch is also the default branch for the project. +The diagram below describes the contribution flow. Omnia has two lifetime branches: `devel` and `release`. The `release` branch is reserved for releases and their associated tags. The `devel` branch is where all development work occurs. The `devel` branch is also the default branch for the project. ![Omnia Branch Flowchart](docs/images/omnia-branch-structure.png "Flowchart of Omnia branches") diff --git a/README.md b/README.md index e25e64656..8fb23db8b 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,8 @@ -![GitHub](https://img.shields.io/github/license/dellhpc/omnia) ![GitHub issues](https://img.shields.io/github/issues-raw/dellhpc/omnia) ![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/dellhpc/omnia?include_prereleases) ![GitHub last commit (branch)](https://img.shields.io/github/last-commit/dellhpc/omnia/devel) ![GitHub commits since tagged version](https://img.shields.io/github/commits-since/dellhpc/omnia/omnia-v0.2/devel) +![GitHub](https://img.shields.io/github/license/dellhpc/omnia) ![GitHub issues](https://img.shields.io/github/issues-raw/dellhpc/omnia) ![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/dellhpc/omnia?include_prereleases) ![GitHub last commit (branch)](https://img.shields.io/github/last-commit/dellhpc/omnia/devel) ![GitHub commits since tagged version](https://img.shields.io/github/commits-since/dellhpc/omnia/v1.0.0/devel) + +![GitHub contributors](https://img.shields.io/github/contributors-anon/dellhpc/omnia) ![GitHub forks](https://img.shields.io/github/forks/dellhpc/omnia) ![GitHub Repo stars](https://img.shields.io/github/stars/dellhpc/omnia) ![GitHub all releases](https://img.shields.io/github/downloads/dellhpc/omnia/total) #### Ansible playbook-based deployment of Slurm and Kubernetes on Dell EMC PowerEdge servers running an RPM-based Linux OS diff --git a/kubernetes/jupyterhub.yaml b/appliance/appliance.yml similarity index 81% rename from kubernetes/jupyterhub.yaml rename to appliance/appliance.yml index 161bf20cc..7eb2c500f 100644 --- a/kubernetes/jupyterhub.yaml +++ b/appliance/appliance.yml @@ -1,4 +1,4 @@ -# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - --- -#Playbook for installing JupyterHub v1.1.0 in Omnia - -# Start K8s worker servers -- hosts: master - gather_facts: false + +- name: Executing omnia roles + hosts: localhost + connection: local roles: - - jupyterhub + - common + - provision + - web_ui \ No newline at end of file diff --git a/appliance/appliance_config.yml b/appliance/appliance_config.yml new file mode 100644 index 000000000..9407495c5 --- /dev/null +++ b/appliance/appliance_config.yml @@ -0,0 +1,49 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# Password used while deploying OS on bare metal servers and for Cobbler UI. +# The Length of the password should be at least 8. +# The password must not contain -,\, '," +provision_password: "" + +# Password used for the AWX UI. +# The Length of the password should be at least 8. +# The password must not contain -,\, '," +awx_password: "" + +# The nic/ethernet card that needs to be connected to the HPC switch. +# This nic will be configured by Omnia for the DHCP server. +# Default value of nic is em1. +hpc_nic: "em1" + +# The nic/ethernet card that will be connected to the public internet. +# Default value of nic is em2 +public_nic: "em2" + +# This is the path where user has kept the iso image that needs to be provisioned in target nodes. +# The iso file should be CentOS7-2009-minimal edition. +# Other iso file not supported. +iso_file_path: "" + +# The mapping file consists of the MAC address and its respective IP address and hostname. +# The format of mapping file should be MAC,hostname,IP and must be a CSV file. +# A template for mapping file exists in omnia/examples and is named as mapping_file.csv. +# This depicts the path where user has kept the mapping file for DHCP configurations. +mapping_file_path: "" + +# The dhcp range for assigning the IPv4 address to the baremetal nodes. +# Example: 10.1.23.1 +dhcp_start_ip_range: "" +dhcp_end_ip_range: "" diff --git a/appliance/inventory.yml b/appliance/inventory.yml new file mode 100644 index 000000000..0a08072ed --- /dev/null +++ b/appliance/inventory.yml @@ -0,0 +1,20 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +- name: Dynamic Inventory + hosts: localhost + connection: local + gather_facts: no + roles: + - inventory diff --git a/appliance/roles/common/files/daemon.json b/appliance/roles/common/files/daemon.json new file mode 100644 index 000000000..c9504885d --- /dev/null +++ b/appliance/roles/common/files/daemon.json @@ -0,0 +1,3 @@ +{ + "bip": "172.18.0.1/16" +} diff --git a/appliance/roles/common/tasks/docker_installation.yml b/appliance/roles/common/tasks/docker_installation.yml new file mode 100644 index 000000000..4568a3a03 --- /dev/null +++ b/appliance/roles/common/tasks/docker_installation.yml @@ -0,0 +1,86 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Add docker repo + get_url: + url: "{{ docker_repo_url }}" + dest: "{{ docker_repo_dest }}" + tags: install + +- name: Enable docker edge and test repo + ini_file: + dest: "{{ docker_repo_dest }}" + section: "{{ item }}" + option: enabled + value: "{{ success }}" + with_items: ['docker-ce-test', 'docker-ce-edge'] + tags: install + +- name: Install docker + package: + name: "{{ container_repo_install }}" + state: present + become: yes + tags: install + +- name: Start services + service: + name: "{{ container_type }}" + state: started + enabled: yes + become: yes + tags: install + +- name: Uninstall docker-py using pip + pip: + name: ['docker-py','docker'] + state: absent + tags: install + +- name: Install docker using pip + pip: + name: docker + state: present + tags: install + +- name: Update pip + command: pip3 install --upgrade pip + changed_when: false + +- name: Installation using python3 + pip: + name: "{{ docker_compose }}" + executable: pip3 + tags: install + +- name: Versionlock docker + command: "yum versionlock '{{ item }}'" + args: + warn: false + with_items: + - "{{ container_repo_install }}" + changed_when: true + tags: install + +- name: Configure docker + copy: + src: daemon.json + dest: "{{ daemon_dest }}" + tags: install + +- name: Restart docker + service: + name: docker + state: restarted diff --git a/appliance/roles/common/tasks/docker_volume.yml b/appliance/roles/common/tasks/docker_volume.yml new file mode 100644 index 000000000..157881c99 --- /dev/null +++ b/appliance/roles/common/tasks/docker_volume.yml @@ -0,0 +1,19 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Create a docker volume + docker_volume: + name: "{{ docker_volume_name }}" + diff --git a/appliance/roles/common/tasks/internet_validation.yml b/appliance/roles/common/tasks/internet_validation.yml new file mode 100644 index 000000000..6486d3815 --- /dev/null +++ b/appliance/roles/common/tasks/internet_validation.yml @@ -0,0 +1,25 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Internet connectivity status + wait_for: + host: "{{ hostname }}" + port: "{{ port_no }}" + state: started + delay: "{{ internet_delay }}" + timeout: "{{ internet_timeout }}" + msg: "{{ internet_status }}" + register: internet_value + tags: install \ No newline at end of file diff --git a/appliance/roles/common/tasks/main.yml b/appliance/roles/common/tasks/main.yml new file mode 100644 index 000000000..141b2090e --- /dev/null +++ b/appliance/roles/common/tasks/main.yml @@ -0,0 +1,35 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +- name: Mount Path + set_fact: + mount_path: "{{ role_path + '/../../..' }}" + +- name: Pre-requisite validation + import_tasks: pre_requisite.yml + +- name: Internet validation + import_tasks: internet_validation.yml + +- name: Common packages installation + import_tasks: package_installation.yml + +- name: Basic Configuration + import_tasks: password_config.yml + +- name: Docker installation and configuration + import_tasks: docker_installation.yml + +- name: Docker volume creation + import_tasks: docker_volume.yml \ No newline at end of file diff --git a/slurm/roles/start-slurm-workers/tasks/main.yml b/appliance/roles/common/tasks/package_installation.yml similarity index 73% rename from slurm/roles/start-slurm-workers/tasks/main.yml rename to appliance/roles/common/tasks/package_installation.yml index 0e929178c..34cb1d01d 100644 --- a/slurm/roles/start-slurm-workers/tasks/main.yml +++ b/appliance/roles/common/tasks/package_installation.yml @@ -1,4 +1,4 @@ -# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,13 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - --- -- name: Install SLURM RPMs on compute - yum: - name: "{{ item }}" - #name: "{{ query('fileglob', ['/home/rpms/slurm*20*.rpm']) }}" <-- how it should work to avoid loop - with_fileglob: - - /home/rpms/slurm*20*.rpm - tags: install +- name: Install packages + package: + name: "{{ common_packages }}" + state: present + tags: install diff --git a/appliance/roles/common/tasks/password_config.yml b/appliance/roles/common/tasks/password_config.yml new file mode 100644 index 000000000..68658a8d7 --- /dev/null +++ b/appliance/roles/common/tasks/password_config.yml @@ -0,0 +1,337 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Check input config file is encrypted + command: cat {{ input_config_filename }} + changed_when: false + register: config_content + +- name: Decrpyt appliance_config.yml + command: >- + ansible-vault decrypt {{ input_config_filename }} + --vault-password-file {{ vault_filename }} + changed_when: false + when: "'$ANSIBLE_VAULT;' in config_content.stdout" + +- name: Include variable file appliance_config.yml + include_vars: "{{ input_config_filename }}" + no_log: true + +- name: Validate input parameters are not empty + fail: + msg: "{{ input_config_failure_msg }}" + register: input_config_check + when: + - provision_password | length < 1 or + awx_password | length < 1 or + hpc_nic | length < 1 or + public_nic | length < 1 or + iso_file_path | length < 1 or + dhcp_start_ip_range | length < 1 or + dhcp_end_ip_range | length < 1 + +- name: Save input variables from file + set_fact: + cobbler_password: "{{ provision_password }}" + admin_password: "{{ awx_password }}" + nic: "{{ hpc_nic }}" + internet_nic: "{{ public_nic }}" + path_for_iso_file: "{{ iso_file_path }}" + dhcp_start_ip: "{{ dhcp_start_ip_range | ipv4 }}" + dhcp_end_ip: "{{ dhcp_end_ip_range | ipv4 }}" + mapping_file: false + path_for_mapping_file: "{{ mapping_file_path }}" + no_log: true + +- name: Get the system hpc ip + shell: "ifconfig {{ hpc_nic }} | grep 'inet' |cut -d: -f2 | awk '{ print $2}'" + register: ip + changed_when: false + +- name: Get the system public ip + shell: "ifconfig {{ internet_nic }} | grep 'inet' |cut -d: -f2 | awk '{ print $2}'" + register: internet_ip + changed_when: false + +- name: Get the system netmask + shell: "ifconfig {{ hpc_nic }} | grep 'inet' |cut -d: -f2 | awk '{ print $4}'" + register: net + changed_when: false + +- name: HPC nic IP + set_fact: + hpc_ip: "{{ ip.stdout }}" + public_ip: "{{ internet_ip.stdout }}" + +- name: Netmask + set_fact: + netmask: "{{ net.stdout }}" + +- name: shell try + shell: | + IFS=. read -r i1 i2 i3 i4 <<< "{{ hpc_ip }}" + IFS=. read -r m1 m2 m3 m4 <<< "{{ netmask }}" + printf "%d.%d.%d.%d\n" "$((i1 & m1))" "$((i2 & m2))" "$((i3 & m3))" "$((i4 & m4))" + register: sub_result + changed_when: false + +- name: Subnet + set_fact: + subnet: "{{ sub_result.stdout }}" + +- name: Assert provision_password + assert: + that: + - cobbler_password | length > min_length | int - 1 + - cobbler_password | length < max_length | int + 1 + - '"-" not in cobbler_password ' + - '"\\" not in cobbler_password ' + - '"\"" not in cobbler_password ' + - " \"'\" not in cobbler_password " + success_msg: "{{ success_msg_provision_password }}" + fail_msg: "{{ fail_msg_provision_password }}" + no_log: true + register: cobbler_password_check + +- name: Assert awx_password + assert: + that: + - admin_password | length > min_length | int - 1 + - admin_password | length < max_length | int + 1 + - '"-" not in admin_password ' + - '"\\" not in admin_password ' + - '"\"" not in admin_password ' + - " \"'\" not in admin_password " + success_msg: "{{ success_msg_awx_password }}" + fail_msg: "{{ fail_msg_awx_password }}" + no_log: true + register: awx_password_check + +- name: Assert hpc_ip + assert: + that: + - hpc_ip | length > 7 + success_msg: "{{ success_hpc_ip }}" + fail_msg: "{{ fail_hpc_ip }}" + register: hpc_ip_check + +- name: Assert public_ip + assert: + that: + - public_ip | length > 7 + success_msg: "{{ success_hpc_ip }}" + fail_msg: "{{ fail_hpc_ip }}" + register: public_ip_check + +- name: Assert hpc_nic + assert: + that: + - nic | length > nic_min_length | int - 1 + - nic != internet_nic + success_msg: "{{ success_msg_hpc_nic }}" + fail_msg: "{{ fail_msg_hpc_nic }}" + register: hpc_nic_check + +- name: Assert public_nic + assert: + that: + - internet_nic | length > nic_min_length | int - 1 + - nic != internet_nic + success_msg: "{{ success_msg_public_nic }}" + fail_msg: "{{ fail_msg_public_nic }}" + register: public_nic_check + +- name: Assert mapping_file_exists + assert: + that: + - "( mapping_file == true ) or ( mapping_file == false )" + success_msg: "{{ success_mapping_file }}" + fail_msg: "{{ fail_mapping_file }}" + +- name: Set the mapping file value + set_fact: + mapping_file: true + when: path_for_mapping_file != "" + +- name: Assert valid mapping_file_path + stat: + path: "{{ path_for_mapping_file }}" + when: mapping_file == true + register: result_path_mapping_file + +- name : Valid mapping_file_path + fail: + msg: "{{ invalid_mapping_file_path }}" + when: ( mapping_file == true ) and ( result_path_mapping_file.stat.exists == false ) + +- name: Assert valid iso_file_path + stat: + path: "{{ path_for_iso_file }}" + register: result_path_iso_file + +- name : Incorrect iso_file_path + fail: + msg: "{{ invalid_iso_file_path }}" + when: ( result_path_iso_file.stat.exists == false ) and ( ".iso" not in path_for_iso_file ) + +- name: Fail when iso path valid but image not right + fail: + msg: "{{ invalid_iso_file_path }}" + when: ( result_path_iso_file.stat.exists == true ) and ( ".iso" not in path_for_iso_file ) + +- name: Check the subnet of dhcp start range + shell: | + IFS=. read -r i1 i2 i3 i4 <<< "{{ dhcp_start_ip }}" + IFS=. read -r m1 m2 m3 m4 <<< "{{ netmask }}" + printf "%d.%d.%d.%d\n" "$((i1 & m1))" "$((i2 & m2))" "$((i3 & m3))" "$((i4 & m4))" + args: + warn: no + register: dhcp_start_sub_result + changed_when: false + when: dhcp_start_ip != "false" + +- name: Set the start dhcp subnet + set_fact: + dhcp_start_sub: "{{ dhcp_start_sub_result.stdout }}" + when: dhcp_start_ip != "false" + +- name: Check the subnet of dhcp end range + shell: | + IFS=. read -r i1 i2 i3 i4 <<< "{{ dhcp_end_ip }}" + IFS=. read -r m1 m2 m3 m4 <<< "{{ netmask }}" + printf "%d.%d.%d.%d\n" "$((i1 & m1))" "$((i2 & m2))" "$((i3 & m3))" "$((i4 & m4))" + register: dhcp_end_sub_result + when: dhcp_end_ip != "false" + changed_when: false + +- name: Set the end dhcp subnet + set_fact: + dhcp_end_sub: "{{ dhcp_end_sub_result.stdout }}" + when: dhcp_end_ip != "false" + +- name: Assert dhcp_start_ip_range + assert: + that: + - dhcp_start_ip != "false" + - dhcp_start_ip != dhcp_end_ip + - dhcp_start_sub == subnet + - dhcp_start_sub == dhcp_end_sub + success_msg: "{{ success_dhcp_range }}" + fail_msg: "{{ fail_dhcp_range }}" + register: dhcp_start_ip_check + +- name: Assert dhcp_end_ip_range + assert: + that: + - dhcp_end_ip != "false" + - dhcp_start_ip != dhcp_end_ip + - dhcp_end_sub == subnet + - dhcp_start_sub == dhcp_end_sub + success_msg: "{{ success_dhcp_range }}" + fail_msg: "{{ fail_dhcp_range }}" + register: dhcp_end_ip_check + +- name: Create ansible vault key + set_fact: + vault_key: "{{ lookup('password', '/dev/null chars=ascii_letters') }}" + when: "'$ANSIBLE_VAULT;' not in config_content.stdout" + +- name: Save vault key + copy: + dest: "{{ vault_filename }}" + content: | + {{ vault_key }} + owner: root + force: yes + when: "'$ANSIBLE_VAULT;' not in config_content.stdout" + +- name: Encrypt input config file + command: >- + ansible-vault encrypt {{ input_config_filename }} + --vault-password-file {{ vault_filename }} + changed_when: false + +- name: Check if omnia_vault_key exists + stat: + path: "{{ role_path }}/../../../{{ config_vaultname }}" + register: vault_key_result + +- name: Create ansible vault key if it does not exist + set_fact: + vault_key: "{{ lookup('password', '/dev/null chars=ascii_letters') }}" + when: not vault_key_result.stat.exists + +- name: Save vault key + copy: + dest: "{{ role_path }}/../../../{{ config_vaultname }}" + content: | + {{ vault_key }} + owner: root + force: yes + when: not vault_key_result.stat.exists + +- name: Check if omnia config file is encrypted + command: cat {{ role_path }}/../../../{{ config_filename }} + changed_when: false + register: config_content + no_log: True + +- name: Decrpyt omnia_config.yml + command: >- + ansible-vault decrypt {{ role_path }}/../../../{{ config_filename }} + --vault-password-file {{ role_path }}/../../../{{ config_vaultname }} + when: "'$ANSIBLE_VAULT;' in config_content.stdout" + +- name: Include variable file omnia_config.yml + include_vars: "{{ role_path }}/../../../{{ config_filename }}" + no_log: True + +- name: Validate input parameters are not empty + fail: + msg: "{{ input_config_failure_msg }}" + register: input_config_check + when: + - mariadb_password | length < 1 or + k8s_cni | length < 1 + +- name: Assert mariadb_password + assert: + that: + - mariadb_password | length > min_length | int - 1 + - mariadb_password | length < max_length | int + 1 + - '"-" not in mariadb_password ' + - '"\\" not in mariadb_password ' + - '"\"" not in mariadb_password ' + - " \"'\" not in mariadb_password " + success_msg: "{{ success_msg_mariadb_password }}" + fail_msg: "{{ fail_msg_mariadb_password }}" + +- name: Assert kubernetes cni + assert: + that: "('calico' in k8s_cni) or ('flannel' in k8s_cni)" + success_msg: "{{ success_msg_k8s_cni }}" + fail_msg: "{{ fail_msg_k8s_cni }}" + +- name: Save input variables from file + set_fact: + db_password: "{{ mariadb_password }}" + k8s_cni: "{{ k8s_cni }}" + no_log: True + +- name: Encrypt input config file + command: >- + ansible-vault encrypt {{ role_path }}/../../../{{ config_filename }} + --vault-password-file {{ role_path }}/../../../{{ config_vaultname }} + changed_when: false diff --git a/appliance/roles/common/tasks/pre_requisite.yml b/appliance/roles/common/tasks/pre_requisite.yml new file mode 100644 index 000000000..f036018ad --- /dev/null +++ b/appliance/roles/common/tasks/pre_requisite.yml @@ -0,0 +1,46 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Set omnia.log file + replace: + path: /etc/ansible/ansible.cfg + regexp: '#log_path = /var/log/ansible.log' + replace: 'log_path = /var/log/omnia.log' + tags: install + +- name: Check OS support + fail: + msg: "{{ os_status }}" + when: not(ansible_distribution == os_name and ansible_distribution_version >= os_version) + register: os_value + tags: install + +- name: Disable SElinux + selinux: + state: disabled + tags: install + +- name: Status of SElinux + fail: + msg: "{{ selinux_status }}" + when: ansible_selinux.status != 'disabled' + register: selinux_value + tags: install + +- name: State of firewall + service: + name: firewalld + state: started + enabled: yes diff --git a/appliance/roles/common/vars/main.yml b/appliance/roles/common/vars/main.yml new file mode 100644 index 000000000..8db490505 --- /dev/null +++ b/appliance/roles/common/vars/main.yml @@ -0,0 +1,94 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# vars file for common + +# Usage: package_installation.yml +common_packages: + - epel-release + - yum-utils + - git + - gcc + - gcc-c++ + - nodejs + - device-mapper-persistent-data + - bzip2 + - python2-pip + - python3-pip + - nano + - lvm2 + - gettext + - python-docker + - net-tools + - python-netaddr + - yum-plugin-versionlock + +# Usage: pre_requisite.yml +internet_delay: 0 +internet_timeout: 10 +hostname: github.com +port_no: 22 +os_name: CentOS +os_version: '7.9' +internet_status: "Failed. No Internet connection. Make sure network is up." +os_status: "Unsupported OS or OS version. OS should be {{ os_name }} and Version should be {{ os_version }} or more" +selinux_status: "SElinux is not disabled. Disable it in /etc/sysconfig/selinux and reboot the system" +iso_name: CentOS-7-x86_64-Minimal-2009.iso +iso_fail: "Iso file not found. Download and copy the iso file to omnia/appliance/roles/provision/files" + +# Usage: docker_installation.yml +docker_repo_url: https://download.docker.com/linux/centos/docker-ce.repo +docker_repo_dest: /etc/yum.repos.d/docker-ce.repo +success: '0' +container_type: docker +container_repo_install: + - docker-ce-cli-20.10.2 + - docker-ce-20.10.2 +docker_compose: docker-compose +daemon_dest: /etc/docker/ + +# Usage: docker_volume.yml +docker_volume_name: omnia-storage + +# Usage: password_config.yml +input_config_filename: "appliance_config.yml" +fail_msg_provision_password: "Failed. Incorrect provision_password format provided in appliance_config.yml file" +success_msg_provision_password: "provision_password validated" +fail_msg_awx_password: "Failed. Incorrect awx_password format provided in appliance_config.yml file" +success_msg_awx_password: "awx_password validated" +fail_msg_hpc_nic: "Failed. Incorrect hpc_nic format provided in appliance_config.yml file" +success_msg_hpc_nic: "hpc_nic validated" +fail_msg_public_nic: "Failed. Incorrect public_nic format provided in appliance_config.yml file" +success_msg_public_nic: "public_nic validated" +success_mapping_file: "mapping_file_exists validated" +fail_mapping_file: "Failed. Incorrect mapping_file_exists value in appliance_config.yml. It should be either true or false" +input_config_failure_msg: "Please provide all the required parameters in appliance_config.yml" +success_dhcp_range: "Dhcp_range validated" +fail_dhcp_range: "Failed. Incorrect range assigned for dhcp" +success_hpc_ip: "IP validated" +fail_hpc_ip: "Failed. Nic should be configured" +fail_mapping_file_path: "Failed. Mapping_file_path input is empty in appliance_config.yml. Either set mapping_file_exists to false or provide a path for a valid mapping file." +invalid_mapping_file_path: "Incorrect mapping_file_path provided in appliance_config.yml" +invalid_iso_file_path: "Incorrect iso_file_path provided in appliance_config.yml." +min_length: 8 +max_length: 30 +nic_min_length: 3 +vault_filename: .vault_key +config_filename: "omnia_config.yml" +config_vaultname: .omnia_vault_key +fail_msg_mariadb_password: "Failed. Incorrect mariadb_password format provided in omnia_config.yml file" +success_msg_mariadb_password: "mariadb_password validated" +success_msg_k8s_cni: "Kubernetes CNI Validated" +fail_msg_k8s_cni: "Failed. Kubernetes CNI is incorrect in omnia_config.yml" diff --git a/appliance/roles/inventory/files/add_host.yml b/appliance/roles/inventory/files/add_host.yml new file mode 100644 index 000000000..b58f1b289 --- /dev/null +++ b/appliance/roles/inventory/files/add_host.yml @@ -0,0 +1,47 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Check if host already exists + command: awk "{{ '/'+ item + '/' }}" /root/inventory + register: check_host + changed_when: no + +- name: Initialise host description + set_fact: + host_description: "Description Unavailable" + +- name: Fetch description + set_fact: + host_description: "CPU:{{ hostvars[item]['ansible_processor_count'] }} + Cores:{{ hostvars[item]['ansible_processor_cores'] }} + Memory:{{ hostvars[item]['ansible_memtotal_mb'] }}MB + BIOS:{{ hostvars[item]['ansible_bios_version'] }}" + when: not check_host.stdout | regex_search(item) + ignore_errors: yes + +- name: Add host + lineinfile: + path: "/root/inventory" + line: " {{ item }}:\n _awx_description: {{ host_description }}" + when: + - not check_host.stdout | regex_search(item) + - host_description != "Description Unavailable" + +- name: Host added msg + debug: + msg: "{{ host_added_msg + item }}" + when: + - not check_host.stdout | regex_search(item) + - host_description != "Description Unavailable" diff --git a/appliance/roles/inventory/files/create_inventory.yml b/appliance/roles/inventory/files/create_inventory.yml new file mode 100644 index 000000000..4137ae6fb --- /dev/null +++ b/appliance/roles/inventory/files/create_inventory.yml @@ -0,0 +1,148 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Find reachable hosts + hosts: all + gather_facts: false + ignore_unreachable: true + ignore_errors: true + tasks: + - name: Check for reachable nodes + command: ping -c1 {{ inventory_hostname }} + delegate_to: localhost + register: ping_result + ignore_errors: yes + changed_when: false + + - name: Refresh ssh keys + command: ssh-keygen -R {{ inventory_hostname }} + delegate_to: localhost + changed_when: false + + - name: Group reachable hosts + group_by: + key: "reachable" + when: "'100% packet loss' not in ping_result.stdout" + +- name: Get provision password + hosts: localhost + connection: local + gather_facts: false + tasks: + - name: Include vars file of inventory role + include_vars: ../vars/main.yml + +- name: Set hostname on reachable nodes and gather facts + hosts: reachable + gather_facts: False + ignore_unreachable: true + remote_user: "{{ cobbler_username }}" + vars: + ansible_password: "{{ cobbler_password }}" + ansible_become_pass: "{{ cobbler_password }}" + ansible_ssh_common_args: '-o StrictHostKeyChecking=no' + mapping_file_present: "" + tasks: + - name: Setup + setup: + filter: ansible_* + + - name: Check hostname of server + command: hostname + register: hostname_check + changed_when: false + ignore_errors: true + + - name: Check if IP present in mapping file + command: grep "{{ inventory_hostname }}" ../../provision/files/new_mapping_file.csv + delegate_to: localhost + register: file_present + when: mapping_file | bool == true + ignore_errors: true + + - name: Set fact if mapping file present + set_fact: + mapping_file_present: "{{ file_present.stdout }}" + when: mapping_file | bool == true + ignore_errors: true + + - name: Get the static hostname from mapping file + shell: awk -F',' '$3 == "{{ inventory_hostname }}" { print $2 }' ../../provision/files/new_mapping_file.csv + delegate_to: localhost + when: ('localhost' in hostname_check.stdout) and (mapping_file_present != "" ) and ( mapping_file | bool == true ) + register: host_name + ignore_errors: true + + - name: Set the hostname from mapping file + hostname: + name: "{{ host_name.stdout }}" + when: ('localhost' in hostname_check.stdout) and (mapping_file_present != "" ) and (mapping_file | bool == true ) + ignore_errors: true + + - name: Set the hostname if hostname not present mapping file + hostname: + name: "compute{{ inventory_hostname.split('.')[-2] + '-' + inventory_hostname.split('.')[-1] }}" + when: ('localhost' in hostname_check.stdout) and (file_present.rc != 0) and (mapping_file | bool == true ) + ignore_errors: true + + - name: Set the system hostname + hostname: + name: "compute{{ inventory_hostname.split('.')[-2] + '-' + inventory_hostname.split('.')[-1] }}" + when: ('localhost' in hostname_check.stdout) and (mapping_file | bool == false) + ignore_errors: true + + - name: Add new hostname to /etc/hosts from mapping file + lineinfile: + dest: /etc/hosts + regexp: '^127\.0\.0\.1[ \t]+localhost' + line: "127.0.0.1 localhost {{ host_name.stdout }}" + state: present + when: ('localhost' in hostname_check.stdout) and ( mapping_file_present != "" ) and ( mapping_file | bool == true ) + ignore_errors: true + + - name: Add new hostname to /etc/hosts if hostname not present mapping fil + lineinfile: + dest: /etc/hosts + regexp: '^127\.0\.0\.1[ \t]+localhost' + line: "127.0.0.1 localhost compute{{ inventory_hostname.split('.')[-2] + '-' + inventory_hostname.split('.')[-1] }}" + state: present + when: ('localhost' in hostname_check.stdout) and ( file_present.rc != 0 ) and ( mapping_file | bool == true ) + ignore_errors: true + + - name: Add new hostname to /etc/hosts + lineinfile: + dest: /etc/hosts + regexp: '^127\.0\.0\.1[ \t]+localhost' + line: "127.0.0.1 localhost compute{{ inventory_hostname.split('.')[-2] + '-' + inventory_hostname.split('.')[-1] }}" + state: present + when: ('localhost' in hostname_check.stdout) and (mapping_file | bool == false ) + ignore_errors: true + +- name: Update inventory + hosts: localhost + connection: local + gather_facts: false + tasks: + - name: Update inventory file + block: + - name: Fetch facts and add new hosts + include_tasks: add_host.yml + with_items: "{{ groups['reachable'] }}" + when: "'reachable' in groups" + + - name: Show unreachable hosts + debug: + msg: "{{ host_unreachable_msg }} + {{ groups['ungrouped'] }}" + when: "'ungrouped' in groups" diff --git a/appliance/roles/inventory/tasks/main.yml b/appliance/roles/inventory/tasks/main.yml new file mode 100644 index 000000000..76d46adf6 --- /dev/null +++ b/appliance/roles/inventory/tasks/main.yml @@ -0,0 +1,100 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +- name: Set Facts + set_fact: + ansible_ssh_common_args: '-o StrictHostKeyChecking=no' + +- name: Check if provisioned host file exists + stat: + path: "{{ role_path }}/files/provisioned_hosts.yml" + register: provisioned_file_result + +- name: Include vars file of common role + include_vars: "{{ role_path }}/../common/vars/main.yml" + no_log: True + +- name: Include vars file of web_ui role + include_vars: "{{ role_path }}/../web_ui/vars/main.yml" + no_log: True + +- name: Update inventory file + block: + - name: Check if input config file is encrypted + command: cat {{ input_config_filename }} + changed_when: false + register: config_content + + - name: Decrpyt appliance_config.yml + command: >- + ansible-vault decrypt {{ input_config_filename }} + --vault-password-file {{ vault_filename }} + when: "'$ANSIBLE_VAULT;' in config_content.stdout" + + - name: Include variable file appliance_config.yml + include_vars: "{{ input_config_filename }}" + no_log: True + + - name: Save input variables from file + set_fact: + cobbler_password: "{{ provision_password }}" + mapping_file: false + path_mapping_file: "{{ mapping_file_path }}" + no_log: True + + - name: Check the status for mapping file + set_fact: + mapping_file: true + when: path_mapping_file != "" + + - name: Encrypt input config file + command: >- + ansible-vault encrypt {{ input_config_filename }} + --vault-password-file {{ vault_filename }} + changed_when: false + + - name: Check if inventory file already exists + file: + path: "/root/inventory" + state: absent + + - name: Create empty inventory file + copy: + dest: "/root/inventory" + content: | + --- + all: + hosts: + owner: root + mode: 0775 + + - name: Add inventory playbook + block: + - name: add hosts with description to inventory file + command: >- + ansible-playbook -i {{ role_path }}/files/provisioned_hosts.yml + {{ role_path }}/files/create_inventory.yml + --extra-vars "cobbler_username={{ cobbler_username }} cobbler_password={{ cobbler_password }} mapping_file={{ mapping_file | bool }}" + no_log: True + register: register_error + rescue: + - name: Fail if host addition was not successful + fail: + msg: "{{ register_error.stderr + register_error.stdout | regex_replace(cobbler_username) | regex_replace(cobbler_password) }}" + + when: provisioned_file_result.stat.exists + +- name: push inventory to AWX + command: awx-manage inventory_import --inventory-name {{ omnia_inventory_name }} --source /root/inventory + when: provisioned_file_result.stat.exists diff --git a/appliance/roles/inventory/vars/main.yml b/appliance/roles/inventory/vars/main.yml new file mode 100644 index 000000000..fd8552f7b --- /dev/null +++ b/appliance/roles/inventory/vars/main.yml @@ -0,0 +1,16 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +host_added_msg: "Added host to inventory: " +host_unreachable_msg: "Following hosts are unreachable: " \ No newline at end of file diff --git a/appliance/roles/provision/files/Dockerfile b/appliance/roles/provision/files/Dockerfile new file mode 100644 index 000000000..5151c25ea --- /dev/null +++ b/appliance/roles/provision/files/Dockerfile @@ -0,0 +1,51 @@ +FROM centos:7 + +# RPM REPOs +RUN yum install -y \ + epel-release \ + && yum clean all \ + && rm -rf /var/cache/yum + +RUN yum update -y \ + && yum clean all \ + && rm -rf /var/cache/yum + +RUN yum install -y \ + cobbler \ + cobbler-web \ + ansible \ + pykickstart \ + cronie \ + debmirror \ + curl \ + rsync \ + httpd\ + dhcp \ + xinetd \ + net-tools \ + memtest86+ \ + && yum clean all \ + && rm -rf /var/cache/yum + +RUN mkdir /root/omnia + +#Copy Configuration files +COPY settings /etc/cobbler/settings +COPY dhcp.template /etc/cobbler/dhcp.template +COPY modules.conf /etc/cobbler/modules.conf +COPY tftp /etc/xinetd.d/tftp +COPY .users.digest /etc/cobbler/users.digest +COPY kickstart.yml /root +COPY tftp.yml /root +COPY inventory_creation.yml /root +COPY centos7.ks /var/lib/cobbler/kickstarts + +EXPOSE 69 80 443 25151 + +VOLUME [ "/var/www/cobbler", "/var/lib/cobbler/backup", "/mnt" ] + +RUN systemctl enable cobblerd +RUN systemctl enable httpd +RUN systemctl enable rsyncd + +CMD ["sbin/init"] \ No newline at end of file diff --git a/appliance/roles/provision/files/cobbler_settings b/appliance/roles/provision/files/cobbler_settings new file mode 100644 index 000000000..7f38d53d1 --- /dev/null +++ b/appliance/roles/provision/files/cobbler_settings @@ -0,0 +1,469 @@ +--- +# cobbler settings file +# restart cobblerd and run "cobbler sync" after making changes +# This config file is in YAML 1.0 format +# see http://yaml.org +# ========================================================== +# if 1, cobbler will allow insertions of system records that duplicate +# the --dns-name information of other system records. In general, +# this is undesirable and should be left 0. +allow_duplicate_hostnames: 0 + +# if 1, cobbler will allow insertions of system records that duplicate +# the ip address information of other system records. In general, +# this is undesirable and should be left 0. +allow_duplicate_ips: 0 + +# if 1, cobbler will allow insertions of system records that duplicate +# the mac address information of other system records. In general, +# this is undesirable. +allow_duplicate_macs: 0 + +# if 1, cobbler will allow settings to be changed dynamically without +# a restart of the cobblerd daemon. You can only change this variable +# by manually editing the settings file, and you MUST restart cobblerd +# after changing it. +allow_dynamic_settings: 0 + +# by default, installs are *not* set to send installation logs to the cobbler +# # # server. With 'anamon_enabled', kickstart templates may use the pre_anamon +# # # snippet to allow remote live monitoring of their installations from the +# # # cobbler server. Installation logs will be stored under +# # # /var/log/cobbler/anamon/. NOTE: This does allow an xmlrpc call to send logs +# # # to this directory, without authentication, so enable only if you are +# # # ok with this limitation. +anamon_enabled: 0 + +# If using authn_pam in the modules.conf, this can be configured +# to change the PAM service authentication will be tested against. +# The default value is "login". +authn_pam_service: "login" + +# How long the authentication token is valid for, in seconds +auth_token_expiration: 3600 + +# Email out a report when cobbler finishes installing a system. +# enabled: set to 1 to turn this feature on +# sender: optional +# email: which addresses to email +# smtp_server: used to specify another server for an MTA +# subject: use the default subject unless overridden +build_reporting_enabled: 0 +build_reporting_sender: "" +build_reporting_email: [ 'root@localhost' ] +build_reporting_smtp_server: "localhost" +build_reporting_subject: "" +build_reporting_ignorelist: [ "" ] + +# Cheetah-language kickstart templates can import Python modules. +# while this is a useful feature, it is not safe to allow them to +# import anything they want. This whitelists which modules can be +# imported through Cheetah. Users can expand this as needed but +# should never allow modules such as subprocess or those that +# allow access to the filesystem as Cheetah templates are evaluated +# by cobblerd as code. +cheetah_import_whitelist: + - "random" + - "re" + - "time" + +# Default createrepo_flags to use for new repositories. If you have +# createrepo >= 0.4.10, consider "-c cache --update -C", which can +# dramatically improve your "cobbler reposync" time. "-s sha" +# enables working with Fedora repos from F11/F12 from EL-4 or +# EL-5 without python-hashlib installed (which is not available +# on EL-4) +createrepo_flags: "-c cache -s sha" + +# if no kickstart is specified to profile add, use this template +default_kickstart: /var/lib/cobbler/kickstarts/default.ks + +# configure all installed systems to use these nameservers by default +# unless defined differently in the profile. For DHCP configurations +# you probably do /not/ want to supply this. +default_name_servers: [] + +# if using the authz_ownership module (see the Wiki), objects +# created without specifying an owner are assigned to this +# owner and/or group. Can be a comma seperated list. +default_ownership: + - "admin" + +# cobbler has various sample kickstart templates stored +# in /var/lib/cobbler/kickstarts/. This controls +# what install (root) password is set up for those +# systems that reference this variable. The factory +# default is "cobbler" and cobbler check will warn if +# this is not changed. +# The simplest way to change the password is to run +# openssl passwd -1 +# and put the output between the "" below. +default_password_crypted: "password" + +# the default template type to use in the absence of any +# other detected template. If you do not specify the template +# with '#template=' on the first line of your +# templates/snippets, cobbler will assume try to use the +# following template engine to parse the templates. +# +# Current valid values are: cheetah, jinja2 +default_template_type: "cheetah" + +# for libvirt based installs in koan, if no virt bridge +# is specified, which bridge do we try? For EL 4/5 hosts +# this should be xenbr0, for all versions of Fedora, try +# "virbr0". This can be overriden on a per-profile +# basis or at the koan command line though this saves +# typing to just set it here to the most common option. +default_virt_bridge: xenbr0 + +# use this as the default disk size for virt guests (GB) +default_virt_file_size: 5 + +# use this as the default memory size for virt guests (MB) +default_virt_ram: 512 + +# if koan is invoked without --virt-type and no virt-type +# is set on the profile/system, what virtualization type +# should be assumed? Values: xenpv, xenfv, qemu, vmware +# (NOTE: this does not change what virt_type is chosen by import) +default_virt_type: xenpv + +# enable gPXE booting? Enabling this option will cause cobbler +# to copy the undionly.kpxe file to the tftp root directory, +# and if a profile/system is configured to boot via gpxe it will +# chain load off pxelinux.0. +# Default: 0 +enable_gpxe: 0 + +# controls whether cobbler will add each new profile entry to the default +# PXE boot menu. This can be over-ridden on a per-profile +# basis when adding/editing profiles with --enable-menu=0/1. Users +# should ordinarily leave this setting enabled unless they are concerned +# with accidental reinstalls from users who select an entry at the PXE +# boot menu. Adding a password to the boot menus templates +# may also be a good solution to prevent unwanted reinstallations +enable_menu: 1 + +# enable Func-integration? This makes sure each installed machine is set up +# to use func out of the box, which is a powerful way to script and control +# remote machines. +# Func lives at http://fedorahosted.org/func +# read more at https://github.com/cobbler/cobbler/wiki/Func-integration +# you will need to mirror Fedora/EPEL packages for this feature, so see +# https://github.com/cobbler/cobbler/wiki/Manage-yum-repos if you want cobbler +# to help you with this +func_auto_setup: 0 +func_master: overlord.example.org + +# change this port if Apache is not running plaintext on port +# 80. Most people can leave this alone. +http_port: 80 + +# kernel options that should be present in every cobbler installation. +# kernel options can also be applied at the distro/profile/system +# level. +kernel_options: + ksdevice: link + lang: 'en_US ' + text: ~ + +# s390 systems require additional kernel options in addition to the +# above defaults +kernel_options_s390x: + RUNKS: 1 + ramdisk_size: 40000 + root: /dev/ram0 + ro: ~ + ip: off + vnc: ~ + +# configuration options if using the authn_ldap module. See the +# the Wiki for details. This can be ignored if you are not using +# LDAP for WebUI/XMLRPC authentication. +ldap_server: "ldap.example.com" +ldap_base_dn: "DC=example,DC=com" +ldap_port: 389 +ldap_tls: 1 +ldap_anonymous_bind: 1 +ldap_search_bind_dn: '' +ldap_search_passwd: '' +ldap_search_prefix: 'uid=' +ldap_tls_cacertfile: '' +ldap_tls_keyfile: '' +ldap_tls_certfile: '' + +# cobbler has a feature that allows for integration with config management +# systems such as Puppet. The following parameters work in conjunction with +# --mgmt-classes and are described in furhter detail at: +# https://github.com/cobbler/cobbler/wiki/Using-cobbler-with-a-configuration-management-system +mgmt_classes: [] +mgmt_parameters: + from_cobbler: 1 + +# if enabled, this setting ensures that puppet is installed during +# machine provision, a client certificate is generated and a +# certificate signing request is made with the puppet master server +puppet_auto_setup: 0 + +# when puppet starts on a system after installation it needs to have +# its certificate signed by the puppet master server. Enabling the +# following feature will ensure that the puppet server signs the +# certificate after installation if the puppet master server is +# running on the same machine as cobbler. This requires +# puppet_auto_setup above to be enabled +sign_puppet_certs_automatically: 0 + +# location of the puppet executable, used for revoking certificates +puppetca_path: "/usr/bin/puppet" + +# when a puppet managed machine is reinstalled it is necessary to +# remove the puppet certificate from the puppet master server before a +# new certificate is signed (see above). Enabling the following +# feature will ensure that the certificate for the machine to be +# installed is removed from the puppet master server if the puppet +# master server is running on the same machine as cobbler. This +# requires puppet_auto_setup above to be enabled +remove_old_puppet_certs_automatically: 0 + +# choose a --server argument when running puppetd/puppet agent during kickstart +#puppet_server: 'puppet' + +# let cobbler know that you're using a newer version of puppet +# choose version 3 to use: 'puppet agent'; version 2 uses status quo: 'puppetd' +#puppet_version: 2 + +# choose whether to enable puppet parameterized classes or not. +# puppet versions prior to 2.6.5 do not support parameters +#puppet_parameterized_classes: 1 + +# set to 1 to enable Cobbler's DHCP management features. +# the choice of DHCP management engine is in /etc/cobbler/modules.conf +manage_dhcp: 1 + +# set to 1 to enable Cobbler's DNS management features. +# the choice of DNS mangement engine is in /etc/cobbler/modules.conf +manage_dns: 0 + +# set to path of bind chroot to create bind-chroot compatible bind +# configuration files. This should be automatically detected. +bind_chroot_path: "" + +# set to the ip address of the master bind DNS server for creating secondary +# bind configuration files +bind_master: 127.0.0.1 + +# manage_genders - Bool to enable/disable managing an /etc/genders file for use with pdsh and others. +manage_genders: 0 + +# bind_manage_ipmi - used to let bind manage IPMI addresses if the power management address is an IP and if manage_bind is set. +bind_manage_ipmi: 0 + +# set to 1 to enable Cobbler's TFTP management features. +# the choice of TFTP mangement engine is in /etc/cobbler/modules.conf +manage_tftpd: 1 + +# set to 1 to enable Cobbler's RSYNC management features. +manage_rsync: 0 + +# if using BIND (named) for DNS management in /etc/cobbler/modules.conf +# and manage_dns is enabled (above), this lists which zones are managed +# See the Wiki (https://github.com/cobbler/cobbler/wiki/Dns-management) for more info +manage_forward_zones: [] +manage_reverse_zones: ['172.17'] + +# if using cobbler with manage_dhcp, put the IP address +# of the cobbler server here so that PXE booting guests can find it +# if you do not set this correctly, this will be manifested in TFTP open timeouts. +next_server: ip + +# settings for power management features. optional. +# see https://github.com/cobbler/cobbler/wiki/Power-management to learn more +# choices (refer to codes.py): +# apc_snmp bladecenter bullpap drac ether_wake ilo integrity +# ipmilan ipmitool lpar rsa virsh wti +power_management_default_type: 'ipmitool' + +# the commands used by the power management module are sourced +# from what directory? +power_template_dir: "/etc/cobbler/power" + +# if this setting is set to 1, cobbler systems that pxe boot +# will request at the end of their installation to toggle the +# --netboot-enabled record in the cobbler system record. This eliminates +# the potential for a PXE boot loop if the system is set to PXE +# first in it's BIOS order. Enable this if PXE is first in your BIOS +# boot order, otherwise leave this disabled. See the manpage +# for --netboot-enabled. +pxe_just_once: 1 + +# the templates used for PXE config generation are sourced +# from what directory? +pxe_template_dir: "/etc/cobbler/pxe" + +# Path to where system consoles are +consoles: "/var/consoles" + +# Are you using a Red Hat management platform in addition to Cobbler? +# Cobbler can help you register to it. Choose one of the following: +# "off" : I'm not using Red Hat Network, Satellite, or Spacewalk +# "hosted" : I'm using Red Hat Network +# "site" : I'm using Red Hat Satellite Server or Spacewalk +# You will also want to read: https://github.com/cobbler/cobbler/wiki/Tips-for-RHN +redhat_management_type: "off" + +# if redhat_management_type is enabled, choose your server +# "management.example.org" : For Satellite or Spacewalk +# "xmlrpc.rhn.redhat.com" : For Red Hat Network +# This setting is also used by the code that supports using Spacewalk/Satellite users/passwords +# within Cobbler Web and Cobbler XMLRPC. Using RHN Hosted for this is not supported. +# This feature can be used even if redhat_management_type is off, you just have +# to have authn_spacewalk selected in modules.conf +redhat_management_server: "xmlrpc.rhn.redhat.com" + +# specify the default Red Hat authorization key to use to register +# system. If left blank, no registration will be attempted. Similarly +# you can set the --redhat-management-key to blank on any system to +# keep it from trying to register. +redhat_management_key: "" + +# if using authn_spacewalk in modules.conf to let cobbler authenticate +# against Satellite/Spacewalk's auth system, by default it will not allow per user +# access into Cobbler Web and Cobbler XMLRPC. +# in order to permit this, the following setting must be enabled HOWEVER +# doing so will permit all Spacewalk/Satellite users of certain types to edit all +# of cobbler's configuration. +# these roles are: config_admin and org_admin +# users should turn this on only if they want this behavior and +# do not have a cross-multi-org seperation concern. If you have +# a single org in your satellite, it's probably safe to turn this +# on and then you can use CobblerWeb alongside a Satellite install. +redhat_management_permissive: 0 + +# if set to 1, allows /usr/bin/cobbler-register (part of the koan package) +# to be used to remotely add new cobbler system records to cobbler. +# this effectively allows for registration of new hardware from system +# records. +register_new_installs: 0 + +# Flags to use for yum's reposync. If your version of yum reposync +# does not support -l, you may need to remove that option. +reposync_flags: "-l -n -d" + +# when DHCP and DNS management are enabled, cobbler sync can automatically +# restart those services to apply changes. The exception for this is +# if using ISC for DHCP, then omapi eliminates the need for a restart. +# omapi, however, is experimental and not recommended for most configurations. +# If DHCP and DNS are going to be managed, but hosted on a box that +# is not on this server, disable restarts here and write some other +# script to ensure that the config files get copied/rsynced to the destination +# box. This can be done by modifying the restart services trigger. +# Note that if manage_dhcp and manage_dns are disabled, the respective +# parameter will have no effect. Most users should not need to change +# this. +restart_dns: 1 +restart_dhcp: 1 + +# install triggers are scripts in /var/lib/cobbler/triggers/install +# that are triggered in kickstart pre and post sections. Any +# executable script in those directories is run. They can be used +# to send email or perform other actions. They are currently +# run as root so if you do not need this functionality you can +# disable it, though this will also disable "cobbler status" which +# uses a logging trigger to audit install progress. +run_install_triggers: 1 + +# enables a trigger which version controls all changes to /var/lib/cobbler +# when add, edit, or sync events are performed. This can be used +# to revert to previous database versions, generate RSS feeds, or for +# other auditing or backup purposes. "git" and "hg" are currently suported, +# but git is the recommend SCM for use with this feature. +scm_track_enabled: 0 +scm_track_mode: "git" + +# this is the address of the cobbler server -- as it is used +# by systems during the install process, it must be the address +# or hostname of the system as those systems can see the server. +# if you have a server that appears differently to different subnets +# (dual homed, etc), you need to read the --server-override section +# of the manpage for how that works. +server: ip + +# If set to 1, all commands will be forced to use the localhost address +# instead of using the above value which can force commands like +# cobbler sync to open a connection to a remote address if one is in the +# configuration and would traceback. +client_use_localhost: 0 + +# If set to 1, all commands to the API (not directly to the XMLRPC +# server) will go over HTTPS instead of plaintext. Be sure to change +# the http_port setting to the correct value for the web server +client_use_https: 0 + +# this is a directory of files that cobbler uses to make +# templating easier. See the Wiki for more information. Changing +# this directory should not be required. +snippetsdir: /var/lib/cobbler/snippets + +# Normally if a kickstart is specified at a remote location, this +# URL will be passed directly to the kickstarting system, thus bypassing +# the usual snippet templating Cobbler does for local kickstart files. If +# this option is enabled, Cobbler will fetch the file contents internally +# and serve a templated version of the file to the client. +template_remote_kickstarts: 0 + +# should new profiles for virtual machines default to auto booting with the physical host when the physical host reboots? +# this can be overridden on each profile or system object. +virt_auto_boot: 1 + +# cobbler's web directory. Don't change this setting -- see the +# Wiki on "relocating your cobbler install" if your /var partition +# is not large enough. +webdir: /var/www/cobbler + +# cobbler's public XMLRPC listens on this port. Change this only +# if absolutely needed, as you'll have to start supplying a new +# port option to koan if it is not the default. +xmlrpc_port: 25151 + +# "cobbler repo add" commands set cobbler up with repository +# information that can be used during kickstart and is automatically +# set up in the cobbler kickstart templates. By default, these +# are only available at install time. To make these repositories +# usable on installed systems (since cobbler makes a very convient) +# mirror, set this to 1. Most users can safely set this to 1. Users +# who have a dual homed cobbler server, or are installing laptops that +# will not always have access to the cobbler server may wish to leave +# this as 0. In that case, the cobbler mirrored yum repos are still +# accessable at http://cobbler.example.org/cblr/repo_mirror and yum +# configuration can still be done manually. This is just a shortcut. +yum_post_install_mirror: 1 + +# the default yum priority for all the distros. This is only used +# if yum-priorities plugin is used. 1=maximum. Tweak with caution. +yum_distro_priority: 1 + +# Flags to use for yumdownloader. Not all versions may support +# --resolve. +yumdownloader_flags: "--resolve" + +# sort and indent JSON output to make it more human-readable +serializer_pretty_json: 0 + +# replication rsync options for distros, kickstarts, snippets set to override default value of "-avzH" +replicate_rsync_options: "-avzH" + +# replication rsync options for repos set to override default value of "-avzH" +replicate_repo_rsync_options: "-avzH" + +# always write DHCP entries, regardless if netboot is enabled +always_write_dhcp_entries: 0 + +# external proxy - used by: get-loaders, reposync, signature update +# eg: proxy_url_ext: "http://192.168.1.1:8080" +proxy_url_ext: "" + +# internal proxy - used by systems to reach cobbler for kickstarts +# eg: proxy_url_int: "http://10.0.0.1:8080" +proxy_url_int: "" + diff --git a/appliance/roles/provision/files/inventory_creation.yml b/appliance/roles/provision/files/inventory_creation.yml new file mode 100644 index 000000000..a5f9f0dfa --- /dev/null +++ b/appliance/roles/provision/files/inventory_creation.yml @@ -0,0 +1,43 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- hosts: localhost + connection: local + gather_facts: false + tasks: + - name: Read dhcp file + set_fact: + var: "{{ lookup('file', '/var/lib/dhcpd/dhcpd.leases').split()| unique | select| list }}" + + - name: Filter the ip + set_fact: + vars_new: "{{ var| ipv4('address')| to_nice_yaml}}" + + - name: Create the static ip + shell: awk -F',' 'NR >1{print $3}' omnia/appliance/roles/provision/files/new_mapping_file.csv > static_hosts.yml + changed_when: false + ignore_errors: true + + - name: Create the dynamic inventory + shell: | + echo "[all]" > omnia/appliance/roles/inventory/files/provisioned_hosts.yml + echo "{{ vars_new }}" > temp.txt + egrep -o '[1-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}' temp.txt >>dynamic_hosts.yml + changed_when: false + ignore_errors: true + + - name: Final inventory + shell: cat dynamic_hosts.yml static_hosts.yml| sort -ur >> omnia/appliance/roles/inventory/files/provisioned_hosts.yml + changed_when: false diff --git a/appliance/roles/provision/files/kickstart.yml b/appliance/roles/provision/files/kickstart.yml new file mode 100644 index 000000000..cb92a2f72 --- /dev/null +++ b/appliance/roles/provision/files/kickstart.yml @@ -0,0 +1,121 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +- name: Initial cobbler setup + hosts: localhost + connection: local + gather_facts: false + vars: + name_iso: CentOS7 + distro_name: CentOS7-x86_64 + tasks: + - name: Inside cobbler container + debug: + msg: "Hiii! I am cobbler" + + - name: Start xinetd + service: + name: "{{ item }}" + state: started + loop: + - cobblerd + - xinetd + - rsyncd + - tftp + - httpd + + - name: Cobbler get-loaders + command: cobbler get-loaders + changed_when: false + + - name: Get fence agents + package: + name: fence-agents + state: present + + - name: Replace in /etc/debian + replace: + path: "/etc/debmirror.conf" + regexp: "^@dists=\"sid\";" + replace: "#@dists=\"sid\";" + + - name: Replace in /etc/debian + replace: + path: "/etc/debmirror.conf" + regexp: "^@arches=\"i386\";" + replace: "#@arches=\"i386\";" + + - name: Adding curl + shell: export PATH="/usr/bin/curl:$PATH" + + - name: Run import command + command: cobbler import --arch=x86_64 --path=/mnt --name="{{ name_iso }}" + changed_when: false + + - name: Distro list + command: cobbler distro edit --name="{{ distro_name }}" --kernel=/var/www/cobbler/ks_mirror/CentOS7-x86_64/isolinux/vmlinuz --initrd=/var/www/cobbler/ks_mirror/CentOS7-x86_64/isolinux/initrd.img + changed_when: false + + - name: Kickstart profile + command: cobbler profile edit --name="{{ distro_name }}" --kickstart=/var/lib/cobbler/kickstarts/centos7.ks + changed_when: false + + - name: Syncing of cobbler + command: cobbler sync + changed_when: false + + - name: Disable default apache webpage + blockinfile: + state: present + insertafter: '^#insert the content here for disabling the default apache webpage' + dest: /etc/httpd/conf/httpd.conf + block: | + + Order Deny,Allow + Deny from all + Options None + AllowOverride None + + + - name: Restart cobbler + service: + name: cobblerd + state: restarted + + - name: Restart httpdd + service: + name: httpd + state: restarted + + - name: Restart xinetd + service: + name: xinetd + state: restarted + + - name: Restart dhcpd + service: + name: dhcpd + state: restarted + + - name: Add tftp cron job + cron: + name: Start tftp service + minute: "*" + job: "ansible-playbook /root/tftp.yml" + + - name: Add inventory cron job + cron: + name: Create inventory + minute: "*/5" + job: "ansible-playbook /root/inventory_creation.yml" diff --git a/appliance/roles/provision/files/modules.conf b/appliance/roles/provision/files/modules.conf new file mode 100644 index 000000000..2b435ffa6 --- /dev/null +++ b/appliance/roles/provision/files/modules.conf @@ -0,0 +1,84 @@ +# cobbler module configuration file +# ================================= + +# authentication: +# what users can log into the WebUI and Read-Write XMLRPC? +# choices: +# authn_denyall -- no one (default) +# authn_configfile -- use /etc/cobbler/users.digest (for basic setups) +# authn_passthru -- ask Apache to handle it (used for kerberos) +# authn_ldap -- authenticate against LDAP +# authn_spacewalk -- ask Spacewalk/Satellite (experimental) +# authn_pam -- use PAM facilities +# authn_testing -- username/password is always testing/testing (debug) +# (user supplied) -- you may write your own module +# WARNING: this is a security setting, do not choose an option blindly. +# for more information: +# https://github.com/cobbler/cobbler/wiki/Cobbler-web-interface +# https://github.com/cobbler/cobbler/wiki/Security-overview +# https://github.com/cobbler/cobbler/wiki/Kerberos +# https://github.com/cobbler/cobbler/wiki/Ldap + +[authentication] +module = authn_configfile + +# authorization: +# once a user has been cleared by the WebUI/XMLRPC, what can they do? +# choices: +# authz_allowall -- full access for all authneticated users (default) +# authz_ownership -- use users.conf, but add object ownership semantics +# (user supplied) -- you may write your own module +# WARNING: this is a security setting, do not choose an option blindly. +# If you want to further restrict cobbler with ACLs for various groups, +# pick authz_ownership. authz_allowall does not support ACLs. configfile +# does but does not support object ownership which is useful as an additional +# layer of control. + +# for more information: +# https://github.com/cobbler/cobbler/wiki/Cobbler-web-interface +# https://github.com/cobbler/cobbler/wiki/Security-overview +# https://github.com/cobbler/cobbler/wiki/Web-authorization + +[authorization] +module = authz_allowall + +# dns: +# chooses the DNS management engine if manage_dns is enabled +# in /etc/cobbler/settings, which is off by default. +# choices: +# manage_bind -- default, uses BIND/named +# manage_dnsmasq -- uses dnsmasq, also must select dnsmasq for dhcp below +# NOTE: more configuration is still required in /etc/cobbler +# for more information: +# https://github.com/cobbler/cobbler/wiki/Dns-management + +[dns] +module = manage_dnsmasq + +# dhcp: +# chooses the DHCP management engine if manage_dhcp is enabled +# in /etc/cobbler/settings, which is off by default. +# choices: +# manage_isc -- default, uses ISC dhcpd +# manage_dnsmasq -- uses dnsmasq, also must select dnsmasq for dns above +# NOTE: more configuration is still required in /etc/cobbler +# for more information: +# https://github.com/cobbler/cobbler/wiki/Dhcp-management + +[dhcp] +module = manage_isc + +# tftpd: +# chooses the TFTP management engine if manage_tftp is enabled +# in /etc/cobbler/settings, which is ON by default. +# +# choices: +# manage_in_tftpd -- default, uses the system's tftp server +# manage_tftpd_py -- uses cobbler's tftp server +# + +[tftpd] +module = manage_in_tftpd + +#-------------------------------------------------- + diff --git a/slurm/slurm.yml b/appliance/roles/provision/files/start_cobbler.yml similarity index 62% rename from slurm/slurm.yml rename to appliance/roles/provision/files/start_cobbler.yml index a0ad9456f..0d045c3b0 100644 --- a/slurm/slurm.yml +++ b/appliance/roles/provision/files/start_cobbler.yml @@ -1,4 +1,4 @@ -# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,25 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. --- -#Playbook for installing Slurm on a cluster -#collect info from everything -- hosts: all - -# Apply Common Installation and Config -- hosts: cluster - gather_facts: false - roles: - - slurm-common - -# Apply Master Config, start services -- hosts: master +- name: Start cobbler on reboot + hosts: localhost + connection: local gather_facts: false - roles: - - slurm-master + tasks: + - name: Wait for 2 minutes + pause: + minutes: 2 -# Start SLURM workers -- hosts: compute - gather_facts: false - roles: - - start-slurm-workers + - name: Execute cobbler sync in cobbler container + command: docker exec cobbler cobbler sync + changed_when: true \ No newline at end of file diff --git a/appliance/roles/provision/files/temp_centos7.ks b/appliance/roles/provision/files/temp_centos7.ks new file mode 100644 index 000000000..eb78a5240 --- /dev/null +++ b/appliance/roles/provision/files/temp_centos7.ks @@ -0,0 +1,64 @@ +#version=DEVEL + +# Use network installation +url --url http://ip/cblr/links/CentOS7-x86_64/ + +# Install OS instead of upgrade +install + +# Use text install +text + +# SELinux configuration +selinux --disabled + +# Firewall configuration +firewall --disabled + +# Do not configure the X Window System +skipx + +# Run the Setup Agent on first boot +#firstboot --enable +ignoredisk --only-use=sda + +# Keyboard layouts +keyboard us + +# System language +lang en_US + +# Network information +network --bootproto=dhcp --device=nic --onboot=on + +# Root password +rootpw --iscrypted password + +# System services +services --enabled="chronyd" + +# System timezone +timezone Asia/Kolkata --isUtc + +# System bootloader configuration +bootloader --location=mbr --boot-drive=sda + +# Partition clearing information +clearpart --all --initlabel --drives=sda + +# Clear the Master Boot Record +zerombr + +# Disk Partitioning +partition /boot/efi --asprimary --fstype=vfat --label EFI --size=200 +partition /boot --asprimary --fstype=ext4 --label BOOT --size=500 +partition / --asprimary --fstype=ext4 --label ROOT --size=4096 --grow + +# Reboot after installation +reboot + +%packages +@core +net-tools +%end + diff --git a/appliance/roles/provision/files/temp_dhcp.template b/appliance/roles/provision/files/temp_dhcp.template new file mode 100644 index 000000000..bbf7291c7 --- /dev/null +++ b/appliance/roles/provision/files/temp_dhcp.template @@ -0,0 +1,93 @@ +# ****************************************************************** +# Cobbler managed dhcpd.conf file +# +# generated from cobbler dhcp.conf template ($date) +# Do NOT make changes to /etc/dhcpd.conf. Instead, make your changes +# in /etc/cobbler/dhcp.template, as /etc/dhcpd.conf will be +# overwritten. +# +# ****************************************************************** + +ddns-update-style interim; + +allow booting; +allow bootp; + +ignore client-updates; +set vendorclass = option vendor-class-identifier; + +option pxe-system-type code 93 = unsigned integer 16; + +subnet subnet_mask netmask net_mask { +option subnet-mask net_mask; +range dynamic-bootp start end; +default-lease-time 21600; +max-lease-time 43200; +next-server $next_server; +#insert the static DHCP leases for configuration here + + + class "pxeclients" { + match if substring (option vendor-class-identifier, 0, 9) = "PXEClient"; + if option pxe-system-type = 00:02 { + filename "ia64/elilo.efi"; + } else if option pxe-system-type = 00:06 { + filename "grub/grub-x86.efi"; + } else if option pxe-system-type = 00:07 { + filename "grub/grub-x86_64.efi"; + } else if option pxe-system-type = 00:09 { + filename "grub/grub-x86_64.efi"; + } else { + filename "pxelinux.0"; + } + } + +} + +#for dhcp_tag in $dhcp_tags.keys(): + ## group could be subnet if your dhcp tags line up with your subnets + ## or really any valid dhcpd.conf construct ... if you only use the + ## default dhcp tag in cobbler, the group block can be deleted for a + ## flat configuration +# group for Cobbler DHCP tag: $dhcp_tag +group { + #for mac in $dhcp_tags[$dhcp_tag].keys(): + #set iface = $dhcp_tags[$dhcp_tag][$mac] + host $iface.name { + #if $iface.interface_type == "infiniband": + option dhcp-client-identifier = $mac; + #else + hardware ethernet $mac; + #end if + #if $iface.ip_address: + fixed-address $iface.ip_address; + #end if + #if $iface.hostname: + option host-name "$iface.hostname"; + #end if + #if $iface.netmask: + option subnet-mask $iface.netmask; + #end if + #if $iface.gateway: + option routers $iface.gateway; + #end if + #if $iface.enable_gpxe: + if exists user-class and option user-class = "gPXE" { + filename "http://$cobbler_server/cblr/svc/op/gpxe/system/$iface.owner"; + } else if exists user-class and option user-class = "iPXE" { + filename "http://$cobbler_server/cblr/svc/op/gpxe/system/$iface.owner"; + } else { + filename "undionly.kpxe"; + } + #else + filename "$iface.filename"; + #end if + ## Cobbler defaults to $next_server, but some users + ## may like to use $iface.system.server for proxied setups + next-server $next_server; + ## next-server $iface.next_server; + } + #end for +} +#end for + diff --git a/appliance/roles/provision/files/tftp b/appliance/roles/provision/files/tftp new file mode 100644 index 000000000..35d3251ba --- /dev/null +++ b/appliance/roles/provision/files/tftp @@ -0,0 +1,19 @@ +# default: off +# description: The tftp server serves files using the trivial file transfer \ +# protocol. The tftp protocol is often used to boot diskless \ +# workstations, download configuration files to network-aware printers, \ +# and to start the installation process for some operating systems. +service tftp +{ + socket_type = dgram + protocol = udp + wait = yes + user = root + server = /usr/sbin/in.tftpd + server_args = -s /var/lib/tftpboot + disable = no + per_source = 11 + cps = 100 2 + flags = IPv4 +} + diff --git a/appliance/roles/provision/files/tftp.yml b/appliance/roles/provision/files/tftp.yml new file mode 100644 index 000000000..e9a524a91 --- /dev/null +++ b/appliance/roles/provision/files/tftp.yml @@ -0,0 +1,46 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Start tftp and dhcp + hosts: localhost + connection: local + tasks: + - name: Fetch tftp status + command: systemctl is-active tftp + args: + warn: no + register: tftp_status + ignore_errors: yes + changed_when: false + + - name: Start tftp if inactive state + command: systemctl start tftp.service + args: + warn: no + when: "('inactive' in tftp_status.stdout) or ('unknown' in tftp_status.stdout)" + + - name: Fetch dhcp status + command: systemctl is-active dhcpd + args: + warn: no + register: dhcp_status + ignore_errors: yes + changed_when: false + + - name: Start dhcp if inactive state + command: systemctl start dhcpd.service + args: + warn: no + when: "('inactive' in dhcp_status.stdout) or ('unknown' in dhcp_status.stdout)" diff --git a/appliance/roles/provision/tasks/check_prerequisites.yml b/appliance/roles/provision/tasks/check_prerequisites.yml new file mode 100644 index 000000000..5f6e7299d --- /dev/null +++ b/appliance/roles/provision/tasks/check_prerequisites.yml @@ -0,0 +1,87 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Initialize variables + set_fact: + cobbler_container_status: false + cobbler_image_status: false + cobbler_config_status: false + backup_map_status: false + new_node_status: false + tags: install + +- name: Check if any backup file exists + block: + - name: Check status of backup file + stat: + path: "{{ role_path }}/files/backup_mapping_file.csv" + register: backup_map + + - name: Set status for backup file + set_fact: + backup_map_status: true + when: backup_map.stat.exists == true + rescue: + - name: Message + debug: + msg: "All nodes are new" + verbosity: 2 + +- name: Inspect the cobbler image + docker_image_info: + name: cobbler + register: cobbler_image_result + tags: install + +- name: Check cobbler status on the machine + docker_container_info: + name: cobbler + register: cobbler_result + tags: install + +- name: Update cobbler image status + set_fact: + cobbler_image_status: true + when: cobbler_image_result.images| length==1 + tags: install + +- name: Update cobbler container status + set_fact: + cobbler_container_status: true + when: cobbler_result.exists + tags: install + +- name: Fetch cobbler profile list + command: docker exec cobbler cobbler profile list + changed_when: false + register: cobbler_profile_list + ignore_errors: true + when: cobbler_container_status == true + +- name: Check crontab list + command: docker exec cobbler crontab -l + changed_when: false + register: crontab_list + ignore_errors: true + when: cobbler_container_status == true + +- name: Update cobbler container status + set_fact: + cobbler_config_status: true + when: + - cobbler_container_status == true + - "'CentOS' in cobbler_profile_list.stdout" + - "'* * * * * ansible-playbook /root/tftp.yml' in crontab_list.stdout" + - "'5 * * * * ansible-playbook /root/inventory_creation.yml' in crontab_list.stdout" diff --git a/appliance/roles/provision/tasks/cobbler_image.yml b/appliance/roles/provision/tasks/cobbler_image.yml new file mode 100644 index 000000000..da74e4199 --- /dev/null +++ b/appliance/roles/provision/tasks/cobbler_image.yml @@ -0,0 +1,30 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Image creation (It may take 5-10 mins) + docker_image: + name: "{{ docker_image_name }}" + tag: "{{ docker_image_tag }}" + source: build + build: + path: "{{ role_path }}/files/" + network: host + state: present + tags: install + +- name: Run cobbler container + command: "{{ cobbler_run_command }}" + changed_when: false + tags: install diff --git a/appliance/roles/provision/tasks/configure_cobbler.yml b/appliance/roles/provision/tasks/configure_cobbler.yml new file mode 100644 index 000000000..d4eabc94f --- /dev/null +++ b/appliance/roles/provision/tasks/configure_cobbler.yml @@ -0,0 +1,56 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +- name: Delete the cobbler container if exits + docker_container: + name: cobbler + state: absent + tags: install + when: cobbler_container_status == true and cobbler_config_status == false + +- name: Run cobbler container + command: "{{ cobbler_run_command }}" + changed_when: false + tags: install + when: cobbler_container_status == true and cobbler_config_status == false + +- name: Configuring cobbler inside container (It may take 5-10 mins) + command: docker exec cobbler ansible-playbook /root/kickstart.yml + changed_when: false + tags: install + when: cobbler_config_status == false + +- name: Schedule task + cron: + name: "start cobbler on reboot" + special_time: reboot + job: "ansible-playbook {{ role_path }}/files/start_cobbler.yml" + tags: install + when: cobbler_config_status == false + +- name: Execute cobbler sync in cobbler container + command: docker exec cobbler cobbler sync + changed_when: true + when: cobbler_config_status == true + +- name: Remove the files + file: + path: "{{ item }}" + state: absent + with_items: + - "{{ role_path }}/files/.users.digest" + - "{{ role_path }}/files/dhcp.template" + - "{{ role_path }}/files/settings" + - "{{ role_path }}/files/centos7.ks" + - "{{ role_path }}/files/new_mapping_file.csv.bak" diff --git a/appliance/roles/provision/tasks/dhcp_configure.yml b/appliance/roles/provision/tasks/dhcp_configure.yml new file mode 100644 index 000000000..6432435ce --- /dev/null +++ b/appliance/roles/provision/tasks/dhcp_configure.yml @@ -0,0 +1,60 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Create the dhcp template + copy: + src: "{{ role_path }}/files/temp_dhcp.template" + dest: "{{ role_path }}/files/dhcp.template" + mode: 0775 + tags: install + +- name: Assign subnet and netmask + replace: + path: "{{ role_path }}/files/dhcp.template" + regexp: '^subnet subnet_mask netmask net_mask {' + replace: 'subnet {{ subnet }} netmask {{ netmask }} {' + tags: install + +- name: Assign netmask + replace: + path: "{{ role_path }}/files/dhcp.template" + regexp: '^option subnet-mask net_mask;' + replace: 'option subnet-mask {{ netmask }};' + +- name: Assign DHCP range + replace: + path: "{{ role_path }}/files/dhcp.template" + regexp: '^range dynamic-bootp start end;' + replace: 'range dynamic-bootp {{ dhcp_start_ip }} {{ dhcp_end_ip }};' + +- name: Create the cobbler settings file + copy: + src: "{{ role_path }}/files/cobbler_settings" + dest: "{{ role_path }}/files/settings" + mode: 0775 + tags: install + +- name: Assign server ip + replace: + path: "{{ role_path }}/files/settings" + regexp: '^server: ip' + replace: 'server: {{ hpc_ip }}' + +- name: Assign next server ip + replace: + path: "{{ role_path }}/files/settings" + regexp: '^next_server: ip' + replace: 'next_server: {{ hpc_ip }}' + diff --git a/appliance/roles/provision/tasks/firewall_settings.yml b/appliance/roles/provision/tasks/firewall_settings.yml new file mode 100644 index 000000000..5bc0a8652 --- /dev/null +++ b/appliance/roles/provision/tasks/firewall_settings.yml @@ -0,0 +1,64 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +#Tasks for modifying firewall configurations for Cobbler + +- name: Permit traffic in default zone on port 80/tcp + firewalld: + port: 80/tcp + permanent: yes + state: enabled + tags: install + +- name: Permit traffic in default zone on port 443/tcp + firewalld: + port: 443/tcp + permanent: yes + state: enabled + tags: install + +- name: Permit traffic in default zone for dhcp service + firewalld: + service: dhcp + permanent: yes + state: enabled + tags: install + +- name: Permit traffic in default zone on port 69/tcp + firewalld: + port: 69/tcp + permanent: yes + state: enabled + tags: install + +- name: Permit traffic in default zone on port 69/udp + firewalld: + port: 69/udp + permanent: yes + state: enabled + tags: install + +- name: Permit traffic in default zone on port 4011/udp + firewalld: + port: 4011/udp + permanent: yes + state: enabled + tags: install + +- name: Reboot firewalld + systemd: + name: firewalld + state: reloaded + tags: install diff --git a/appliance/roles/provision/tasks/main.yml b/appliance/roles/provision/tasks/main.yml new file mode 100644 index 000000000..2c2d2d6f8 --- /dev/null +++ b/appliance/roles/provision/tasks/main.yml @@ -0,0 +1,66 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +#Tasks for Deploying cobbler on the system + +- name: Check cobbler status on machine + include_tasks: check_prerequisites.yml + +- name: Mount iso image + import_tasks: mount_iso.yml + when: not cobbler_image_status + +- name: Modify firewall settings for Cobbler + import_tasks: firewall_settings.yml + when: not cobbler_container_status + +- name: Include common variables + include_vars: ../../common/vars/main.yml + when: not cobbler_container_status + +- name: Internet validation + include_tasks: ../../common/tasks/internet_validation.yml + when: not cobbler_container_status + +- name: Provision password validation + import_tasks: provision_password.yml + when: not cobbler_image_status + +- name: Dhcp Configuration + import_tasks: dhcp_configure.yml + when: (not cobbler_image_status) or ( backup_map_status == true) + +- name: Mapping file validation + import_tasks: mapping_file.yml + when: (not cobbler_image_status) and (mapping_file == true) or ( backup_map_status == true) + +- name: Cobbler image creation + import_tasks: cobbler_image.yml + when: not cobbler_container_status + +- name: Cobbler configuration + import_tasks: configure_cobbler.yml + +- name: Cobbler container status message + block: + - debug: + msg: "{{ message_skipped }}" + verbosity: 2 + when: cobbler_container_status + - debug: + msg: "{{ message_installed }}" + verbosity: 2 + when: not cobbler_container_status + tags: install diff --git a/appliance/roles/provision/tasks/mapping_file.yml b/appliance/roles/provision/tasks/mapping_file.yml new file mode 100644 index 000000000..91dc8ea7c --- /dev/null +++ b/appliance/roles/provision/tasks/mapping_file.yml @@ -0,0 +1,166 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# limitations under the License. +--- + +- name: Check if file is comma seperated + shell: awk -F\, '{print NF-1}' {{ path_for_mapping_file }} + register: comma_seperated + changed_when: false + tags: install + +- name: Fail if not comma seperated + fail: + msg: "{{ not_comma_seperated }}" + when: item != "2" + with_items: "{{ comma_seperated.stdout_lines }}" + tags: install + +- name: Remove blank lines + shell: awk -F, 'length>NF+1' {{ path_for_mapping_file }} > {{ role_path }}/files/new_mapping_file.csv + changed_when: false + tags: install + +- name: Remove blank spaces + shell: sed -i.bak -E 's/(^|,)[[:blank:]]+/\1/g; s/[[:blank:]]+(,|$)/\1/g' {{ role_path }}/files/new_mapping_file.csv + args: + warn: no + changed_when: false + tags: install + +- name: Check if header present + shell: awk 'NR==1 { print $1}' {{ role_path }}/files/new_mapping_file.csv + register: header + changed_when: false + tags: install + +- name: Fail if header not present + fail: + msg: "{{ header_fail }}" + when: header.stdout != valid_header + +- name: Count the hostname + shell: awk -F',' '{print $2}' {{ role_path }}/files/new_mapping_file.csv | wc -l + register: total_hostname + changed_when: false + tags: install + +- name: Count the ip + shell: awk -F',' '{print $3}' {{ role_path }}/files/new_mapping_file.csv | wc -l + register: total_ip + changed_when: false + tags: install + +- name: Count the macs + shell: awk -F',' '{print $1}' {{ role_path }}/files/new_mapping_file.csv | wc -l + register: total_mac + changed_when: false + tags: install + +- name: Check for duplicate hostname + shell: awk -F',' '{print $2}' {{ role_path }}/files/new_mapping_file.csv | uniq | wc -l + register: uniq_hostname + changed_when: false + tags: install + +- name: Check for duplicate ip + shell: awk -F',' '{print $3}' {{ role_path }}/files/new_mapping_file.csv | uniq | wc -l + register: uniq_ip + changed_when: false + tags: install + +- name: Check for duplicate mac + shell: awk -F',' '{print $1}' {{ role_path }}/files/new_mapping_file.csv | uniq | wc -l + register: uniq_mac + changed_when: false + tags: install + +- name: Fail if duplicate hosts exist + fail: + msg: "{{ fail_hostname_duplicate }}" + when: total_hostname.stdout > uniq_hostname.stdout + tags: install + +- name: Fail if duplicate ips exist + fail: + msg: "{{ fail_ip_duplicate }}" + when: total_ip.stdout > uniq_ip.stdout + tags: install + +- name: Fail if duplicate mac exist + fail: + msg: "{{ fail_mac_duplicate }}" + when: total_mac.stdout > uniq_mac.stdout + tags: install + +- name: Check if _ or . or space present in hostname + shell: awk -F',' '{print $2}' {{ role_path }}/files/new_mapping_file.csv |grep -E -- '_|\.| ' + register: hostname_result + ignore_errors: true + changed_when: false + tags: install + +- name: Fail if _ or . or space present in hostname + fail: + msg: "{{ hostname_result.stdout + ' :Hostname should not contain _ or . as it will cause error with slurm and K8s'}}" + when: hostname_result.stdout != "" + tags: install + +- name: Compare the file for new nodes + block: + - name: difference + shell: diff {{ role_path }}/files/new_mapping_file.csv {{role_path}}/files/backup_mapping_file.csv| tr -d \>|tr -d \<| grep -E -- ', & :| ' + register: diff_output + when: backup_map_status == true + + - name: status of new nodes + set_fact: + new_node_status: true + when: diff_output.stdout!= "" + rescue: + - name: No new nodes + debug: + msg: "No new nodes to add" + verbosity: 2 + +- name: Fetch input + blockinfile: + path: "{{ role_path }}/files/dhcp.template" + insertafter: '^#insert the static DHCP leases for configuration here' + block: | + host {{ item.split(',')[1] }} { + hardware ethernet {{ item.split(',')[0] }}; + fixed-address {{ item.split(',')[2] }}; + } + marker: "# {mark} DHCP BLOCK OF {{ item.split(',')[0] }}" + with_lines: "{{ remove_header }}" + ignore_errors: true + when: (not cobbler_image_status) or (new_node_status == true) + tags: install + +- name: Create a backup file + copy: + src: "{{ role_path }}/files/new_mapping_file.csv" + dest: "{{ role_path }}/files/backup_mapping_file.csv" + +- name: Copy the dhcp.template inside container + command: docker exec cobbler cp /root/omnia/appliance/roles/provision/files/dhcp.template /etc/cobbler/dhcp.template + when: ( cobbler_container_status == true ) and ( new_node_status == true ) + +- name: Cobbler sync for adding new nodes + command: docker exec cobbler cobbler sync + when: ( cobbler_container_status == true ) and ( new_node_status == true ) + +- name: Restart dhcpd + command: docker exec cobbler systemctl restart dhcpd + when: ( cobbler_container_status == true ) and ( new_node_status == true ) + diff --git a/appliance/roles/provision/tasks/mount_iso.yml b/appliance/roles/provision/tasks/mount_iso.yml new file mode 100644 index 000000000..45806c9e5 --- /dev/null +++ b/appliance/roles/provision/tasks/mount_iso.yml @@ -0,0 +1,44 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +- name: Initialize vars + set_fact: + mount_check: true + tags: install + +- name: Create iso directory + file: + path: "/mnt/{{ iso_path }}" + state: directory + tags: install + +- name: Check mountpoint + command: mountpoint /mnt/{{ iso_path }} + changed_when: false + register: result + ignore_errors: yes + tags: install + +- name: Update mount status + set_fact: + mount_check: "{{ result.failed }}" + tags: install + +- name: Mount the iso file + command: mount -o loop {{ path_for_iso_file }} /mnt/{{ iso_path }} + changed_when: false + args: + warn: no + when: mount_check == true + tags: install diff --git a/appliance/roles/provision/tasks/provision_password.yml b/appliance/roles/provision/tasks/provision_password.yml new file mode 100644 index 000000000..5bbec9ccc --- /dev/null +++ b/appliance/roles/provision/tasks/provision_password.yml @@ -0,0 +1,89 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Remove old user + file: + path: "{{ role_path }}/files/.users.digest" + state: absent + tags: install + +- name: Create a new user + file: + path: "{{ role_path }}/files/.users.digest" + state: touch + mode: 0644 + tags: install + +- name: Encrypt cobbler password + shell: printf "%s:%s:%s" {{ username }} "Cobbler" "{{ cobbler_password }}" | md5sum | awk '{print $1}' + changed_when: false + register: encrypt_password + no_log: true + tags: install + +- name: Copy cobbler password to cobbler config file + shell: printf "%s:%s:%s\n" "{{ username }}" "Cobbler" "{{ encrypt_password.stdout }}" > "{{ role_path }}/files/.users.digest" + changed_when: false + no_log: true + tags: install + +- name: Create the kickstart file + copy: + src: "{{ role_path }}/files/temp_centos7.ks" + dest: "{{ role_path }}/files/centos7.ks" + mode: 0775 + tags: install + +- name: Configure kickstart file- IP + replace: + path: "{{ role_path }}/files/centos7.ks" + regexp: '^url --url http://ip/cblr/links/CentOS7-x86_64/' + replace: url --url http://{{ hpc_ip }}/cblr/links/CentOS7-x86_64/ + tags: install + +- name: Random phrase generation + command: openssl rand -base64 12 + changed_when: false + register: prompt_random_phrase + tags: install + no_log: true + +- name: Set random phrase + set_fact: + random_phrase: "{{ prompt_random_phrase.stdout }}" + tags: install + no_log: true + +- name: Login password + command: openssl passwd -1 -salt {{ random_phrase }} {{ cobbler_password }} + no_log: true + changed_when: false + register: login_pass + tags: install + +- name: Configure kickstart file- Password + replace: + path: "{{ role_path }}/files/centos7.ks" + regexp: '^rootpw --iscrypted password' + replace: 'rootpw --iscrypted {{ login_pass.stdout }}' + no_log: true + tags: install + +- name: Configure kickstart file- nic + replace: + path: "{{ role_path }}/files/centos7.ks" + regexp: '^network --bootproto=dhcp --device=nic --onboot=on' + replace: 'network --bootproto=dhcp --device={{ nic }} --onboot=on' + tags: install diff --git a/appliance/roles/provision/vars/main.yml b/appliance/roles/provision/vars/main.yml new file mode 100644 index 000000000..a634fc05f --- /dev/null +++ b/appliance/roles/provision/vars/main.yml @@ -0,0 +1,45 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# vars file for provision + +#Usage: mapping_file.yml +fail_hostname_duplicate: "Failed: Duplicate hostname exists. Please verify mapping file again." +remove_header: awk 'NR > 1 { print }' {{ role_path }}/files/new_mapping_file.csv +fail_ip_duplicate: "Failed: Duplicate ip exists. Please verify mapping file again." +fail_mac_duplicate: "Failed: Duplicate mac exists. Please verify mapping file again." +header_fail: "Failed: Header (MAC,Hostname,IP) should be present in the mapping file" +valid_header: MAC,Hostname,IP +not_comma_seperated: "Failed: Mapping file should be comma seperated." + +#Usage: check_prerequisite.yml +iso_name: CentOS-7-x86_64-Minimal-2009.iso +iso_fail: "Iso file not found. Download and copy the iso file to omnia/appliance/roles/provision/files" + +# Usage: provision_password.yml +provision_encrypted_dest: ../files/ +username: cobbler + +# Usage: cobbler_image.yml +docker_image_name: cobbler +docker_image_tag: latest +cobbler_run_command: docker run -itd --privileged --net=host --restart=always -v {{ mount_path }}:/root/omnia -v cobbler_www:/var/www/cobbler:Z -v cobbler_backup:/var/lib/cobbler/backup:Z -v /mnt/iso:/mnt:Z -p 69:69/udp -p 81:80 -p 443:443 -p 25151:25151 --name cobbler cobbler:latest /sbin/init + +# Usage: main.yml +message_skipped: "Installation Skipped: Cobbler instance is already running in your system" +message_installed: "Installation Successful" + +# Usage: mount_iso.yml +iso_path: iso diff --git a/appliance/roles/web_ui/tasks/awx_configuration.yml b/appliance/roles/web_ui/tasks/awx_configuration.yml new file mode 100644 index 000000000..0f4738aa9 --- /dev/null +++ b/appliance/roles/web_ui/tasks/awx_configuration.yml @@ -0,0 +1,284 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# Get Current AWX configuration +- name: Waiting for 30 seconds for UI components to be accessible + wait_for: + timeout: 30 + +- name: Organization list + block: + - name: Get organization list + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}" + organizations list -f human + register: organizations_list + changed_when: no + no_log: True + rescue: + - name: Message + fail: + msg: "{{ organizations_list.stdout | regex_replace(awx_user) | regex_replace(admin_password) }}" + +- name: Project list + block: + - name: Get project list + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}" + projects list -f human + register: projects_list + changed_when: no + no_log: True + rescue: + - name: Message + fail: + msg: "{{ projects_list.stdout | regex_replace(awx_user) | regex_replace(admin_password) }}" + +- name: Inventory list + block: + - name: Get inventory list + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}" + inventory list -f human + register: inventory_list + changed_when: no + no_log: True + rescue: + - name: Message + fail: + msg: "{{ inventory_list.stdout | regex_replace(awx_user) | regex_replace(admin_password) }}" + +- name: Credential list + block: + - name: Get credentials list + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}" + credentials list -f human + register: credentials_list + changed_when: no + no_log: True + rescue: + - name: Message + fail: + msg: "{{ credentials_list.stdout | regex_replace(awx_user) | regex_replace(admin_password) }}" + +- name: Template List + block: + - name: Get template list + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}" + job_templates list -f human + register: job_templates_list + changed_when: no + no_log: True + rescue: + - name: Message + fail: + msg: "{{ job_templates_list.stdout | regex_replace(awx_user) | regex_replace(admin_password) }}" + +- name: Group names + block: + - name: If omnia-inventory exists, fetch group names in the inventory + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}" + groups list --inventory "{{ omnia_inventory_name }}" -f human + register: groups_list + when: omnia_inventory_name in inventory_list.stdout + no_log: True + rescue: + - name: Message + fail: + msg: "{{ groups_list.stdout | regex_replace(awx_user) | regex_replace(admin_password) }}" + +- name: Schedules list + block: + - name: Get schedules list + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}" + schedules list -f human + register: schedules_list + changed_when: no + no_log: True + rescue: + - name: Message + fail: + msg: "{{ schedules_list.stdout | regex_replace(awx_user) | regex_replace(admin_password) }}" + +# Delete Default Configurations +- name: Delete default configurations + block: + - name: Delete default organization + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}" + organizations delete "{{ default_org }}" + when: default_org in organizations_list.stdout + register: register_error + no_log: True + + - name: Delete default job template + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}" + job_templates delete "{{ default_template }}" + when: default_template in job_templates_list.stdout + register: register_error + no_log: True + + - name: Delete default project + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}" + projects delete "{{ default_projects }}" + when: default_projects in projects_list.stdout + register: register_error + no_log: True + + - name: Delete default credential + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}" + credentials delete "{{ default_credentials }}" + when: default_credentials in credentials_list.stdout + register: register_error + no_log: True + + rescue: + - name: Message + fail: + msg: "{{ register_error.stdout | regex_replace(awx_user) | regex_replace(admin_password) }}" + +# Create required configuration if not present +- name: Create required configurations + block: + - name: Create organisation + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}" + organizations create --name "{{ organization_name }}" + when: organization_name not in organizations_list.stdout + register: register_error + no_log: True + + - name: Create new project + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}" + projects create --name "{{ project_name }}" --organization "{{ organization_name }}" + --local_path "{{ role_path.split('/')[-4] }}" + when: project_name not in projects_list.stdout + register: register_error + no_log: True + + - name: Create new omnia inventory + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}" + inventory create --name "{{ omnia_inventory_name }}" --organization "{{ organization_name }}" + when: omnia_inventory_name not in inventory_list.stdout + register: register_error + no_log: True + + - name: Create groups in omnia inventory + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}" + groups create --name "{{ item }}" --inventory "{{ omnia_inventory_name }}" + when: omnia_inventory_name not in inventory_list.stdout or item not in groups_list.stdout + register: register_error + no_log: True + loop: "{{ group_names }}" + + - name: Create credentials for omnia + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}" + credentials create --name "{{ credential_name }}" --organization "{{ organization_name }}" + --credential_type "{{ credential_type }}" + --inputs '{"username": "{{ cobbler_username }}", "password": "{{ cobbler_password }}"}' + when: credential_name not in credentials_list.stdout + register: register_error + no_log: True + + - name: DeployOmnia Template + block: + - name: Create template to deploy omnia + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}" + job_templates create + --name "{{ omnia_template_name }}" + --job_type run + --inventory "{{ omnia_inventory_name }}" + --project "{{ project_name }}" + --playbook "{{ omnia_playbook }}" + --verbosity "{{ playbooks_verbosity }}" + --ask_skip_tags_on_launch true + register: register_error + no_log: True + + - name: Associate credential + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}" + job_templates associate "{{ omnia_template_name }}" + --credential ""{{ credential_name }}"" + register: register_error + no_log: True + + when: omnia_template_name not in job_templates_list.stdout + + - name: DynamicInventory template + block: + - name: Create template to fetch dynamic inventory + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}" + job_templates create + --name "{{ inventory_template_name }}" + --job_type run + --inventory "{{ omnia_inventory_name }}" + --project "{{ project_name }}" + --playbook "{{ inventory_playbook }}" + --verbosity "{{ playbooks_verbosity }}" + --use_fact_cache true + register: register_error + no_log: True + + - name: Associate credential + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}" + job_templates associate "{{ inventory_template_name }}" + --credential ""{{ credential_name }}"" + register: register_error + no_log: True + + when: inventory_template_name not in job_templates_list.stdout + + - name: Schedule dynamic inventory template + block: + - name: Get unified job template list + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}" + unified_job_templates list --name "{{ inventory_template_name }}" -f human + no_log: True + register: unified_job_template_list + + - name: Get job ID + set_fact: + job_id: "{{ unified_job_template_list.stdout | regex_search('[0-9]+') }}" + + - name: Schedule dynamic inventory job + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}" + schedules create --name "{{ schedule_name }}" + --unified_job_template="{{ job_id }}" --rrule="{{ schedule_rule }}" + register: register_error + no_log: True + + when: schedule_name not in schedules_list.stdout + + rescue: + - name: Message + fail: + msg: "{{ register_error.stdout | regex_replace(awx_user) | regex_replace(admin_password) }}" \ No newline at end of file diff --git a/appliance/roles/web_ui/tasks/check_awx_status.yml b/appliance/roles/web_ui/tasks/check_awx_status.yml new file mode 100644 index 000000000..c7b18e505 --- /dev/null +++ b/appliance/roles/web_ui/tasks/check_awx_status.yml @@ -0,0 +1,40 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +#Tasks for verifying if AWX is already installed on the system +- name: Initialize variables + set_fact: + awx_status: false + tags: install + +- name: Check awx_task status on the machine + docker_container_info: + name: awx_task + register: awx_task_result + tags: install + +- name: Check awx_web status on the machine + docker_container_info: + name: awx_web + register: awx_web_result + tags: install + +- name: Update awx status + set_fact: + awx_status: true + when: + - awx_task_result.exists + - awx_web_result.exists + tags: install \ No newline at end of file diff --git a/appliance/roles/web_ui/tasks/clone_awx.yml b/appliance/roles/web_ui/tasks/clone_awx.yml new file mode 100644 index 000000000..e79be6627 --- /dev/null +++ b/appliance/roles/web_ui/tasks/clone_awx.yml @@ -0,0 +1,22 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Clone AWX repo + git: + repo: "{{ awx_git_repo }}" + dest: "{{ awx_repo_path }}" + force: yes + version: 15.0.0 + tags: install \ No newline at end of file diff --git a/appliance/roles/web_ui/tasks/firewall_settings.yml b/appliance/roles/web_ui/tasks/firewall_settings.yml new file mode 100644 index 000000000..887e131e4 --- /dev/null +++ b/appliance/roles/web_ui/tasks/firewall_settings.yml @@ -0,0 +1,40 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +#Tasks for modifying firewall configurations for AWX + +- name: Masquerading on public zone + firewalld: + masquerade: yes + state: enabled + permanent: 'true' + zone: public + tags: install + +- name: Add HTTP and HTTPS services to firewalld + firewalld: + service: "{{ item }}" + permanent: true + state: enabled + with_items: + - http + - https + tags: install + +- name: Reboot firewalld + systemd: + name: firewalld + state: reloaded + tags: install \ No newline at end of file diff --git a/appliance/roles/web_ui/tasks/install_awx.yml b/appliance/roles/web_ui/tasks/install_awx.yml new file mode 100644 index 000000000..d3b0ad12e --- /dev/null +++ b/appliance/roles/web_ui/tasks/install_awx.yml @@ -0,0 +1,64 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# Tasks for installing AWX + +- name: Change inventory file + replace: + path: "{{ awx_inventory_path }}" + regexp: "{{ item.regexp }}" + replace: "{{ item.replace }}" + loop: + - { name: Project data directory, regexp: "{{ project_data_dir_old }}" , replace: "{{ project_data_dir_new }}" } + - { name: Alternate DNS Servers, regexp: "{{ awx_alternate_dns_servers_old }}", replace: "{{ awx_alternate_dns_servers_new }}" } + - { name: Credentials, regexp: "{{ admin_password_old }}", replace: "{{ admin_password_new }}"} + loop_control: + label: "{{ item.name }}" + tags: install + +- name: Ensure port is 8081 + lineinfile: + path: "{{ awx_inventory_path }}" + regexp: "{{ port_old }}" + line: "{{ port_new }}" + state: present + +- name: Create pgdocker directory + file: + path: "{{ pgdocker_dir_path }}" + state: directory + mode: 0775 + tags: install + +- name: Install AWX + block: + - name: Run AWX install.yml file + command: ansible-playbook -i inventory install.yml --extra-vars "admin_password={{ admin_password }}" + args: + chdir: "{{ awx_installer_path }}" + register: awx_installation + no_log: True + + rescue: + - name: Check AWX status on machine + include_tasks: check_awx_status.yml + + - name: Fail if container are not running + fail: + msg: "AWX installation failed with error msg: + {{ awx_installation.stdout | regex_replace(admin_password) }}." + when: not awx_status + + tags: install diff --git a/appliance/roles/web_ui/tasks/install_awx_cli.yml b/appliance/roles/web_ui/tasks/install_awx_cli.yml new file mode 100644 index 000000000..bce4a9445 --- /dev/null +++ b/appliance/roles/web_ui/tasks/install_awx_cli.yml @@ -0,0 +1,34 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# Tasks for installing AWX-CLI +- name: Add AWX CLI repo + block: + - name: Get repo + get_url: + url: "{{ awx_cli_repo }}" + dest: "{{ awx_cli_repo_path }}" + - name: Disable gpgcheck + replace: + path: "{{ awx_cli_repo_path }}" + regexp: 'gpgcheck=1' + replace: 'gpgcheck=0' + tags: install + +- name: Install AWX-CLI + package: + name: ansible-tower-cli + state: present + tags: install diff --git a/appliance/roles/web_ui/tasks/main.yml b/appliance/roles/web_ui/tasks/main.yml new file mode 100644 index 000000000..134aae8cb --- /dev/null +++ b/appliance/roles/web_ui/tasks/main.yml @@ -0,0 +1,76 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# Tasks for Deploying AWX on the system +- name: Check AWX status on machine + include_tasks: check_awx_status.yml + tags: install + +- name: Include common variables + include_vars: ../../common/vars/main.yml + tags: install + +- name: Internet validation + include_tasks: ../../common/tasks/internet_validation.yml + when: not awx_status + tags: install + +- name: Clone AWX repo + include_tasks: clone_awx.yml + when: not awx_status + tags: install + +- name: Modify firewall config + include_tasks: firewall_settings.yml + when: not awx_status + tags: install + +- name: Install AWX + include_tasks: install_awx.yml + when: not awx_status + tags: install + +- name: Status message + block: + - debug: + msg: "{{ message_skipped }}" + verbosity: 2 + when: awx_status + - debug: + msg: "{{ message_installed }}" + verbosity: 2 + when: not awx_status + tags: install + +- name: Internet validation + include_tasks: ../../common/tasks/internet_validation.yml + tags: install + +- name: Install AWX-CLI + include_tasks: install_awx_cli.yml + tags: install + +- name: Check if AWX-UI is accessible + include_tasks: ui_accessibility.yml + tags: install + +- name: Configure AWX + block: + - include_tasks: awx_configuration.yml + rescue: + - name: Display msg + debug: + msg: "{{ conf_fail_msg }}" + tags: install \ No newline at end of file diff --git a/appliance/roles/web_ui/tasks/ui_accessibility.yml b/appliance/roles/web_ui/tasks/ui_accessibility.yml new file mode 100644 index 000000000..e182e901b --- /dev/null +++ b/appliance/roles/web_ui/tasks/ui_accessibility.yml @@ -0,0 +1,85 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# Check accessibility of AWX-UI +- name: Re-install if in migrating state + block: + - name: Wait for AWX UI to be up + uri: + url: "{{ awx_ip }}" + status_code: "{{ return_status }}" + return_content: yes + register: register_error + until: awx_ui_msg in register_error.content + retries: 20 + delay: 15 + changed_when: no + no_log: True + + rescue: + - name: Starting rescue + debug: + msg: "Attempting to re-install AWX" + + - name: Remove old containers + docker_container: + name: "{{ item }}" + state: absent + loop: + - awx_task + - awx_web + + - name: Restart docker + service: + name: docker + state: restarted + + - name: Re-install AWX + block: + - name: Run AWX install.yml file + command: ansible-playbook -i inventory install.yml --extra-vars "admin_password={{ admin_password }}" + args: + chdir: "{{ awx_installer_path }}" + register: awx_installation + no_log: True + + rescue: + - name: Check AWX status on machine + include_tasks: check_awx_status.yml + + - name: Fail if container are not running + fail: + msg: "AWX installation failed with error msg: + {{ awx_installation.stdout | regex_replace(admin_password) }}." + when: not awx_status + + - name: Check if AWX UI is up + block: + - name: Wait for AWX UI to be up + uri: + url: "{{ awx_ip }}" + status_code: "{{ return_status }}" + return_content: yes + register: register_error + until: awx_ui_msg in register_error.content + retries: 30 + delay: 10 + changed_when: no + no_log: True + rescue: + - name: Message + fail: + msg: "{{ register_error | regex_replace(awx_user) | regex_replace(admin_password) }}" + tags: install \ No newline at end of file diff --git a/appliance/roles/web_ui/vars/main.yml b/appliance/roles/web_ui/vars/main.yml new file mode 100644 index 000000000..3feafd0fc --- /dev/null +++ b/appliance/roles/web_ui/vars/main.yml @@ -0,0 +1,69 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# vars file for web_ui + +# Usage: clone_awx.yml +awx_git_repo: "https://github.com/ansible/awx.git" +docker_volume: "/var/lib/docker/volumes/{{ docker_volume_name }}" +awx_repo_path: "{{ docker_volume }}/awx/" +awx_installer_path: "{{ awx_repo_path }}/installer/" + +# Usage: install_awx.yml +awx_inventory_path: "{{ awx_repo_path }}/installer/inventory" +pgdocker_dir_path: /var/lib/pgdocker +project_data_dir_old: "#project_data_dir=/var/lib/awx/projects" +project_data_dir_new: "project_data_dir= {{ role_path + '/../../../..' }} " +awx_alternate_dns_servers_old: '#awx_alternate_dns_servers="10.1.2.3,10.2.3.4"' +awx_alternate_dns_servers_new: 'awx_alternate_dns_servers="8.8.8.8,8.8.4.4"' +admin_password_old: "admin_password=password" +admin_password_new: "#admin_password=password" +port_old: "host_port=80" +port_new: "host_port=8081" + +# Usage: main.yml +message_skipped: "Installation Skipped: AWX instance is already running on your system" +message_installed: "Installation Successful" +awx_ip: http://localhost:8081 +return_status: 200 +awx_ui_msg: "Password Dialog" +conf_fail_msg: "AWX configuration failed at the last executed task." + +# Usage: install_awx_cli.yml +awx_cli_repo: "https://releases.ansible.com/ansible-tower/cli/ansible-tower-cli-centos7.repo" +awx_cli_repo_path: "/etc/yum.repos.d/ansible-tower-cli-centos7.repo" + +# Usage: awx_configuration.yml +awx_user: admin #Don't change it. It is set as admin while installing AWX +default_org: Default +default_template: 'Demo Job Template' +default_projects: 'Demo Project' +default_credentials: 'Demo Credential' +organization_name: DellEMC +project_name: omnia +omnia_inventory_name: omnia_inventory +group_names: + - manager + - compute +credential_name: omnia_credential +credential_type: Machine +cobbler_username: root +omnia_template_name: DeployOmnia +omnia_playbook: omnia.yml +inventory_template_name: DynamicInventory +inventory_playbook: appliance/inventory.yml +playbooks_verbosity: 0 +schedule_name: DynamicInventorySchedule +schedule_rule: "DTSTART:20201201T000000Z RRULE:FREQ=MINUTELY;INTERVAL=10" \ No newline at end of file diff --git a/appliance/test/appliance_config_empty.yml b/appliance/test/appliance_config_empty.yml new file mode 100644 index 000000000..4a9880780 --- /dev/null +++ b/appliance/test/appliance_config_empty.yml @@ -0,0 +1,49 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# Password used while deploying OS on bare metal servers and for Cobbler UI. +# The Length of the password should be at least 8. +# The password must not contain -,\, '," +provision_password: "" + +# Password used for the AWX UI. +# The Length of the password should be at least 8. +# The password must not contain -,\, '," +awx_password: "" + +# The nic/ethernet card that needs to be connected to the HPC switch. +# This nic will be configured by Omnia for the DHCP server. +# Default value of nic is em1. +hpc_nic: "em1" + +# The nic/ethernet card that will be connected to the public internet. +# Default value of nic is em2 +public_nic: "em2" + +# This is the path where user has kept the iso image that needs to be provisioned in target nodes. +# The iso file should be CentOS7-2009-minimal edition. +# Other iso files are not supported. +iso_file_path: "" + +# The mapping file consists of the MAC address and its respective IP address and hostname. +# The format of mapping file should be MAC,hostname,IP and must be a CSV file. +# A template for mapping file exists in omnia/examples and is named as mapping_file.csv. +# This depicts the path where user has kept the mapping file for DHCP configurations. +mapping_file_path: "" + +# The dhcp range for assigning the IPv4 address to the baremetal nodes. +# Example: 10.1.23.1 +dhcp_start_ip_range: "" +dhcp_end_ip_range: "" diff --git a/appliance/test/appliance_config_test.yml b/appliance/test/appliance_config_test.yml new file mode 100644 index 000000000..f7413ace6 --- /dev/null +++ b/appliance/test/appliance_config_test.yml @@ -0,0 +1,49 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# Password used while deploying OS on bare metal servers and for Cobbler UI. +# The Length of the password should be at least 8. +# The password must not contain -,\, '," +provision_password: "omnia@123" + +# Password used for the AWX UI. +# The Length of the password should be at least 8. +# The password must not contain -,\, '," +awx_password: "omnia@123" + +# The nic/ethernet card that needs to be connected to the HPC switch. +# This nic will be configured by Omnia for the DHCP server. +# Default value of nic is em1. +hpc_nic: "em1" + +# The nic/ethernet card that will be connected to the public internet. +# Default value of nic is em2 +public_nic: "em2" + +# This is the path where user has kept the iso image that needs to be provisioned in target nodes. +# The iso file should be CentOS7-2009-minimal edition. +# Other iso files are not supported. +iso_file_path: "/root/CentOS-7-x86_64-Minimal-2009.iso" + +# The mapping file consists of the MAC address and its respective IP address and hostname. +# The format of mapping file should be MAC,hostname,IP and must be a CSV file. +# A template for mapping file exists in omnia/examples and is named as mapping_file.csv. +# This depicts the path where user has kept the mapping file for DHCP configurations. +mapping_file_path: "" + +# The dhcp range for assigning the IPv4 address to the baremetal nodes. +# Example: 10.1.23.1 +dhcp_start_ip_range: "172.17.0.10" +dhcp_end_ip_range: "172.17.0.100" diff --git a/appliance/test/provisioned_hosts.yml b/appliance/test/provisioned_hosts.yml new file mode 100644 index 000000000..ff3a88689 --- /dev/null +++ b/appliance/test/provisioned_hosts.yml @@ -0,0 +1,3 @@ +[all] +172.17.0.10 +172.17.0.15 \ No newline at end of file diff --git a/appliance/test/test_common.yml b/appliance/test/test_common.yml new file mode 100644 index 000000000..1959a5cd1 --- /dev/null +++ b/appliance/test/test_common.yml @@ -0,0 +1,1882 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# Testcase OMNIA_DIO_US_DVC_TC_001 +# Execute common role in management station with os installed centos 7.9 +- name: OMNIA_DIO_US_DVC_TC_001 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - name: Check OS support + fail: + msg: "{{ os_status }}" + when: not(ansible_distribution == os_name and ansible_distribution_version >= os_version) + tags: TC_001 + + - name: Delete docker volume + docker_volume: + name: "{{ docker_volume_name }}" + state: absent + tags: TC_001 + + - block: + - name: Call common role + include_role: + name: ../roles/common + vars: + input_config_filename: "{{ test_input_config_filename }}" + tags: TC_001 + + - name: Fetch package installed + package_facts: + manager: auto + tags: TC_001,VERIFY_001 + + - name: Verify all required packages is installed + assert: + that: "'{{ item }}' in ansible_facts.packages" + success_msg: "{{ install_package_success_msg }}" + fail_msg: "{{ install_package_fail_msg }}" + when: "'python-docker' not in item" + with_items: "{{ common_packages }}" + tags: TC_001,VERIFY_001 + + - name: Verify python-docker-py package is installed + assert: + that: "'python-docker-py' in ansible_facts.packages" + success_msg: "{{ install_package_success_msg }}" + fail_msg: "{{ install_package_fail_msg }}" + tags: TC_001,VERIFY_001 + + - name: Fetching docker volume info + docker_volume_info: + name: "{{ docker_volume_name }}" + register: docker_volume_status + tags: TC_001,VERIFY_001 + + - name: Validate docker volume + assert: + that: docker_volume_status.exists + fail_msg: "{{ docker_volume_fail_msg }}" + success_msg: "{{ docker_volume_success_msg }}" + tags: TC_001,VERIFY_001 + + - name: Check input config file is encrypted + command: cat {{ test_input_config_filename }} + changed_when: false + register: config_content + tags: TC_001,VERIFY_001 + + - name: Validate input config file is encypted or not + assert: + that: "'$ANSIBLE_VAULT;' in config_content.stdout" + fail_msg: "{{ input_config_fail_msg }}" + success_msg: "{{ input_config_success_msg }}" + tags: TC_001,VERIFY_001 + +# Testcase OMNIA_DIO_US_DVC_TC_002 +# Execute common role in management station with docker volume omnia-storage already present +- name: OMNIA_DIO_US_DVC_TC_002 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - block: + - name: Call common role + include_role: + name: ../roles/common + vars: + input_config_filename: "{{ test_input_config_filename }}" + tags: TC_002 + + - name: Fetch package installed + package_facts: + manager: auto + tags: TC_002,VERIFY_002 + + - name: Verify all required packages is installed + assert: + that: "'{{ item }}' in ansible_facts.packages" + success_msg: "{{ install_package_success_msg }}" + fail_msg: "{{ install_package_fail_msg }}" + when: "'python-docker' not in item" + with_items: "{{ common_packages }}" + tags: TC_002,VERIFY_002 + + - name: Verify python-docker-py package is installed + assert: + that: "'python-docker-py' in ansible_facts.packages" + success_msg: "{{ install_package_success_msg }}" + fail_msg: "{{ install_package_fail_msg }}" + tags: TC_002,VERIFY_002 + + - name: Fetching docker volume info + docker_volume_info: + name: "{{ docker_volume_name }}" + register: docker_volume_status + tags: TC_002,VERIFY_002 + + - name: Validate docker volume + assert: + that: docker_volume_status.exists + fail_msg: "{{ docker_volume_fail_msg }}" + success_msg: "{{ docker_volume_success_msg }}" + tags: TC_002,VERIFY_002 + + - name: Check input config file is encrypted + command: cat {{ test_input_config_filename }} + changed_when: false + register: config_content + tags: TC_002,VERIFY_002 + + - name: Validate input config file is encypted or not + assert: + that: "'$ANSIBLE_VAULT;' in config_content.stdout" + fail_msg: "{{ input_config_fail_msg }}" + success_msg: "{{ input_config_success_msg }}" + tags: TC_002,VERIFY_002 + +# Testcase OMNIA_DIO_US_DVC_TC_003 +# Execute common role in management station with os installed different than centos 7.9 +- name: OMNIA_DIO_US_DVC_TC_003 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - block: + - name: Call common role + include_role: + name: ../roles/common + vars: + input_config_filename: "{{ test_input_config_filename }}" + ansible_distribution_version: "{{ centos_version }}" + + rescue: + - name: Validate OS check failure message + assert: + that: os_status in os_value.msg + success_msg: "{{ os_check_success_msg }}" + fail_msg: "{{ os_check_fail_msg }}" + tags: TC_003 + +# Testcase OMNIA_DIO_US_DVC_TC_004 +# Execute common role in management station without internet connectivity +- name: OMNIA_DIO_US_DVC_TC_004 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - name: Down internet connectivity + lineinfile: + path: /etc/hosts + line: "172.16.0.5 github.com" + state: present + backup: yes + tags: TC_004 + + - block: + - name: Call common role + include_role: + name: ../roles/common + vars: + input_config_filename: "{{ test_input_config_filename }}" + + rescue: + - name: Validate internet connectivity failure message + assert: + that: internet_status in internet_value.msg + success_msg: "{{ internet_check_success_msg }}" + fail_msg: "{{ internet_check_fail_msg }}" + tags: TC_004 + + - name: Up internet connectivity + lineinfile: + path: /etc/hosts + line: "172.16.0.5 github.com" + state: absent + tags: TC_004 + +# Testcase OMNIA_DIO_US_DVC_TC_005 +# Execute common role in management station with different user than root +- name: OMNIA_DIO_US_DVC_TC_005 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + tasks: + - name: Create temp user + user: + name: temp + tags: TC_005 + + - block: + - name: Execute playbook with different user + command: ansible-playbook ../appliance.yml --become-user=temp -e "input_config_filename=test/input_config_test.yml" + register: temp_user_output + + rescue: + - name: Validate the different user execution failure message + assert: + that: + - '"FAILED" in temp_user_output.stdout' + - '"This command has to be run under the root user" in temp_user_output.stdout' + success_msg: "{{ different_user_check_success_msg }}" + fail_msg: "{{ different_user_check_fail_msg }}" + tags: TC_005 + + - name: Remove temp user + user: + name: temp + state: absent + remove: yes + tags: TC_005 + +# Testcase OMNIA_DIO_US_DVC_TC_006 +# Execute common role in management station with selinux enabled. +- name: OMNIA_DIO_US_DVC_TC_006 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - name: Enable selinux + selinux: + policy: targeted + state: enforcing + when: ansible_selinux.status == "disabled" + tags: TC_006 + + - name: Reboot localhost + command: reboot + when: ansible_selinux.status == "disabled" + tags: TC_006 + + - block: + - name: Call common role + include_role: + name: ../roles/common + vars: + input_config_filename: "{{ test_input_config_filename }}" + + rescue: + - name: Validate selinux failure message + assert: + that: selinux_status in selinux_value.msg + success_msg: "{{ selinux_check_success_msg }}" + fail_msg: "{{ selinux_check_fail_msg }}" + tags: TC_006 + + - name: Disable selinux + selinux: + state: disabled + tags: TC_006 + +# Testcase OMNIA_DIO_US_DVC_TC_007 +# Execute common role in management station with all input paramter as empty +- name: OMNIA_DIO_US_DVC_TC_007 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - block: + - name: Call common role + include_role: + name: ../roles/common + tasks_from: "{{ password_config_file }}" + vars: + input_config_filename: "{{ empty_input_config_filename }}" + rescue: + - name: Validate error + assert: + that: input_config_failure_msg in input_config_check.msg + success_msg: "{{ input_config_check_success_msg }}" + fail_msg: "{{ input_config_check_fail_msg }}" + tags: TC_007 + +# Testcase OMNIA_DIO_US_DVC_TC_008 +# Execute common role in management station with provision_password as empty +- name: OMNIA_DIO_US_DVC_TC_008 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_008 + + - name: Copy "{{ empty_input_config_filename }}" to new file + copy: + src: "{{ empty_input_config_filename }}" + dest: "{{ new_input_config_filename }}" + tags: TC_008 + + - name: Edit "{{ new_input_config_filename }}" + replace: + path: "{{ new_input_config_filename }}" + regexp: "{{ item.regexp }}" + replace: "{{ item.replace }}" + with_items: + - { regexp: "awx_password: \"\"", replace: "awx_password: \"{{ min_length_password }}\"" } + - { regexp: "iso_file_path: \"\"", replace: "iso_file_path: \"{{ valid_iso_path }}\"" } + - { regexp: "dhcp_start_ip_range: \"\"", replace: "dhcp_start_ip_range: \"{{ valid_dhcp_start_range }}\"" } + - { regexp: "dhcp_end_ip_range: \"\"", replace: "dhcp_end_ip_range: \"{{ valid_dhcp_end_range }}\"" } + tags: TC_008 + + - block: + - name: Call common role + include_role: + name: ../roles/common + tasks_from: "{{ password_config_file }}" + vars: + input_config_filename: "{{ new_input_config_filename }}" + rescue: + - name: Validate error + assert: + that: input_config_failure_msg in input_config_check.msg + success_msg: "{{ input_config_check_success_msg }}" + fail_msg: "{{ input_config_check_fail_msg }}" + tags: TC_008 + + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_008 + +# Testcase OMNIA_DIO_US_DVC_TC_009 +# Execute common role in management station with awx_password as empty +- name: OMNIA_DIO_US_DVC_TC_009 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_009 + + - name: Copy "{{ empty_input_config_filename }}" to new file + copy: + src: "{{ empty_input_config_filename }}" + dest: "{{ new_input_config_filename }}" + tags: TC_009 + + - name: Edit "{{ new_input_config_filename }}" + replace: + path: "{{ new_input_config_filename }}" + regexp: "{{ item.regexp }}" + replace: "{{ item.replace }}" + with_items: + - { regexp: "provision_password: \"\"", replace: "provision_password: \"{{ min_length_password }}\"" } + - { regexp: "iso_file_path: \"\"", replace: "iso_file_path: \"{{ valid_iso_path }}\"" } + - { regexp: "dhcp_start_ip_range: \"\"", replace: "dhcp_start_ip_range: \"{{ valid_dhcp_start_range }}\"" } + - { regexp: "dhcp_end_ip_range: \"\"", replace: "dhcp_end_ip_range: \"{{ valid_dhcp_end_range }}\"" } + tags: TC_009 + + - block: + - name: Call common role + include_role: + name: ../roles/common + tasks_from: "{{ password_config_file }}" + vars: + input_config_filename: "{{ new_input_config_filename }}" + rescue: + - name: Validate error + assert: + that: input_config_failure_msg in input_config_check.msg + success_msg: "{{ input_config_check_success_msg }}" + fail_msg: "{{ input_config_check_fail_msg }}" + tags: TC_009 + + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_009 + +# Testcase OMNIA_DIO_US_DVC_TC_010 +# Execute common role in management station with iso_file_path as empty +- name: OMNIA_DIO_US_DVC_TC_010 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_010 + + - name: Copy "{{ empty_input_config_filename }}" to new file + copy: + src: "{{ empty_input_config_filename }}" + dest: "{{ new_input_config_filename }}" + tags: TC_010 + + - name: Edit "{{ new_input_config_filename }}" + replace: + path: "{{ new_input_config_filename }}" + regexp: "{{ item.regexp }}" + replace: "{{ item.replace }}" + with_items: + - { regexp: "provision_password: \"\"", replace: "provision_password: \"{{ min_length_password }}\"" } + - { regexp: "awx_password: \"\"", replace: "awx_password: \"{{ min_length_password }}\"" } + - { regexp: "dhcp_start_ip_range: \"\"", replace: "dhcp_start_ip_range: \"{{ valid_dhcp_start_range }}\"" } + - { regexp: "dhcp_end_ip_range: \"\"", replace: "dhcp_end_ip_range: \"{{ valid_dhcp_end_range }}\"" } + tags: TC_010 + + - block: + - name: Call common role + include_role: + name: ../roles/common + tasks_from: "{{ password_config_file }}" + vars: + input_config_filename: "{{ new_input_config_filename }}" + rescue: + - name: Validate error + assert: + that: input_config_failure_msg in input_config_check.msg + success_msg: "{{ input_config_check_success_msg }}" + fail_msg: "{{ input_config_check_fail_msg }}" + tags: TC_010 + + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_010 + +# Testcase OMNIA_DIO_US_DVC_TC_011 +# Execute common role in management station with provision_password of more than 31 characters +- name: OMNIA_DIO_US_DVC_TC_011 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_011 + + - name: Copy "{{ empty_input_config_filename }}" to new file + copy: + src: "{{ empty_input_config_filename }}" + dest: "{{ new_input_config_filename }}" + tags: TC_011 + + - name: Edit "{{ new_input_config_filename }}" + replace: + path: "{{ new_input_config_filename }}" + regexp: "{{ item.regexp }}" + replace: "{{ item.replace }}" + with_items: + - { regexp: "provision_password: \"\"", replace: "provision_password: \"{{ long_password }}\"" } + - { regexp: "awx_password: \"\"", replace: "awx_password: \"{{ min_length_password }}\"" } + - { regexp: "iso_file_path: \"\"", replace: "iso_file_path: \"{{ valid_iso_path }}\"" } + - { regexp: "dhcp_start_ip_range: \"\"", replace: "dhcp_start_ip_range: \"{{ valid_dhcp_start_range }}\"" } + - { regexp: "dhcp_end_ip_range: \"\"", replace: "dhcp_end_ip_range: \"{{ valid_dhcp_end_range }}\"" } + tags: TC_011 + + - block: + - name: Call common role + include_role: + name: ../roles/common + tasks_from: "{{ password_config_file }}" + vars: + input_config_filename: "{{ new_input_config_filename }}" + rescue: + - name: Validate error + assert: + that: fail_msg_provision_password in cobbler_password_check.msg + success_msg: "{{ input_config_check_success_msg }}" + fail_msg: "{{ input_config_check_fail_msg }}" + tags: TC_011 + + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_011 + +# Testcase OMNIA_DIO_US_DVC_TC_012 +# Execute common role in management station with provision_password as string contains white spaces +- name: OMNIA_DIO_US_DVC_TC_012 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_012 + + - name: Copy "{{ empty_input_config_filename }}" to new file + copy: + src: "{{ empty_input_config_filename }}" + dest: "{{ new_input_config_filename }}" + tags: TC_012 + + - name: Edit "{{ new_input_config_filename }}" + replace: + path: "{{ new_input_config_filename }}" + regexp: "{{ item.regexp }}" + replace: "{{ item.replace }}" + with_items: + - { regexp: "provision_password: \"\"", replace: "provision_password: \"{{ white_space_password }}\"" } + - { regexp: "awx_password: \"\"", replace: "awx_password: \"{{ min_length_password }}\"" } + - { regexp: "iso_file_path: \"\"", replace: "iso_file_path: \"{{ valid_iso_path }}\"" } + - { regexp: "dhcp_start_ip_range: \"\"", replace: "dhcp_start_ip_range: \"{{ valid_dhcp_start_range }}\"" } + - { regexp: "dhcp_end_ip_range: \"\"", replace: "dhcp_end_ip_range: \"{{ valid_dhcp_end_range }}\"" } + tags: TC_012 + + - block: + - name: Call common role + include_role: + name: ../roles/common + tasks_from: "{{ password_config_file }}" + vars: + input_config_filename: "{{ new_input_config_filename }}" + always: + - name: Validate success message + assert: + that: success_msg_provision_password in cobbler_password_check.msg + success_msg: "{{ input_config_check_success_msg }}" + fail_msg: "{{ input_config_check_fail_msg }}" + tags: TC_012 + + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_012 + +# Testcase OMNIA_DIO_US_DVC_TC_013 +# Execute common role in management station with provision_password as string contains characters like '/' and '-' +- name: OMNIA_DIO_US_DVC_TC_013 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_013 + + - name: Copy "{{ empty_input_config_filename }}" to new file + copy: + src: "{{ empty_input_config_filename }}" + dest: "{{ new_input_config_filename }}" + tags: TC_013 + + - name: Edit "{{ new_input_config_filename }}" + replace: + path: "{{ new_input_config_filename }}" + regexp: "{{ item.regexp }}" + replace: "{{ item.replace }}" + with_items: + - { regexp: "provision_password: \"\"", replace: "provision_password: \"{{ special_character_password1 }}\"" } + - { regexp: "awx_password: \"\"", replace: "awx_password: \"{{ min_length_password }}\"" } + - { regexp: "iso_file_path: \"\"", replace: "iso_file_path: \"{{ valid_iso_path }}\"" } + - { regexp: "dhcp_start_ip_range: \"\"", replace: "dhcp_start_ip_range: \"{{ valid_dhcp_start_range }}\"" } + - { regexp: "dhcp_end_ip_range: \"\"", replace: "dhcp_end_ip_range: \"{{ valid_dhcp_end_range }}\"" } + tags: TC_013 + + - block: + - name: Call common role + include_role: + name: ../roles/common + tasks_from: "{{ password_config_file }}" + vars: + input_config_filename: "{{ new_input_config_filename }}" + rescue: + - name: Validate error + assert: + that: fail_msg_provision_password in cobbler_password_check.msg + success_msg: "{{ input_config_check_success_msg }}" + fail_msg: "{{ input_config_check_fail_msg }}" + tags: TC_013 + + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_013 + +# Testcase OMNIA_DIO_US_DVC_TC_014 +# Execute common role in management station with provision_password as string contains special characters other than '-', '/' +- name: OMNIA_DIO_US_DVC_TC_014 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_014 + + - name: Copy "{{ empty_input_config_filename }}" to new file + copy: + src: "{{ empty_input_config_filename }}" + dest: "{{ new_input_config_filename }}" + tags: TC_014 + + - name: Edit "{{ new_input_config_filename }}" + replace: + path: "{{ new_input_config_filename }}" + regexp: "{{ item.regexp }}" + replace: "{{ item.replace }}" + with_items: + - { regexp: "provision_password: \"\"", replace: "provision_password: \"{{ special_character_password2 }}\"" } + - { regexp: "awx_password: \"\"", replace: "awx_password: \"{{ min_length_password }}\"" } + - { regexp: "iso_file_path: \"\"", replace: "iso_file_path: \"{{ valid_iso_path }}\"" } + - { regexp: "dhcp_start_ip_range: \"\"", replace: "dhcp_start_ip_range: \"{{ valid_dhcp_start_range }}\"" } + - { regexp: "dhcp_end_ip_range: \"\"", replace: "dhcp_end_ip_range: \"{{ valid_dhcp_end_range }}\"" } + tags: TC_014 + + - block: + - name: Call common role + include_role: + name: ../roles/common + tasks_from: "{{ password_config_file }}" + vars: + input_config_filename: "{{ new_input_config_filename }}" + always: + - name: Validate success message + assert: + that: success_msg_provision_password in cobbler_password_check.msg + success_msg: "{{ input_config_check_success_msg }}" + fail_msg: "{{ input_config_check_fail_msg }}" + tags: TC_014 + + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_014 + +# Testcase OMNIA_DIO_US_DVC_TC_015 +# Execute common role in management station with awx_password of more than 31 characters +- name: OMNIA_DIO_US_DVC_TC_015 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_015 + + - name: Copy "{{ empty_input_config_filename }}" to new file + copy: + src: "{{ empty_input_config_filename }}" + dest: "{{ new_input_config_filename }}" + tags: TC_015 + + - name: Edit "{{ new_input_config_filename }}" + replace: + path: "{{ new_input_config_filename }}" + regexp: "{{ item.regexp }}" + replace: "{{ item.replace }}" + with_items: + - { regexp: "awx_password: \"\"", replace: "awx_password: \"{{ long_password }}\"" } + - { regexp: "provision_password: \"\"", replace: "provision_password: \"{{ min_length_password }}\"" } + - { regexp: "iso_file_path: \"\"", replace: "iso_file_path: \"{{ valid_iso_path }}\"" } + - { regexp: "dhcp_start_ip_range: \"\"", replace: "dhcp_start_ip_range: \"{{ valid_dhcp_start_range }}\"" } + - { regexp: "dhcp_end_ip_range: \"\"", replace: "dhcp_end_ip_range: \"{{ valid_dhcp_end_range }}\"" } + tags: TC_015 + + - block: + - name: Call common role + include_role: + name: ../roles/common + tasks_from: "{{ password_config_file }}" + vars: + input_config_filename: "{{ new_input_config_filename }}" + rescue: + - name: Validate error + assert: + that: fail_msg_awx_password in awx_password_check.msg + success_msg: "{{ input_config_check_success_msg }}" + fail_msg: "{{ input_config_check_fail_msg }}" + tags: TC_015 + + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_015 + +# Testcase OMNIA_DIO_US_DVC_TC_016 +# Execute common role in management station with awx_password as string contains white spaces +- name: OMNIA_DIO_US_DVC_TC_016 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_016 + + - name: Copy "{{ empty_input_config_filename }}" to new file + copy: + src: "{{ empty_input_config_filename }}" + dest: "{{ new_input_config_filename }}" + tags: TC_016 + + - name: Edit "{{ new_input_config_filename }}" + replace: + path: "{{ new_input_config_filename }}" + regexp: "{{ item.regexp }}" + replace: "{{ item.replace }}" + with_items: + - { regexp: "awx_password: \"\"", replace: "awx_password: \"{{ white_space_password }}\"" } + - { regexp: "provision_password: \"\"", replace: "provision_password: \"{{ min_length_password }}\"" } + - { regexp: "iso_file_path: \"\"", replace: "iso_file_path: \"{{ valid_iso_path }}\"" } + - { regexp: "dhcp_start_ip_range: \"\"", replace: "dhcp_start_ip_range: \"{{ valid_dhcp_start_range }}\"" } + - { regexp: "dhcp_end_ip_range: \"\"", replace: "dhcp_end_ip_range: \"{{ valid_dhcp_end_range }}\"" } + tags: TC_016 + + - block: + - name: Call common role + include_role: + name: ../roles/common + tasks_from: "{{ password_config_file }}" + vars: + input_config_filename: "{{ new_input_config_filename }}" + always: + - name: Validate success message + assert: + that: success_msg_awx_password in awx_password_check.msg + success_msg: "{{ input_config_check_success_msg }}" + fail_msg: "{{ input_config_check_fail_msg }}" + tags: TC_016 + + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_016 + +# Testcase OMNIA_DIO_US_DVC_TC_017 +# Execute common role in management station with awx_password as string contains characters like '/' and '-' +- name: OMNIA_DIO_US_DVC_TC_017 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_017 + + - name: Copy "{{ empty_input_config_filename }}" to new file + copy: + src: "{{ empty_input_config_filename }}" + dest: "{{ new_input_config_filename }}" + tags: TC_017 + + - name: Edit "{{ new_input_config_filename }}" + replace: + path: "{{ new_input_config_filename }}" + regexp: "{{ item.regexp }}" + replace: "{{ item.replace }}" + with_items: + - { regexp: "awx_password: \"\"", replace: "awx_password: \"{{ special_character_password1 }}\"" } + - { regexp: "provision_password: \"\"", replace: "provision_password: \"{{ min_length_password }}\"" } + - { regexp: "iso_file_path: \"\"", replace: "iso_file_path: \"{{ valid_iso_path }}\"" } + - { regexp: "dhcp_start_ip_range: \"\"", replace: "dhcp_start_ip_range: \"{{ valid_dhcp_start_range }}\"" } + - { regexp: "dhcp_end_ip_range: \"\"", replace: "dhcp_end_ip_range: \"{{ valid_dhcp_end_range }}\"" } + tags: TC_017 + + - block: + - name: Call common role + include_role: + name: ../roles/common + tasks_from: "{{ password_config_file }}" + vars: + input_config_filename: "{{ new_input_config_filename }}" + rescue: + - name: Validate error + assert: + that: fail_msg_awx_password in awx_password_check.msg + success_msg: "{{ input_config_check_success_msg }}" + fail_msg: "{{ input_config_check_fail_msg }}" + tags: TC_017 + + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_017 + +# Testcase OMNIA_DIO_US_DVC_TC_018 +# Execute common role in management station with awx_password as string contains special characters other than '-', '/' +- name: OMNIA_DIO_US_DVC_TC_018 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_018 + + - name: Copy "{{ empty_input_config_filename }}" to new file + copy: + src: "{{ empty_input_config_filename }}" + dest: "{{ new_input_config_filename }}" + tags: TC_018 + + - name: Edit "{{ new_input_config_filename }}" + replace: + path: "{{ new_input_config_filename }}" + regexp: "{{ item.regexp }}" + replace: "{{ item.replace }}" + with_items: + - { regexp: "awx_password: \"\"", replace: "awx_password: \"{{ special_character_password2 }}\"" } + - { regexp: "provision_password: \"\"", replace: "provision_password: \"{{ min_length_password }}\"" } + - { regexp: "iso_file_path: \"\"", replace: "iso_file_path: \"{{ valid_iso_path }}\"" } + - { regexp: "dhcp_start_ip_range: \"\"", replace: "dhcp_start_ip_range: \"{{ valid_dhcp_start_range }}\"" } + - { regexp: "dhcp_end_ip_range: \"\"", replace: "dhcp_end_ip_range: \"{{ valid_dhcp_end_range }}\"" } + tags: TC_018 + + - block: + - name: Call common role + include_role: + name: ../roles/common + tasks_from: "{{ password_config_file }}" + vars: + input_config_filename: "{{ new_input_config_filename }}" + always: + - name: Validate success message + assert: + that: success_msg_awx_password in awx_password_check.msg + success_msg: "{{ input_config_check_success_msg }}" + fail_msg: "{{ input_config_check_fail_msg }}" + tags: TC_018 + + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_018 + +# Testcase OMNIA_DIO_US_DVC_TC_019 +# Execute common role in management station with valid iso_file_path +- name: OMNIA_DIO_US_DVC_TC_019 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_019 + + - name: Copy "{{ empty_input_config_filename }}" to new file + copy: + src: "{{ empty_input_config_filename }}" + dest: "{{ new_input_config_filename }}" + tags: TC_019 + + - name: Edit "{{ new_input_config_filename }}" + replace: + path: "{{ new_input_config_filename }}" + regexp: "{{ item.regexp }}" + replace: "{{ item.replace }}" + with_items: + - { regexp: "provision_password: \"\"", replace: "provision_password: \"{{ min_length_password }}\"" } + - { regexp: "awx_password: \"\"", replace: "awx_password: \"{{ min_length_password }}\"" } + - { regexp: "iso_file_path: \"\"", replace: "iso_file_path: \"{{ valid_iso_path }}\"" } + - { regexp: "dhcp_start_ip_range: \"\"", replace: "dhcp_start_ip_range: \"{{ valid_dhcp_start_range }}\"" } + - { regexp: "dhcp_end_ip_range: \"\"", replace: "dhcp_end_ip_range: \"{{ valid_dhcp_end_range }}\"" } + tags: TC_019 + + - block: + - name: Call common role + include_role: + name: ../roles/common + tasks_from: "{{ password_config_file }}" + vars: + input_config_filename: "{{ new_input_config_filename }}" + always: + - name: Validate success message + assert: + that: result_path_iso_file.stat.exists == true + success_msg: "{{ input_config_check_success_msg }}" + fail_msg: "{{ input_config_check_fail_msg }}" + tags: TC_019 + + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_019 + +# Testcase OMNIA_DIO_US_DVC_TC_020 +# Execute common role in management station with wrong iso_file_path +- name: OMNIA_DIO_US_DVC_TC_020 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_020 + + - name: Copy "{{ empty_input_config_filename }}" to new file + copy: + src: "{{ empty_input_config_filename }}" + dest: "{{ new_input_config_filename }}" + tags: TC_020 + + - name: Edit "{{ new_input_config_filename }}" + replace: + path: "{{ new_input_config_filename }}" + regexp: "{{ item.regexp }}" + replace: "{{ item.replace }}" + with_items: + - { regexp: "provision_password: \"\"", replace: "provision_password: \"{{ min_length_password }}\"" } + - { regexp: "awx_password: \"\"", replace: "awx_password: \"{{ min_length_password }}\"" } + - { regexp: "iso_file_path: \"\"", replace: "iso_file_path: \"{{ wrong_iso_path }}\"" } + - { regexp: "dhcp_start_ip_range: \"\"", replace: "dhcp_start_ip_range: \"{{ valid_dhcp_start_range }}\"" } + - { regexp: "dhcp_end_ip_range: \"\"", replace: "dhcp_end_ip_range: \"{{ valid_dhcp_end_range }}\"" } + tags: TC_020 + + - block: + - name: Call common role + include_role: + name: ../roles/common + tasks_from: "{{ password_config_file }}" + vars: + input_config_filename: "{{ new_input_config_filename }}" + rescue: + - name: Check log file + command: tail /var/log/omnia.log + register: iso_fail_log_check + changed_when: false + + - name: Validate error + assert: + that: invalid_iso_file_path in iso_fail_log_check.stdout + success_msg: "{{ input_config_check_success_msg }}" + fail_msg: "{{ input_config_check_fail_msg }}" + tags: TC_020 + + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_020 + +# Testcase OMNIA_DIO_US_DVC_TC_021 +# Execute common role in management station with dhcp_start_ip_range as empty +- name: OMNIA_DIO_US_DVC_TC_021 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_021 + + - name: Copy "{{ empty_input_config_filename }}" to new file + copy: + src: "{{ empty_input_config_filename }}" + dest: "{{ new_input_config_filename }}" + tags: TC_021 + + - name: Edit "{{ new_input_config_filename }}" + replace: + path: "{{ new_input_config_filename }}" + regexp: "{{ item.regexp }}" + replace: "{{ item.replace }}" + with_items: + - { regexp: "provision_password: \"\"", replace: "provision_password: \"{{ min_length_password }}\"" } + - { regexp: "awx_password: \"\"", replace: "awx_password: \"{{ min_length_password }}\"" } + - { regexp: "iso_file_path: \"\"", replace: "iso_file_path: \"{{ valid_iso_path }}\"" } + - { regexp: "dhcp_end_ip_range: \"\"", replace: "dhcp_end_ip_range: \"{{ valid_dhcp_end_range }}\"" } + tags: TC_021 + + - block: + - name: Call common role + include_role: + name: ../roles/common + tasks_from: "{{ password_config_file }}" + vars: + input_config_filename: "{{ new_input_config_filename }}" + rescue: + - name: Validate error + assert: + that: input_config_failure_msg in input_config_check.msg + success_msg: "{{ input_config_check_success_msg }}" + fail_msg: "{{ input_config_check_fail_msg }}" + tags: TC_021 + + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_021 + +# Testcase OMNIA_DIO_US_DVC_TC_022 +# Execute common role in management station with dhcp_end_ip_range as empty +- name: OMNIA_DIO_US_DVC_TC_022 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_022 + + - name: Copy "{{ empty_input_config_filename }}" to new file + copy: + src: "{{ empty_input_config_filename }}" + dest: "{{ new_input_config_filename }}" + tags: TC_022 + + - name: Edit "{{ new_input_config_filename }}" + replace: + path: "{{ new_input_config_filename }}" + regexp: "{{ item.regexp }}" + replace: "{{ item.replace }}" + with_items: + - { regexp: "provision_password: \"\"", replace: "provision_password: \"{{ min_length_password }}\"" } + - { regexp: "awx_password: \"\"", replace: "awx_password: \"{{ min_length_password }}\"" } + - { regexp: "iso_file_path: \"\"", replace: "iso_file_path: \"{{ valid_iso_path }}\"" } + - { regexp: "dhcp_start_ip_range: \"\"", replace: "dhcp_start_ip_range: \"{{ valid_dhcp_start_range }}\"" } + tags: TC_022 + + - block: + - name: Call common role + include_role: + name: ../roles/common + tasks_from: "{{ password_config_file }}" + vars: + input_config_filename: "{{ new_input_config_filename }}" + rescue: + - name: Validate error + assert: + that: input_config_failure_msg in input_config_check.msg + success_msg: "{{ input_config_check_success_msg }}" + fail_msg: "{{ input_config_check_fail_msg }}" + tags: TC_022 + + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_022 + +# Testcase OMNIA_DIO_US_DVC_TC_023 +# Execute common role in management station with mapping_file_path provided +- name: OMNIA_DIO_US_DVC_TC_023 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_023 + + - name: Copy "{{ empty_input_config_filename }}" to new file + copy: + src: "{{ empty_input_config_filename }}" + dest: "{{ new_input_config_filename }}" + tags: TC_023 + + - name: Edit "{{ new_input_config_filename }}" + replace: + path: "{{ new_input_config_filename }}" + regexp: "{{ item.regexp }}" + replace: "{{ item.replace }}" + with_items: + - { regexp: "provision_password: \"\"", replace: "provision_password: \"{{ min_length_password }}\"" } + - { regexp: "awx_password: \"\"", replace: "awx_password: \"{{ min_length_password }}\"" } + - { regexp: "iso_file_path: \"\"", replace: "iso_file_path: \"{{ valid_iso_path }}\"" } + - { regexp: "mapping_file_path: \"\"", replace: "mapping_file_path: \"{{ valid_mapping_file_path }}\"" } + - { regexp: "dhcp_start_ip_range: \"\"", replace: "dhcp_start_ip_range: \"{{ valid_dhcp_start_range }}\"" } + - { regexp: "dhcp_end_ip_range: \"\"", replace: "dhcp_end_ip_range: \"{{ valid_dhcp_end_range }}\"" } + tags: TC_023 + + - block: + - name: Call common role + include_role: + name: ../roles/common + tasks_from: "{{ password_config_file }}" + vars: + input_config_filename: "{{ new_input_config_filename }}" + always: + - name: Check log file + command: tail -100 /var/log/omnia.log + register: mapping_success_log_check + changed_when: false + + - name: Validate success message + assert: + that: success_mapping_file in mapping_success_log_check.stdout + success_msg: "{{ input_config_check_success_msg }}" + fail_msg: "{{ input_config_check_fail_msg }}" + tags: TC_023 + + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_023 + +# Testcase OMNIA_DIO_US_DVC_TC_024 +# Execute common role in management station with awx_password of 30 characters +- name: OMNIA_DIO_US_DVC_TC_024 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_024 + + - name: Copy "{{ empty_input_config_filename }}" to new file + copy: + src: "{{ empty_input_config_filename }}" + dest: "{{ new_input_config_filename }}" + tags: TC_024 + + - name: Edit "{{ new_input_config_filename }}" + replace: + path: "{{ new_input_config_filename }}" + regexp: "{{ item.regexp }}" + replace: "{{ item.replace }}" + with_items: + - { regexp: "awx_password: \"\"", replace: "awx_password: \"{{ max_length_password }}\"" } + - { regexp: "provision_password: \"\"", replace: "provision_password: \"{{ min_length_password }}\"" } + - { regexp: "iso_file_path: \"\"", replace: "iso_file_path: \"{{ valid_iso_path }}\"" } + - { regexp: "dhcp_start_ip_range: \"\"", replace: "dhcp_start_ip_range: \"{{ valid_dhcp_start_range }}\"" } + - { regexp: "dhcp_end_ip_range: \"\"", replace: "dhcp_end_ip_range: \"{{ valid_dhcp_end_range }}\"" } + tags: TC_024 + + - block: + - name: Call common role + include_role: + name: ../roles/common + tasks_from: "{{ password_config_file }}" + vars: + input_config_filename: "{{ new_input_config_filename }}" + always: + - name: Validate success message + assert: + that: success_msg_awx_password in awx_password_check.msg + success_msg: "{{ input_config_check_success_msg }}" + fail_msg: "{{ input_config_check_fail_msg }}" + tags: TC_024 + + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_024 + +# Testcase OMNIA_DIO_US_DVC_TC_025 +# Execute common role in management station with provision_password of 30 characters +- name: OMNIA_DIO_US_DVC_TC_025 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_025 + + - name: Copy "{{ empty_input_config_filename }}" to new file + copy: + src: "{{ empty_input_config_filename }}" + dest: "{{ new_input_config_filename }}" + tags: TC_025 + + - name: Edit "{{ new_input_config_filename }}" + replace: + path: "{{ new_input_config_filename }}" + regexp: "{{ item.regexp }}" + replace: "{{ item.replace }}" + with_items: + - { regexp: "provision_password: \"\"", replace: "provision_password: \"{{ max_length_password }}\"" } + - { regexp: "awx_password: \"\"", replace: "awx_password: \"{{ min_length_password }}\"" } + - { regexp: "iso_file_path: \"\"", replace: "iso_file_path: \"{{ valid_iso_path }}\"" } + - { regexp: "dhcp_start_ip_range: \"\"", replace: "dhcp_start_ip_range: \"{{ valid_dhcp_start_range }}\"" } + - { regexp: "dhcp_end_ip_range: \"\"", replace: "dhcp_end_ip_range: \"{{ valid_dhcp_end_range }}\"" } + tags: TC_025 + + - block: + - name: Call common role + include_role: + name: ../roles/common + tasks_from: "{{ password_config_file }}" + vars: + input_config_filename: "{{ new_input_config_filename }}" + always: + - name: Validate success message + assert: + that: success_msg_provision_password in cobbler_password_check.msg + success_msg: "{{ input_config_check_success_msg }}" + fail_msg: "{{ input_config_check_fail_msg }}" + tags: TC_025 + + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_025 + +# Testcase OMNIA_DIO_US_DVC_TC_026 +# Execute common role in management station with hpc_nic and public_nic as same value +- name: OMNIA_DIO_US_DVC_TC_026 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_026 + + - name: Copy "{{ empty_input_config_filename }}" to new file + copy: + src: "{{ empty_input_config_filename }}" + dest: "{{ new_input_config_filename }}" + tags: TC_026 + + - name: Edit "{{ new_input_config_filename }}" + replace: + path: "{{ new_input_config_filename }}" + regexp: "{{ item.regexp }}" + replace: "{{ item.replace }}" + with_items: + - { regexp: "provision_password: \"\"", replace: "provision_password: \"{{ min_length_password }}\"" } + - { regexp: "awx_password: \"\"", replace: "awx_password: \"{{ min_length_password }}\"" } + - { regexp: "iso_file_path: \"\"", replace: "iso_file_path: \"{{ valid_iso_path }}\"" } + - { regexp: "dhcp_start_ip_range: \"\"", replace: "dhcp_start_ip_range: \"{{ valid_dhcp_start_range }}\"" } + - { regexp: "dhcp_end_ip_range: \"\"", replace: "dhcp_end_ip_range: \"{{ valid_dhcp_end_range }}\"" } + - { regexp: "hpc_nic: \"em1\"", replace: "hpc_nic: \"em2\"" } + tags: TC_026 + + - block: + - name: Call common role + include_role: + name: ../roles/common + tasks_from: "{{ password_config_file }}" + vars: + input_config_filename: "{{ new_input_config_filename }}" + rescue: + - name: Validate error + assert: + that: fail_msg_hpc_nic in hpc_nic_check.msg + success_msg: "{{ input_config_check_success_msg }}" + fail_msg: "{{ input_config_check_fail_msg }}" + tags: TC_026 + + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_026 + +# Testcase OMNIA_DIO_US_DVC_TC_027 +# Execute common role in management station with hpc_nic as empty +- name: OMNIA_DIO_US_DVC_TC_027 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_027 + + - name: Copy "{{ empty_input_config_filename }}" to new file + copy: + src: "{{ empty_input_config_filename }}" + dest: "{{ new_input_config_filename }}" + tags: TC_027 + + - name: Edit "{{ new_input_config_filename }}" + replace: + path: "{{ new_input_config_filename }}" + regexp: "{{ item.regexp }}" + replace: "{{ item.replace }}" + with_items: + - { regexp: "provision_password: \"\"", replace: "provision_password: \"{{ min_length_password }}\"" } + - { regexp: "awx_password: \"\"", replace: "awx_password: \"{{ min_length_password }}\"" } + - { regexp: "iso_file_path: \"\"", replace: "iso_file_path: \"{{ valid_iso_path }}\"" } + - { regexp: "dhcp_start_ip_range: \"\"", replace: "dhcp_start_ip_range: \"{{ valid_dhcp_start_range }}\"" } + - { regexp: "dhcp_end_ip_range: \"\"", replace: "dhcp_end_ip_range: \"{{ valid_dhcp_end_range }}\"" } + - { regexp: "hpc_nic: \"em1\"", replace: "hpc_nic: \"\"" } + tags: TC_027 + + - block: + - name: Call common role + include_role: + name: ../roles/common + tasks_from: "{{ password_config_file }}" + vars: + input_config_filename: "{{ new_input_config_filename }}" + rescue: + - name: Validate error + assert: + that: input_config_failure_msg in input_config_check.msg + success_msg: "{{ input_config_check_success_msg }}" + fail_msg: "{{ input_config_check_fail_msg }}" + tags: TC_027 + + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_027 + +# Testcase OMNIA_DIO_US_DVC_TC_028 +# Execute common role in management station with public_nic as empty +- name: OMNIA_DIO_US_DVC_TC_028 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_028 + + - name: Copy "{{ empty_input_config_filename }}" to new file + copy: + src: "{{ empty_input_config_filename }}" + dest: "{{ new_input_config_filename }}" + tags: TC_028 + + - name: Edit "{{ new_input_config_filename }}" + replace: + path: "{{ new_input_config_filename }}" + regexp: "{{ item.regexp }}" + replace: "{{ item.replace }}" + with_items: + - { regexp: "provision_password: \"\"", replace: "provision_password: \"{{ min_length_password }}\"" } + - { regexp: "awx_password: \"\"", replace: "awx_password: \"{{ min_length_password }}\"" } + - { regexp: "iso_file_path: \"\"", replace: "iso_file_path: \"{{ valid_iso_path }}\"" } + - { regexp: "dhcp_start_ip_range: \"\"", replace: "dhcp_start_ip_range: \"{{ valid_dhcp_start_range }}\"" } + - { regexp: "dhcp_end_ip_range: \"\"", replace: "dhcp_end_ip_range: \"{{ valid_dhcp_end_range }}\"" } + - { regexp: "public_nic: \"em2\"", replace: "public_nic: \"\"" } + tags: TC_028 + + - block: + - name: Call common role + include_role: + name: ../roles/common + tasks_from: "{{ password_config_file }}" + vars: + input_config_filename: "{{ new_input_config_filename }}" + rescue: + - name: Validate error + assert: + that: input_config_failure_msg in input_config_check.msg + success_msg: "{{ input_config_check_success_msg }}" + fail_msg: "{{ input_config_check_fail_msg }}" + tags: TC_028 + + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_028 + +# Testcase OMNIA_DIO_US_DVC_TC_029 +# Execute common role in management station with public_nic not present in device +- name: OMNIA_DIO_US_DVC_TC_029 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_029 + + - name: Copy "{{ empty_input_config_filename }}" to new file + copy: + src: "{{ empty_input_config_filename }}" + dest: "{{ new_input_config_filename }}" + tags: TC_029 + + - name: Edit "{{ new_input_config_filename }}" + replace: + path: "{{ new_input_config_filename }}" + regexp: "{{ item.regexp }}" + replace: "{{ item.replace }}" + with_items: + - { regexp: "provision_password: \"\"", replace: "provision_password: \"{{ min_length_password }}\"" } + - { regexp: "awx_password: \"\"", replace: "awx_password: \"{{ min_length_password }}\"" } + - { regexp: "iso_file_path: \"\"", replace: "iso_file_path: \"{{ valid_iso_path }}\"" } + - { regexp: "dhcp_start_ip_range: \"\"", replace: "dhcp_start_ip_range: \"{{ valid_dhcp_start_range }}\"" } + - { regexp: "dhcp_end_ip_range: \"\"", replace: "dhcp_end_ip_range: \"{{ valid_dhcp_end_range }}\"" } + - { regexp: "public_nic: \"em2\"", replace: "public_nic: \"em5\"" } + tags: TC_029 + + - block: + - name: Call common role + include_role: + name: ../roles/common + tasks_from: "{{ password_config_file }}" + vars: + input_config_filename: "{{ new_input_config_filename }}" + rescue: + - name: Validate error + assert: + that: fail_hpc_ip in public_ip_check.msg + success_msg: "{{ input_config_check_success_msg }}" + fail_msg: "{{ input_config_check_fail_msg }}" + tags: TC_029 + + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_029 + +# Testcase OMNIA_DIO_US_DVC_TC_030 +# Execute common role in management station with hpc_nic not present in device +- name: OMNIA_DIO_US_DVC_TC_030 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_030 + + - name: Copy "{{ empty_input_config_filename }}" to new file + copy: + src: "{{ empty_input_config_filename }}" + dest: "{{ new_input_config_filename }}" + tags: TC_030 + + - name: Edit "{{ new_input_config_filename }}" + replace: + path: "{{ new_input_config_filename }}" + regexp: "{{ item.regexp }}" + replace: "{{ item.replace }}" + with_items: + - { regexp: "provision_password: \"\"", replace: "provision_password: \"{{ min_length_password }}\"" } + - { regexp: "awx_password: \"\"", replace: "awx_password: \"{{ min_length_password }}\"" } + - { regexp: "iso_file_path: \"\"", replace: "iso_file_path: \"{{ valid_iso_path }}\"" } + - { regexp: "dhcp_start_ip_range: \"\"", replace: "dhcp_start_ip_range: \"{{ valid_dhcp_start_range }}\"" } + - { regexp: "dhcp_end_ip_range: \"\"", replace: "dhcp_end_ip_range: \"{{ valid_dhcp_end_range }}\"" } + - { regexp: "hpc_nic: \"em1\"", replace: "hpc_nic: \"em5\"" } + tags: TC_030 + + - block: + - name: Call common role + include_role: + name: ../roles/common + tasks_from: "{{ password_config_file }}" + vars: + input_config_filename: "{{ new_input_config_filename }}" + rescue: + - name: Validate error + assert: + that: fail_hpc_ip in hpc_ip_check.msg + success_msg: "{{ input_config_check_success_msg }}" + fail_msg: "{{ input_config_check_fail_msg }}" + tags: TC_030 + + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_030 + +# Testcase OMNIA_DIO_US_DVC_TC_031 +# Execute common role with dhcp_start_ip_range in wrong ip range +- name: OMNIA_DIO_US_DVC_TC_031 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_031 + + - name: Copy "{{ empty_input_config_filename }}" to new file + copy: + src: "{{ empty_input_config_filename }}" + dest: "{{ new_input_config_filename }}" + tags: TC_031 + + - name: Edit "{{ new_input_config_filename }}" + replace: + path: "{{ new_input_config_filename }}" + regexp: "{{ item.regexp }}" + replace: "{{ item.replace }}" + with_items: + - { regexp: "provision_password: \"\"", replace: "provision_password: \"{{ min_length_password }}\"" } + - { regexp: "awx_password: \"\"", replace: "awx_password: \"{{ min_length_password }}\"" } + - { regexp: "iso_file_path: \"\"", replace: "iso_file_path: \"{{ valid_iso_path }}\"" } + - { regexp: "dhcp_start_ip_range: \"\"", replace: "dhcp_start_ip_range: \"{{ invalid_dhcp_ip }}\"" } + - { regexp: "dhcp_end_ip_range: \"\"", replace: "dhcp_end_ip_range: \"{{ valid_dhcp_end_range }}\"" } + tags: TC_031 + + - block: + - name: Call common role + include_role: + name: ../roles/common + tasks_from: "{{ password_config_file }}" + vars: + input_config_filename: "{{ new_input_config_filename }}" + rescue: + - name: Validate error + assert: + that: fail_dhcp_range in dhcp_start_ip_check.msg + success_msg: "{{ input_config_check_success_msg }}" + fail_msg: "{{ input_config_check_fail_msg }}" + tags: TC_031 + + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_031 + +# Testcase OMNIA_DIO_US_DVC_TC_032 +# Execute common role with dhcp_start_ip_range in wrong format +- name: OMNIA_DIO_US_DVC_TC_032 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_032 + + - name: Copy "{{ empty_input_config_filename }}" to new file + copy: + src: "{{ empty_input_config_filename }}" + dest: "{{ new_input_config_filename }}" + tags: TC_032 + + - name: Edit "{{ new_input_config_filename }}" + replace: + path: "{{ new_input_config_filename }}" + regexp: "{{ item.regexp }}" + replace: "{{ item.replace }}" + with_items: + - { regexp: "provision_password: \"\"", replace: "provision_password: \"{{ min_length_password }}\"" } + - { regexp: "awx_password: \"\"", replace: "awx_password: \"{{ min_length_password }}\"" } + - { regexp: "iso_file_path: \"\"", replace: "iso_file_path: \"{{ valid_iso_path }}\"" } + - { regexp: "dhcp_start_ip_range: \"\"", replace: "dhcp_start_ip_range: \"{{ wrong_dhcp_ip }}\"" } + - { regexp: "dhcp_end_ip_range: \"\"", replace: "dhcp_end_ip_range: \"{{ valid_dhcp_end_range }}\"" } + tags: TC_032 + + - block: + - name: Call common role + include_role: + name: ../roles/common + tasks_from: "{{ password_config_file }}" + vars: + input_config_filename: "{{ new_input_config_filename }}" + rescue: + - name: Validate error + assert: + that: fail_dhcp_range in dhcp_start_ip_check.msg + success_msg: "{{ input_config_check_success_msg }}" + fail_msg: "{{ input_config_check_fail_msg }}" + tags: TC_032 + + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_032 + +# Testcase OMNIA_DIO_US_DVC_TC_033 +#Execute common role with dhcp_end_ip_range in wrong format +- name: OMNIA_DIO_US_DVC_TC_033 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_033 + + - name: Copy "{{ empty_input_config_filename }}" to new file + copy: + src: "{{ empty_input_config_filename }}" + dest: "{{ new_input_config_filename }}" + tags: TC_033 + + - name: Edit "{{ new_input_config_filename }}" + replace: + path: "{{ new_input_config_filename }}" + regexp: "{{ item.regexp }}" + replace: "{{ item.replace }}" + with_items: + - { regexp: "provision_password: \"\"", replace: "provision_password: \"{{ min_length_password }}\"" } + - { regexp: "awx_password: \"\"", replace: "awx_password: \"{{ min_length_password }}\"" } + - { regexp: "iso_file_path: \"\"", replace: "iso_file_path: \"{{ valid_iso_path }}\"" } + - { regexp: "dhcp_start_ip_range: \"\"", replace: "dhcp_start_ip_range: \"{{ valid_dhcp_start_range }}\"" } + - { regexp: "dhcp_end_ip_range: \"\"", replace: "dhcp_end_ip_range: \"{{ wrong_dhcp_ip }}\"" } + tags: TC_033 + + - block: + - name: Call common role + include_role: + name: ../roles/common + tasks_from: "{{ password_config_file }}" + vars: + input_config_filename: "{{ new_input_config_filename }}" + rescue: + - name: Validate error + assert: + that: fail_dhcp_range in dhcp_start_ip_check.msg + success_msg: "{{ input_config_check_success_msg }}" + fail_msg: "{{ input_config_check_fail_msg }}" + tags: TC_033 + + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_033 + +# Testcase OMNIA_DIO_US_DVC_TC_034 +#Execute common role with dhcp_end_ip_range in wrong ip range +- name: OMNIA_DIO_US_DVC_TC_034 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_034 + + - name: Copy "{{ empty_input_config_filename }}" to new file + copy: + src: "{{ empty_input_config_filename }}" + dest: "{{ new_input_config_filename }}" + tags: TC_034 + + - name: Edit "{{ new_input_config_filename }}" + replace: + path: "{{ new_input_config_filename }}" + regexp: "{{ item.regexp }}" + replace: "{{ item.replace }}" + with_items: + - { regexp: "provision_password: \"\"", replace: "provision_password: \"{{ min_length_password }}\"" } + - { regexp: "awx_password: \"\"", replace: "awx_password: \"{{ min_length_password }}\"" } + - { regexp: "iso_file_path: \"\"", replace: "iso_file_path: \"{{ valid_iso_path }}\"" } + - { regexp: "dhcp_start_ip_range: \"\"", replace: "dhcp_start_ip_range: \"{{ valid_dhcp_start_range }}\"" } + - { regexp: "dhcp_end_ip_range: \"\"", replace: "dhcp_end_ip_range: \"{{ invalid_dhcp_ip }}\"" } + tags: TC_034 + + - block: + - name: Call common role + include_role: + name: ../roles/common + tasks_from: "{{ password_config_file }}" + vars: + input_config_filename: "{{ new_input_config_filename }}" + rescue: + - name: Validate error + assert: + that: fail_dhcp_range in dhcp_start_ip_check.msg + success_msg: "{{ input_config_check_success_msg }}" + fail_msg: "{{ input_config_check_fail_msg }}" + tags: TC_034 + + - name: Delete "{{ new_input_config_filename }}" + file: + path: "{{ new_input_config_filename }}" + state: absent + tags: TC_034 \ No newline at end of file diff --git a/appliance/test/test_mapping_file b/appliance/test/test_mapping_file new file mode 100644 index 000000000..b2a091048 --- /dev/null +++ b/appliance/test/test_mapping_file @@ -0,0 +1,2 @@ +Mac,Hostname,IP +xx:yy:zz:aa:bb,validation-host21,172.20.0.21 \ No newline at end of file diff --git a/appliance/test/test_provision_cc.yml b/appliance/test/test_provision_cc.yml new file mode 100644 index 000000000..d40c860ea --- /dev/null +++ b/appliance/test/test_provision_cc.yml @@ -0,0 +1,608 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# Testcase OMNIA_DIO_US_CC_TC_004 +# Execute provision role in management station and verify cobbler configuration +- name: OMNIA_DIO_US_CC_TC_004 + hosts: localhost + connection: local + vars_files: + - test_vars/test_provision_vars.yml + - ../roles/provision/vars/main.yml + - "{{ test_input_config_filename }}" + tasks: + - name: Check the iso file is present + stat: + path: "{{ iso_file_path }}" + register: iso_status + tags: TC_004 + + - name: Fail if iso file is missing + fail: + msg: "{{ iso_fail }}" + when: iso_status.stat.exists == false + tags: TC_004 + + - name: Delete the cobbler container if exits + docker_container: + name: "{{ docker_container_name }}" + state: absent + tags: TC_004 + + - name: Delete docker image if exists + docker_image: + name: "{{ docker_image_name }}" + tag: "{{ docker_image_tag }}" + state: absent + tags: TC_004 + + - block: + - name: Call common role + include_role: + name: ../roles/common + vars: + input_config_filename: "{{ test_input_config_filename }}" + + - name: Call provision role + include_role: + name: ../roles/provision + tags: TC_004 + + - name: Check the connection to cobbler UI and it returns a status 200 + uri: + url: https://localhost/cobbler_web + status_code: 200 + return_content: yes + validate_certs: no + tags: TC_004,VERIFY_004 + + - name: Fetch cobbler version in cobbler container + command: docker exec {{ docker_container_name }} cobbler version + changed_when: false + register: cobbler_version + tags: TC_004,VERIFY_004 + + - name: Verify cobbler version + assert: + that: + - "'Cobbler' in cobbler_version.stdout" + - "'Error' not in cobbler_version.stdout" + fail_msg: "{{ cobbler_version_fail_msg }}" + success_msg: "{{ cobbler_version_success_msg }}" + tags: TC_004,VERIFY_004 + + - name: Run cobbler check command in cobbler container + command: docker exec {{ docker_container_name }} cobbler check + changed_when: false + register: cobbler_check + tags: TC_004,VERIFY_004 + + - name: Verify cobbler check command output + assert: + that: + - "'The following are potential configuration items that you may want to fix' not in cobbler_check.stdout" + - "'Error' not in cobbler_check.stdout" + fail_msg: "{{ cobbler_check_fail_msg }}" + success_msg: "{{ cobbler_check_success_msg }}" + ignore_errors: yes + tags: TC_004,VERIFY_004 + + - name: Run cobbler sync command in cobbler container + command: docker exec {{ docker_container_name }} cobbler sync + changed_when: false + register: cobbler_sync + tags: TC_004,VERIFY_004 + + - name: Verify cobbler sync command output + assert: + that: + - "'TASK COMPLETE' in cobbler_sync.stdout" + - "'Fail' not in cobbler_sync.stdout" + - "'Error' not in cobbler_sync.stdout" + fail_msg: "{{ cobbler_sync_fail_msg }}" + success_msg: "{{ cobbler_sync_success_msg }}" + tags: TC_004,VERIFY_004 + + - name: Fetch cobbler distro list + command: docker exec {{ docker_container_name }} cobbler distro list + changed_when: false + register: cobbler_distro_list + tags: TC_004,VERIFY_004 + + - name: Verify cobbler distro list + assert: + that: + - "'CentOS' in cobbler_distro_list.stdout" + fail_msg: "{{ cobbler_distro_list_fail_msg }}" + success_msg: "{{ cobbler_distro_list_success_msg }}" + tags: TC_004,VERIFY_004 + + - name: Fetch cobbler profile list + command: docker exec cobbler cobbler profile list + changed_when: false + register: cobbler_profile_list + tags: TC_004,VERIFY_004 + + - name: Verify cobbler profile list + assert: + that: + - "'CentOS' in cobbler_profile_list.stdout" + fail_msg: "{{ cobbler_profile_list_fail_msg }}" + success_msg: "{{ cobbler_profile_list_success_msg }}" + tags: TC_004,VERIFY_004 + + - name: Check kickstart file + shell: | + docker exec {{ docker_container_name }} [ -f /var/lib/cobbler/kickstarts/{{ kickstart_filename }} ] && echo "File exist" || echo "File does not exist" + changed_when: false + register: kickstart_file_status + tags: TC_004,VERIFY_004 + + - name: Verify kickstart file present + assert: + that: + - "'File exist' in kickstart_file_status.stdout" + fail_msg: "{{ kickstart_file_fail_msg }}" + success_msg: "{{ kickstart_file_success_msg }}" + tags: TC_004,VERIFY_004 + + - name: Check crontab list + command: docker exec cobbler crontab -l + changed_when: false + register: crontab_list + tags: TC_004,VERIFY_004 + + - name: Verify crontab list + assert: + that: + - "'* * * * * ansible-playbook /root/tftp.yml' in crontab_list.stdout" + - "'5 * * * * ansible-playbook /root/inventory_creation.yml' in crontab_list.stdout" + fail_msg: "{{ crontab_list_fail_msg }}" + success_msg: "{{ crontab_list_success_msg }}" + tags: TC_004,VERIFY_004 + + - name: Check tftp,dhcpd,xinetd,cobblerd service is running + command: docker exec cobbler systemctl is-active {{ item }} + changed_when: false + ignore_errors: yes + register: cobbler_service_check + with_items: "{{ cobbler_services }}" + tags: TC_004,VERIFY_004 + + - name: Verify tftp,dhcpd,xinetd,cobblerd service is running + assert: + that: + - "'active' in cobbler_service_check.results[{{ item }}].stdout" + - "'inactive' not in cobbler_service_check.results[{{ item }}].stdout" + - "'unknown' not in cobbler_service_check.results[{{ item }}].stdout" + fail_msg: "{{ cobbler_service_check_fail_msg }}" + success_msg: "{{ cobbler_service_check_success_msg }}" + with_sequence: start=0 end=3 + tags: TC_004,VERIFY_004 + +# Testcase OMNIA_DIO_US_CDIP_TC_005 +# Execute provison role in management station where cobbler container is configured +- name: OMNIA_DIO_US_CDIP_TC_005 + hosts: localhost + connection: local + vars_files: + - test_vars/test_provision_vars.yml + - ../roles/provision/vars/main.yml + tasks: + - block: + - name: Call common role + include_role: + name: ../roles/common + vars: + input_config_filename: "{{ test_input_config_filename }}" + + - name: Call provision role + include_role: + name: ../roles/provision + tags: TC_005 + + - name: Check the connection to cobbler UI and it returns a status 200 + uri: + url: https://localhost/cobbler_web + status_code: 200 + return_content: yes + validate_certs: no + tags: TC_005,VERIFY_005 + + - name: Fetch cobbler version in cobbler container + command: docker exec {{ docker_container_name }} cobbler version + changed_when: false + register: cobbler_version + tags: TC_005,VERIFY_005 + + - name: Verify cobbler version + assert: + that: + - "'Cobbler' in cobbler_version.stdout" + - "'Error' not in cobbler_version.stdout" + fail_msg: "{{ cobbler_version_fail_msg }}" + success_msg: "{{ cobbler_version_success_msg }}" + tags: TC_005,VERIFY_005 + + - name: Run cobbler check command in cobbler container + command: docker exec {{ docker_container_name }} cobbler check + changed_when: false + register: cobbler_check + tags: TC_005,VERIFY_005 + + - name: Verify cobbler check command output + assert: + that: + - "'The following are potential configuration items that you may want to fix' not in cobbler_check.stdout" + - "'Error' not in cobbler_check.stdout" + fail_msg: "{{ cobbler_check_fail_msg }}" + success_msg: "{{ cobbler_check_success_msg }}" + ignore_errors: yes + tags: TC_005,VERIFY_005 + + - name: Run cobbler sync command in cobbler container + command: docker exec {{ docker_container_name }} cobbler sync + changed_when: false + register: cobbler_sync + tags: TC_005,VERIFY_005 + + - name: Verify cobbler sync command output + assert: + that: + - "'TASK COMPLETE' in cobbler_sync.stdout" + - "'Fail' not in cobbler_sync.stdout" + - "'Error' not in cobbler_sync.stdout" + fail_msg: "{{ cobbler_sync_fail_msg }}" + success_msg: "{{ cobbler_sync_success_msg }}" + tags: TC_005,VERIFY_005 + + - name: Fetch cobbler distro list + command: docker exec {{ docker_container_name }} cobbler distro list + changed_when: false + register: cobbler_distro_list + tags: TC_005,VERIFY_005 + + - name: Verify cobbler distro list + assert: + that: + - "'CentOS' in cobbler_distro_list.stdout" + fail_msg: "{{ cobbler_distro_list_fail_msg }}" + success_msg: "{{ cobbler_distro_list_success_msg }}" + tags: TC_005,VERIFY_005 + + - name: Fetch cobbler profile list + command: docker exec cobbler cobbler profile list + changed_when: false + register: cobbler_profile_list + tags: TC_005,VERIFY_005 + + - name: Verify cobbler profile list + assert: + that: + - "'CentOS' in cobbler_profile_list.stdout" + fail_msg: "{{ cobbler_profile_list_fail_msg }}" + success_msg: "{{ cobbler_profile_list_success_msg }}" + tags: TC_005,VERIFY_005 + + - name: Check kickstart file + shell: | + docker exec {{ docker_container_name }} [ -f /var/lib/cobbler/kickstarts/{{ kickstart_filename }} ] && echo "File exist" || echo "File does not exist" + changed_when: false + register: kickstart_file_status + tags: TC_005,VERIFY_005 + + - name: Verify kickstart file present + assert: + that: + - "'File exist' in kickstart_file_status.stdout" + fail_msg: "{{ kickstart_file_fail_msg }}" + success_msg: "{{ kickstart_file_success_msg }}" + tags: TC_005,VERIFY_005 + + - name: Check crontab list + command: docker exec cobbler crontab -l + changed_when: false + register: crontab_list + tags: TC_005,VERIFY_005 + + - name: Verify crontab list + assert: + that: + - "'* * * * * ansible-playbook /root/tftp.yml' in crontab_list.stdout" + - "'5 * * * * ansible-playbook /root/inventory_creation.yml' in crontab_list.stdout" + fail_msg: "{{ crontab_list_fail_msg }}" + success_msg: "{{ crontab_list_success_msg }}" + tags: TC_005,VERIFY_005 + + - name: Check tftp,dhcpd,xinetd,cobblerd service is running + command: docker exec cobbler systemctl is-active {{ item }} + changed_when: false + ignore_errors: yes + register: cobbler_service_check + with_items: "{{ cobbler_services }}" + tags: TC_005,VERIFY_005 + + - name: Verify tftp,dhcpd,xinetd,cobblerd service is running + assert: + that: + - "'active' in cobbler_service_check.results[{{ item }}].stdout" + - "'inactive' not in cobbler_service_check.results[{{ item }}].stdout" + - "'unknown' not in cobbler_service_check.results[{{ item }}].stdout" + fail_msg: "{{ cobbler_service_check_fail_msg }}" + success_msg: "{{ cobbler_service_check_success_msg }}" + with_sequence: start=0 end=3 + tags: TC_005,VERIFY_005 + +# Testcase OMNIA_DIO_US_CC_TC_006 +# Execute provision role in management station where already one container present +- name: OMNIA_DIO_US_CC_TC_006 + hosts: localhost + connection: local + vars_files: + - test_vars/test_provision_vars.yml + - ../roles/provision/vars/main.yml + tasks: + - name: Delete the cobbler container if exits + docker_container: + name: "{{ docker_container_name }}" + state: absent + tags: TC_006 + + - name: Delete docker image if exists + docker_image: + name: "{{ docker_image_name }}" + tag: "{{ docker_image_tag }}" + state: absent + tags: TC_006 + + - name: Create docker image + docker_image: + name: ubuntu + tag: latest + source: pull + tags: TC_006 + + - name: Create docker container + command: docker run -dit ubuntu + register: create_docker_container + changed_when: true + args: + warn: false + tags: TC_006 + + - block: + - name: Call common role + include_role: + name: ../roles/common + vars: + input_config_filename: "{{ test_input_config_filename }}" + + - name: Call provision role + include_role: + name: ../roles/provision + tags: TC_006 + + - name: Check the connection to cobbler UI and it returns a status 200 + uri: + url: https://localhost/cobbler_web + status_code: 200 + return_content: yes + validate_certs: no + tags: TC_006,VERIFY_006 + + - name: Fetch cobbler version in cobbler container + command: docker exec {{ docker_container_name }} cobbler version + changed_when: false + register: cobbler_version + tags: TC_006,VERIFY_006 + + - name: Verify cobbler version + assert: + that: + - "'Cobbler' in cobbler_version.stdout" + - "'Error' not in cobbler_version.stdout" + fail_msg: "{{ cobbler_version_fail_msg }}" + success_msg: "{{ cobbler_version_success_msg }}" + tags: TC_006,VERIFY_006 + + - name: Run cobbler check command in cobbler container + command: docker exec {{ docker_container_name }} cobbler check + changed_when: false + register: cobbler_check + tags: TC_006,VERIFY_006 + + - name: Verify cobbler check command output + assert: + that: + - "'The following are potential configuration items that you may want to fix' not in cobbler_check.stdout" + - "'Error' not in cobbler_check.stdout" + fail_msg: "{{ cobbler_check_fail_msg }}" + success_msg: "{{ cobbler_check_success_msg }}" + ignore_errors: yes + tags: TC_006,VERIFY_006 + + - name: Run cobbler sync command in cobbler container + command: docker exec {{ docker_container_name }} cobbler sync + changed_when: false + register: cobbler_sync + tags: TC_006,VERIFY_006 + + - name: Verify cobbler sync command output + assert: + that: + - "'TASK COMPLETE' in cobbler_sync.stdout" + - "'Fail' not in cobbler_sync.stdout" + - "'Error' not in cobbler_sync.stdout" + fail_msg: "{{ cobbler_sync_fail_msg }}" + success_msg: "{{ cobbler_sync_success_msg }}" + tags: TC_006,VERIFY_006 + + - name: Fetch cobbler distro list + command: docker exec {{ docker_container_name }} cobbler distro list + changed_when: false + register: cobbler_distro_list + tags: TC_006,VERIFY_006 + + - name: Verify cobbler distro list + assert: + that: + - "'CentOS' in cobbler_distro_list.stdout" + fail_msg: "{{ cobbler_distro_list_fail_msg }}" + success_msg: "{{ cobbler_distro_list_success_msg }}" + tags: TC_006,VERIFY_006 + + - name: Fetch cobbler profile list + command: docker exec cobbler cobbler profile list + changed_when: false + register: cobbler_profile_list + tags: TC_006,VERIFY_006 + + - name: Verify cobbler profile list + assert: + that: + - "'CentOS' in cobbler_profile_list.stdout" + fail_msg: "{{ cobbler_profile_list_fail_msg }}" + success_msg: "{{ cobbler_profile_list_success_msg }}" + tags: TC_006,VERIFY_006 + + - name: Check kickstart file + shell: | + docker exec {{ docker_container_name }} [ -f /var/lib/cobbler/kickstarts/{{ kickstart_filename }} ] && echo "File exist" || echo "File does not exist" + changed_when: false + register: kickstart_file_status + tags: TC_006,VERIFY_006 + + - name: Verify kickstart file present + assert: + that: + - "'File exist' in kickstart_file_status.stdout" + fail_msg: "{{ kickstart_file_fail_msg }}" + success_msg: "{{ kickstart_file_success_msg }}" + tags: TC_006,VERIFY_006 + + - name: Check crontab list + command: docker exec cobbler crontab -l + changed_when: false + register: crontab_list + tags: TC_006,VERIFY_006 + + - name: Verify crontab list + assert: + that: + - "'* * * * * ansible-playbook /root/tftp.yml' in crontab_list.stdout" + - "'5 * * * * ansible-playbook /root/inventory_creation.yml' in crontab_list.stdout" + fail_msg: "{{ crontab_list_fail_msg }}" + success_msg: "{{ crontab_list_success_msg }}" + tags: TC_006,VERIFY_006 + + - name: Check tftp,dhcpd,xinetd,cobblerd service is running + command: docker exec cobbler systemctl is-active {{ item }} + changed_when: false + ignore_errors: yes + register: cobbler_service_check + with_items: "{{ cobbler_services }}" + tags: TC_006,VERIFY_006 + + - name: Verify tftp,dhcpd,xinetd,cobblerd service is running + assert: + that: + - "'active' in cobbler_service_check.results[{{ item }}].stdout" + - "'inactive' not in cobbler_service_check.results[{{ item }}].stdout" + - "'unknown' not in cobbler_service_check.results[{{ item }}].stdout" + fail_msg: "{{ cobbler_service_check_fail_msg }}" + success_msg: "{{ cobbler_service_check_success_msg }}" + with_sequence: start=0 end=3 + tags: TC_006,VERIFY_006 + + - name: Delete the ubuntu container + docker_container: + name: "{{ create_docker_container.stdout }}" + state: absent + tags: TC_006 + + - name: Delete the ubuntu umage + docker_image: + name: ubuntu + state: absent + tags: TC_006 + +# Testcase OMNIA_DIO_US_CC_TC_007 +# Execute provision role in management station and reboot management station +- name: OMNIA_DIO_US_CC_TC_007 + hosts: localhost + connection: local + vars_files: + - test_vars/test_provision_vars.yml + - ../roles/provision/vars/main.yml + tasks: + - name: Check last uptime of the server + command: uptime -s + register: uptime_status + changed_when: false + ignore_errors: yes + tags: TC_007 + + - name: Check current date + command: date +"%Y-%m-%d %H" + register: current_time + changed_when: false + ignore_errors: yes + tags: TC_007 + + - name: Delete the cobbler container if exits + docker_container: + name: "{{ docker_container_name }}" + state: absent + when: current_time.stdout not in uptime_status.stdout + tags: TC_007 + + - name: Delete docker image if exists + docker_image: + name: "{{ docker_image_name }}" + tag: "{{ docker_image_tag }}" + state: absent + when: current_time.stdout not in uptime_status.stdout + tags: TC_007 + + - block: + - name: Call common role + include_role: + name: ../roles/common + vars: + input_config_filename: "{{ test_input_config_filename }}" + + - name: Call provision role + include_role: + name: ../roles/provision + when: current_time.stdout not in uptime_status.stdout + tags: TC_007 + + - name: Reboot localhost + command: reboot + when: current_time.stdout not in uptime_status.stdout + tags: TC_007 + + - name: Inspect cobbler container + docker_container_info: + name: "{{ docker_container_name }}" + register: cobbler_cnt_status + tags: TC_007,VERIFY_007 + + - name: Verify cobbler container is running after reboot + assert: + that: "'running' in cobbler_cnt_status.container.State.Status" + fail_msg: "{{ cobbler_reboot_fail_msg }}" + success_msg: "{{ cobbler_reboot_success_msg }}" + tags: TC_007,VERIFY_007 diff --git a/appliance/test/test_provision_cdip.yml b/appliance/test/test_provision_cdip.yml new file mode 100644 index 000000000..32342ddf2 --- /dev/null +++ b/appliance/test/test_provision_cdip.yml @@ -0,0 +1,183 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# Testcase OMNIA_DIO_US_CDIP_TC_001 +# Execute provison role in management station with os installed centos 7 +- name: OMNIA_DIO_US_CDIP_TC_001 + hosts: localhost + connection: local + vars_files: + - test_vars/test_provision_vars.yml + - ../roles/provision/vars/main.yml + tasks: + - name: Delete the cobbler container if exits + docker_container: + name: "{{ docker_container_name }}" + state: absent + tags: TC_001 + + - name: Delete docker image if exists + docker_image: + name: "{{ docker_image_name }}" + tag: "{{ docker_image_tag }}" + state: absent + tags: TC_001 + + - block: + - name: Call common role + include_role: + name: ../roles/common + vars: + input_config_filename: "{{ test_input_config_filename }}" + + - name: Call provision role + include_role: + name: ../roles/provision + tasks_from: "{{ item }}" + with_items: + - "{{ cobbler_image_files }}" + tags: TC_001 + + - name: Inspect cobbler docker image + docker_image_info: + name: "{{ docker_image_name }}" + register: cobbler_image_status + tags: TC_001,VERIFY_001 + + - name: Validate cobbler docker image + assert: + that: + - cobbler_image_status.images + fail_msg: "{{ cobbler_img_fail_msg }}" + success_msg: "{{ cobbler_img_success_msg }}" + tags: TC_001,VERIFY_001 + + - name: Inspect cobbler container + docker_container_info: + name: "{{ docker_container_name }}" + register: cobbler_cnt_status + tags: TC_001,VERIFY_001 + + - name: Validate cobbler docker container + assert: + that: + - cobbler_cnt_status.exists + fail_msg: "{{ cobbler_cnt_fail_msg }}" + success_msg: "{{ cobbler_cnt_success_msg }}" + tags: TC_001,VERIFY_001 + +# Testcase OMNIA_DIO_US_CDIP_TC_002 +# Execute provison role in management station where cobbler container and image already created +- name: OMNIA_DIO_US_CDIP_TC_002 + hosts: localhost + connection: local + vars_files: + - test_vars/test_provision_vars.yml + - ../roles/provision/vars/main.yml + tasks: + - block: + - name: Call common role + include_role: + name: ../roles/common + vars: + input_config_filename: "{{ test_input_config_filename }}" + + - name: Call provision role + include_role: + name: ../roles/provision + tags: TC_002 + + - name: Inspect cobbler docker image + docker_image_info: + name: "{{ docker_image_name }}" + register: cobbler_image_status + tags: TC_002,VERIFY_002 + + - name: Validate cobbler docker image + assert: + that: + - cobbler_image_status.images + fail_msg: "{{ cobbler_img_fail_msg }}" + success_msg: "{{ cobbler_img_success_msg }}" + tags: TC_002,VERIFY_002 + + - name: Inspect cobbler container + docker_container_info: + name: "{{ docker_container_name }}" + register: cobbler_cnt_status + tags: TC_002,VERIFY_002 + + - name: Validate cobbler docker container + assert: + that: + - cobbler_cnt_status.exists + fail_msg: "{{ cobbler_cnt_fail_msg }}" + success_msg: "{{ cobbler_cnt_success_msg }}" + tags: TC_002,VERIFY_002 + +# Testcase OMNIA_DIO_US_CDIP_TC_003 +# Execute provison role in management station where docker service not running +- name: OMNIA_DIO_US_CDIP_TC_003 + hosts: localhost + connection: local + vars_files: + - test_vars/test_provision_vars.yml + - ../roles/provision/vars/main.yml + tasks: + - name: Delete the cobbler container if exits + docker_container: + name: "{{ docker_container_name }}" + state: absent + tags: TC_003 + + - name: Delete docker image if exists + docker_image: + name: "{{ docker_image_name }}" + tag: "{{ docker_image_tag }}" + state: absent + tags: TC_003 + + - name: Stop docker service + service: + name: docker + state: stopped + tags: TC_003 + + - block: + - name: Call common role + include_role: + name: ../roles/common + vars: + input_config_filename: "{{ test_input_config_filename }}" + + - name: Call provision role + include_role: + name: ../roles/provision + + - name: Docker service stopped usecase success message + debug: + msg: "{{ docker_check_success_msg }}" + + rescue: + - name: Docker service stopped usecase fail message + fail: + msg: "{{ docker_check_fail_msg }}" + + always: + - name: Start docker service + service: + name: docker + state: started + tags: TC_003 diff --git a/appliance/test/test_provision_ndod.yml b/appliance/test/test_provision_ndod.yml new file mode 100644 index 000000000..ac4895045 --- /dev/null +++ b/appliance/test/test_provision_ndod.yml @@ -0,0 +1,294 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# OMNIA_DIO_US_NDOD_TC_009 +# Execute provison role in management station and PXE boot one compute node +- name: OMNIA_DIO_US_NDOD_TC_009 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_provision_vars.yml + - ../roles/common/vars/main.yml + tasks: + - name: Set ip address of the compute node + set_fact: + single_node_ip_address: "{{ groups[cobbler_groupname][0] }}" + tags: TC_009,VERIFY_009 + + - name: Delete inventory if exists + file: + path: inventory + state: absent + tags: TC_009,VERIFY_009 + + - name: Check input config file is encrypted + command: cat {{ test_input_config_filename }} + changed_when: false + register: config_content + tags: TC_009,VERIFY_009 + + - name: Decrpyt input_config.yml + command: ansible-vault decrypt {{ test_input_config_filename }} --vault-password-file {{ vault_path }} + changed_when: false + when: "'$ANSIBLE_VAULT;' in config_content.stdout" + tags: TC_009,VERIFY_009 + + - name: Include variable file input_config.yml + include_vars: "{{ test_input_config_filename }}" + tags: TC_009,VERIFY_009 + + - name: Create inventory file + lineinfile: + path: inventory + line: "{{ single_node_ip_address }} ansible_user=root ansible_password={{ provision_password }} ansible_ssh_common_args='-o StrictHostKeyChecking=no'" + create: yes + mode: '{{ file_permission }}' + tags: TC_009,VERIFY_009 + + - meta: refresh_inventory + tags: TC_009,VERIFY_009 + + - name: Validate authentication of username and password + command: ansible {{ single_node_ip_address }} -m ping -i inventory + register: validate_login + changed_when: false + ignore_errors: yes + tags: TC_009,VERIFY_009 + + - name: Validate the authentication output + assert: + that: + - "'pong' in validate_login.stdout" + - "'SUCCESS' in validate_login.stdout" + - "'UNREACHABLE' not in validate_login.stdout" + fail_msg: "{{ authentication_fail_msg }}" + success_msg: "{{ authentication_success_msg }}" + tags: TC_009,VERIFY_009 + + - name: Check hostname + command: ansible {{ single_node_ip_address }} -m shell -a hostname -i inventory + register: validate_hostname + changed_when: false + ignore_errors: yes + tags: TC_009,VERIFY_009 + + - name: Validate the hostname + assert: + that: "'localhost' not in validate_hostname.stdout" + fail_msg: "{{ hostname_fail_msg }}" + success_msg: "{{ hostname_success_msg }}" + tags: TC_009,VERIFY_009 + + - name: Delete inventory if exists + file: + path: inventory + state: absent + tags: TC_009,VERIFY_009 + +# OMNIA_DIO_US_NDOD_TC_010 +# Execute provison role in management station and PXE boot two compute node +- name: OMNIA_DIO_US_NDOD_TC_010 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_provision_vars.yml + - ../roles/provision/vars/main.yml + tasks: + - name: Delete inventory if exists + file: + path: inventory + state: absent + tags: TC_010,VERIFY_010 + + - name: Check input config file is encrypted + command: cat {{ test_input_config_filename }} + changed_when: false + register: config_content + tags: TC_010,VERIFY_010 + + - name: Decrpyt input_config.yml + command: ansible-vault decrypt {{ test_input_config_filename }} --vault-password-file {{ vault_path }} + changed_when: false + when: "'$ANSIBLE_VAULT;' in config_content.stdout" + tags: TC_010,VERIFY_010 + + - name: Include variable file input_config.yml + include_vars: "{{ test_input_config_filename }}" + tags: TC_010,VERIFY_010 + + - name: Create inventory file + lineinfile: + path: inventory + line: "[nodes]" + create: yes + mode: '{{ file_permission }}' + tags: TC_010,VERIFY_010 + + - name: Edit inventory file + lineinfile: + path: inventory + line: "{{ item }} ansible_user=root ansible_password={{ provision_password }} ansible_ssh_common_args='-o StrictHostKeyChecking=no'" + with_items: + - "{{ groups[cobbler_groupname] }}" + tags: TC_010,VERIFY_010 + + - meta: refresh_inventory + tags: TC_010,VERIFY_010 + + - name: Validate ip address is different for both servers + assert: + that: groups[cobbler_groupname][0] != groups[cobbler_groupname][1] + fail_msg: "{{ ip_address_fail_msg }}" + success_msg: "{{ ip_address_success_msg }}" + delegate_to: localhost + run_once: yes + tags: TC_010,VERIFY_010 + + - name: Check hostname of both servers + command: ansible nodes -m shell -a hostname -i inventory + register: node_hostname + changed_when: false + ignore_errors: yes + tags: TC_010,VERIFY_010 + + - name: Validate hostname is different for both servers + assert: + that: + - node_hostname.stdout_lines[1] != node_hostname.stdout_lines[3] + - "'localhost' not in node_hostname.stdout_lines[1]" + - "'localhost' not in node_hostname.stdout_lines[3]" + fail_msg: "{{ hostname_fail_msg }}" + success_msg: "{{ hostname_success_msg }}" + delegate_to: localhost + run_once: yes + tags: TC_010,VERIFY_010 + + - name: Delete inventory if exists + file: + path: inventory + state: absent + delegate_to: localhost + run_once: yes + tags: TC_010,VERIFY_010 + +# OMNIA_DIO_US_NDOD_TC_011 +# Validate passwordless ssh connection established or not with compute nodes +- name: OMNIA_DIO_US_NDOD_TC_011 + hosts: localhost + gather_subset: + - 'min' + vars_files: + - test_vars/test_provision_vars.yml + - ../roles/provision/vars/main.yml + tasks: + - name: Validate authentication of username and password + command: "ansible {{ cobbler_groupname }} -m ping -i {{ inventory_file }}" + register: validate_login + changed_when: false + ignore_errors: yes + tags: TC_011,VERIFY_011 + + - name: Validate the passwordless SSH connection + assert: + that: + - "'pong' in validate_login.stdout" + - "'SUCCESS' in validate_login.stdout" + - "'UNREACHABLE' not in validate_login.stdout" + success_msg: "{{ authentication_success_msg }}" + fail_msg: "{{ authentication_fail_msg }}" + tags: TC_011,VERIFY_011 + +# OMNIA_DIO_US_NDOD_TC_012 +# Execute provison role in management station and reboot compute node after os provision again +- name: OMNIA_DIO_US_NDOD_TC_012 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_provision_vars.yml + tasks: + - name: Set ip address of the compute node + set_fact: + single_node_ip_address: "{{ groups[cobbler_groupname][0] }}" + tags: TC_012,VERIFY_012 + + - name: Delete inventory if exists + file: + path: inventory + state: absent + tags: TC_012,VERIFY_012 + + - name: Check input config file is encrypted + command: cat {{ test_input_config_filename }} + changed_when: false + register: config_content + tags: TC_012,VERIFY_012 + + - name: Decrpyt input_config.yml + command: ansible-vault decrypt {{ test_input_config_filename }} --vault-password-file {{ vault_path }} + changed_when: false + when: "'$ANSIBLE_VAULT;' in config_content.stdout" + tags: TC_012,VERIFY_012 + + - name: Include variable file input_config.yml + include_vars: "{{ test_input_config_filename }}" + tags: TC_012,VERIFY_012 + + - name: Create inventory file + lineinfile: + path: inventory + line: "[nodes]" + create: yes + mode: '{{ file_permission }}' + tags: TC_012,VERIFY_012 + + - name: Edit inventory file + lineinfile: + path: inventory + line: "{{ single_node_ip_address }} ansible_user=root ansible_password={{ provision_password }} ansible_ssh_common_args='-o StrictHostKeyChecking=no'" + tags: TC_012,VERIFY_012 + + - meta: refresh_inventory + tags: TC_012,VERIFY_012 + + - name: Reboot servers + command: ansible nodes -m command -a reboot -i inventory + ignore_errors: yes + changed_when: true + tags: TC_012,VERIFY_012 + + - name: Wait for 10 minutes + pause: + minutes: 10 + tags: TC_012,VERIFY_012 + + - name: Check ip address of servers + command: ansible nodes -m command -a 'ip a' -i inventory + ignore_errors: yes + changed_when: false + register: ip_address_after_reboot + tags: TC_012,VERIFY_012 + + - name: Validate ip address is same after reboot + assert: + that: "'{{ single_node_ip_address }}' in ip_address_after_reboot.stdout" + fail_msg: "{{ ip_address_fail_msg }}" + success_msg: "{{ ip_address_success_msg }}" + tags: TC_012,VERIFY_012 \ No newline at end of file diff --git a/appliance/test/test_vars/test_common_vars.yml b/appliance/test/test_vars/test_common_vars.yml new file mode 100644 index 000000000..3ae02329a --- /dev/null +++ b/appliance/test/test_vars/test_common_vars.yml @@ -0,0 +1,51 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# vars file for test_common.yml file +centos_version: '7.8' +test_input_config_filename: "appliance_config_test.yml" +empty_input_config_filename: "appliance_config_empty.yml" +new_input_config_filename: "appliance_config_new.yml" +password_config_file: "password_config" +min_length_password: "testpass" +max_length_password: "helloworld123helloworld12hello" +long_password: "helloworld123hellowordl12hello3" +white_space_password: "hello world 123" +special_character_password1: "hello-world/" +special_character_password2: "hello@$%!world" +valid_dhcp_start_range: "172.17.0.10" +valid_dhcp_end_range: "172.17.0.200" +invalid_dhcp_ip: "1720.1700.1000.1000" +wrong_dhcp_ip: "d6:dh1:dsj:10" +valid_iso_path: "/root/CentOS-7-x86_64-Minimal-2009.iso" +wrong_iso_path: "/root/testfile" +valid_mapping_file_path: "test_mapping_file" + +docker_volume_success_msg: "Docker volume omnia-storage exists" +docker_volume_fail_msg: "Docker volume omnia-storage does not exist" +input_config_success_msg: "Input config file is encrypted using ansible-vault successfully" +input_config_fail_msg: "Input config file is failed to encrypt using ansible-vault" +os_check_success_msg: "OS check passed" +os_check_fail_msg: "OS check failed" +internet_check_success_msg: "Internet connectivity check passed" +internet_check_fail_msg: "Internet connectivity check failed" +different_user_check_success_msg: "Different user execution check passed" +different_user_check_fail_msg: "Different user execution check failed" +selinux_check_success_msg: "selinux check passed" +selinux_check_fail_msg: "selinux check failed" +input_config_check_success_msg: "appliance_config.yml validation passed" +input_config_check_fail_msg: "appliance_config.yml validation failed" +install_package_success_msg: "Installation of package is successful" +install_package_fail_msg: "Installation of package is failed" diff --git a/appliance/test/test_vars/test_provision_vars.yml b/appliance/test/test_vars/test_provision_vars.yml new file mode 100644 index 000000000..66a536914 --- /dev/null +++ b/appliance/test/test_vars/test_provision_vars.yml @@ -0,0 +1,85 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# Usage: test_provision_cdip.yml +first_nic: "em1" +nic1_ip_address: 172.17.0.1 +validate_password_success_msg: "Password validation successful" +validate_password_fail_msg: "Password validation failed" +cobbler_img_fail_msg: "Docker image cobbler does not exist" +cobbler_img_success_msg: "Docker image cobbler exists" +cobbler_cnt_fail_msg: "Docker container cobbler does not exist" +cobbler_cnt_success_msg: "Docker container cobbler exists" +nic_check_fail_msg: "NIC-1 ip address validation failed" +nic_check_success_msg: "NIC-1 ip address validation successful" +cobbler_image_files: + - check_prerequisites + - mount_iso + - firewall_settings + - provision_password + - dhcp_configure + - cobbler_image +password_config_file: "password_config" +test_input_config_filename: "appliance_config_test.yml" + +# Usage: test_provision_cc.yml +docker_check_success_msg: "Docker service stopped usescase validation successful" +docker_check_fail_msg: "Docker service stopped usescase validation failed" +docker_ip_fail_msg: "Docker IP validation failed" +docker_ip_success_msg: "Docker IP validation successful" +cobbler_version_fail_msg: "Cobbler version validation failed" +cobbler_version_success_msg: "Cobbler version validation successful" +cobbler_check_fail_msg: "Cobbler check validation failed" +cobbler_check_success_msg: "Cobbler check validation successful" +cobbler_sync_fail_msg: "Cobbler sync validation failed" +cobbler_sync_success_msg: "Cobbler sync validation successful" +cobbler_distro_list_fail_msg: "Cobbler distro list validation failed" +cobbler_distro_list_success_msg: "Cobbler distro list validation successful" +cobbler_profile_list_fail_msg: "Cobbler profile list validation failed" +cobbler_profile_list_success_msg: "Cobbler profile list validation successful" +kickstart_file_fail_msg: "Kickstart file validation failed" +kickstart_file_success_msg: "Kickstart file validation successful" +cobbler_reboot_fail_msg: "Cobbler container failed to start after reboot" +cobbler_reboot_success_msg: "Cobbler container started successfully after reboot" +crontab_list_fail_msg: "Crontab list validation failed" +crontab_list_success_msg: "Crontab list validation successful" +iso_check_fail_msg: "centos iso file check validation failed" +iso_check_success_msg: "centos iso file check validation successful" +cobbler_service_check_fail_msg: "cobbler service validation failed" +cobbler_service_check_success_msg: "cobbler service validation successful" +kickstart_filename: "centos7.ks" +iso_file_path: "../roles/provision/files" +temp_iso_name: "temp_centos.iso" +cobbler_services: + - tftp + - dhcpd + - cobblerd + - xinetd + +# Usage: test_provision_cdip.yml, test_provision_cc.yml, test_provision_ndod.yml +docker_container_name: "cobbler" +boundary_password: "testpass" + +# Usage: test_provision_ndod.yml +hostname_fail_msg: "Hostname validation failed" +hostname_success_msg: "Hostname validation successful" +authentication_fail_msg: "Server authentication validation failed" +authentication_success_msg: "Server authentication validation successful" +ip_address_fail_msg: "IP address validation failed" +ip_address_success_msg: "IP address validation successful" +cobbler_groupname: "all" +inventory_file: "provisioned_hosts.yml" +file_permission: 0644 +vault_path: ../roles/common/files/.vault_key diff --git a/appliance/test/test_vars/test_web_ui_vars.yml b/appliance/test/test_vars/test_web_ui_vars.yml new file mode 100644 index 000000000..22476d1ce --- /dev/null +++ b/appliance/test/test_vars/test_web_ui_vars.yml @@ -0,0 +1,35 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# Usage: test_web_ui.yml +return_status: 200 +fail_return_status: -1 +awx_listening_port: 8081 +time: 1 +actual_containers: 4 +package: "docker-ce" +awx_exists_msg: "Test case passed: AWX instance is already running on your system" +awx_not_exists_msg: "Test case failed: AWX does not exist" +validate_password_success_msg: "Test case passed: Password validation succesful" +validate_password_fail_msg: "Test case failed: Password validation failed" +resource_exists_success_msg: "Success: Requested resource(s) exists" +resource_exists_fail_msg: "Failure: Requested resource(s) does not exists" +compute_group_name: "compute" +manager_group_name: "manager" +tower_cli_package_name: "ansible-tower-cli" +docker_container_name: "awx_web" +container_up_status_success_msg: "Container is running successfully after the reboot" +container_up_status_fail_msg: "Container is not running after the reboot" +test_input_config_filename: appliance_config_test.yml \ No newline at end of file diff --git a/appliance/test/test_web_ui.yml b/appliance/test/test_web_ui.yml new file mode 100644 index 000000000..68aa35bca --- /dev/null +++ b/appliance/test/test_web_ui.yml @@ -0,0 +1,378 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# Testcase OMNIA_CRM_US_AWXD_TC_001 +# Test case to verify the prerequisites are installed and execute the AWX deployment +- name: OMNIA_CRM_US_AWXD_TC_001 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - ../roles/web_ui/vars/main.yml + - test_vars/test_web_ui_vars.yml + tasks: + - name: Get the docker package facts + package_facts: + manager: auto + tags: TC_001 + + - name: Check if docker-ce is already installed + debug: + var: ansible_facts.packages['{{ package }}'] + tags: TC_001 + + - block: + - name: Call common role + include_role: + name: ../roles/common + vars: + input_config_filename: "{{ test_input_config_filename }}" + + - name: Calling the web_ui role to be tested + include_role: + name: ../roles/web_ui + tags: TC_001 + + - name: Check that you can connect to github repo and it returns a status 200 + uri: + url: "{{ awx_git_repo }}" + status_code: "{{ return_status }}" + return_content: true + tags: TC_001 + + - name: Check that you can can connect to AWX UI and it returns a status 200 + uri: + url: "{{ awx_ip }}" + status_code: "{{ return_status }}" + return_content: true + tags: TC_001 + + - name: verify awx-server is listening on 8081 + wait_for: + port: "{{ awx_listening_port }}" + timeout: "{{ time }}" + tags: TC_001 + + - name: Get the containers count + shell: | + set -o pipefail + docker ps -a | grep awx | wc -l + register: containers_count + changed_when: False + tags: TC_001 + + - name: Validate the containers count + assert: + that: containers_count.stdout | int >= actual_containers + success_msg: "{{ awx_exists_msg }}" + fail_msg: "{{ awx_not_exists_msg }}" + tags: TC_001 + +# Testcase OMNIA_CRM_US_AWXD_TC_002 +# Test case to verify regression testing +- name: OMNIA_CRM_US_AWXD_TC_002 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - ../roles/web_ui/vars/main.yml + - test_vars/test_web_ui_vars.yml + tasks: + - block: + - name: Call common role + include_role: + name: ../roles/common + vars: + input_config_filename: "{{ test_input_config_filename }}" + + - name: Calling the web_ui role to be tested + include_role: + name: ../roles/web_ui + tags: TC_002 + + - name: Check that you can connect to github repo and it returns a status 200 + uri: + url: "{{ awx_git_repo }}" + status_code: "{{ return_status }}" + return_content: true + tags: TC_002 + + - name: Check that you can can connect to AWX UI and it returns a status 200 + uri: + url: "{{ awx_ip }}" + status_code: "{{ return_status }}" + return_content: true + tags: TC_002 + + - name: verify awx-server is listening on 80 + wait_for: + port: "{{ awx_listening_port }}" + timeout: "{{ time }}" + tags: TC_002 + + - name: Get the containers count + shell: | + set -o pipefail + docker ps -a | grep awx | wc -l + register: containers_count + changed_when: False + tags: TC_002 + + - name: Validate the containers count + assert: + that: containers_count.stdout | int >= actual_containers + success_msg: "{{ awx_exists_msg }}" + fail_msg: "{{ awx_not_exists_msg }}" + tags: TC_002 + +# Testcase OMNIA_CRM_US_AWXD_TC_003 +# Test case to validate the AWX configuration +- name: OMNIA_CRM_US_AWXD_TC_003 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - ../roles/web_ui/vars/main.yml + - ../roles/common/vars/main.yml + - test_vars/test_web_ui_vars.yml + tasks: + + - block: + - name: Call common role + include_role: + name: ../roles/common + vars: + input_config_filename: "{{ test_input_config_filename }}" + + - name: Calling the web_ui role to be tested + include_role: + name: ../roles/web_ui + tags: TC_003 + + - name: Get the package facts + package_facts: + manager: auto + tags: TC_003 + + - name: Check if ansible-tower-cli is already installed + assert: + that: "'{{ tower_cli_package_name }}' in ansible_facts.packages" + success_msg: "{{ resource_exists_success_msg }}" + fail_msg: "{{ resource_exists_fail_msg }}" + tags: TC_003 + + - name: Get the existing organizations + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}" + organizations list -f human + register: organizations_array + changed_when: False + tags: TC_003 + + - name: Check for organization + assert: + that: organization_name in organizations_array.stdout + success_msg: "{{ resource_exists_success_msg }}" + fail_msg: "{{ resource_exists_fail_msg }}" + tags: TC_003 + + - name: Get the existing projects + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}" + projects list -f human + changed_when: False + register: projects_array + tags: TC_003 + + - name: Check for project + assert: + that: project_name in projects_array.stdout + success_msg: "{{ resource_exists_success_msg }}" + fail_msg: "{{ resource_exists_fail_msg }}" + tags: TC_003 + + - name: Get the existing inventories + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}" + inventory list -f human + changed_when: False + register: inventory_array + tags: TC_003 + + - name: Check for inventories + assert: + that: omnia_inventory_name in inventory_array.stdout + success_msg: "{{ resource_exists_success_msg }}" + fail_msg: "{{ resource_exists_fail_msg }}" + tags: TC_003 + + - name: Get the existing groups if omnia-inventory exists + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}" + groups list --inventory "{{ omnia_inventory_name }}" -f human + changed_when: False + register: groups_array + when: omnia_inventory_name in inventory_array.stdout + tags: TC_003 + + - name: Check for manager and compute groups + assert: + that: manager_group_name and compute_group_name in groups_array.stdout + success_msg: "{{ resource_exists_success_msg }}" + fail_msg: "{{ resource_exists_fail_msg }}" + tags: TC_003 + + - name: Get the existing credentials + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}" + credentials list -f human + changed_when: False + register: credentials_array + tags: TC_003 + + - name: Check for "{{ credential_name }}" + assert: + that: credential_name in credentials_array.stdout + success_msg: "{{ resource_exists_success_msg }}" + fail_msg: "{{ resource_exists_fail_msg }}" + tags: TC_003 + + - name: Get the existing job templates + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}" + job_templates list -f human + changed_when: False + register: templates_array + tags: TC_003 + + - name: Check for templates + assert: + that: omnia_template_name and inventory_template_name in templates_array.stdout + success_msg: "{{ resource_exists_success_msg }}" + fail_msg: "{{ resource_exists_fail_msg }}" + tags: TC_003 + + - name: Get the existing schedules for job templates + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}" + schedules list -f human + changed_when: False + register: schedules_array + tags: TC_003 + + - name: Check for schedules to job template + assert: + that: schedule_name in schedules_array.stdout + success_msg: "{{ resource_exists_success_msg }}" + fail_msg: "{{ resource_exists_fail_msg }}" + tags: TC_003 + +# Testcase OMNIA_CRM_US_AWXD_TC_004 +# Execute common role in management station without internet connectivity +- name: OMNIA_CRM_US_AWXD_TC_004 + hosts: localhost + connection: local + gather_subset: + - 'min' + vars_files: + - test_vars/test_common_vars.yml + - ../roles/common/vars/main.yml + tasks: + - name: Down internet connectivity + lineinfile: + path: /etc/hosts + line: "172.16.0.5 github.com" + state: present + backup: yes + tags: TC_004 + + - block: + - name: Call common role + include_role: + name: ../roles/common + vars: + input_config_filename: "{{ test_input_config_filename }}" + + - name: Calling the web_ui role to be tested + include_role: + name: ../roles/web_ui + + rescue: + - name: Validate internet connectivity failure message + assert: + that: internet_status in internet_value.msg + success_msg: "{{ internet_check_success_msg }}" + fail_msg: "{{ internet_check_fail_msg }}" + tags: TC_004 + + - name: Up internet connectivity + lineinfile: + path: /etc/hosts + line: "172.16.0.5 github.com" + state: absent + tags: TC_004 + +# Testcase OMNIA_CRM_US_AWXD_TC_005 +# Execute web_ui role in management station and reboot the server +- name: OMNIA_CRM_US_AWXD_TC_005 + hosts: localhost + connection: local + vars_files: + - test_vars/test_web_ui_vars.yml + tasks: + - name: Get last uptime of the server + command: uptime -s + register: uptime_status + changed_when: false + ignore_errors: yes + tags: TC_005 + + - name: Get current date + command: date +"%Y-%m-%d %H" + register: current_time + changed_when: false + ignore_errors: yes + tags: TC_005 + + - block: + - name: Call common role + include_role: + name: ../roles/common + vars: + input_config_filename: "{{ test_input_config_filename }}" + + - name: Calling the web_ui role to be tested + include_role: + name: ../roles/web_ui + tags: TC_005 + + - name: Reboot localhost + command: reboot + when: current_time.stdout not in uptime_status.stdout + tags: TC_005 + + - name: Inspect AWX web container + docker_container_info: + name: "{{ docker_container_name }}" + register: awx_container_status + tags: TC_005 + + - name: Verify AWX container is running after reboot + assert: + that: + - "'running' in awx_container_status.container.State.Status" \ No newline at end of file diff --git a/appliance/tools/passwordless_ssh.yml b/appliance/tools/passwordless_ssh.yml new file mode 100644 index 000000000..cc406c09e --- /dev/null +++ b/appliance/tools/passwordless_ssh.yml @@ -0,0 +1,40 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +- name: Fetch provision_password + hosts: localhost + connection: local + gather_facts: no + roles: + - fetch_password + +- name: Prepare the cluster with passwordless ssh from manager to compute + hosts: manager + gather_facts: false + pre_tasks: + - name: Set Fact + set_fact: + ssh_to: "{{ groups['compute'] }}" + roles: + - cluster_preperation + +- name: Prepare the cluster with passwordless ssh from compute to manager + hosts: compute + gather_facts: false + pre_tasks: + - name: Set Fact + set_fact: + ssh_to: "{{ groups['manager'] }}" + roles: + - cluster_preperation \ No newline at end of file diff --git a/appliance/tools/provision_report.yml b/appliance/tools/provision_report.yml new file mode 100644 index 000000000..e82d1b9a6 --- /dev/null +++ b/appliance/tools/provision_report.yml @@ -0,0 +1,81 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# This file used to generate a report of reachable and unreachable host of hpc cluster +# This file can be executed only if provisioned_hosts.yml is created inside the path omnia/appliance/roles/inventory/files/provisioned_hosts.yml + +# Command to execute: ansible-playbook provision_report.yml -i ../roles/inventory/files/provisioned_hosts.yml + +- name: Fetch provision_password + hosts: localhost + connection: local + gather_facts: no + roles: + - fetch_password + +- name: Find reachable hosts using ping + hosts: all + gather_facts: false + ignore_unreachable: true + ignore_errors: true + tasks: + - name: Check for reachable nodes + command: ping -c1 {{ inventory_hostname }} + delegate_to: localhost + register: ping_result + ignore_errors: yes + changed_when: false + + - name: Refresh ssh keys + command: ssh-keygen -R {{ inventory_hostname }} + delegate_to: localhost + changed_when: false + + - name: Group reachable hosts + group_by: + key: "reachable" + when: "'100% packet loss' not in ping_result.stdout" + +- name: Find reachable hosts using ssh + hosts: reachable + gather_facts: False + ignore_unreachable: true + remote_user: "root" + vars: + ansible_password: "{{ hostvars['127.0.0.1']['cobbler_password'] }}" + ansible_become_pass: "{{ hostvars['127.0.0.1']['cobbler_password'] }}" + ansible_ssh_common_args: '-o StrictHostKeyChecking=no' + tasks: + - name: Check ssh status + command: hostname + register: ssh_status + ignore_errors: yes + + - name: Group unreachable ssh hosts + group_by: + key: "unreachable_ssh" + when: ssh_status.unreachable is defined and ssh_status.unreachable == true + + - name: Group reachable ssh hosts + group_by: + key: "reachable_ssh" + when: (ssh_status.unreachable is defined and ssh_status.unreachable == false) or (ssh_status.rc is defined and ssh_status.rc == 0) + +- name: Display hosts list + hosts: localhost + connection: local + gather_facts: false + roles: + - hpc_cluster_report diff --git a/appliance/tools/roles/cluster_preperation/tasks/main.yml b/appliance/tools/roles/cluster_preperation/tasks/main.yml new file mode 100644 index 000000000..6c1bf3541 --- /dev/null +++ b/appliance/tools/roles/cluster_preperation/tasks/main.yml @@ -0,0 +1,36 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +- name: Set Facts + set_fact: + ansible_ssh_common_args: '-o StrictHostKeyChecking=no' + +- name: Disable host key checking + replace: + path: /etc/ssh/ssh_config + regexp: '# StrictHostKeyChecking ask' + replace: 'StrictHostKeyChecking no' + +- name: Install sshpass + package: + name: sshpass + state: present + +- name: Verify and set passwordless ssh from manager to compute nodes + block: + - name: Execute on individual hosts + include_tasks: passwordless_ssh.yml + with_items: "{{ ssh_to }}" + loop_control: + pause: 5 \ No newline at end of file diff --git a/appliance/tools/roles/cluster_preperation/tasks/passwordless_ssh.yml b/appliance/tools/roles/cluster_preperation/tasks/passwordless_ssh.yml new file mode 100644 index 000000000..54df2e569 --- /dev/null +++ b/appliance/tools/roles/cluster_preperation/tasks/passwordless_ssh.yml @@ -0,0 +1,84 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Initialize variables + set_fact: + ssh_status: false + current_host: "{{ item }}" + +- name: Refresh ssh-key if changed + command: ssh-keygen -R {{ current_host }} + changed_when: False + ignore_errors: yes + when: "'manager' in group_names" + +- name: Verify whether passwordless ssh is set on the remote host + command: ssh -o PasswordAuthentication=no root@"{{ current_host }}" 'hostname' + register: ssh_output + ignore_errors: yes + changed_when: False + +- name: Update ssh connection status + set_fact: + ssh_status: true + when: "'Permission denied' not in ssh_output.stderr" + +- name: Verify the public key file existence + stat: + path: "{{ rsa_id_file }}" + register: verify_rsa_id_file + when: not ssh_status + +- name: Generate ssh key pair + command: ssh-keygen -t rsa -b 4096 -f "{{ rsa_id_file }}" -q -N "{{ passphrase }}" + when: + - not ssh_status + - not verify_rsa_id_file.stat.exists + +- name: Add the key identity + shell: | + eval `ssh-agent -s` + ssh-add "{{ rsa_id_file }}" + when: not ssh_status + +- name: Post public key + block: + - name: Create .ssh directory + command: >- + sshpass -p "{{ hostvars['127.0.0.1']['cobbler_password'] }}" + ssh root@"{{ current_host }}" mkdir -p /root/.ssh + when: not ssh_status + no_log: True + register: register_error + + - name: Copy the public key to remote host + shell: >- + set -o pipefail && cat "{{ rsa_id_file }}".pub + | sshpass -p "{{ hostvars['127.0.0.1']['cobbler_password'] }}" + ssh root@"{{ current_host }}" 'cat >> "{{ auth_key_path }}"' + when: not ssh_status + no_log: True + register: register_error + + - name: Change permissions on the remote host + shell: sshpass -p "{{ hostvars['127.0.0.1']['cobbler_password'] }}" ssh root@"{{ current_host }}" 'chmod 700 .ssh; chmod 640 "{{ auth_key_path }}"' + when: not ssh_status + no_log: True + register: register_error + + rescue: + - name: Passwordless ssh failed + fail: + msg: "{{ register_error.stderr | regex_replace(hostvars['127.0.0.1']['cobbler_password']) | regex_replace(auth_key_path) }}" \ No newline at end of file diff --git a/appliance/tools/roles/cluster_preperation/vars/main.yml b/appliance/tools/roles/cluster_preperation/vars/main.yml new file mode 100644 index 000000000..39d505572 --- /dev/null +++ b/appliance/tools/roles/cluster_preperation/vars/main.yml @@ -0,0 +1,19 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +#Usage: passwordless_ssh.yml +rsa_id_file: "/root/.ssh/id_rsa" +passphrase: "" +auth_key_path: "/root/.ssh/authorized_keys" \ No newline at end of file diff --git a/appliance/tools/roles/fetch_password/tasks/main.yml b/appliance/tools/roles/fetch_password/tasks/main.yml new file mode 100644 index 000000000..b1350f131 --- /dev/null +++ b/appliance/tools/roles/fetch_password/tasks/main.yml @@ -0,0 +1,44 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +- name: Include variables from common role + include_vars: "{{ role_path }}/../../../roles/common/vars/main.yml" + no_log: True + +- name: Check input config file is encrypted + command: cat {{ role_path }}/../../../{{ input_config_filename }} + changed_when: false + register: config_content + +- name: Decrpyt appliance_config.yml + command: >- + ansible-vault decrypt {{ role_path }}/../../../{{ input_config_filename }} + --vault-password-file {{ role_path }}/../../../{{ vault_filename }} + changed_when: false + when: "'$ANSIBLE_VAULT;' in config_content.stdout" + +- name: Include variable file appliance_config.yml + include_vars: "{{ role_path }}/../../../{{ input_config_filename }}" + no_log: true + +- name: Save input variables from file + set_fact: + cobbler_password: "{{ provision_password }}" + no_log: true + +- name: Encrypt input config file + command: >- + ansible-vault encrypt {{ role_path }}/../../../{{ input_config_filename }} + --vault-password-file {{ role_path }}/../../../{{ vault_filename }} + changed_when: false diff --git a/appliance/tools/roles/hpc_cluster_report/tasks/main.yml b/appliance/tools/roles/hpc_cluster_report/tasks/main.yml new file mode 100644 index 000000000..a72ef5018 --- /dev/null +++ b/appliance/tools/roles/hpc_cluster_report/tasks/main.yml @@ -0,0 +1,91 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Initialize reachable and unreachable host number + set_fact: + reachable_host_number: 0 + unreachable_host_number: 0 + unreachable_ssh_host_number: 0 + unreachable_ping_host_number: 0 + +- name: Set reachable host number + set_fact: + reachable_host_number: "{{ groups['reachable_ssh'] | length}}" + when: groups['reachable_ssh'] is defined + +- name: Set unreachable host number + set_fact: + unreachable_host_number: "{{ groups['ungrouped'] | length | int + groups['unreachable_ssh'] | length | int }}" + unreachable_ssh_host_number: "{{ groups['unreachable_ssh'] | length }}" + unreachable_ping_host_number: "{{ groups['ungrouped'] | length }}" + when: groups['unreachable_ssh'] is defined + +- name: Set unreachable host number + set_fact: + unreachable_host_number: "{{ groups['ungrouped'] | length }}" + unreachable_ping_host_number: "{{ groups['ungrouped'] | length }}" + when: groups['unreachable_ssh'] is not defined + +- name: Create files folder + file: + path: "{{ role_path}}/files" + state: directory + +- name: Copy dhcpd.leases from cobbler + command: docker cp cobbler:/var/lib/dhcpd/dhcpd.leases {{ role_path}}/files/dhcpd.leases + changed_when: true + +- name: Fetch ethernet details of unreachable hosts + shell: sed -n '/{{ item }}/,/ethernet/p' {{ role_path}}/files/dhcpd.leases | grep "ethernet" | awk '{ print $3 }' | uniq + register: ethernet_detail_unreachable_ping + changed_when: false + args: + warn: no + with_items: + - "{{ groups['ungrouped'] }}" + +- name: Fetch ethernet details of unreachable hosts + shell: sed -n '/{{ item }}/,/ethernet/p' {{ role_path}}/files/dhcpd.leases | grep "ethernet" | awk '{ print $3 }' | uniq + register: ethernet_detail_unreachable_ssh + changed_when: false + args: + warn: no + when: groups['unreachable_ssh'] is defined + with_items: + - "{{ groups['unreachable_ssh'] }}" + +- name: Fetch ethernet details of reachable hosts + shell: sed -n '/{{ item }}/,/ethernet/p' {{ role_path}}/files/dhcpd.leases | grep "ethernet" | awk '{ print $3 }' | uniq + register: ethernet_detail_reachable + changed_when: false + args: + warn: no + when: groups['reachable_ssh'] is defined + with_items: + - "{{ groups['reachable_ssh'] }}" + +- name: Copy host information to file + template: + src: provision_host_report.j2 + dest: "{{ role_path}}/files/provision_host_report.txt" + +- name: Read provision host report + command: cat {{ role_path}}/files/provision_host_report.txt + register: host_report + changed_when: false + +- name: Display provision host report + debug: + var: host_report.stdout_lines \ No newline at end of file diff --git a/appliance/tools/roles/hpc_cluster_report/templates/provision_host_report.j2 b/appliance/tools/roles/hpc_cluster_report/templates/provision_host_report.j2 new file mode 100644 index 000000000..a4051ea67 --- /dev/null +++ b/appliance/tools/roles/hpc_cluster_report/templates/provision_host_report.j2 @@ -0,0 +1,38 @@ +HPC Cluster +----------- +Reachable Hosts: +{% if reachable_host_number > 0 %} +{% for host in groups['reachable_ssh'] %} +{% if reachable_host_number == 1 %} + inet={{ host }}, link/ether={{ ethernet_detail_reachable.stdout | replace(';','')}} +{% elif reachable_host_number > 1 %} +{% if ethernet_detail_reachable.results[loop.index|int - 1].stdout | length > 1 %} + inet={{ host }}, link/ether={{ ethernet_detail_reachable.results[loop.index|int - 1].stdout | replace(';','')}} +{% else %} + inet={{ host }}, link/ether=Refer to mapping file provided +{% endif %} +{% endif %} +{% endfor %} +{% endif %} +Total reachable hosts: {{ reachable_host_number }} + +Unreachable Hosts: +{% if unreachable_ping_host_number > 0 %} +{% for host in groups['ungrouped'] %} +{% if unreachable_ping_host_number == 1 %} + inet={{ host }}, link/ether={{ ethernet_detail_unreachable_ping.stdout | replace(';','')}} +{% elif unreachable_ping_host_number > 1 %} + inet={{ host }}, link/ether={{ ethernet_detail_unreachable_ping.results[loop.index|int - 1].stdout | replace(';','')}} +{% endif %} +{% endfor %} +{% endif %} +{% if unreachable_ssh_host_number > 0 %} +{% for host in groups['unreachable_ssh'] %} +{% if unreachable_ssh_host_number == 1 %} + inet={{ host }}, link/ether={{ ethernet_detail_unreachable_ssh.stdout | replace(';','')}} +{% elif unreachable_ssh_host_number > 1 %} + inet={{ host }}, link/ether={{ ethernet_detail_unreachable_ssh.results[loop.index|int - 1].stdout | replace(';','')}} +{% endif %} +{% endfor %} +{% endif %} +Total unreachable hosts: {{ unreachable_host_number }} \ No newline at end of file diff --git a/docs/FAQ.md b/docs/FAQ.md new file mode 100644 index 000000000..a23b9d5fd --- /dev/null +++ b/docs/FAQ.md @@ -0,0 +1,100 @@ +# Frequently Asked Questions + +* TOC +{:toc} + +## Why is the error "Wait for AWX UI to be up" displayed when `appliance.yaml` fails? +Cause: +1. When AWX is not accessible even after five minutes of wait time. +2. When __isMigrating__ or __isInstalling__ is seen in the failure message. + +Resolution: +Wait for AWX UI to be accessible at http://\:8081, and then run the `appliance.yaml` file again, where __management-station-IP__ is the ip address of the management node. + +## What are the next steps after the nodes in a Kubernetes cluster reboots? +Resolution: +Wait for upto 15 minutes after the Kubernetes cluster reboots. Next, verify status of the cluster using the following services: +* `kubectl get nodes` on the manager node provides correct k8s cluster status. +* `kubectl get pods --all-namespaces` on the manager node displays all the pods in the **Running** state. +* `kubectl cluster-info` on the manager node displays both k8s master and kubeDNS are in the **Running** state. + +## What to do when the Kubernetes services are not in the __Running__ state? +Resolution: +1. Run `kubectl get pods --all-namespaces` to verify the pods are in the **Running** state. +2. If the pods are not in the **Running** state, delete the pods using the command:`kubectl delete pods ` +3. Run the corresponding playbook that was used to install Kubernetes: `omnia.yml`, `jupyterhub.yml`, or `kubeflow.yml`. + +## What to do when the JupyterHub or Prometheus UI are not accessible? +Resolution: +Run the command `kubectl get pods --namespace default` to ensure **nfs-client** pod and all prometheus server pods are in the **Running** state. + +## While configuring the Cobbler, why does the `appliance.yml` fail with an error during the Run import command? +Cause: +* When the mounted .iso file is corrupt. + +Resolution: +1. Go to __var__->__log__->__cobbler__->__cobbler.log__ to view the error. +2. If the error message is **repo verification failed** then it signifies that the .iso file is not mounted properly. +3. Verify if the downloaded .iso file is valid and correct. +4. Delete the Cobbler container using `docker rm -f cobbler` and rerun `appliance.yml`. + +## Why does the PXE boot fail with tftp timeout or service timeout errors? +Cause: +* When RAID is configured on the server. +* When more than two servers in the same network have Cobbler services running. + +Resolution: +1. Create a Non-RAID or virtual disk in the server. +2. Check if other systems except for the management node has cobblerd running. If yes, then stop the Cobbler container using the following commands: `docker rm -f cobbler` and `docker image rm -f cobbler`. + +## What to do when the Slurm services do not start automatically after the cluster reboots? +Resolution: +* Manually restart the slurmd services on the manager node by running the following commands: +``` +systemctl restart slurmdbd +systemctl restart slurmctld +systemctl restart prometheus-slurm-exporter +``` +* Run `systemctl status slurmd` to manually restart the following service on all the compute nodes. + +## What to do when the Slurm services fail? +Cause: The `slurm.conf` is not configured properly. +Resolution: +1. Run the following commands: +``` +slurmdbd -Dvvv +slurmctld -Dvvv +``` +2. Verify `/var/lib/log/slurmctld.log` file. + +## What to do when when the error "ports are unavailable" is displayed? +Cause: Slurm database connection fails. +Resolution: +1. Run the following commands: +``` +slurmdbd -Dvvv +slurmctld -Dvvv +``` +2. Verify the `/var/lib/log/slurmctld.log` file. +3. Verify: `netstat -antp | grep LISTEN` +4. If PIDs are in the **Listening** state, kill the processes of that specific port. +5. Restart all Slurm services: +``` +slurmctl restart slurmctld on manager node +systemctl restart slurmdbd on manager node +systemctl restart slurmd on compute node +``` + +## What to do if Kubernetes Pods are unable to communicate with the servers when the DNS servers are not responding? +Cause: With the host network which is DNS issue. +Resolution: +1. In your Kubernetes cluster, run `kubeadm reset -f` on the nodes. +2. In the management node, edit the `omnia_config.yml` file to change the Kubernetes Pod Network CIDR. Suggested IP range is 192.168.0.0/16 and ensure you provide an IP which is not in use in your host network. +3. Execute omnia.yml and skip slurm using __skip_ tag __slurm__. + +## What to do if time taken to pull the images to create the Kubeflow containers exceeds the limit and the Apply Kubeflow configurations task fails? +Cause: Unstable or slow Internet connectivity. +Resolution: +1. Complete the PXE booting/ format the OS on manager and compute nodes. +2. In the omnia_config.yml file, change the k8s_cni variable value from calico to flannel. +3. Run the Kubernetes and Kubeflow playbooks. diff --git a/docs/INSTALL.md b/docs/INSTALL.md deleted file mode 100644 index 9c6a4f482..000000000 --- a/docs/INSTALL.md +++ /dev/null @@ -1,105 +0,0 @@ -## TL;DR Installation - -### Kubernetes -Install Kubernetes and all dependencies -``` -ansible-playbook -i host_inventory_file kubernetes/kubernetes.yml -``` - -Initialize K8s cluster -``` -ansible-playbook -i host_inventory_file kubernetes/kubernetes.yml --tags "init" -``` - -### Install Kubeflow -``` -ansible-playbook -i host_inventory_file kubernetes/kubeflow.yaml -``` - -### Slurm -``` -ansible-playbook -i host_inventory_file slurm/slurm.yml -``` - -# Omnia -Omnia is a collection of [Ansible](https://www.ansible.com/) playbooks which perform: -* Installation of [Slurm](https://slurm.schedmd.com/) and/or [Kubernetes](https://kubernetes.io/) on servers already provisioned with a standard [CentOS](https://www.centos.org/) image. -* Installation of auxiliary scripts for administrator functions such as moving nodes between Slurm and Kubernetes personalities. - -Omnia playbooks perform several tasks: -`common` playbook handles installation of software -* Add yum repositories: - - Kubernetes (Google) - - El Repo (for Nvidia drivers) - - EPEL (Extra Packages for Enterprise Linux) -* Install Packages from repos: - - bash-completion - - docker - - gcc - - python-pip - - kubelet - - kubeadm - - kubectl - - nfs-utils - - nvidia-detect - - yum-plugin-versionlock -* Restart and enable system level services - - Docker - - Kubelet - -`computeGPU` playbook installs Nvidia drivers and nvidia-container-runtime-hook -* Add yum repositories: - - Nvidia (container runtime) -* Install Packages from repos: - - kmod-nvidia - - nvidia-container-runtime-hook -* Restart and enable system level services - - Docker - - Kubelet -* Configuration: - - Enable GPU Device Plugins (nvidia-container-runtime-hook) - - Modify kubeadm config to allow GPUs as schedulable resource -* Restart and enable system level services - - Docker - - Kubelet - -`master` playbook -* Install Helm v3 -* (optional) add firewall rules for Slurm and kubernetes - -Everything from this point on can be called by using the `init` tag -``` -ansible-playbook -i host_inventory_file kubernetes/kubernetes.yml --tags "init" -``` - -`startmaster` playbook -* turn off swap -*Initialize Kubernetes - * Head/master - - Start K8S pass startup token to compute/slaves - - Initialize software defined networking (Calico) - -`startworkers` playbook -* turn off swap -* Join k8s cluster - -`startservices` playbook -* Setup K8S Dashboard -* Add `stable` repo to helm -* Add `jupyterhub` repo to helm -* Update helm repos -* Deploy NFS client Provisioner -* Deploy Jupyterhub -* Deploy Prometheus -* Install MPI Operator - - -### Slurm -* Downloads and builds Slurm from source -* Install package dependencies - - Python3 - - munge - - MariaDB - - MariaDB development libraries -* Build Slurm configuration files - diff --git a/docs/INSTALL_OMNIA.md b/docs/INSTALL_OMNIA.md new file mode 100644 index 000000000..fca18b2bd --- /dev/null +++ b/docs/INSTALL_OMNIA.md @@ -0,0 +1,117 @@ +# Install Omnia using CLI + +The following sections provide details on installing Omnia using CLI. If you want to install the Omnia appliance and manage workloads using the Omnia appliance, see [Install the Omnia appliance](INSTALL_OMNIA_APPLIANCE.md) and [Monitor Kubernetes and Slurm](MONITOR_CLUSTERS.md) for more information. + +## Prerequisites +* Ensure that all the prerequisites listed in the [Preparation to install Omnia](PREINSTALL_OMNIA.md) are met before installing Omnia. +* If there are errors when any of the following Ansible playbook commands are run, re-run the commands again. +* The user should have root privileges to perform installations and configurations. + +## Install Omnia using CLI + +1. Clone the Omnia repository: +``` +git clone https://github.com/dellhpc/omnia.git +``` +__Note:__ After the Omnia repository is cloned, a folder named __omnia__ is created. Ensure that you do not rename this folder. + +2. Change the directory to __omnia__: `cd omnia` + +3. An inventory file must be created in the __omnia__ folder. Add compute node IPs under **[compute]** group and the manager node IP under **[manager]** group. See the INVENTORY template file under `omnia\docs` folder. + +4. To install Omnia: +``` +ansible-playbook omnia.yml -i inventory -e "ansible_python_interpreter=/usr/bin/python2" +``` + +5. By default, no skip tags are selected, and both Kubernetes and Slurm will be deployed. + +To skip the installation of Kubernetes, enter: +`ansible-playbook omnia.yml -i inventory -e "ansible_python_interpreter=/usr/bin/python2" --skip-tags "kubernetes"` + +To skip the installation of Slurm, enter: +`ansible-playbook omnia.yml -i inventory -e "ansible_python_interpreter=/usr/bin/python2" --skip-tags "slurm"` + +To skip the NFS client setup, enter the following command to skip the k8s_nfs_client_setup role of Kubernetes: +`ansible-playbook omnia.yml -i inventory -e "ansible_python_interpreter=/usr/bin/python2" --skip-tags "nfs_client"` + +6. To provide passwords for mariaDB Database (for Slurm accounting), Kubernetes Pod Network CIDR, and Kubernetes CNI, edit the `omnia_config.yml` file. +__Note:__ +* Supported values for Kubernetes CNI are calico and flannel. The default value of CNI considered by Omnia is calico. +* The default value of Kubernetes Pod Network CIDR is 10.244.0.0/16. If 10.244.0.0/16 is already in use within your network, select a different Pod Network CIDR. For more information, see __https://docs.projectcalico.org/getting-started/kubernetes/quickstart__. + +To view the set passwords of omnia_config.yml at a later time: +`ansible-vault view omnia_config.yml --vault-password-file .omnia_vault_key` + +Omnia considers `slurm` as the default username for MariaDB. + +## Kubernetes roles + +The following __kubernetes__ roles are provided by Omnia when __omnia.yml__ file is run: +- __common__ role: + - Install common packages on manager and compute nodes + - Docker is installed + - Deploy time ntp/chrony + - Install Nvidia drivers and software components +- **k8s_common** role: + - Required Kubernetes packages are installed + - Starts the docker and Kubernetes services. +- **k8s_manager** role: + - __helm__ package for Kubernetes is installed. +- **k8s_firewalld** role: This role is used to enable the required ports to be used by Kubernetes. + - For __head-node-ports__: 6443,2379-2380,10251,10250,10252 + - For __compute-node-ports__: 10250,30000-32767 + - For __calico-udp-ports__: 4789 + - For __calico-tcp-ports__: 5473,179 + - For __flanel-udp-ports__: 8285,8472 +- **k8s_nfs_server_setup** role: + - A __nfs-share__ directory, `/home/k8snfs`, is created. Using this directory, compute nodes share the common files. +- **k8s_nfs_client_setup** role +- **k8s_start_manager** role: + - Runs the __/bin/kubeadm init__ command to initialize the Kubernetes services on manager node. + - Initialize the Kubernetes services in the manager node and create service account for Kubernetes Dashboard +- **k8s_start_workers** role: + - The compute nodes are initialized and joined to the Kubernetes cluster with the manager node. +- **k8s_start_services** role + - Kubernetes services are deployed such as Kubernetes Dashboard, Prometheus, MetalLB and NFS client provisioner + +__Note:__ +* After Kubernetes is installed and configured, few Kubernetes and calico/flannel related ports are opened in the manager and compute nodes. This is required for Kubernetes Pod-to-Pod and Pod-to-Service communications. Calico/flannel provides a full networking stack for Kubernetes pods. +* If Kubernetes Pods are unable to communicate with the servers when the DNS servers are not responding, then the Kubernetes Pod Network CIDR may be overlapping with the host network which is DNS issue. To resolve this issue follow the below steps: +1. In your Kubernetes cluster, run `kubeadm reset -f` on the nodes. +2. In the management node, edit the `omnia_config.yml` file to change the Kubernetes Pod Network CIDR. Suggested IP range is 192.168.0.0/16 and ensure you provide an IP which is not in use in your host network. +3. Execute omnia.yml and skip slurm using --skip-tags slurm. + +## Slurm roles + +The following __Slurm__ roles are provided by Omnia when __omnia.yml__ file is run: +- **slurm_common** role: + - Installs the common packages on manager node and compute node. +- **slurm_manager** role: + - Installs the packages only related to manager node + - This role also enables the required ports to be used by Slurm. + **tcp_ports**: 6817,6818,6819 + **udp_ports**: 6817,6818,6819 + - Creating and updating the Slurm configuration files based on the manager node requirements. +- **slurm_workers** role: + - Installs the Slurm packages into all compute nodes as per the compute node requirements. +- **slurm_start_services** role: + - Starting the Slurm services so that compute node communicates with manager node. +- **slurm_exporter** role: + - Slurm exporter is a package for exporting metrics collected from Slurm resource scheduling system to prometheus. + - Slurm exporter is installed on the host like Slurm, and Slurm exporter will be successfully installed only if Slurm is installed. + +**Note:** If you want to install JupyterHub and Kubeflow playbooks, you have to first install the JupyterHub playbook and then install the Kubeflow playbook. + +Commands to install JupyterHub and Kubeflow: +* `ansible-playbook platforms/jupyterhub.yml -i inventory -e "ansible_python_interpreter=/usr/bin/python2"` +* `ansible-playbook platforms/kubeflow.yml -i inventory -e "ansible_python_interpreter=/usr/bin/python2" ` + +__Note:__ When the Internet connectivity is unstable or slow, it may take more time to pull the images to create the Kubeflow containers. If the time limit is exceeded, the **Apply Kubeflow configurations** task may fail. To resolve this issue, you must redeploy Kubernetes cluster and reinstall Kubeflow by completing the following steps: +* Format the OS on manager and compute nodes. +* In the `omnia_config.yml` file, change the k8s_cni variable value from calico to flannel. +* Run the Kubernetes and Kubeflow playbooks. + +## Add a new compute node to the cluster + +To update the INVENTORY file present in `omnia` directory with the new node IP address under the compute group. Ensure the other nodes which are already a part of the cluster are also present in the compute group along with the new node. Then, run`omnia.yml` to add the new node to the cluster and update the configurations of the manager node. diff --git a/docs/INSTALL_OMNIA_APPLIANCE.md b/docs/INSTALL_OMNIA_APPLIANCE.md new file mode 100644 index 000000000..f4d3c47f7 --- /dev/null +++ b/docs/INSTALL_OMNIA_APPLIANCE.md @@ -0,0 +1,189 @@ +# Install the Omnia appliance + +## Prerequisites +* Ensure that all the prerequisites listed in the [Prerequisites to install the Omnia appliance](PREINSTALL_OMNIA_APPLIANCE.md) file are met before installing the Omnia appliance. +* After the installation of the Omnia appliance, changing the manager node is not supported. If you need to change the manager node, you must redeploy the entire cluster. +* You must have root privileges to perform installations and configurations using the Omnia appliance. +* If there are errors when any of the following Ansible playbook commands are run, re-run the commands again. + +## Steps to install the Omnia appliance + +1. On the management node, change the working directory to the directory where you want to clone the Omnia Git repository. +2. Clone the Omnia repository: +``` +git clone https://github.com/dellhpc/omnia.git +``` +3. Change the directory to __omnia__: `cd omnia` +4. Edit the `omnia_config.yml` file to: +* Provide passwords for mariaDB Database (for Slurm accounting), Kubernetes Pod Network CIDR, Kubernetes CNI under `mariadb_password` and `k8s_cni` respectively. +__Note:__ +* Supported values for Kubernetes CNI are calico and flannel. The default value of CNI considered by Omnia is calico. +* The default value of Kubernetes Pod Network CIDR is 10.244.0.0/16. If 10.244.0.0/16 is already in use within your network, select a different Pod Network CIDR. For more information, see __https://docs.projectcalico.org/getting-started/kubernetes/quickstart__. + +5. Run `ansible-vault view omnia_config.yml --vault-password-file .omnia_vault_key` to view the set passwords of __omnia_config.yml__. +6. Change the directory to __omnia__->__appliance__: `cd omnia/appliance` +7. Edit the `appliance_config.yml` file to: + a. Provide passwords for Cobbler and AWX under `provision_password` and `awx_password` respectively. + __Note:__ Minimum length of the password must be at least eight characters and a maximum of 30 characters. Do not use these characters while entering a password: -, \\, "", and \' + + b. Change the NIC for the DHCP server under `hpc_nic`, and the NIC used to connect to the Internet under `public_nic`. The default values of **hpc_nic** and **public_nic** are set to em1 and em2 respectively. + + c. Provide the CentOS-7-x86_64-Minimal-2009 ISO file path under `iso_file_path`. This ISO file is used by Cobbler to provision the OS on the compute nodes. + __Note:__ It is recommended that the ISO image file is not renamed. And, you **must not** change the path of this ISO image file as the provisioning of the OS on the compute nodes may be impacted. + + d. Provide a mapping file for DHCP configuration under `mapping_file_path`. The **mapping_file.csv** template file is present under `omnia/examples`. Enter the details in the order: `MAC, Hostname, IP`. The header in the template file must not be deleted before saving the file. + If you want to continue without providing a mapping file, leave the `mapping_file_path` value as blank. + __Note:__ Ensure that duplicate values are not provided for MAC, Hostname, and IP in the mapping file. The Hostname should not contain the following characters: , (comma), \. (period), and - (hyphen). + + e. Provide valid DHCP range for HPC cluster under the variables `dhcp_start_ip_range` and `dhcp_end_ip_range`. + +8. Run `ansible-vault view appliance_config.yml --vault-password-file .vault_key` to view the set passwords of __appliance_config.yml__. + +Omnia considers the following usernames as default: +* `cobbler` for Cobbler Server +* `admin` for AWX +* `slurm` for MariaDB + +9. Run `ansible-playbook appliance.yml -e "ansible_python_interpreter=/usr/bin/python2"` to install Omnia appliance. + + +Omnia creates a log file which is available at: `/var/log/omnia.log`. + +## Provision operating system on the target nodes +Omnia role used: *provision* +Ports used by Cobbler: +* TCP ports: 80,443,69 +* UDP ports: 69,4011 + +To create the Cobbler image, Omnia configures the following: +* Firewall settings. +* The kickstart file of Cobbler which will enable the UEFI PXE boot. + +To access the Cobbler dashboard, enter `https:///cobbler_web` where `` is the Global IP address of the management node. For example, enter +`https://100.98.24.225/cobbler_web` to access the Cobbler dashboard. + +__Note__: After the Cobbler Server provisions the operating system on the nodes, IP addresses and host names are assigned by the DHCP service. +* If a mapping file is not provided, the hostname to the server is provided based on the following format: **computexxx-xxx** where "xxx-xxx" is the last two octets of Host IP address. For example, if the Host IP address is 172.17.0.11 then the assigned hostname by Omnia is compute0-11. +* If a mapping file is provided, the hostnames follow the format provided in the mapping file. + +__Note__: If you want to add more nodes, append the new nodes in the existing mapping file. However, do not modify the previous nodes in the mapping file as it may impact the existing cluster. + +## Install and configure Ansible AWX +Omnia role used: *web_ui* +The port used by AWX is __8081__. +The AWX repository is cloned from the GitHub path: https://github.com/ansible/awx.git + +Omnia performs the following configurations on AWX: +* The default organization name is set to **Dell EMC**. +* The default project name is set to **omnia**. +* The credentials are stored in the **omnia_credential**. +* Two groups, namely compute and manager groups, are provided under **omnia_inventory**. You can add hosts to these groups using the AWX UI. +* Pre-defined templates are provided: **DeployOmnia** and **DynamicInventory** +* **DynamicInventorySchedule** which is scheduled to run every 10 minutes updates the inventory details dynamically. + +To access the AWX dashboard, enter `http://:8081` where **\** is the Global IP address of the management node. For example, enter `http://100.98.24.225:8081` to access the AWX dashboard. + +**Note**: The AWX configurations are automatically performed Omnia and Dell Technologies recommends that you do not change the default configurations provided by Omnia as the functionality may be impacted. + +__Note__: Although AWX UI is accessible, hosts will be shown only after few nodes have been provisioned by Cobbler. It takes approximately 10 to 15 minutes to display the host details after the provisioning by Cobbler. If a server is provisioned but you are unable to view the host details on the AWX UI, then you can run the following command from __omnia__ -> __appliance__ ->__tools__ folder to view the hosts which are reachable. +``` +ansible-playbook -i ../roles/inventory/provisioned_hosts.yml provision_report.yml +``` + +## Install Kubernetes and Slurm using AWX UI +Kubernetes and Slurm are installed by deploying the **DeployOmnia** template on the AWX dashboard. + +1. On the AWX dashboard, under __RESOURCES__ __->__ __Inventories__, select **omnia_inventory**. +2. Select __GROUPS__, and then select either __compute__ or __manager__ group. +3. Select the __HOSTS__ tab. +4. To add the hosts provisioned by Cobbler, click **+**, and then select **Existing Host**. +5. Select the hosts from the list and click __SAVE__. +6. To deploy Omnia, under __RESOURCES__ -> __Templates__, select __DeployOmnia__, and then click __LAUNCH__. +7. By default, no skip tags are selected and both Kubernetes and Slurm will be deployed. +8. To install only Kubernetes, enter `slurm` and select **slurm**. +9. To install only Slurm, select and add `kubernetes` skip tag. + +__Note:__ +* If you would like to skip the NFS client setup, enter `nfs_client` in the skip tag section to skip the **k8s_nfs_client_setup** role of Kubernetes. + +10. Click **NEXT**. +11. Review the details in the **PREVIEW** window, and click **LAUNCH** to run the DeployOmnia template. + +__Note:__ If you want to install __JupyterHub__ and __Kubeflow__ playbooks, you have to first install the __JupyterHub__ playbook and then install the __Kubeflow__ playbook. + +__Note:__ To install __JupyterHub__ and __Kubeflow__ playbooks: +* From AWX UI, under __RESOURCES__ -> __Templates__, select __DeployOmnia__ template. +* From __PLAYBOOK__ dropdown menu, select __platforms/jupyterhub.yml__ and launch the template to install JupyterHub playbook. +* From __PLAYBOOK__ dropdown menu, select __platforms/kubeflow.yml__ and launch the template to install Kubeflow playbook. + +__Note:__ When the Internet connectivity is unstable or slow, it may take more time to pull the images to create the Kubeflow containers. If the time limit is exceeded, the **Apply Kubeflow configurations** task may fail. To resolve this issue, you must redeploy Kubernetes cluster and reinstall Kubeflow by completing the following steps: +* Complete the PXE booting of the manager and compute nodes. +* In the `omnia_config.yml` file, change the k8s_cni variable value from calico to flannel. +* Run the Kubernetes and Kubeflow playbooks. + +The DeployOmnia template may not run successfully if: +- The Manager group contains more than one host. +- The Compute group does not contain a host. Ensure that the Compute group is assigned with at least one host node. +- Under Skip Tags, when both kubernetes and slurm tags are selected. + +After **DeployOmnia** template is run from the AWX UI, the **omnia.yml** file installs Kubernetes and Slurm, or either Kubernetes or slurm, as per the selection in the template on the management node. Additionally, appropriate roles are assigned to the compute and manager groups. + +## Kubernetes roles + +The following __kubernetes__ roles are provided by Omnia when __omnia.yml__ file is run: +- __common__ role: + - Install common packages on manager and compute nodes + - Docker is installed + - Deploy time ntp/chrony + - Install Nvidia drivers and software components +- **k8s_common** role: + - Required Kubernetes packages are installed + - Starts the docker and Kubernetes services. +- **k8s_manager** role: + - __helm__ package for Kubernetes is installed. +- **k8s_firewalld** role: This role is used to enable the required ports to be used by Kubernetes. + - For __head-node-ports__: 6443, 2379-2380,10251,10250,10252 + - For __compute-node-ports__: 10250,30000-32767 + - For __calico-udp-ports__: 4789 + - For __calico-tcp-ports__: 5473,179 + - For __flanel-udp-ports__: 8285,8472 +- **k8s_nfs_server_setup** role: + - A __nfs-share__ directory, `/home/k8snfs`, is created. Using this directory, compute nodes share the common files. +- **k8s_nfs_client_setup** role +- **k8s_start_manager** role: + - Runs the __/bin/kubeadm init__ command to initialize the Kubernetes services on manager node. + - Initialize the Kubernetes services in the manager node and create service account for Kubernetes Dashboard +- **k8s_start_workers** role: + - The compute nodes are initialized and joined to the Kubernetes cluster with the manager node. +- **k8s_start_services** role + - Kubernetes services are deployed such as Kubernetes Dashboard, Prometheus, MetalLB and NFS client provisioner + +__Note:__ +* After Kubernetes is installed and configured, few Kubernetes and calico/flannel related ports are opened in the manager and compute nodes. This is required for Kubernetes Pod-to-Pod and Pod-to-Service communications. Calico/flannel provides a full networking stack for Kubernetes pods. +* If Kubernetes Pods are unable to communicate with the servers when the DNS servers are not responding, then the Kubernetes Pod Network CIDR may be overlapping with the host network which is DNS issue. To resolve this issue: +1. In your Kubernetes cluster, run `kubeadm reset -f` on the nodes. +2. In the management node, edit the `omnia_config.yml` file to change the Kubernetes Pod Network CIDR. Suggested IP range is 192.168.0.0/16 and ensure you provide an IP which is not in use in your host network. +3. Execute omnia.yml and skip slurm using --skip-tags slurm + +## Slurm roles + +The following __Slurm__ roles are provided by Omnia when __omnia.yml__ file is run: +- **slurm_common** role: + - Installs the common packages on manager node and compute node. +- **slurm_manager** role: + - Installs the packages only related to manager node + - This role also enables the required ports to be used by Slurm. + **tcp_ports**: 6817,6818,6819 + **udp_ports**: 6817,6818,6819 + - Creating and updating the Slurm configuration files based on the manager node requirements. +- **slurm_workers** role: + - Installs the Slurm packages into all compute nodes as per the compute node requirements. +- **slurm_start_services** role: + - Starting the Slurm services so that communicates with manager node. +- **slurm_exporter** role: + - Slurm exporter is a package for exporting metrics collected from Slurm resource scheduling system to prometheus. + - Slurm exporter is installed on the host like Slurm, and Slurm exporter will be successfully installed only if Slurm is installed. + +## Add a new compute node to the Cluster + +If a new node is provisioned through Cobbler, the node address is automatically displayed on the AWX dashboard. The node is not assigned to any group. You can add the node to the compute group along with the existing nodes and run `omnia.yml` to add the new node to the cluster and update the configurations in the manager node. \ No newline at end of file diff --git a/docs/INVENTORY b/docs/INVENTORY new file mode 100644 index 000000000..99cf89caf --- /dev/null +++ b/docs/INVENTORY @@ -0,0 +1,6 @@ +[compute] +compute-01 +compute-02 + +[manager] +manager-01 diff --git a/docs/MONITOR_CLUSTERS.md b/docs/MONITOR_CLUSTERS.md new file mode 100644 index 000000000..b6e411fab --- /dev/null +++ b/docs/MONITOR_CLUSTERS.md @@ -0,0 +1,93 @@ +# Monitor Kuberentes and Slurm +Omnia provides playbooks to configure additional software components for Kubernetes such as JupyterHub and Kubeflow. For workload management (submitting, conrolling, and managing jobs) of HPC, AI, and Data Analytics clusters, you can access Kubernetes and Slurm dashboards and other supported applications. + +To access any of the dashboards login to the manager node and open the installed web browser. + +If you are connecting remotely ensure your putty or any X11 based clients and you are using mobaxterm version 8 and above, follow the below mentioned steps: + +1. To provide __ssh__ to the manager node. + `ssh -x root@` (where IP is the private IP of manager node) +2. `yum install firefox -y` +3. `yum install xorg-x11-xauth` +4. `export DISPLAY=:10.0` +5. `logout and login back` +6. To launch firefox from terminal use the following command: + `firefox&` + +__Note:__ When the putty/mobaxterm session ends, you must run __export DISPLAY=:10.0__ command each time, else Firefox cannot be launched again. + +## Setup user account in manager node +1. Login to head node as root user and run `adduser ____`. +2. Run `passwd ____` to set password. +3. Run `usermod -a -G wheel ____` to give sudo permission. + +__Note:__ Kuberenetes and Slurm job can be scheduled only for users with __sudo__ privileges. + +## Access Kuberentes Dashboard +1. To verify if the __Kubernetes-dashboard service__ is __running__, run `kubectl get pods --namespace kubernetes-dashboard`. +2. To start the Kubernetes dashboard, run `kubectl proxy`. +3. From the CLI, run `kubectl get secrets` to see the generated tokens. +4. Copy the token with the name __prometheus-__-kube-state-metrics__ of the type __kubernetes.io/service-account-token__. +5. Run `kubectl describe secret ____` +6. Copy the encrypted token value. +7. On a web browser(installed on the manager node), enter http://localhost:8001/api/v1/namespaces/kubernetes-dashboard/services/https:kubernetes-dashboard:/proxy/ to access the Kubernetes Dashboard. +8. Select the authentication method as __Token__. +9. On the Kuberenetes Dashboard, paste the copied encrypted token and click __Sign in__. + +## Access Kubeflow Dashboard + +It is recommended that you use port numbers between __8000-8999__ and the suggested port number is __8085__. + +1. To view the ports which are in use, run the following command: + `netstat -an` +2. Select a port number between __8000-8999__ which is not in use. +3. To run the **Kubeflow Dashboard** at selected port number, run one of the following commands: + `kubectl port-forward -n kubeflow service/centraldashboard __selected_port_number__:80` + (Or) + `kubectl port-forward -n istio-system svc/istio-ingressgateway __selected_port_number__:80` +4. On a web browser installed on the manager node, go to http://localhost:selected-port-number/ to launch the Kubeflow Central Dashboard. + +For more information about the Kubeflow Central Dashboard, see https://www.kubeflow.org/docs/components/central-dash/overview/. + +## Access JupyterHub Dashboard + +1. To verify if the JupyterHub services are running, run `kubectl get pods --namespace jupyterhub`. +2. Ensure that the pod names starting with __hub__ and __proxy__ are in __Running__ status. +3. Run `kubectl get services --namespace jupyterhub`. +4. Copy the **External IP** of __proxy-public__ service. +5. On a web browser installed on the __manager node__, use the External IP address to access the JupyterHub Dashboard. +6. Enter any __username__ and __password__ combination to enter the Jupyterhub. The __username__ and __password__ can be later configured from the JupyterHub dashboard. + +## Prometheus + +Prometheus is installed in two different ways: + * It is installed on the host when Slurm is installed without installing Kubernetes. + * It is installed as a Kubernetes role, if you install both Slurm and Kubernetes. + +If Prometheus is installed as part of kubernetes role, run the following commands before starting the Prometheus UI: +1. `export POD_NAME=$(kubectl get pods --namespace default -l "app=prometheus,component=server" -o jsonpath="{.items[0].metadata.name}")` +2. `echo $POD_NAME` +3. `kubectl --namespace default port-forward $POD_NAME 9090` + +If Prometheus is installed on the host, start the Prometheus web server by run the following command: +1. Navigate to Prometheus folder. The default path is __/var/lib/prometheus-2.23.0.linux-amd64/__. +2. Start the web server, + `./prometheus` + +Go to http://localhost:9090 to launch the Prometheus UI in the browser. + +__Note:__ +* If Prometheus was installed through slurm without Kubernetes then it will be removed when Kubernetes is installed as Prometheus would be running as a pod. +* You can use a single instance of Prometheus when both kubernetes and slurm are installed. + + + + + + + + + + + + diff --git a/docs/PREINSTALL.md b/docs/PREINSTALL.md deleted file mode 100644 index b8b609bef..000000000 --- a/docs/PREINSTALL.md +++ /dev/null @@ -1,27 +0,0 @@ -# Pre-Installation Preparation - -## Assumptions -Omnia assumes that prior to installation: -* Systems have a base operating system (currently CentOS 7 or 8) -* Network(s) has been cabled and nodes can reach the internet -* SSH Keys for `root` have been installed on all nodes to allow for password-less SSH -* Ansible is installed on either the master node or a separate deployment node -``` -yum install ansible -``` - -## Example system designs -Omnia can configure systems which use Ethernet- or Infiniband-based fabric to connect the compute servers. - -![Example system configuration with Ethernet fabric](images/example-system-ethernet.png) - -![Example system configuration with Infiniband fabric](images/example-system-infiniband.png) - -## Network Setup -Omnia assumes that servers are already connected to the network and have access to the internet. -### Network Topology -Possible network configurations include: -* A flat topology where all nodes are connected to a switch which includes an uplink to the internet. This requires multiple externally-facing IP addresses -* A hierarchical topology where compute nodes are connected to a common switch, but the master node contains a second network connection which is connected to the internet. All outbound/inbound traffic would be routed through the master node. This requires setting up firewall rules for IP masquerade, see [here](https://www.server-world.info/en/note?os=CentOS_7&p=firewalld&f=2) for an example. -### IP and Hostname Assignment -The recommended setup is to assign IP addresses to individual servers. This can be done manually by logging onto each node, or via DHCP. diff --git a/docs/PREINSTALL_OMNIA.md b/docs/PREINSTALL_OMNIA.md new file mode 100644 index 000000000..08180b62a --- /dev/null +++ b/docs/PREINSTALL_OMNIA.md @@ -0,0 +1,28 @@ +# Preparation to install Omnia + +## Assumptions +Ensure that the following prerequisites are met: +* The manager and compute nodes must be running CentOS 7.9 2009 OS. +* All nodes are connected to the network and have access to Internet. +* SSH Keys for root have been installed on all nodes to allow for password-less SSH. +* On the manager node, install Ansible and Git using the following commands: + * `yum install epel-release -y` + * `yum install ansible-2.9.18 git -y` +__Note:__ Ansible must be installed using __yum__. If Ansible is installed using __pip3__, re-install it using the __yum__ command again. + + +## Example system designs +Omnia can configure systems which use Ethernet or Infiniband-based fabric to connect the compute servers. + +![Example system configuration with Ethernet fabric](images/example-system-ethernet.png) + +![Example system configuration with Infiniband fabric](images/example-system-infiniband.png) + +## Network Setup +Omnia assumes that servers are already connected to the network and have access to the internet. +### Network Topology +Possible network configurations include: +* A flat topology where all nodes are connected to a switch which includes an uplink to the internet. This requires multiple externally-facing IP addresses +* A hierarchical topology where compute nodes are connected to a common switch, but the manager node contains a second network connection which is connected to the internet. All outbound/inbound traffic would be routed through the manager node. This requires setting up firewall rules for IP masquerade, see [here](https://www.server-world.info/en/note?os=CentOS_7&p=firewalld&f=2) for an example. +### IP and Hostname Assignment +The recommended setup is to assign IP addresses to individual servers. This can be done manually by logging onto each node, or via DHCP. diff --git a/docs/PREINSTALL_OMNIA_APPLIANCE.md b/docs/PREINSTALL_OMNIA_APPLIANCE.md new file mode 100644 index 000000000..8889a2955 --- /dev/null +++ b/docs/PREINSTALL_OMNIA_APPLIANCE.md @@ -0,0 +1,36 @@ +# Prerequisites to install the Omnia appliance + +Ensure that the following prequisites are met before installing the Omnia appliance: +* On the management node, install Ansible and Git using the following commands: + * `yum install epel-release -y` + * `yum install ansible-2.9.18 git -y` + __Note:__ Ansible must be installed using __yum__. If Ansible is installed using __pip3__, re-install it using the __yum__ command again. +* Ensure a stable Internet connection is available on management node and target nodes. +* CentOS 7.9 2009 is installed on the management node. +* To provision the bare metal servers, go to http://isoredirect.centos.org/centos/7/isos/x86_64/ and download the **CentOS-7-x86_64-Minimal-2009** ISO file. +* For DHCP configuration, you can provide a mapping file. The provided details must be in the format: MAC, Hostname, IP. For example, `xx:xx:4B:C4:xx:44,validation01,172.17.0.81` and `xx:xx:4B:C5:xx:52,validation02,172.17.0.82` are valid entries. +__Note:__ A template for mapping file is present in the `omnia/examples`, named `mapping_file.csv`. The header in the template file must not be deleted before saving the file. +__Note:__ Ensure that duplicate values are not provided for MAC, Hostname, and IP in the mapping file. The Hostname should not contain the following characters: , (comma), \. (period), and - (hyphen). +* Connect one of the Ethernet cards on the management node to the HPC switch and the other ethernet card connected to the global network. +* If SELinux is not disabled on the management node, disable it from `/etc/sysconfig/selinux` and restart the management node. +* The default mode of PXE is __UEFI__, and the BIOS Legacy Mode is not supported. +* The default boot order for the bare metal servers must be __PXE__. +* Configuration of __RAID__ is not part of Omnia. If bare metal servers have __RAID__ controller installed then it is mandatory to create **VIRTUAL DISK**. + +## Assumptions + +## Example system designs +Omnia can configure systems which use Ethernet or Infiniband-based fabric to connect the compute servers. + +![Example system configuration with Ethernet fabric](images/example-system-ethernet.png) + +![Example system configuration with Infiniband fabric](images/example-system-infiniband.png) + +## Network Setup +Omnia assumes that servers are already connected to the network and have access to the internet. +### Network Topology +Possible network configurations include: +* A flat topology where all nodes are connected to a switch which includes an uplink to the internet. This requires multiple externally-facing IP addresses +* A hierarchical topology where compute nodes are connected to a common switch, but the manager node contains a second network connection which is connected to the internet. All outbound/inbound traffic would be routed through the manager node. This requires setting up firewall rules for IP masquerade, see [here](https://www.server-world.info/en/note?os=CentOS_7&p=firewalld&f=2) for an example. +### IP and Hostname Assignment +The recommended setup is to assign IP addresses to individual servers. This can be done manually by logging onto each node, or via DHCP. diff --git a/docs/README.md b/docs/README.md index fc24b2282..c8929a01f 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,6 +1,6 @@ -**Omnia** (Latin: all or everything) is a deployment tool to configure Dell EMC PowerEdge servers running standard RPM-based Linux OS images into cluster capable of supporting HPC, AI, and data analytics workloads. Omnia installs Slurm and/or Kubernetes for managing jobs and enables installation of many other packages and services for running diverse workloads on the same converged solution. Omnia is a collection of [Ansible](https://ansible.org) playbooks, is open source, and is constantly being extended to enable comprehensive workloads. +**Omnia** (Latin: all or everything) is a deployment tool to configure Dell EMC PowerEdge servers running standard RPM-based Linux OS images into clusters capable of supporting HPC, AI, and data analytics workloads. It uses Slurm, Kubernetes, and other packages to manage jobs and run diverse workloads on the same converged solution. It is a collection of [Ansible](https://ansible.org) playbooks, is open source, and is constantly being extended to enable comprehensive workloads. -## What Omnia Does +## What Omnia does Omnia can build clusters which use Slurm or Kubernetes (or both!) for workload management. Omnia will install software from a variety of sources, including: - Standard CentOS and [ELRepo](http://elrepo.org) repositories - Helm repositories @@ -8,28 +8,95 @@ Omnia can build clusters which use Slurm or Kubernetes (or both!) for workload m - [OpenHPC](https://openhpc.community) repositories (_coming soon!_) - [OperatorHub](https://operatorhub.io) (_coming soon!_) -Whenever possible, Omnia will opt to leverage existing projects rather than reinvent the wheel. +Whenever possible, Omnia will leverage existing projects rather than reinvent the wheel. ![Omnia draws from existing repositories](images/omnia-overview.png) -### Omnia Stacks +### Omnia stacks Omnia can install Kubernetes or Slurm (or both), along with additional drivers, services, libraries, and user applications. ![Omnia Kubernetes Stack](images/omnia-k8s.png) ![Omnia Slurm Stack](images/omnia-slurm.png) -## Installing Omnia -Omnia requires that servers already have an RPM-based Linux OS running on them, and are all connected to the Internet. Currently all Omnia testing is done on [CentOS](https://centos.org). Please see [PREINSTALL](PREINSTALL.md) for instructions on network setup. +## Deploying clusters using the Omnia Appliance +The Omnia Appliance will automate the entire cluster deployment process, starting with provisioning the operating system to servers. -Once servers have functioning OS and networking, you can using Omnia to install and start Slurm and/or Kubernetes. Please see [INSTALL](INSTALL.md) for instructions. +Ensure all the prerequisites listed in [preparation to install Omnia Appliance](PREINSTALL_OMNIA_APPLIANCE.md) are met before installing the Omnia appliance. -## Contributing to Omnia +For detailed instructions on installing the Omnia appliance, see [Install Omnia Appliance](INSTALL_OMNIA_APPLIANCE.md). + +## Installing Omnia to servers with a pre-provisioned OS +Omnia can be deploy clusters to servers that already have an RPM-based Linux OS running on them, and are all connected to the Internet. Currently all Omnia testing is done on [CentOS](https://centos.org). Please see [Preparation to install Omnia](PREINSTALL_OMNIA.md) for instructions on network setup. + +Once servers have functioning OS and networking, you can use Omnia to install and start Slurm and/or Kubernetes. Please see [Install Omnia using CLI](INSTALL_OMNIA.md) for detailed instructions. + +# System requirements +Ensure the supported version of all the software are installed as per the following table and other versions than those listed are not supported by Omnia. This is to ensure that there is no impact to the functionality of Omnia. + +Software and hardware requirements | Version +---------------------------------- | ------- +OS installed on the management node | CentOS 7.9 2009 +OS deployed by Omnia on bare-metal servers | CentOS 7.9 2009 Minimal Edition +Cobbler | 2.8.5 +Ansible AWX | 15.0.0 +Slurm Workload Manager | 20.11.2 +Kubernetes Controllers | 1.16.7 +Kubeflow | 1 +Prometheus | 2.23.0 +Supported PowerEdge servers | R640, R740, R7525, C4140, DSS8440, and C6420 + +## Software managed by Omnia +Ensure the supported version of all the software are installed as per the following table and other versions than those listed are not supported by Omnia. This is to ensure that there is no impact to the functionality of Omnia. + +Software | Licence | Compatible Version | Description +----------- | ------- | ---------------- | ----------------- +MariaDB | GPL 2.0 | 5.5.68 | Relational database used by Slurm +Slurm | GNU General Public | 20.11.2 | HPC Workload Manager +Docker CE | Apache-2.0 | 20.10.2 | Docker Service +NVIDIA container runtime | Apache-2.0 | 3.4.2 | Nvidia container runtime library +Python PIP | MIT Licence | 3.2.1 | Python Package +Python2 | - | 2.7.5 | - +Kubelet | Apache-2.0 | 1.16.7 | Provides external, versioned ComponentConfig API types for configuring the kubelet +Kubeadm | Apache-2.0 | 1.16.7 | "fast paths" for creating Kubernetes clusters +Kubectl | Apache-2.0 | 1.16.7 | Command line tool for Kubernetes +JupyterHub | Modified BSD Licence | 1.1.0 | Multi-user hub +Kfctl | Apache-2.0 | 1.0.2 | CLI for deploying and managing Kubeflow +Kubeflow | Apache-2.0 | 1 | Cloud Native platform for machine learning +Helm | Apache-2.0 | 3.5.0 | Kubernetes Package Manager +Helm Chart | - | 0.9.0 | - +TensorFlow | Apache-2.0 | 2.1.0 | Machine Learning framework +Horovod | Apache-2.0 | 0.21.1 | Distributed deep learning training framework for Tensorflow +MPI | Copyright (c) 2018-2019 Triad National Security,LLC. All rights reserved. | 0.2.3 | HPC library +CoreDNS | Apache-2.0 | 1.6.2 | DNS server that chains plugins +CNI | Apache-2.0 | 0.3.1 | Networking for Linux containers +AWX | Apache-2.0 | 15.0.0 | Web-based User Interface +PostgreSQL | Copyright (c) 1996-2020, PostgreSQL Global Development Group | 10.15 | Database Management System +Redis | BSD-3-Clause Licence | 6.0.10 | In-memory database +NGINX | BSD-2-Clause Licence | 1.14 | - + +# Known issue +Issue: Hosts do not display on the AWX UI. + +Resolution: +* Verify if `provisioned_hosts.yml` is present in the `omnia/appliance/roles/inventory/files` folder. +* Verify if hosts are not listed in the `provisioned_hosts.yml` file. If hosts are not listed, then servers are not PXE booted yet. +* If hosts are listed in the `provisioned_hosts.yml` file, then an IP address has been assigned to them by DHCP. However, hosts are not displayed on the AWX UI as the PXE boot is still in process or is not initiated. +* Check for the reachable and unreachable hosts using the `provisioned_report.yml` tool present in the `omnia/appliance/tools` folder. To run provisioned_report.yml, in the omnia/appliance directory, run `playbook -i roles/inventory/files/provisioned_hosts.yml tools/provisioned_report.yml`. + +# [Frequently asked questions](FAQ.md) + +# Limitations +1. Removal of Slurm and Kubernetes component roles are not supported. However, skip tags can be provided at the start of installation to select the component roles.​ +2. After the installation of the Omnia appliance, changing the manager node is not supported. If you need to change the manager node, you must redeploy the entire cluster. +3. Dell Technologies provides support to the Dell developed modules of Omnia. All the other third-party tools deployed by Omnia are outside the support scope.​ +4. To change the Kubernetes single node cluster to a multi-node cluster or to change a multi-node cluster to a single node cluster, you must either redeploy the entire cluster or run `kubeadm reset -f` on all the nodes of the cluster. You then need to run `omnia.yml` file and skip the installation of Slurm using the skip tags. +# Contributing to Omnia The Omnia project was started to give members of the [Dell Technologies HPC Community](https://dellhpc.org) a way to easily setup clusters of Dell EMC servers, and to contribute useful tools, fixes, and functionality back to the HPC Community. -### Open to All +# Open to All While we started Omnia within the Dell Technologies HPC Community, that doesn't mean that it's limited to Dell EMC servers, networking, and storage. This is an open project, and we want to encourage *everyone* to use and contribute to Omnia! -### Anyone Can Contribute! +# Anyone can contribute! It's not just new features and bug fixes that can be contributed to the Omnia project! Anyone should feel comfortable contributing. We are asking for all types of contributions: * New feature code * Bug fixes @@ -38,6 +105,4 @@ It's not just new features and bug fixes that can be contributed to the Omnia pr * Feedback * Validation that it works for your particular configuration -If you would like to contribute, see [CONTRIBUTING](https://github.com/dellhpc/omnia/blob/master/CONTRIBUTING.md). - -### [Omnia Contributors](CONTRIBUTORS.md) +If you would like to contribute, see [CONTRIBUTING](https://github.com/dellhpc/omnia/blob/release/CONTRIBUTING.md). diff --git a/docs/_config.yml b/docs/_config.yml index 367390b58..8eeac47e0 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -2,3 +2,4 @@ theme: jekyll-theme-minimal title: Omnia description: Ansible playbook-based tools for deploying Slurm and Kubernetes clusters for High Performance Computing, Machine Learning, Deep Learning, and High-Performance Data Analytics logo: images/omnia-logo.png +markdown: kramdown diff --git a/docs/images/omnia-branch-structure.png b/docs/images/omnia-branch-structure.png index 379725a15..2d7ee66b5 100644 Binary files a/docs/images/omnia-branch-structure.png and b/docs/images/omnia-branch-structure.png differ diff --git a/docs/images/omnia-overview.png b/docs/images/omnia-overview.png index 6f244bbf5..27ba63381 100644 Binary files a/docs/images/omnia-overview.png and b/docs/images/omnia-overview.png differ diff --git a/examples/README.md b/examples/README.md index 258cfe466..6f507a5d2 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,7 +1,7 @@ # Examples -The examples [K8s Submit](https://github.com/dellhpc/omnia/blob/master/examples/k8s-TensorFlow-resnet50-multinode-MPIOperator.yaml) and [SLURM submit](https://github.com/dellhpc/omnia/blob/master/examples/slurm-TensorFlow-resnet50-multinode-MPI.batch) are provide as examples for running the resnet50 benchmark with TensorFlow on 8 GPUs using 2 C4140s. +The examples [K8s Submit](https://github.com/dellhpc/omnia/blob/devel/examples/k8s-TensorFlow-resnet50-multinode-MPIOperator.yaml) and [SLURM submit](https://github.com/dellhpc/omnia/blob/devel/examples/slurm-TensorFlow-resnet50-multinode-MPI.batch) are provide as examples for running the resnet50 benchmark with TensorFlow on 8 GPUs using 2 C4140s. ## Submitting the example diff --git a/kubernetes/host_inventory_file b/examples/host_inventory_file similarity index 71% rename from kubernetes/host_inventory_file rename to examples/host_inventory_file index 607fe3e2f..ecdb3b7de 100644 --- a/kubernetes/host_inventory_file +++ b/examples/host_inventory_file @@ -2,19 +2,16 @@ all: children: cluster: children: - master: + manager: hosts: compute000: workers: children: compute: hosts: - compute003: + compute001: gpus: hosts: compute002: + compute003: compute004: - compute005: - vars: - single_node: false - master_ip: 10.0.0.100 diff --git a/examples/host_inventory_file.ini b/examples/host_inventory_file.ini new file mode 100644 index 000000000..e67b3811e --- /dev/null +++ b/examples/host_inventory_file.ini @@ -0,0 +1,13 @@ +[manager] +friday + +[compute] +compute000 +compute[002:005] + +[workers:children] +compute + +[cluster:children] +manager +workers diff --git a/examples/mapping_file.csv b/examples/mapping_file.csv new file mode 100644 index 000000000..ccf9dd022 --- /dev/null +++ b/examples/mapping_file.csv @@ -0,0 +1,2 @@ +MAC,Hostname,IP +xx:yy:zz:aa:bb,server,1.2.3.4 \ No newline at end of file diff --git a/kubernetes/kubernetes.yml b/kubernetes/kubernetes.yml deleted file mode 100644 index 8814c2ff9..000000000 --- a/kubernetes/kubernetes.yml +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- -#Playbook for kubernetes cluster - -#collect info from everything -- hosts: all - -# Apply Common Installation and Config -- hosts: cluster - gather_facts: false - roles: - - common - -# Apply GPU Node Config -- hosts: gpus - gather_facts: false - roles: - - computeGPU - -# Apply Master Config -- hosts: master - gather_facts: false - roles: - - master - -# Start K8s on master server -- hosts: master - gather_facts: false - roles: - - startmaster - -# Start K8s worker servers -- hosts: compute,gpus - gather_facts: false - roles: - - startworkers - -# Start K8s worker servers -- hosts: master - gather_facts: false - roles: - - startservices diff --git a/kubernetes/roles/common/files/nvidia b/kubernetes/roles/common/files/nvidia deleted file mode 100644 index f22e77e0b..000000000 --- a/kubernetes/roles/common/files/nvidia +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh -PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" exec nvidia-container-runtime-hook "$@" - diff --git a/kubernetes/roles/common/handlers/main.yml b/kubernetes/roles/common/handlers/main.yml deleted file mode 100644 index 4fdc7000e..000000000 --- a/kubernetes/roles/common/handlers/main.yml +++ /dev/null @@ -1,21 +0,0 @@ ---- - -#- name: Enable docker service - #service: - #name: docker - #enabled: yes -# -- name: Start and Enable docker service - service: - name: docker - state: restarted - enabled: yes - #tags: install - -- name: Start and Enable Kubernetes - kubelet - service: - name: kubelet - state: started - enabled: yes - #tags: install - diff --git a/kubernetes/roles/common/tasks/main.yml b/kubernetes/roles/common/tasks/main.yml deleted file mode 100644 index f2df1d649..000000000 --- a/kubernetes/roles/common/tasks/main.yml +++ /dev/null @@ -1,140 +0,0 @@ -# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- - -- name: add kubernetes repo - copy: src=kubernetes.repo dest=/etc/yum.repos.d/ owner=root group=root mode=644 - tags: install - -# add ElRepo GPG Key -- rpm_key: - state: present - key: https://www.elrepo.org/RPM-GPG-KEY-elrepo.org - tags: install - -- name: add ElRepo (Nvidia kmod drivers) - yum: - name: http://www.elrepo.org/elrepo-release-7.0-3.el7.elrepo.noarch.rpm - state: present - tags: install - -- name: update sysctl to handle incorrectly routed traffic when iptables is bypassed - copy: src=k8s.conf dest=/etc/sysctl.d/ owner=root group=root mode=644 - tags: install - -- name: update sysctl - command: /sbin/sysctl --system - tags: install - -- name: Install EPEL Repository - yum: name=epel-release state=present - tags: install - -#likely need to add a reboot hook in here -#- name: update kernel and all other system packages - #yum: name=* state=latest - #tags: install - -- name: disable swap - command: /sbin/swapoff -a - tags: install - -# Disable selinux -- selinux: - state: disabled - tags: install - -- name: install common packages - yum: - name: - - yum-plugin-versionlock - - gcc - - nfs-utils - - python-pip - - docker - - bash-completion - - kubelet-1.16.7 - - kubeadm-1.16.7 - - kubectl-1.16.7 - - nvidia-detect - state: present - tags: install - -- name: versionlock kubernetes - command: yum versionlock kubelet-1.16.7 kubectl-1.16.7 kubeadm-1.16.7 - tags: install - - -- name: install InfiniBand Support - yum: - name: "@Infiniband Support" - state: present - -- name: upgrade pip - command: /bin/pip install --upgrade pip - tags: install - -#- name: Enable DevicePlugins for all GPU nodes (nvidia-container-runtime-hook) - #copy: src=nvidia dest=/usr/libexec/oci/hooks.d/ owner=root group=root mode=755 - #tags: install - -- name: Add KUBE_EXTRA_ARGS to enable GPUs - lineinfile: - path: /usr/lib/systemd/system/kubelet.service.d/10-kubeadm.conf - line: 'Environment="KUBELET_EXTRA_ARGS=--feature-gates=DevicePlugins=true"' - insertbefore: 'KUBELET_KUBECONFIG_ARGS=' - tags: install - -- name: Start and Enable docker service - service: - name: docker - state: restarted - enabled: yes - tags: install - -- name: Start and Enable Kubernetes - kubelet - service: - name: kubelet - state: restarted - enabled: yes - tags: install - -- name: Start and rpcbind service - service: - name: rpcbind - state: restarted - enabled: yes - tags: install - -- name: Start and nfs-server service - service: - name: nfs-server - state: restarted - enabled: yes - tags: install - -- name: Start and nfs-lock service - service: - name: nfs-lock - #state: restarted - enabled: yes - tags: install - -- name: Start and nfs-idmap service - service: - name: nfs-idmap - state: restarted - enabled: yes - tags: install diff --git a/kubernetes/roles/common/vars/main.yml b/kubernetes/roles/common/vars/main.yml deleted file mode 100644 index 1bdf70a4c..000000000 --- a/kubernetes/roles/common/vars/main.yml +++ /dev/null @@ -1,10 +0,0 @@ ---- - -common_packages: - - epel-release - - python-pip - - docker - - bash-completion - - kubelet - - kubeadm - - kubectl diff --git a/kubernetes/roles/computeGPU/files/k8s.conf b/kubernetes/roles/computeGPU/files/k8s.conf deleted file mode 100644 index 9994b1482..000000000 --- a/kubernetes/roles/computeGPU/files/k8s.conf +++ /dev/null @@ -1,3 +0,0 @@ -net.bridge.bridge-nf-call-ip6tables = 1 -net.bridge.bridge-nf-call-iptables = 1 - diff --git a/kubernetes/roles/computeGPU/files/kubernetes.repo b/kubernetes/roles/computeGPU/files/kubernetes.repo deleted file mode 100644 index 476b99cb2..000000000 --- a/kubernetes/roles/computeGPU/files/kubernetes.repo +++ /dev/null @@ -1,8 +0,0 @@ -[kubernetes] -name=Kubernetes -baseurl=https://packages.cloud.google.com/yum/repos/kubernetes-el7-x86_64 -enabled=1 -gpgcheck=1 -repo_gpgcheck=1 -gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg - diff --git a/kubernetes/roles/computeGPU/files/nvidia b/kubernetes/roles/computeGPU/files/nvidia deleted file mode 100644 index f22e77e0b..000000000 --- a/kubernetes/roles/computeGPU/files/nvidia +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh -PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" exec nvidia-container-runtime-hook "$@" - diff --git a/kubernetes/roles/computeGPU/handlers/main.yml b/kubernetes/roles/computeGPU/handlers/main.yml deleted file mode 100644 index 4fdc7000e..000000000 --- a/kubernetes/roles/computeGPU/handlers/main.yml +++ /dev/null @@ -1,21 +0,0 @@ ---- - -#- name: Enable docker service - #service: - #name: docker - #enabled: yes -# -- name: Start and Enable docker service - service: - name: docker - state: restarted - enabled: yes - #tags: install - -- name: Start and Enable Kubernetes - kubelet - service: - name: kubelet - state: started - enabled: yes - #tags: install - diff --git a/kubernetes/roles/computeGPU/tasks/main.yml b/kubernetes/roles/computeGPU/tasks/main.yml deleted file mode 100644 index 522385d72..000000000 --- a/kubernetes/roles/computeGPU/tasks/main.yml +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- -- name: install Nvidia driver - yum: - name: - - kmod-nvidia - #- nvidia-x11-drv - state: present - tags: install - -#- name: add Nvidia container runtime support - #get_url: - #url: https://nvidia.github.io/nvidia-docker/centos7/nvidia-docker.repo - #dest: /etc/yum.repos.d/nvidia-docker.repo - #tags: install - -- name: add Nvidia container runtime support - get_url: - url: https://nvidia.github.io/nvidia-container-runtime/centos7/nvidia-container-runtime.repo - dest: /etc/yum.repos.d/nvidia-container-runtime.repo - tags: install, testing - -# disable gpg key (because Nvidia doesn't know how to make that work yet for some reason) -- replace: - path: /etc/yum.repos.d/nvidia-container-runtime.repo - regexp: 'repo_gpgcheck=1' - replace: 'repo_gpgcheck=0' - backup: yes - tags: testing - -- name: install Nvidia-container-runtime-hook - yum: - name: - #- nvidia-detect - #- kmod-nvidia-410.73-1.el7_5.elrepo - - nvidia-container-runtime-hook - state: present - tags: install - - -# This needs to be done on GPU nodes -#- name: Enable DevicePlugins for all GPU nodes (nvidia-container-runtime-hook) - #copy: src=nvidia dest=/usr/libexec/oci/hooks.d/ owner=root group=root mode=755 - #tags: install - -#- name: Add KUBE_EXTRA_ARGS to enable Plugins (GPU support) --III alreday done in common - #lineinfile: - #path: /etc/systemd/system/kubelet.service.d/10-kubeadm.conf - #line: 'Environment="KUBELET_EXTRA_ARGS=--feature-gates=DevicePlugins=true"' - #insertbefore: 'KUBELET_KUBECONFIG_ARGS=' - #tags: install - -- name: Restart and Enable docker service - service: - name: docker - state: restarted - enabled: yes - tags: install - -- name: Restart and Enable Kubernetes - kubelet - service: - name: kubelet - state: restarted - enabled: yes - tags: install diff --git a/kubernetes/roles/computeGPU/vars/main.yml b/kubernetes/roles/computeGPU/vars/main.yml deleted file mode 100644 index 1bdf70a4c..000000000 --- a/kubernetes/roles/computeGPU/vars/main.yml +++ /dev/null @@ -1,10 +0,0 @@ ---- - -common_packages: - - epel-release - - python-pip - - docker - - bash-completion - - kubelet - - kubeadm - - kubectl diff --git a/kubernetes/roles/kubeflow/tasks/main.yml b/kubernetes/roles/kubeflow/tasks/main.yml deleted file mode 100644 index 75020caf5..000000000 --- a/kubernetes/roles/kubeflow/tasks/main.yml +++ /dev/null @@ -1,122 +0,0 @@ -# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- - -#Configure build and deploy kubeflow v1.0 - -- name: Download kfctl v1.0.2 release from the Kubeflow releases page. - unarchive: - src: https://github.com/kubeflow/kfctl/releases/download/v1.0.2/kfctl_v1.0.2-0-ga476281_linux.tar.gz - dest: /usr/bin/ - remote_src: yes - -- name: Delete Omnia Kubeflow Directory if exists - file: - path: /root/k8s/omnia-kubeflow - state: absent - -- name: Create Kubeflow Directory - file: - path: /root/k8s/omnia-kubeflow - state: directory - recurse: yes - -- name: Build Kubeflow Configuration - shell: - cmd: /usr/bin/kfctl build -V -f https://raw.githubusercontent.com/kubeflow/manifests/v1.0-branch/kfdef/kfctl_k8s_istio.v1.0.2.yaml - chdir: /root/k8s/omnia-kubeflow - -- name: Modify Cpu Limit for istio-ingressgateway-service-account - replace: - path: /root/k8s/omnia-kubeflow/kustomize/istio-install/base/istio-noauth.yaml - after: 'serviceAccountName: istio-ingressgateway-service-account' - before: '---' - regexp: 'cpu: 100m' - replace: 'cpu: 2' - -- name: Modify Mem Limit for istio-ingressgateway-service-account - replace: - path: /root/k8s/omnia-kubeflow/kustomize/istio-install/base/istio-noauth.yaml - after: 'serviceAccountName: istio-ingressgateway-service-account' - before: '---' - regexp: 'memory: 128Mi' - replace: 'memory: 512Mi' - -- name: Modify Cpu Request for istio-ingressgateway-service-account - replace: - path: /root/k8s/omnia-kubeflow/kustomize/istio-install/base/istio-noauth.yaml - after: 'serviceAccountName: istio-ingressgateway-service-account' - before: '---' - regexp: 'cpu: 10m' - replace: 'cpu: 1' - -- name: Modify Mem Request for istio-ingressgateway-service-account - replace: - path: /root/k8s/omnia-kubeflow/kustomize/istio-install/base/istio-noauth.yaml - after: 'serviceAccountName: istio-ingressgateway-service-account' - before: '---' - regexp: 'memory: 40Mi' - replace: 'memory: 256Mi' - - -- name: Modify Cpu Limit for kfserving-gateway - replace: - path: /root/k8s/omnia-kubeflow/kustomize/kfserving-gateway/base/deployment.yaml - after: 'serviceAccountName: istio-ingressgateway-service-account' - before: 'env:' - regexp: 'cpu: 100m' - replace: 'cpu: 2' - -- name: Modify Mem Limit for kfserving-gateway - replace: - path: /root/k8s/omnia-kubeflow/kustomize/kfserving-gateway/base/deployment.yaml - after: 'serviceAccountName: istio-ingressgateway-service-account' - before: 'env:' - regexp: 'memory: 128Mi' - replace: 'memory: 512Mi' - -- name: Modify Cpu Request for kfserving-gateway - replace: - path: /root/k8s/omnia-kubeflow/kustomize/kfserving-gateway/base/deployment.yaml - after: 'serviceAccountName: istio-ingressgateway-service-account' - before: 'env:' - regexp: 'cpu: 10m' - replace: 'cpu: 1' - -- name: Modify Mem Request for kfserving-gateway - replace: - path: /root/k8s/omnia-kubeflow/kustomize/kfserving-gateway/base/deployment.yaml - after: 'serviceAccountName: istio-ingressgateway-service-account' - before: 'env:' - regexp: 'memory: 40Mi' - replace: 'memory: 256Mi' - - -- name: Change Argo base service from NodePort to LoadBalancer - replace: - path: /root/k8s/omnia-kubeflow/kustomize/argo/base/service.yaml - regexp: 'NodePort' - replace: 'LoadBalancer' - -- name: Change istio-install base istio-noauth service from NodePort to LoadBalancer - replace: - path: /root/k8s/omnia-kubeflow/kustomize/istio-install/base/istio-noauth.yaml - regexp: 'NodePort' - replace: 'LoadBalancer' - -- name: Apply Kubeflow Configuration - shell: - cmd: /usr/bin/kfctl apply -V -f /root/k8s/omnia-kubeflow/kfctl_k8s_istio.v1.0.2.yaml - chdir: /root/k8s/omnia-kubeflow diff --git a/kubernetes/roles/master/files/k8s.conf b/kubernetes/roles/master/files/k8s.conf deleted file mode 100644 index 9994b1482..000000000 --- a/kubernetes/roles/master/files/k8s.conf +++ /dev/null @@ -1,3 +0,0 @@ -net.bridge.bridge-nf-call-ip6tables = 1 -net.bridge.bridge-nf-call-iptables = 1 - diff --git a/kubernetes/roles/master/files/kubernetes.repo b/kubernetes/roles/master/files/kubernetes.repo deleted file mode 100644 index 476b99cb2..000000000 --- a/kubernetes/roles/master/files/kubernetes.repo +++ /dev/null @@ -1,8 +0,0 @@ -[kubernetes] -name=Kubernetes -baseurl=https://packages.cloud.google.com/yum/repos/kubernetes-el7-x86_64 -enabled=1 -gpgcheck=1 -repo_gpgcheck=1 -gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg - diff --git a/kubernetes/roles/master/files/nvidia b/kubernetes/roles/master/files/nvidia deleted file mode 100644 index f22e77e0b..000000000 --- a/kubernetes/roles/master/files/nvidia +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh -PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" exec nvidia-container-runtime-hook "$@" - diff --git a/kubernetes/roles/master/tasks/main.yml b/kubernetes/roles/master/tasks/main.yml deleted file mode 100644 index 9d461c0f8..000000000 --- a/kubernetes/roles/master/tasks/main.yml +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- -#- name: Firewall Rule K8s:6443/tcp - #command: firewall-cmd --zone=internal --add-port=6443/tcp --permanent - #tags: master -# -#- name: Firewall Rule K8s:10250/tcp - #command: firewall-cmd --zone=internal --add-port=10250/tcp --permanent - #tags: master -## -#- name: Firewall Reload - #command: firewall-cmd --reload - #tags: master -# -- name: Create /root/bin (if it doesn't exist) - file: - path: /root/bin - state: directory - mode: 0755 - -- name: Get Helm Installer - get_url: - url: https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 - dest: /root/bin/get_helm.sh - mode: 700 - tags: master - -- name: Install Helm - command: /root/bin/get_helm.sh - tags: master - -# install and start up OpenSM - III diff --git a/kubernetes/roles/startmaster/files/enable_gpu_k8s.sh b/kubernetes/roles/startmaster/files/enable_gpu_k8s.sh deleted file mode 100755 index f733d0ad7..000000000 --- a/kubernetes/roles/startmaster/files/enable_gpu_k8s.sh +++ /dev/null @@ -1 +0,0 @@ -kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v1.11/nvidia-device-plugin.yml diff --git a/kubernetes/roles/startmaster/tasks/main.yml b/kubernetes/roles/startmaster/tasks/main.yml deleted file mode 100644 index 202108888..000000000 --- a/kubernetes/roles/startmaster/tasks/main.yml +++ /dev/null @@ -1,126 +0,0 @@ -# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- -- name: Turn Swap OFF (if not already disabled) - command: /usr/sbin/swapoff -a - tags: init - -- name: Initialize kubeadm - command: /bin/kubeadm init --pod-network-cidr=10.244.0.0/16 --apiserver-advertise-address={{ master_ip }} - #command: /bin/kubeadm init - register: init_output - tags: init - -- name: Setup Directory for Kubernetes environment for root - file: path=/root/.kube state=directory - tags: init - -- name: Copy Kubernetes Config for root #do this for other users too? - copy: - src: /etc/kubernetes/admin.conf - dest: /root/.kube/config - owner: root - group: root - mode: 644 - remote_src: yes - tags: init - -- name: Cluster token - shell: kubeadm token list | cut -d ' ' -f1 | sed -n '2p' - register: K8S_TOKEN - tags: init - -- name: CA Hash - shell: openssl x509 -pubkey -in /etc/kubernetes/pki/ca.crt | openssl rsa -pubin -outform der 2>/dev/null | openssl dgst -sha256 -hex | sed 's/^.* //' - register: K8S_MASTER_CA_HASH - tags: init - -- name: Add K8S Master IP, Token, and Hash to dummy host - add_host: - name: "K8S_TOKEN_HOLDER" - token: "{{ K8S_TOKEN.stdout }}" - hash: "{{ K8S_MASTER_CA_HASH.stdout }}" - ip: "{{ master_ip }}" - tags: init - -- name: - debug: - msg: "[Master] K8S_TOKEN_HOLDER K8S token is {{ hostvars['K8S_TOKEN_HOLDER']['token'] }}" - tags: init - -- name: - debug: - msg: "[Master] K8S_TOKEN_HOLDER K8S Hash is {{ hostvars['K8S_TOKEN_HOLDER']['hash'] }}" - tags: init - -- name: - debug: - msg: "[Master] K8S_MASTER_IP is {{ master_ip }}" - tags: init - -- name: Setup Calico SDN network - shell: kubectl apply -f https://docs.projectcalico.org/manifests/calico.yaml - tags: init - -#- name: Setup Flannel SDN network - #shell: kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml - #tags: init - -- name: Enabled GPU support in Kubernetes - shell: kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/1.0.0-beta4/nvidia-device-plugin.yml - #https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v1.11/nvidia-device-plugin.yml - register: gpu_enable - tags: init - -- name: Deploy Xilinx Device Plugin - shell: kubectl create -f https://raw.githubusercontent.com/Xilinx/FPGA_as_a_Service/master/k8s-fpga-device-plugin/fpga-device-plugin.yml - register: fpga_enable - tags: init - -- name: Create yaml repo for setup - file: - path: /root/k8s - state: directory - tags: init - -- name: Create Service Account (K8S Dashboard) Files - copy: src=create_admin_user.yaml dest=/root/k8s/create_admin_user.yaml owner=root group=root mode=655 - tags: init - -- name: Create Service Account (K8S Dashboard) - Create - shell: kubectl create -f /root/k8s/create_admin_user.yaml - tags: init - -- name: Create ClusterRoleBinding (K8S Dashboard) Files - copy: src=create_clusterRoleBinding.yaml dest=/root/k8s/create_clusterRoleBinding.yaml owner=root group=root mode=655 - tags: init - -- name: Create ClusterRoleBinding (K8S Dashboard) - Apply - shell: kubectl create -f /root/k8s/create_clusterRoleBinding.yaml - tags: init - -- name: Dump Bearer Token for K8S Dashboard Login - shell: kubectl -n kube-system describe secret $(kubectl -n kube-system get secret | grep admin-user | awk '{print $1}') > /root/k8s/token - tags: init - -- name: Edge / Workstation Install allows pods to scheudle on master - shell: kubectl taint nodes --all node-role.kubernetes.io/master- - when: single_node - tags: init - - -# If more debug information is needed during init uncomment the following 2 lines -#- debug: var=init_output.stdout_lines - #tags: init diff --git a/kubernetes/roles/startservices/tasks/main.yml b/kubernetes/roles/startservices/tasks/main.yml deleted file mode 100644 index c605f2cd8..000000000 --- a/kubernetes/roles/startservices/tasks/main.yml +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright 2020 Dell Technologies -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- -#- name: Kick CoreDNS (this is a hack that needs to be fixed) - #shell: kubectl get pods -n kube-system --no-headers=true | awk '/coredns/{print $1}'|xargs kubectl delete -n kube-system pod - #tags: init - -- name: Wait for CoreDNS to restart - shell: kubectl rollout status deployment/coredns -n kube-system - tags: init - -- name: Deploy MetalLB - shell: kubectl apply -f https://raw.githubusercontent.com/google/metallb/v0.8.1/manifests/metallb.yaml - tags: init - -- name: Create MetalLB Setup Config Files - copy: src=metal-config.yaml dest=/root/k8s/metal-config.yaml owner=root group=root mode=655 - tags: init - -- name: Create MetalLB Setup Deployment Files - copy: src=metallb.yaml dest=/root/k8s/metallb.yaml owner=root group=root mode=655 - tags: init - -- name: Deploy MetalLB - shell: kubectl apply -f /root/k8s/metallb.yaml - tags: init - -- name: Create default setup for MetalLB - shell: kubectl apply -f /root/k8s/metal-config.yaml - tags: init - -- name: Start K8S Dashboard - shell: kubectl create -f https://raw.githubusercontent.com/kubernetes/dashboard/v2.0.0-beta6/aio/deploy/recommended.yaml - tags: init - -- name: Helm - Add Stable Repo - shell: helm repo add stable https://kubernetes-charts.storage.googleapis.com/ - tags: init - -- name: Helm - Update Repo - shell: helm repo update - tags: init - -- name: Start NFS Client Provisioner - shell: helm install stable/nfs-client-provisioner --set nfs.server=10.0.0.1 --set nfs.path=/work --generate-name - tags: init - -- name: Set NFS-Client Provisioner as DEFAULT StorageClass - shell: "kubectl patch storageclasses.storage.k8s.io nfs-client -p '{\"metadata\": {\"annotations\":{\"storageclass.kubernetes.io/is-default-class\":\"true\"}}}'" - tags: init - -- name: Prometheus deployment - shell: helm install stable/prometheus --set alertmanager.persistentVolume.storageClass=nfs-client,server.persistentVolume.storageClass=nfs-client,server.service.type=LoadBalancer --generate-name - tags: init - -- name: Install MPI Operator - shell: kubectl create -f https://raw.githubusercontent.com/kubeflow/mpi-operator/master/deploy/v1alpha2/mpi-operator.yaml - tags: init diff --git a/omnia.yml b/omnia.yml new file mode 100644 index 000000000..6657044a7 --- /dev/null +++ b/omnia.yml @@ -0,0 +1,130 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Validate the cluster + hosts: localhost + connection: local + gather_facts: no + roles: + - cluster_validation + +- name: Gather facts from all the nodes + hosts: all + +- name: Apply common installation and config + hosts: manager, compute + gather_facts: false + roles: + - common + tags: common + +- name: Apply common K8s installation and config + hosts: manager, compute + gather_facts: false + roles: + - k8s_common + tags: kubernetes + +- name: Apply K8s manager config + hosts: manager + gather_facts: true + roles: + - k8s_manager + tags: kubernetes + +- name: Apply K8s firewalld config on manager and compute nodes + hosts: manager, compute + gather_facts: false + roles: + - k8s_firewalld + tags: kubernetes + +- name: Apply NFS server setup on manager node + hosts: manager + gather_facts: false + roles: + - k8s_nfs_server_setup + tags: + - kubernetes + - nfs + +- name: Apply NFS client setup on compute nodes + hosts: compute + gather_facts: false + roles: + - k8s_nfs_client_setup + tags: + - kubernetes + - nfs + +- name: Start K8s on manager server + hosts: manager + gather_facts: true + roles: + - k8s_start_manager + tags: kubernetes + +- name: Start K8s worker servers on compute nodes + hosts: compute + gather_facts: false + roles: + - k8s_start_workers + tags: kubernetes + +- name: Start K8s worker servers on manager nodes + hosts: manager + gather_facts: false + roles: + - k8s_start_services + tags: kubernetes + +- name: Apply common Slurm installation and config + hosts: manager, compute + gather_facts: false + roles: + - slurm_common + tags: slurm + +- name: Apply Slurm manager config + hosts: manager + gather_facts: false + roles: + - slurm_manager + tags: slurm + +- name: Start Slurm workers + hosts: compute + gather_facts: false + roles: + - slurm_workers + tags: slurm + +- name: Start Slurm services + hosts: manager + gather_facts: false + roles: + - slurm_start_services + tags: slurm + +- name: Install slurm exporter + hosts: manager + gather_facts: false + roles: + - slurm_exporter + tags: slurm + +- name: Passwordless SSH between manager and compute nodes + include: appliance/tools/passwordless_ssh.yml + when: hostvars['127.0.0.1']['appliance_status'] \ No newline at end of file diff --git a/omnia_config.yml b/omnia_config.yml new file mode 100644 index 000000000..0c2cfce84 --- /dev/null +++ b/omnia_config.yml @@ -0,0 +1,29 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# Password used for Slurm database. +# The Length of the password should be atleast 8. +# The password must not contain -,\, '," +mariadb_password: "password" + +# Kubernetes SDN network. +# It can either be "calico" or "flannel". +# Default value assigned is "calico". +k8s_cni: "calico" + +# Kubernetes pod network CIDR. +# Default value is "10.244.0.0/16" +# Make sure this value does not overlap with any of the host networks. +k8s_pod_network_cidr: "10.244.0.0/16" diff --git a/platforms/jupyterhub.yml b/platforms/jupyterhub.yml new file mode 100644 index 000000000..7ada69543 --- /dev/null +++ b/platforms/jupyterhub.yml @@ -0,0 +1,20 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Installing JupyterHub + hosts: manager + gather_facts: false + roles: + - jupyterhub \ No newline at end of file diff --git a/kubernetes/kubeflow.yaml b/platforms/kubeflow.yml similarity index 85% rename from kubernetes/kubeflow.yaml rename to platforms/kubeflow.yml index abda4bc1d..d48587d9e 100644 --- a/kubernetes/kubeflow.yaml +++ b/platforms/kubeflow.yml @@ -1,4 +1,4 @@ -# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - --- -#Playbook for installing Kubeflow v1.0 on Omnia - -# Start K8s worker servers -- hosts: master + +- name: Installing Kubeflow + hosts: manager gather_facts: false roles: - - kubeflow + - kubeflow \ No newline at end of file diff --git a/kubernetes/roles/jupyterhub/files/jupyter_config.yaml b/platforms/roles/jupyterhub/files/jupyter_config.yaml similarity index 100% rename from kubernetes/roles/jupyterhub/files/jupyter_config.yaml rename to platforms/roles/jupyterhub/files/jupyter_config.yaml diff --git a/platforms/roles/jupyterhub/tasks/main.yml b/platforms/roles/jupyterhub/tasks/main.yml new file mode 100644 index 000000000..29eb52d0f --- /dev/null +++ b/platforms/roles/jupyterhub/tasks/main.yml @@ -0,0 +1,62 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Helm - add JupyterHub repo + command: "helm repo add jupyterhub '{{ jupyterhub_helm_chart_repo }}'" + changed_when: true + +- name: Helm - update repo + command: helm repo update + changed_when: true + +- name: Copy JupyterHub custom config file + copy: + src: jupyter_config.yaml + dest: "{{ jupyter_config_file_dest }}" + owner: root + group: root + mode: "{{ jupyter_config_file_mode }}" + +- name: JupyterHub deploy + block: + - name: JupyterHub deploy + command: > + helm upgrade --cleanup-on-fail \ + --install {{ jupyterhub_namespace }} jupyterhub/jupyterhub \ + --namespace {{ jupyterhub_namespace }} \ + --create-namespace \ + --version {{ helm_chart_version }} \ + --values {{ jupyter_config_file_dest }} \ + --timeout {{ timeout_min_sec }} + register: deployment_output + + rescue: + - name: JupyterHub deployment error + debug: + msg: "Previous JupyterHub deployment is in progress" + when: "'another operation (install/upgrade/rollback) is in progress' in deployment_output.stderr" + + - name: Delete existing release + command: helm delete '{{ jupyterhub_namespace }}' + + - name: JupyterHub deploy + command: > + helm upgrade --cleanup-on-fail \ + --install {{ jupyterhub_namespace }} jupyterhub/jupyterhub \ + --namespace {{ jupyterhub_namespace }} \ + --create-namespace \ + --version {{ helm_chart_version }} \ + --values {{ jupyter_config_file_dest }} \ + --timeout {{ timeout_min_sec }} \ No newline at end of file diff --git a/platforms/roles/jupyterhub/vars/main.yml b/platforms/roles/jupyterhub/vars/main.yml new file mode 100644 index 000000000..66f8a4231 --- /dev/null +++ b/platforms/roles/jupyterhub/vars/main.yml @@ -0,0 +1,26 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +jupyterhub_helm_chart_repo: https://jupyterhub.github.io/helm-chart/ + +jupyter_config_file_dest: /root/k8s/jupyter_config.yaml + +jupyter_config_file_mode: 0655 + +helm_chart_version: 0.9.0 + +timeout_min_sec: 60m + +jupyterhub_namespace: jupyterhub \ No newline at end of file diff --git a/platforms/roles/kubeflow/tasks/main.yml b/platforms/roles/kubeflow/tasks/main.yml new file mode 100644 index 000000000..7bbe04b47 --- /dev/null +++ b/platforms/roles/kubeflow/tasks/main.yml @@ -0,0 +1,137 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Download kfctl release from the Kubeflow releases page + unarchive: + src: "{{ kfctl_download_url }}" + dest: "{{ kfctl_download_dest_path }}" + mode: "{{ kfctl_download_file_mode }}" + remote_src: yes + +- name: Delete omnia kubeflow directory if exists + file: + path: "{{ omnia_kubeflow_dir_path }}" + state: absent + +- name: Create omnia kubeflow directory + file: + path: "{{ omnia_kubeflow_dir_path }}" + state: directory + mode: "{{ omnia_kubeflow_dir_mode }}" + recurse: yes + +- name: Build kubeflow configuration + command: + cmd: /usr/bin/kfctl build -V -f "{{ kubeflow_config_yaml_url }}" + chdir: "{{ omnia_kubeflow_dir_path }}" + changed_when: true + +- name: Modify CPU limit for istio-ingressgateway-service-account + replace: + path: "{{ istio_noauth_yaml_file_path }}" + after: 'serviceAccountName: istio-ingressgateway-service-account' + before: '---' + regexp: 'cpu: 100m' + replace: 'cpu: 2' + +- name: Modify memory limit for istio-ingressgateway-service-account + replace: + path: "{{ istio_noauth_yaml_file_path }}" + after: 'serviceAccountName: istio-ingressgateway-service-account' + before: '---' + regexp: 'memory: 128Mi' + replace: 'memory: 512Mi' + +- name: Modify CPU request for istio-ingressgateway-service-account + replace: + path: "{{ istio_noauth_yaml_file_path }}" + after: 'serviceAccountName: istio-ingressgateway-service-account' + before: '---' + regexp: 'cpu: 10m' + replace: 'cpu: 1' + +- name: Modify memory request for istio-ingressgateway-service-account + replace: + path: "{{ istio_noauth_yaml_file_path }}" + after: 'serviceAccountName: istio-ingressgateway-service-account' + before: '---' + regexp: 'memory: 40Mi' + replace: 'memory: 256Mi' + +- name: Modify memory request for istio-engressgateway-service-account + replace: + path: "{{ istio_noauth_yaml_file_path }}" + after: 'serviceAccountName: istio-egressgateway-service-account' + before: '---' + regexp: 'memory: 128Mi' + replace: 'memory: 256Mi' + +- name: Modify memory request for istio-engressgateway-service-account + replace: + path: "{{ istio_noauth_yaml_file_path }}" + after: 'serviceAccountName: istio-egressgateway-service-account' + before: '---' + regexp: 'memory: 40Mi' + replace: 'memory: 128Mi' + +- name: Modify CPU limit for kfserving-gateway + replace: + path: "{{ kfserving_gateway_yaml_file_path }}" + after: 'serviceAccountName: istio-ingressgateway-service-account' + before: 'env:' + regexp: 'cpu: 100m' + replace: 'cpu: 2' + +- name: Modify memory limit for kfserving-gateway + replace: + path: "{{ kfserving_gateway_yaml_file_path }}" + after: 'serviceAccountName: istio-ingressgateway-service-account' + before: 'env:' + regexp: 'memory: 128Mi' + replace: 'memory: 512Mi' + +- name: Modify CPU request for kfserving-gateway + replace: + path: "{{ kfserving_gateway_yaml_file_path }}" + after: 'serviceAccountName: istio-ingressgateway-service-account' + before: 'env:' + regexp: 'cpu: 10m' + replace: 'cpu: 1' + +- name: Modify memory request for kfserving-gateway + replace: + path: "{{ kfserving_gateway_yaml_file_path }}" + after: 'serviceAccountName: istio-ingressgateway-service-account' + before: 'env:' + regexp: 'memory: 40Mi' + replace: 'memory: 256Mi' + +- name: Change argo base service from NodePort to LoadBalancer + replace: + path: "{{ argo_yaml_file_path }}" + regexp: 'NodePort' + replace: 'LoadBalancer' + +- name: Change istio-install base istio-noauth service from NodePort to LoadBalancer + replace: + path: "{{ istio_noauth_yaml_file_path }}" + regexp: 'NodePort' + replace: 'LoadBalancer' + +- name: Apply kubeflow configuration + command: + cmd: "/usr/bin/kfctl apply -V -f '{{ kubeflow_config_file }}'" + chdir: "{{ omnia_kubeflow_dir_path }}" + changed_when: true diff --git a/platforms/roles/kubeflow/vars/main.yml b/platforms/roles/kubeflow/vars/main.yml new file mode 100644 index 000000000..fbce14428 --- /dev/null +++ b/platforms/roles/kubeflow/vars/main.yml @@ -0,0 +1,34 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +kfctl_download_url: https://github.com/kubeflow/kfctl/releases/download/v1.0.2/kfctl_v1.0.2-0-ga476281_linux.tar.gz + +kfctl_download_dest_path: /usr/bin/ + +kfctl_download_file_mode: 0755 + +omnia_kubeflow_dir_path: /root/k8s/omnia-kubeflow + +omnia_kubeflow_dir_mode: 0755 + +kubeflow_config_yaml_url: https://raw.githubusercontent.com/kubeflow/manifests/v1.0-branch/kfdef/kfctl_k8s_istio.v1.0.2.yaml + +istio_noauth_yaml_file_path: "{{ omnia_kubeflow_dir_path }}/kustomize/istio-install/base/istio-noauth.yaml" + +kfserving_gateway_yaml_file_path: "{{ omnia_kubeflow_dir_path }}/kustomize/kfserving-gateway/base/deployment.yaml" + +argo_yaml_file_path: "{{ omnia_kubeflow_dir_path }}/kustomize/argo/base/service.yaml" + +kubeflow_config_file: "{{ omnia_kubeflow_dir_path }}/kfctl_k8s_istio.v1.0.2.yaml" diff --git a/roles/cluster_validation/tasks/fetch_password.yml b/roles/cluster_validation/tasks/fetch_password.yml new file mode 100644 index 000000000..deb1399ea --- /dev/null +++ b/roles/cluster_validation/tasks/fetch_password.yml @@ -0,0 +1,97 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +- name: Check if omnia_vault_key exists + stat: + path: "{{ role_path }}/../../{{ config_vaultname }}" + register: vault_key_result + +- name: Create ansible vault key if it does not exist + set_fact: + vault_key: "{{ lookup('password', '/dev/null chars=ascii_letters') }}" + when: not vault_key_result.stat.exists + +- name: Save vault key + copy: + dest: "{{ role_path }}/../../{{ config_vaultname }}" + content: | + {{ vault_key }} + owner: root + force: yes + mode: '0600' + when: not vault_key_result.stat.exists + +- name: Check if omnia config file is encrypted + command: cat {{ role_path }}/../../{{ config_filename }} + changed_when: false + register: config_content + no_log: True + +- name: Decrpyt omnia_config.yml + command: >- + ansible-vault decrypt {{ role_path }}/../../{{ config_filename }} + --vault-password-file {{ role_path }}/../../{{ config_vaultname }} + when: "'$ANSIBLE_VAULT;' in config_content.stdout" + +- name: Include variable file omnia_config.yml + include_vars: "{{ role_path }}/../../{{ config_filename }}" + no_log: True + +- name: Validate input parameters are not empty + fail: + msg: "{{ input_config_failure_msg }}" + register: input_config_check + when: + - mariadb_password | length < 1 or + k8s_cni | length < 1 or + k8s_pod_network_cidr | length < 1 + +- name: Assert mariadb_password + assert: + that: + - mariadb_password | length > min_length | int - 1 + - mariadb_password | length < max_length | int + 1 + - '"-" not in mariadb_password ' + - '"\\" not in mariadb_password ' + - '"\"" not in mariadb_password ' + - " \"'\" not in mariadb_password " + success_msg: "{{ success_msg_mariadb_password }}" + fail_msg: "{{ fail_msg_mariadb_password }}" + +- name: Assert kubernetes cni + assert: + that: "('calico' in k8s_cni) or ('flannel' in k8s_cni)" + success_msg: "{{ success_msg_k8s_cni }}" + fail_msg: "{{ fail_msg_k8s_cni }}" + +- name: Assert kubernetes pod network CIDR + assert: + that: + - k8s_pod_network_cidr | length > 9 + - '"/" in k8s_pod_network_cidr ' + success_msg: "{{ success_msg_k8s_pod_network_cidr }}" + fail_msg: "{{ fail_msg_k8s_pod_network_cidr }}" + +- name: Save input variables from file + set_fact: + db_password: "{{ mariadb_password }}" + k8s_cni: "{{ k8s_cni }}" + k8s_pod_network_cidr: "{{ k8s_pod_network_cidr }}" + no_log: True + +- name: Encrypt input config file + command: >- + ansible-vault encrypt {{ role_path }}/../../{{ config_filename }} + --vault-password-file {{ role_path }}/../../{{ config_vaultname }} + changed_when: false \ No newline at end of file diff --git a/kubernetes/roles/jupyterhub/tasks/main.yml b/roles/cluster_validation/tasks/main.yml similarity index 55% rename from kubernetes/roles/jupyterhub/tasks/main.yml rename to roles/cluster_validation/tasks/main.yml index 5f6949f1b..f25328b39 100644 --- a/kubernetes/roles/jupyterhub/tasks/main.yml +++ b/roles/cluster_validation/tasks/main.yml @@ -11,16 +11,27 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - --- -- name: Helm - Add JupyterHub Repo - shell: helm repo add jupyterhub https://jupyterhub.github.io/helm-chart/ +- name: Perform validations + include_tasks: validations.yml + +- name: Fetch passwords + include_tasks: fetch_password.yml + +- name: Check if omnia is running from AWX + block: + - name: Appliance status + set_fact: + appliance_status: false + + - name: Check AWX instance + command: awx-manage --version -- name: Helm - Update Repo - shell: helm repo update + - name: Update appliance status + set_fact: + appliance_status: true -- name: JupyterHub Custom Config (files) - copy: src=jupyter_config.yaml dest=/root/k8s/jupyter_config.yaml owner=root group=root mode=655 - -- name: jupyterHub deploy - shell: helm install jupyterhub/jupyterhub --namespace default --version 0.9.0 --values /root/k8s/jupyter_config.yaml --generate-name --wait --timeout 60m + rescue: + - name: Passwordless SSH status + debug: + msg: "omnia.yml running on host" \ No newline at end of file diff --git a/roles/cluster_validation/tasks/validations.yml b/roles/cluster_validation/tasks/validations.yml new file mode 100644 index 000000000..b072b34d7 --- /dev/null +++ b/roles/cluster_validation/tasks/validations.yml @@ -0,0 +1,30 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +- name: Validate skip tags + fail: + msg: "{{ skip_tag_fail_msg }}" + when: "'slurm' in ansible_skip_tags and 'kubernetes' in ansible_skip_tags" + +- name: Manager group to contain exactly 1 node + assert: + that: "groups['manager'] | length | int == 1" + fail_msg: "{{ manager_group_fail_msg }}" + success_msg: "{{ manager_group_success_msg }}" + +- name: Compute group to contain atleast 1 node + assert: + that: "groups['compute'] | length | int >= 1" + fail_msg: "{{ compute_group_fail_msg }}" + success_msg: "{{ compute_group_success_msg }}" \ No newline at end of file diff --git a/roles/cluster_validation/vars/main.yml b/roles/cluster_validation/vars/main.yml new file mode 100644 index 000000000..59fe22241 --- /dev/null +++ b/roles/cluster_validation/vars/main.yml @@ -0,0 +1,34 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +#Usage: fetch_password.yml +config_filename: "omnia_config.yml" +config_vaultname: .omnia_vault_key +min_length: 8 +max_length: 30 +fail_msg_mariadb_password: "maria_db password not given in correct format." +success_msg_mariadb_password: "mariadb_password validated" +success_msg_k8s_cni: "Kubernetes CNI Validated" +fail_msg_k8s_cni: "Kubernetes CNI not correct." +success_msg_k8s_pod_network_cidr: "Kubernetes pod network cidr validated" +fail_msg_k8s_pod_network_cidr: "Kubernetes pod network cidr not given in correct format" + +#Usage: validations.yml +skip_tag_fail_msg: "Can't skip both slurm and kubernetes" +manager_group_fail_msg: "manager group should contain exactly 1 node" +manager_group_success_msg: "manager group check passed" +compute_group_fail_msg: "compute group should contain atleast 1 node" +compute_group_success_msg: "compute group check passed" +disjoint_fail_msg: "manager and compute groups should be disjoint" +disjoint_success_msg: "manager and compute groups are disjoint" \ No newline at end of file diff --git a/roles/common/files/daemon.json b/roles/common/files/daemon.json new file mode 100644 index 000000000..363ed0f3c --- /dev/null +++ b/roles/common/files/daemon.json @@ -0,0 +1,9 @@ +{ + "runtimes": { + "nvidia": { + "path": "nvidia-container-runtime", + "runtimeArgs": [] + } + }, + "default-runtime": "nvidia" +} \ No newline at end of file diff --git a/roles/common/files/inventory.fact b/roles/common/files/inventory.fact new file mode 100644 index 000000000..b6c3ba7e9 --- /dev/null +++ b/roles/common/files/inventory.fact @@ -0,0 +1,20 @@ +#!/bin/bash +INVENTORY=$(mktemp lspci.XXXXXXXX) + +lspci > $INVENTORY + +NVIDIA_GPU=$(cat $INVENTORY | grep -i nvidia | wc -l) +XILINX_FPGA=$(cat $INVENTORY | grep "Processing accelerators: Xilinx Corporation Device" | wc -l) +INTEL_A10_FPGA=$(cat $INVENTORY | grep "Processing accelerators: Intel Corporation Device" | wc -l) +AMD_GPU=$(cat $INVENTORY | grep "Display controller: Advanced Micro Devices, Inc. \[AMD/ATI\]" | wc -l) + +cat << EOF +{ + "xilinx_fpga" : $XILINX_FPGA, + "nvidia_gpu" : $NVIDIA_GPU, + "amd_gpu" : $AMD_GPU, + "intel_a10_fpga" : $INTEL_A10_FPGA +} +EOF + +rm -f $INVENTORY diff --git a/roles/common/handlers/main.yml b/roles/common/handlers/main.yml new file mode 100644 index 000000000..d4d84f6ed --- /dev/null +++ b/roles/common/handlers/main.yml @@ -0,0 +1,40 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Restart ntpd + systemd: + name: ntpd + state: started + enabled: yes + +- name: Restart chrony + service: + name: chronyd + state: restarted + enabled: yes + +- name: Sync tp clocks + command: ntpdc -np + register: ntp_clock + until: ntp_clock.stdout.find('*') > -1 + retries: "{{ retry_count_one }}" + delay: "{{ delay_count_one }}" + +- name: Sync chrony sources + command: chronyc sources + register: chrony_src + until: chrony_src.stdout.find('^*') > -1 + retries: "{{ retry_count }}" + delay: "{{ delay_count }}" \ No newline at end of file diff --git a/roles/common/tasks/amd.yml b/roles/common/tasks/amd.yml new file mode 100644 index 000000000..1db2a4b0b --- /dev/null +++ b/roles/common/tasks/amd.yml @@ -0,0 +1,35 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Add AMD ROCm repository + yum_repository: + name: ROCm + description: AMD GPU ROCm Repository + baseurl: https://repo.radeon.com/rocm/yum/rpm + gpgcheck: yes + gpgkey: https://repo.radeon.com/rocm/rocm.gpg.key + enabled: yes + tags: install + +- name: Install AMD ROCm drivers + package: + name: rocm-dkms + enablerepo: ROCm + state: present + tags: install + +- name: Reboot after installing GPU drivers + reboot: + tags: install \ No newline at end of file diff --git a/roles/common/tasks/main.yml b/roles/common/tasks/main.yml new file mode 100644 index 000000000..be405d19f --- /dev/null +++ b/roles/common/tasks/main.yml @@ -0,0 +1,135 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Create a custom fact directory on each host + file: + path: "{{ custom_fact_dir }}" + state: directory + mode: "{{ custom_fact_dir_mode }}" + +- name: Install accelerator discovery script + copy: + src: inventory.fact + dest: "{{ accelerator_discovery_script_dest }}" + owner: root + group: root + mode: "{{ accelerator_discovery_script_mode }}" + +- name: Add elrepo GPG key + rpm_key: + state: present + key: "{{ elrepo_gpg_key_url }}" + tags: install + +- name: Add elrepo (nvidia kmod drivers) + package: + name: "{{ elrepo_rpm_url }}" + state: present + tags: install + +- name: Add docker community edition repository + get_url: + url: "{{ docker_repo_url }}" + dest: "{{ docker_repo_dest }}" + tags: install + +- name: Permanently Disable swap + mount: + name: "swap" + fstype: swap + state: absent + +- name: Disable selinux + selinux: + state: disabled + tags: install + +- name: Install common packages + package: + name: "{{ common_packages }}" + state: present + tags: install + +- name: Versionlock docker + command: "yum versionlock '{{ item }}'" + args: + warn: false + with_items: + - "{{ docker_packages }}" + changed_when: true + tags: install + +- name: Collect host facts (including acclerator information) + setup: ~ + +- name: Install infiniBand support + package: + name: "@Infiniband Support" + state: present + tags: install + +- name: Deploy time ntp/chrony + include_tasks: ntp.yml + tags: install + +- name: Install Nvidia drivers and software components + include_tasks: nvidia.yml + when: ansible_local.inventory.nvidia_gpu > 0 + tags: install + +- name: Install AMD GPU drivers and software components + include_tasks: amd.yml + when: ansible_local.inventory.amd_gpu > 0 + tags: install + +- name: Get the hostname + command: hostname + register: machine_hostname + changed_when: true + +- name: Set facts for node hostname and ip + set_fact: + node_ip: "{{ inventory_hostname }}" + node_hostname: "{{ machine_hostname.stdout }}" + +- name: Add host name in hosts file + lineinfile: + dest: "{{ hosts_file_dest }}" + line: "{{ inventory_hostname }} {{ machine_hostname.stdout }}" + state: present + create: yes + mode: "{{ hosts_file_mode }}" + +- name: Add compute hosts info in manager node hosts file + lineinfile: + dest: "{{ hosts_file_dest }}" + line: "{{ hostvars[item].node_ip }} {{ hostvars[item].node_hostname }}" + state: present + create: yes + mode: "{{ hosts_file_mode }}" + with_items: + - "{{ groups['compute'] }}" + when: "'manager' in group_names" + +- name: Add manager hosts info in compute node hosts file + lineinfile: + dest: "{{ hosts_file_dest }}" + line: "{{ hostvars[item].node_ip }} {{ hostvars[item].node_hostname }}" + state: present + create: yes + mode: "{{ hosts_file_mode }}" + with_items: + - "{{ groups['manager'] }}" + when: "'compute' in group_names" \ No newline at end of file diff --git a/roles/common/tasks/ntp.yml b/roles/common/tasks/ntp.yml new file mode 100644 index 000000000..c8d496b05 --- /dev/null +++ b/roles/common/tasks/ntp.yml @@ -0,0 +1,56 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + + - name: Deploy ntp servers + block: + - name: Deploy ntpd + package: + name: ntp + state: present + - name: Deploy ntpdate + package: + name: ntpdate + state: present + - name: Update ntp servers + template: + src: ntp.conf.j2 + dest: "{{ ntp_path }}" + owner: root + group: root + mode: "{{ ntp_mode }}" + backup: yes + notify: + - Restart ntpd + - Sync tp clocks + when: ( ansible_distribution == "CentOS" or ansible_distribution == "RedHat" ) and ansible_distribution_major_version < os_higher_version + + - name: Deploy chrony server + block: + - name: Deploy chrony + package: + name: chrony + state: present + - name: Update ntp servers + template: + src: chrony.conf.j2 + dest: "{{ chrony_path }}" + owner: root + group: root + mode: "{{ ntp_mode }}" + backup: yes + notify: + - Restart chrony + - Sync chrony sources + when: ( ansible_distribution == "CentOS" or ansible_distribution == "RedHat" ) and ansible_distribution_major_version > os_version \ No newline at end of file diff --git a/roles/common/tasks/nvidia.yml b/roles/common/tasks/nvidia.yml new file mode 100644 index 000000000..30809718d --- /dev/null +++ b/roles/common/tasks/nvidia.yml @@ -0,0 +1,81 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Add libnvidia container Repo + yum_repository: + name: libnvidia-container + description: libnvidia-container + baseurl: https://nvidia.github.io/libnvidia-container/stable/centos7/$basearch + repo_gpgcheck: no + gpgcheck: no + gpgkey: https://nvidia.github.io/libnvidia-container/gpgkey + sslverify: yes + sslcacert: /etc/pki/tls/certs/ca-bundle.crt + enabled: yes + tags: install + +- name: Add nvidia-container-runtime Repo + yum_repository: + name: nvidia-container-runtime + description: nvidia-container-runtime + baseurl: https://nvidia.github.io/nvidia-container-runtime/stable/centos7/$basearch + repo_gpgcheck: no + gpgcheck: no + gpgkey: https://nvidia.github.io/nvidia-container-runtime/gpgkey + sslverify: yes + sslcacert: /etc/pki/tls/certs/ca-bundle.crt + enabled: yes + tags: install + +- name: Add nvidia-docker Repo + yum_repository: + name: nvidia-docker + description: nvidia-docker + baseurl: https://nvidia.github.io/nvidia-docker/centos7/$basearch + repo_gpgcheck: no + gpgcheck: no + gpgkey: https://nvidia.github.io/nvidia-docker/gpgkey + enabled: yes + sslverify: yes + sslcacert: /etc/pki/tls/certs/ca-bundle.crt + tags: install + +- name: Install nvidia driver and nvidia-docker2 + package: + name: "{{ nvidia_packages }}" + enablerepo: libnvidia-container,nvidia-docker + state: present + tags: install + +- name: Reboot after installing GPU drivers + reboot: + tags: install + +- name: Set nvidia as default runtime + copy: + src: daemon.json + dest: "{{ daemon_file_dest }}" + owner: root + group: root + mode: "{{ daemon_file_mode }}" + tags: install + +- name: Restart and enable docker service + service: + name: docker + state: restarted + enabled: yes + daemon_reload: yes + tags: install diff --git a/roles/common/templates/chrony.conf.j2 b/roles/common/templates/chrony.conf.j2 new file mode 100644 index 000000000..317a5b50a --- /dev/null +++ b/roles/common/templates/chrony.conf.j2 @@ -0,0 +1,41 @@ +# Use public servers from the pool.ntp.org project. +# Please consider joining the pool (http://www.pool.ntp.org/join.html). +{% for item in chrony_servers %} +pool {{ item }} iburst +{% endfor %} + + +# Record the rate at which the system clock gains/losses time. +driftfile /var/lib/chrony/drift + +# Allow the system clock to be stepped in the first three updates +# if its offset is larger than 1 second. +makestep 1.0 3 + +# Enable kernel synchronization of the real-time clock (RTC). +rtcsync + +# Enable hardware timestamping on all interfaces that support it. +#hwtimestamp * + +# Increase the minimum number of selectable sources required to adjust +# the system clock. +#minsources 2 + +# Allow NTP client access from local network. +#allow 192.168.0.0/16 + +# Serve time even if not synchronized to a time source. +#local stratum 10 + +# Specify file containing keys for NTP authentication. +keyfile /etc/chrony.keys + +# Get TAI-UTC offset and leap seconds from the system tz database. +leapsectz right/UTC + +# Specify directory for log files. +logdir /var/log/chrony + +# Select which information is logged. +#log measurements statistics tracking \ No newline at end of file diff --git a/roles/common/templates/ntp.conf.j2 b/roles/common/templates/ntp.conf.j2 new file mode 100644 index 000000000..b26ff5df8 --- /dev/null +++ b/roles/common/templates/ntp.conf.j2 @@ -0,0 +1,14 @@ +driftfile /var/lib/ntp/drift + +restrict default nomodify notrap nopeer noquery + +restrict 127.0.0.1 +restrict ::1 + +{% for item in ntp_servers %} +server {{ item }} iburst +{% endfor %} + +includefile /etc/ntp/crypto/pw + +keys /etc/ntp/keys \ No newline at end of file diff --git a/roles/common/vars/main.yml b/roles/common/vars/main.yml new file mode 100644 index 000000000..d864c34d6 --- /dev/null +++ b/roles/common/vars/main.yml @@ -0,0 +1,80 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +common_packages: + - epel-release + - yum-plugin-versionlock + - gcc + - nfs-utils + - python3-pip + - bash-completion + - nvidia-detect + - chrony + - pciutils + - docker-ce-cli-20.10.2 + - docker-ce-20.10.2 + - openssl + +docker_packages: + - docker-ce-cli-20.10.2 + - docker-ce-20.10.2 + +custom_fact_dir: /etc/ansible/facts.d + +custom_fact_dir_mode: 0755 + +accelerator_discovery_script_dest: /etc/ansible/facts.d/inventory.fact + +accelerator_discovery_script_mode: 0755 + +elrepo_gpg_key_url: https://www.elrepo.org/RPM-GPG-KEY-elrepo.org + +elrepo_rpm_url: https://www.elrepo.org/elrepo-release-7.el7.elrepo.noarch.rpm + +docker_repo_url: https://download.docker.com/linux/centos/docker-ce.repo + +docker_repo_dest: /etc/yum.repos.d/docker-ce.repo + +chrony_path: "/etc/chrony.conf" +ntp_path: "/etc/ntp.conf" +ntp_mode: "0644" +os_higher_version: "8" +os_version: "7" +retry_count_one: "10" +delay_count_one: "60" +retry_count: "6" +delay_count: "10" + +ntp_servers: + - 0.centos.pool.ntp.org + - 1.centos.pool.ntp.org + - 2.centos.pool.ntp.org +chrony_servers: + - 2.centos.pool.ntp.org + +nvidia_docker_repo_url: https://nvidia.github.io/nvidia-docker/centos7/nvidia-docker.repo +nvidia_docker_repo_dest: /etc/yum.repos.d/nvidia-docker.repo +nvidia_container_repo_url: https://nvidia.github.io/libnvidia-container/centos7/libnvidia-container.repo +nvidia_container_repo_dest: /etc/yum.repos.d/libnvidia-container.repo + +nvidia_packages: + - kmod-nvidia + - nvidia-docker2 + +daemon_file_dest: /etc/docker/ +daemon_file_mode: 0644 + +hosts_file_dest: "/etc/hosts" +hosts_file_mode: "0644" \ No newline at end of file diff --git a/kubernetes/roles/common/files/k8s.conf b/roles/k8s_common/files/k8s.conf similarity index 100% rename from kubernetes/roles/common/files/k8s.conf rename to roles/k8s_common/files/k8s.conf diff --git a/kubernetes/roles/common/files/kubernetes.repo b/roles/k8s_common/files/kubernetes.repo similarity index 100% rename from kubernetes/roles/common/files/kubernetes.repo rename to roles/k8s_common/files/kubernetes.repo diff --git a/roles/k8s_common/handlers/main.yml b/roles/k8s_common/handlers/main.yml new file mode 100644 index 000000000..cb21d67eb --- /dev/null +++ b/roles/k8s_common/handlers/main.yml @@ -0,0 +1,28 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Start and Enable docker service + service: + name: docker + state: restarted + enabled: yes + tags: install + +- name: Start and Enable Kubernetes - kubelet + service: + name: kubelet + state: started + enabled: yes + tags: install \ No newline at end of file diff --git a/roles/k8s_common/tasks/main.yml b/roles/k8s_common/tasks/main.yml new file mode 100644 index 000000000..3d07c31d9 --- /dev/null +++ b/roles/k8s_common/tasks/main.yml @@ -0,0 +1,69 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Add kubernetes repo + yum_repository: + name: kubernetes + description: kubernetes + baseurl: https://packages.cloud.google.com/yum/repos/kubernetes-el7-x86_64 + enabled: yes + gpgcheck: no + repo_gpgcheck: no + gpgkey: + - https://packages.cloud.google.com/yum/doc/yum-key.gpg + - https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg + tags: install + +- name: Update sysctl to handle incorrectly routed traffic when iptables is bypassed + copy: + src: k8s.conf + dest: "{{ k8s_conf_dest }}" + owner: root + group: root + mode: "{{ k8s_conf_file_mode }}" + tags: install + +- name: Update sysctl + command: /sbin/sysctl --system + changed_when: true + tags: install + +- name: Install k8s packages + package: + name: "{{ k8s_packages }}" + state: present + tags: install + +- name: Versionlock kubernetes + command: "yum versionlock '{{ item }}'" + args: + warn: false + with_items: + - "{{ k8s_packages }}" + changed_when: true + tags: install + +- name: Start and enable docker service + service: + name: docker + state: restarted + enabled: yes + tags: install + +- name: Start and enable kubernetes - kubelet + service: + name: kubelet + state: restarted + enabled: yes diff --git a/roles/k8s_common/vars/main.yml b/roles/k8s_common/vars/main.yml new file mode 100644 index 000000000..3365d3d37 --- /dev/null +++ b/roles/k8s_common/vars/main.yml @@ -0,0 +1,27 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +k8s_packages: + - kubelet-1.16.7 + - kubeadm-1.16.7 + - kubectl-1.16.7 + +k8s_repo_dest: /etc/yum.repos.d/ + +k8s_conf_dest: /etc/sysctl.d/ + +k8s_repo_file_mode: 0644 + +k8s_conf_file_mode: 0644 diff --git a/roles/k8s_firewalld/tasks/main.yml b/roles/k8s_firewalld/tasks/main.yml new file mode 100644 index 000000000..7c04a800a --- /dev/null +++ b/roles/k8s_firewalld/tasks/main.yml @@ -0,0 +1,84 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Install firewalld + package: + name: firewalld + state: present + tags: firewalld + +- name: Start and enable firewalld + service: + name: firewalld + state: started + enabled: yes + tags: firewalld + +- name: Configure firewalld on master nodes + firewalld: + port: "{{ item }}/tcp" + permanent: yes + state: enabled + with_items: '{{ k8s_master_ports }}' + when: "'manager' in group_names" + tags: firewalld + +- name: Configure firewalld on compute nodes + firewalld: + port: "{{ item }}/tcp" + permanent: yes + state: enabled + with_items: '{{ k8s_compute_ports }}' + when: "'compute' in group_names and groups['manager'][0] != groups['compute'][0] and groups['compute']|length >= 1" + tags: firewalld + +- name: Open flannel ports on the firewall + firewalld: + port: "{{ item }}/udp" + permanent: yes + state: enabled + with_items: "{{ flannel_udp_ports }}" + when: hostvars['127.0.0.1']['k8s_cni'] == "flannel" + tags: firewalld + +- name: Open calico UDP ports on the firewall + firewalld: + port: "{{ item }}/udp" + permanent: yes + state: enabled + with_items: "{{ calico_udp_ports }}" + when: hostvars['127.0.0.1']['k8s_cni'] == "calico" + tags: firewalld + +- name: Open calico TCP ports on the firewall + firewalld: + port: "{{ item }}/tcp" + permanent: yes + state: enabled + with_items: "{{ calico_tcp_ports }}" + when: hostvars['127.0.0.1']['k8s_cni'] == "calico" + tags: firewalld + +- name: Reload firewalld + command: firewall-cmd --reload + changed_when: true + tags: firewalld + +- name: Stop and disable firewalld + service: + name: firewalld + state: stopped + enabled: no + tags: firewalld \ No newline at end of file diff --git a/roles/k8s_firewalld/vars/main.yml b/roles/k8s_firewalld/vars/main.yml new file mode 100644 index 000000000..a8d4da4fa --- /dev/null +++ b/roles/k8s_firewalld/vars/main.yml @@ -0,0 +1,39 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# Master nodes firewall ports +k8s_master_ports: + - 6443 + - 2379-2380 + - 10250 + - 10251 + - 10252 + +# Worker nodes firewall ports +k8s_compute_ports: + - 10250 + - 30000-32767 + +# Calico CNI firewall ports +calico_udp_ports: + - 4789 +calico_tcp_ports: + - 5473 + - 179 + +# Flannel CNI firewall ports +flannel_udp_ports: + - 8285 + - 8472 diff --git a/roles/k8s_manager/tasks/main.yml b/roles/k8s_manager/tasks/main.yml new file mode 100644 index 000000000..a371cf040 --- /dev/null +++ b/roles/k8s_manager/tasks/main.yml @@ -0,0 +1,32 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Create directory for helm installer file + file: + path: "{{ helm_installer_file_directory }}" + state: directory + mode: "{{ helm_installer_file_directory_mode }}" + +- name: Get helm installer + get_url: + url: "{{ helm_installer_url }}" + dest: "{{ helm_installer_file_dest }}" + mode: "{{ helm_installer_file_mode }}" + tags: manager + +- name: Install helm + command: "/bin/bash {{ helm_installer_file_dest }}" + changed_when: true + tags: manager \ No newline at end of file diff --git a/roles/k8s_manager/vars/main.yml b/roles/k8s_manager/vars/main.yml new file mode 100644 index 000000000..406e3ab09 --- /dev/null +++ b/roles/k8s_manager/vars/main.yml @@ -0,0 +1,24 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +helm_installer_file_directory: /root/bin + +helm_installer_file_directory_mode: 0755 + +helm_installer_url: https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 + +helm_installer_file_dest: /root/bin/get_helm.sh + +helm_installer_file_mode: 0700 \ No newline at end of file diff --git a/roles/k8s_nfs_client_setup/tasks/main.yml b/roles/k8s_nfs_client_setup/tasks/main.yml new file mode 100644 index 000000000..5a7450677 --- /dev/null +++ b/roles/k8s_nfs_client_setup/tasks/main.yml @@ -0,0 +1,53 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Install nfs-utils + package: + name: nfs-utils + state: present + when: groups['manager'][0] != groups['compute'][0] and groups['compute']|length >= 1 + tags: nfs_client + +- name: Check mounted share + shell: mount | grep nfs + changed_when: false + args: + warn: false + register: mounted_share + ignore_errors: True + tags: nfs_client + +- name: Creating directory to mount NFS Share + file: + path: "{{ nfs_mnt_dir }}" + state: directory + mode: "{{ nfs_mnt_dir_mode }}" + when: groups['manager'][0] != groups['compute'][0] and groups['compute']|length >= 1 + tags: nfs_client + +- name: Mounting NFS Share + command: "mount {{ mounthost }}:{{ nfs_share_dir }} {{ nfs_mnt_dir }}" + changed_when: true + args: + warn: false + when: groups['manager'][0] not in mounted_share.stdout and groups['manager'][0] != groups['compute'][0] and groups['compute']|length >= 1 + tags: nfs_client + +- name: Configuring Automount NFS Shares on reboot + lineinfile: + path: "{{ fstab_file_path }}" + line: "{{ mounthost }}:{{ nfs_share_dir }} {{ nfs_mnt_dir }} nfs nosuid,rw,sync,hard,intr 0 0" + when: groups['manager'][0] not in mounted_share.stdout and groups['manager'][0] != groups['compute'][0] and groups['compute']|length >= 1 + tags: nfs_client \ No newline at end of file diff --git a/roles/k8s_nfs_client_setup/vars/main.yml b/roles/k8s_nfs_client_setup/vars/main.yml new file mode 100644 index 000000000..ef7e379b2 --- /dev/null +++ b/roles/k8s_nfs_client_setup/vars/main.yml @@ -0,0 +1,24 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +nfs_mnt_dir: /home/k8snfs + +nfs_share_dir: /home/k8snfs + +mounthost: "{{ groups['manager'][0] }}" + +nfs_mnt_dir_mode: 0755 + +fstab_file_path: /etc/fstab \ No newline at end of file diff --git a/roles/k8s_nfs_server_setup/tasks/main.yml b/roles/k8s_nfs_server_setup/tasks/main.yml new file mode 100644 index 000000000..4757bae29 --- /dev/null +++ b/roles/k8s_nfs_server_setup/tasks/main.yml @@ -0,0 +1,84 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Install nfs-utils + package: + name: nfs-utils + state: present + tags: nfs_server + +- name: Install firewalld + package: + name: firewalld + state: present + tags: firewalld + +- name: Start and enable firewalld + service: + name: firewalld + state: started + enabled: yes + tags: firewalld + +- name: Start and enable rpcbind and nfs-server service + service: + name: "{{ item }}" + state: restarted + enabled: yes + with_items: + - rpcbind + - nfs-server + tags: nfs_server + +- name: Creating NFS share directory + file: + path: "{{ nfs_share_dir }}" + state: directory + mode: "{{ nfs_share_dir_mode }}" + tags: nfs_server + +- name: Adding NFS share entries in /etc/exports + lineinfile: + path: "{{ exports_file_path }}" + line: "{{ nfs_share_dir }} {{ item }}(rw,sync,no_root_squash)" + with_items: + - "{{ groups['compute'] }}" + tags: nfs_server + +- name: Exporting the shared directories + command: exportfs -r + changed_when: true + tags: nfs_server + +- name: Configuring firewall + firewalld: + service: "{{ item }}" + permanent: true + state: enabled + with_items: + - "{{ nfs_services }}" + tags: nfs_server + +- name: Reload firewalld + command: firewall-cmd --reload + changed_when: true + tags: nfs_server + +- name: Stop and disable firewalld + service: + name: firewalld + state: stopped + enabled: no + tags: firewalld \ No newline at end of file diff --git a/roles/k8s_nfs_server_setup/vars/main.yml b/roles/k8s_nfs_server_setup/vars/main.yml new file mode 100644 index 000000000..af4c15586 --- /dev/null +++ b/roles/k8s_nfs_server_setup/vars/main.yml @@ -0,0 +1,25 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +nfs_share_dir: /home/k8snfs + +nfs_share_dir_mode: 0777 + +exports_file_path: /etc/exports + +nfs_services: + - mountd + - rpc-bind + - nfs \ No newline at end of file diff --git a/kubernetes/roles/startmaster/files/create_admin_user.yaml b/roles/k8s_start_manager/files/create_admin_user.yaml similarity index 100% rename from kubernetes/roles/startmaster/files/create_admin_user.yaml rename to roles/k8s_start_manager/files/create_admin_user.yaml diff --git a/kubernetes/roles/startmaster/files/create_clusterRoleBinding.yaml b/roles/k8s_start_manager/files/create_clusterRoleBinding.yaml similarity index 100% rename from kubernetes/roles/startmaster/files/create_clusterRoleBinding.yaml rename to roles/k8s_start_manager/files/create_clusterRoleBinding.yaml diff --git a/kubernetes/roles/startmaster/files/data-pv.yaml b/roles/k8s_start_manager/files/data-pv.yaml old mode 100755 new mode 100644 similarity index 100% rename from kubernetes/roles/startmaster/files/data-pv.yaml rename to roles/k8s_start_manager/files/data-pv.yaml diff --git a/kubernetes/roles/startmaster/files/data2-pv.yaml b/roles/k8s_start_manager/files/data2-pv.yaml old mode 100755 new mode 100644 similarity index 100% rename from kubernetes/roles/startmaster/files/data2-pv.yaml rename to roles/k8s_start_manager/files/data2-pv.yaml diff --git a/kubernetes/roles/startmaster/files/data3-pv.yaml b/roles/k8s_start_manager/files/data3-pv.yaml old mode 100755 new mode 100644 similarity index 100% rename from kubernetes/roles/startmaster/files/data3-pv.yaml rename to roles/k8s_start_manager/files/data3-pv.yaml diff --git a/kubernetes/roles/startmaster/files/data4-pv.yaml b/roles/k8s_start_manager/files/data4-pv.yaml old mode 100755 new mode 100644 similarity index 100% rename from kubernetes/roles/startmaster/files/data4-pv.yaml rename to roles/k8s_start_manager/files/data4-pv.yaml diff --git a/kubernetes/roles/startmaster/files/flannel_net.sh b/roles/k8s_start_manager/files/flannel_net.sh old mode 100755 new mode 100644 similarity index 100% rename from kubernetes/roles/startmaster/files/flannel_net.sh rename to roles/k8s_start_manager/files/flannel_net.sh diff --git a/kubernetes/roles/startmaster/files/katib-pv.yaml b/roles/k8s_start_manager/files/katib-pv.yaml old mode 100755 new mode 100644 similarity index 100% rename from kubernetes/roles/startmaster/files/katib-pv.yaml rename to roles/k8s_start_manager/files/katib-pv.yaml diff --git a/kubernetes/roles/startmaster/files/kube-flannel.yaml b/roles/k8s_start_manager/files/kube-flannel.yaml similarity index 100% rename from kubernetes/roles/startmaster/files/kube-flannel.yaml rename to roles/k8s_start_manager/files/kube-flannel.yaml diff --git a/kubernetes/roles/startmaster/files/kubeflow_persistent_volumes.yaml b/roles/k8s_start_manager/files/kubeflow_persistent_volumes.yaml similarity index 100% rename from kubernetes/roles/startmaster/files/kubeflow_persistent_volumes.yaml rename to roles/k8s_start_manager/files/kubeflow_persistent_volumes.yaml diff --git a/kubernetes/roles/startmaster/files/minio-pvc.yaml b/roles/k8s_start_manager/files/minio-pvc.yaml old mode 100755 new mode 100644 similarity index 100% rename from kubernetes/roles/startmaster/files/minio-pvc.yaml rename to roles/k8s_start_manager/files/minio-pvc.yaml diff --git a/kubernetes/roles/startmaster/files/mysql-pv.yaml b/roles/k8s_start_manager/files/mysql-pv.yaml old mode 100755 new mode 100644 similarity index 100% rename from kubernetes/roles/startmaster/files/mysql-pv.yaml rename to roles/k8s_start_manager/files/mysql-pv.yaml diff --git a/kubernetes/roles/startmaster/files/nfs-class.yaml b/roles/k8s_start_manager/files/nfs-class.yaml similarity index 100% rename from kubernetes/roles/startmaster/files/nfs-class.yaml rename to roles/k8s_start_manager/files/nfs-class.yaml diff --git a/kubernetes/roles/startmaster/files/nfs-deployment.yaml b/roles/k8s_start_manager/files/nfs-deployment.yaml similarity index 100% rename from kubernetes/roles/startmaster/files/nfs-deployment.yaml rename to roles/k8s_start_manager/files/nfs-deployment.yaml diff --git a/kubernetes/roles/startmaster/files/nfs-serviceaccount.yaml b/roles/k8s_start_manager/files/nfs-serviceaccount.yaml similarity index 100% rename from kubernetes/roles/startmaster/files/nfs-serviceaccount.yaml rename to roles/k8s_start_manager/files/nfs-serviceaccount.yaml diff --git a/kubernetes/roles/startmaster/files/nfs_clusterrole.yaml b/roles/k8s_start_manager/files/nfs_clusterrole.yaml similarity index 100% rename from kubernetes/roles/startmaster/files/nfs_clusterrole.yaml rename to roles/k8s_start_manager/files/nfs_clusterrole.yaml diff --git a/kubernetes/roles/startmaster/files/nfs_clusterrolebinding.yaml b/roles/k8s_start_manager/files/nfs_clusterrolebinding.yaml similarity index 100% rename from kubernetes/roles/startmaster/files/nfs_clusterrolebinding.yaml rename to roles/k8s_start_manager/files/nfs_clusterrolebinding.yaml diff --git a/kubernetes/roles/startmaster/files/notebook-pv.yaml b/roles/k8s_start_manager/files/notebook-pv.yaml old mode 100755 new mode 100644 similarity index 100% rename from kubernetes/roles/startmaster/files/notebook-pv.yaml rename to roles/k8s_start_manager/files/notebook-pv.yaml diff --git a/kubernetes/roles/startmaster/files/persistent_volumes.yaml b/roles/k8s_start_manager/files/persistent_volumes.yaml old mode 100755 new mode 100644 similarity index 100% rename from kubernetes/roles/startmaster/files/persistent_volumes.yaml rename to roles/k8s_start_manager/files/persistent_volumes.yaml diff --git a/kubernetes/roles/startmaster/files/pvc.yaml b/roles/k8s_start_manager/files/pvc.yaml similarity index 100% rename from kubernetes/roles/startmaster/files/pvc.yaml rename to roles/k8s_start_manager/files/pvc.yaml diff --git a/kubernetes/roles/startmaster/files/tiller_config.sh b/roles/k8s_start_manager/files/tiller_config.sh old mode 100755 new mode 100644 similarity index 100% rename from kubernetes/roles/startmaster/files/tiller_config.sh rename to roles/k8s_start_manager/files/tiller_config.sh diff --git a/roles/k8s_start_manager/tasks/main.yml b/roles/k8s_start_manager/tasks/main.yml new file mode 100644 index 000000000..00e203dbf --- /dev/null +++ b/roles/k8s_start_manager/tasks/main.yml @@ -0,0 +1,186 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Disable swap + command: /usr/sbin/swapoff -a + changed_when: true + tags: init + +- name: Get netaddr + setup: + filter: ansible_default_ipv4.address + +- name: Get K8s nodes status + command: kubectl get nodes + changed_when: false + ignore_errors: True + register: k8s_nodes + tags: init + +- name: Get K8s ready compute nodes + shell: kubectl get node --selector='!node-role.kubernetes.io/master' | grep -v 'NotReady' + changed_when: false + ignore_errors: True + register: k8s_nodes_ready + tags: init + +- name: Get K8s not ready compute nodes + shell: kubectl get node --selector='!node-role.kubernetes.io/master' | grep 'NotReady' + changed_when: false + ignore_errors: True + register: k8s_nodes_not_ready + tags: init + +- name: Initialize kubeadm + command: "/bin/kubeadm init --pod-network-cidr='{{ hostvars['127.0.0.1']['k8s_pod_network_cidr'] }}' \ + --apiserver-advertise-address='{{ ansible_default_ipv4.address }}'" + changed_when: true + when: "'master' not in k8s_nodes.stdout" + register: init_output + tags: init + +- name: Setup directory for Kubernetes environment for root + file: + path: "{{ k8s_root_directory }}" + state: directory + mode: "{{ k8s_root_directory_mode }}" + tags: init + +- name: Copy Kubernetes config for root + copy: + src: "{{ k8s_config_src }}" + dest: "{{ k8s_config_dest }}" + owner: root + group: root + mode: "{{ k8s_config_file_mode }}" + remote_src: yes + tags: init + +- name: Update the kubernetes config file permissions + shell: "chown $(id -u):$(id -g) '{{ k8s_config_dest }}'" + args: + warn: false + changed_when: true + tags: init + +- name: Cluster token + shell: > + set -o pipefail && \ + kubeadm token list | cut -d ' ' -f1 | sed -n '2p' + changed_when: false + register: K8S_TOKEN + tags: init + +- name: CA Hash + shell: > + set -o pipefail && \ + openssl x509 -pubkey -in {{ k8s_cert_path }} | openssl rsa -pubin -outform der 2>/dev/null | openssl dgst -sha256 -hex | sed 's/^.* //' + changed_when: false + register: K8S_MANAGER_CA_HASH + tags: init + +- name: Add K8S Manager IP, Token, and Hash to dummy host + add_host: + name: "K8S_TOKEN_HOLDER" + token: "{{ K8S_TOKEN.stdout }}" + hash: "{{ K8S_MANAGER_CA_HASH.stdout }}" + ip: "{{ ansible_default_ipv4.address }}" + k8s_nodes: "{{ k8s_nodes.stdout }}" + k8s_nodes_ready: "{{ k8s_nodes_ready.stdout }}" + k8s_nodes_not_ready: "{{ k8s_nodes_not_ready.stdout }}" + tags: init + +- name: Print k8s token + debug: + msg: "[Manager] K8S_TOKEN_HOLDER K8S token is {{ hostvars['K8S_TOKEN_HOLDER']['token'] }}" + verbosity: 2 + tags: init + +- name: Print k8s hash + debug: + msg: "[Manager] K8S_TOKEN_HOLDER K8S Hash is {{ hostvars['K8S_TOKEN_HOLDER']['hash'] }}" + verbosity: 2 + tags: init + +- name: Print k8s ansible_default_ipv4.address + debug: + msg: "[Manager] K8S_MANAGER_IP is {{ ansible_default_ipv4.address }}" + verbosity: 2 + tags: init + +- name: Setup Calico SDN network + command: "kubectl apply -f '{{ calico_yml_url }}'" + when: hostvars['127.0.0.1']['k8s_cni'] == "calico" + tags: init + +- name: Setup Flannel SDN network + command: "kubectl apply -f '{{ flannel_yml_url }}'" + when: hostvars['127.0.0.1']['k8s_cni'] == "flannel" + tags: init + +- name: Create yaml repo for setup + file: + path: "{{ yaml_repo_dir_path }}" + state: directory + mode: "{{ yaml_repo_dir_mode }}" + tags: init + +- name: Create service account (K8s dashboard) files + copy: + src: create_admin_user.yaml + dest: "{{ k8s_service_account_file_dest }}" + owner: root + group: root + mode: "{{ k8s_service_account_file_mode }}" + tags: init + +- name: Check K8s service accounts status + command: "kubectl get serviceaccounts" + changed_when: false + register: k8s_service_accounts + tags: init + +- name: Create service account (K8s dashboard) + command: "kubectl create -f '{{ k8s_service_account_file_dest }}'" + changed_when: true + when: "'default' not in k8s_service_accounts.stdout" + tags: init + +- name: Create clusterRoleBinding (K8s dashboard) files + copy: + src: create_clusterRoleBinding.yaml + dest: "{{ k8s_clusterRoleBinding_file_dest }}" + owner: root + group: root + mode: "{{ k8s_clusterRoleBinding_file_mode }}" + tags: init + +- name: Create clusterRoleBinding (K8s dashboard) + command: "kubectl create -f '{{ k8s_clusterRoleBinding_file_dest }}'" + changed_when: true + ignore_errors: True + tags: init + +- name: Dump bearer token for K8s dashboard login + shell: > + set -o pipefail && \ + kubectl -n kube-system describe secret $(kubectl -n kube-system get secret | grep admin-user | awk '{print $1}') > /root/k8s/token + changed_when: true + tags: init + +- name: Edge / Workstation Install allows pods to scheudle on manager + command: kubectl taint nodes --all node-role.kubernetes.io/master- + when: groups['manager'][0] == groups['compute'][0] and groups['compute']|length == 1 + tags: init \ No newline at end of file diff --git a/roles/k8s_start_manager/vars/main.yml b/roles/k8s_start_manager/vars/main.yml new file mode 100644 index 000000000..730430c1a --- /dev/null +++ b/roles/k8s_start_manager/vars/main.yml @@ -0,0 +1,44 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +k8s_root_directory: /root/.kube + +k8s_root_directory_mode: 0755 + +k8s_config_src: /etc/kubernetes/admin.conf + +k8s_config_dest: /root/.kube/config + +k8s_config_file_mode: 0644 + +k8s_cert_path: /etc/kubernetes/pki/ca.crt + +k8s_dummy_hostname: K8S_TOKEN_HOLDER + +yaml_repo_dir_path: /root/k8s + +yaml_repo_dir_mode: 0755 + +k8s_service_account_file_dest: /root/k8s/create_admin_user.yaml + +k8s_service_account_file_mode: 0655 + +k8s_clusterRoleBinding_file_dest: /root/k8s/create_clusterRoleBinding.yaml + +k8s_clusterRoleBinding_file_mode: 0655 + +calico_yml_url: https://docs.projectcalico.org/manifests/calico.yaml + +flannel_yml_url: https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml \ No newline at end of file diff --git a/roles/k8s_start_services/files/extraScrapeConfigs.yaml b/roles/k8s_start_services/files/extraScrapeConfigs.yaml new file mode 100644 index 000000000..134fda367 --- /dev/null +++ b/roles/k8s_start_services/files/extraScrapeConfigs.yaml @@ -0,0 +1,4 @@ +- job_name: Slurm-exporter-prometheus + static_configs: + - targets: + - localhost:8080 \ No newline at end of file diff --git a/kubernetes/roles/startservices/files/metal-config.yaml b/roles/k8s_start_services/files/metal-config.yaml similarity index 100% rename from kubernetes/roles/startservices/files/metal-config.yaml rename to roles/k8s_start_services/files/metal-config.yaml diff --git a/kubernetes/roles/startservices/files/metallb.yaml b/roles/k8s_start_services/files/metallb.yaml similarity index 100% rename from kubernetes/roles/startservices/files/metallb.yaml rename to roles/k8s_start_services/files/metallb.yaml diff --git a/roles/k8s_start_services/tasks/main.yml b/roles/k8s_start_services/tasks/main.yml new file mode 100644 index 000000000..6c85b62e1 --- /dev/null +++ b/roles/k8s_start_services/tasks/main.yml @@ -0,0 +1,182 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Wait for CoreDNS to restart + command: kubectl rollout status deployment/coredns -n kube-system + changed_when: false + ignore_errors: True + tags: init + +- name: Get K8s pods + command: kubectl get pods --all-namespaces + changed_when: false + register: k8s_pods + tags: init + +- name: Deploy MetalLB + command: "kubectl apply -f '{{ metallb_yaml_url }}'" + changed_when: true + when: "'metallb' not in k8s_pods.stdout" + tags: init + +- name: Create MetalLB Setup Config Files + copy: + src: metal-config.yaml + dest: "{{ metallb_config_file_dest }}" + owner: root + group: root + mode: "{{ metallb_config_file_mode }}" + tags: init + +- name: Create MetalLB Setup Deployment Files + copy: + src: metallb.yaml + dest: "{{ metallb_deployment_file_dest }}" + owner: root + group: root + mode: "{{ metallb_deployment_file_mode }}" + tags: init + +- name: Deploy MetalLB + command: "kubectl apply -f '{{ metallb_deployment_file_dest }}'" + changed_when: true + when: "'metallb' not in k8s_pods.stdout" + tags: init + +- name: Create default setup for MetalLB + command: "kubectl apply -f '{{ metallb_config_file_dest }}'" + changed_when: true + when: "'metallb' not in k8s_pods.stdout" + tags: init + +- name: Start k8s dashboard + command: "kubectl create -f '{{ k8s_dashboard_yaml_url }}'" + changed_when: true + when: "'kubernetes-dashboard' not in k8s_pods.stdout" + tags: init + +- name: Helm - add stable repo + command: "helm repo add stable '{{ helm_stable_repo_url }}'" + changed_when: true + tags: init + +- name: Helm - add Nvidia k8s-device-plugin (nvdp) repo + command: "helm repo add nvdp '{{ nvidia_k8s_device_plugin_repo_url }}'" + changed_when: true + tags: init + +- name: Helm - add Nvidia GPU discovery (nvgfd) repo + command: "helm repo add nvgfd '{{ nvidia_gpu_discovery_repo_url }}'" + changed_when: true + tags: init + +- name: Helm - update repo + command: helm repo update + changed_when: true + tags: init + +- name: Start NFS Client Provisioner + command: "helm install stable/nfs-client-provisioner --set nfs.server='{{ nfs_server }}' --set nfs.path='{{ nfs_path }}' --generate-name" + changed_when: true + when: "'nfs-client-provisioner' not in k8s_pods.stdout" + tags: init + +- name: Set NFS-Client Provisioner as DEFAULT StorageClass + shell: > + kubectl patch storageclasses.storage.k8s.io nfs-client \ + -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}' + changed_when: true + tags: init + +- name: Check if prometheus is installed on the host + stat: + path: "{{ prometheus_path_on_host }}" + register: prometheus_status + changed_when: False + ignore_errors: yes + tags: init + +- name: Delete prometheus installed on host if it exists + file: + path: "{{ prometheus_path_on_host }}" + state: absent + when: prometheus_status.stat.exists + tags: init + +- name: Copy the slurm exporter config file + copy: + src: "{{ slurm_exporter_config_file }}" + dest: "{{ slurm_exporter_config_file_path }}" + owner: root + group: root + mode: "{{ slurm_exporter_file_mode }}" + tags: init + +- name: Fetch the public IP of the host + shell: > + set -o pipefail && \ + ip route get 8.8.8.8 | awk '{print $7}' + register: public_ip + changed_when: False + tags: init + +- name: Add the host IP to config file + replace: + path: "{{ slurm_exporter_config_file_path }}{{ slurm_exporter_config_file }}" + regexp: "localhost" + replace: "{{ public_ip.stdout }}" + tags: init + +- name: Prometheus deployment + command: > + helm install stable/prometheus \ + --set-file extraScrapeConfigs="{{ slurm_exporter_config_file_path }}{{ slurm_exporter_config_file }}" \ + --set alertmanager.persistentVolume.storageClass=nfs-client,server.persistentVolume.storageClass=nfs-client,server.service.type=LoadBalancer \ + --generate-name + changed_when: true + when: "'prometheus' not in k8s_pods.stdout" + tags: init + +- name: Install MPI Operator + command: "kubectl create -f '{{ mpi_operator_yaml_url }}'" + changed_when: true + when: "'mpi-operator' not in k8s_pods.stdout" + tags: init + +- name: Install nvidia-device-plugin + command: "helm install --version='{{ nvidia_device_plugin_version }}' --generate-name --set migStrategy='{{ mig_strategy }}' nvdp/nvidia-device-plugin" + changed_when: true + when: "'nvidia-device-plugin' not in k8s_pods.stdout" + tags: init + +- name: Install GPU Feature Discovery + command: "helm install --version='{{ gpu_feature_discovery_version }}' --generate-name --set migStrategy='{{ mig_strategy }}' nvgfd/gpu-feature-discovery" + changed_when: true + when: "'node-feature-discovery' not in k8s_pods.stdout" + tags: init + +- name: Deploy Xilinx Device plugin + command: "kubectl create -f '{{ fpga_device_plugin_yaml_url }}'" + changed_when: true + register: fpga_enable + when: "'fpga-device-plugin' not in k8s_pods.stdout" + tags: init + +- name: Deploy ROCm Device plugin + command: "kubectl create -f '{{ rocm_device_plugin_yaml_url }}'" + changed_when: true + register: amd_gpu_enable + when: "'amdgpu-device-plugin' not in k8s_pods.stdout" + tags: init \ No newline at end of file diff --git a/roles/k8s_start_services/vars/main.yml b/roles/k8s_start_services/vars/main.yml new file mode 100644 index 000000000..bd046a2c0 --- /dev/null +++ b/roles/k8s_start_services/vars/main.yml @@ -0,0 +1,56 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +metallb_config_file_dest: /root/k8s/metal-config.yaml + +metallb_config_file_mode: 0655 + +metallb_deployment_file_dest: /root/k8s/metallb.yaml + +metallb_deployment_file_mode: 0655 + +metallb_yaml_url: https://raw.githubusercontent.com/google/metallb/v0.8.1/manifests/metallb.yaml + +k8s_dashboard_yaml_url: https://raw.githubusercontent.com/kubernetes/dashboard/v2.0.0/aio/deploy/recommended.yaml + +helm_stable_repo_url: https://charts.helm.sh/stable + +nfs_server: "{{ ansible_host }}" + +nfs_path: /home/k8snfs + +mpi_operator_yaml_url: https://raw.githubusercontent.com/kubeflow/mpi-operator/master/deploy/v1alpha2/mpi-operator.yaml + +nvidia_k8s_device_plugin_repo_url: https://nvidia.github.io/k8s-device-plugin + +nvidia_gpu_discovery_repo_url: https://nvidia.github.io/gpu-feature-discovery + +nvidia_device_plugin_version: 0.7.0 + +mig_strategy: none + +gpu_feature_discovery_version: 0.2.0 + +fpga_device_plugin_yaml_url: https://raw.githubusercontent.com/Xilinx/FPGA_as_a_Service/master/k8s-fpga-device-plugin/fpga-device-plugin.yml + +rocm_device_plugin_yaml_url: https://raw.githubusercontent.com/RadeonOpenCompute/k8s-device-plugin/master/k8s-ds-amdgpu-dp.yaml + +slurm_exporter_config_file: extraScrapeConfigs.yaml + +slurm_exporter_config_file_path: /var/lib/ + +slurm_exporter_file_mode: 0655 + +prometheus_path_on_host: /var/lib/prometheus-2.23.0.linux-amd64/ \ No newline at end of file diff --git a/kubernetes/roles/startworkers/tasks/main.yml b/roles/k8s_start_workers/tasks/main.yml similarity index 53% rename from kubernetes/roles/startworkers/tasks/main.yml rename to roles/k8s_start_workers/tasks/main.yml index 41d15626c..8a5d40acf 100644 --- a/kubernetes/roles/startworkers/tasks/main.yml +++ b/roles/k8s_start_workers/tasks/main.yml @@ -1,4 +1,4 @@ -# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,38 +11,36 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - --- -- name: Turn Swap OFF (if not already disabled) +- name: Disable swap (if not already disabled) command: /usr/sbin/swapoff -a + changed_when: true tags: init -#- name: - #debug: - #msg: "[Worker] K8S_TOKEN_HOLDER K8S token is {{ hostvars['K8S_TOKEN_HOLDER']['token'] }}" - #tags: init - -#- name: - #debug: - #msg: "[Worker] K8S_TOKEN_HOLDER K8S Hash is {{ hostvars['K8S_TOKEN_HOLDER']['hash'] }}" - #tags: init +- name: Get hostname + command: hostname + changed_when: true + register: node_hostname + tags: init -#- name: - #debug: - #msg: "[Worker] K8S_MASTER_IP is {{ hostvars['K8S_TOKEN_HOLDER']['ip'] }}" - #tags: init +- name: Reset kubeadm + command: kubeadm reset -f + changed_when: true + ignore_errors: True + when: + - groups['manager'][0] != groups['compute'][0] + - groups['compute']|length >= 1 + - node_hostname.stdout in hostvars['K8S_TOKEN_HOLDER']['k8s_nodes_not_ready'] + tags: init -- name: "Kubeadmn join" +- name: Execute kubeadm join command shell: > kubeadm join --token={{ hostvars['K8S_TOKEN_HOLDER']['token'] }} --discovery-token-ca-cert-hash sha256:{{ hostvars['K8S_TOKEN_HOLDER']['hash'] }} - {{ hostvars['K8S_TOKEN_HOLDER']['ip'] }}:6443 - when: not single_node - tags: init - - -#- name: Join Computes to pool -# command: "{{ kubeJoinCommand }}" -# tags: init - + {{ hostvars['K8S_TOKEN_HOLDER']['ip'] }}:{{ apiserver_bind_port }} + when: + - groups['manager'][0] != groups['compute'][0] + - groups['compute']|length >= 1 + - node_hostname.stdout not in hostvars['K8S_TOKEN_HOLDER']['k8s_nodes_ready'] + tags: init \ No newline at end of file diff --git a/roles/k8s_start_workers/vars/main.yml b/roles/k8s_start_workers/vars/main.yml new file mode 100644 index 000000000..40108be65 --- /dev/null +++ b/roles/k8s_start_workers/vars/main.yml @@ -0,0 +1,16 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +apiserver_bind_port: 6443 diff --git a/slurm/roles/slurm-common/files/munge.key b/roles/slurm_common/files/munge.key similarity index 100% rename from slurm/roles/slurm-common/files/munge.key rename to roles/slurm_common/files/munge.key diff --git a/slurm/roles/slurm-common/files/slurm.conf b/roles/slurm_common/files/slurm.conf similarity index 71% rename from slurm/roles/slurm-common/files/slurm.conf rename to roles/slurm_common/files/slurm.conf index a57acb47e..6f49794c6 100644 --- a/slurm/roles/slurm-common/files/slurm.conf +++ b/roles/slurm_common/files/slurm.conf @@ -8,30 +8,32 @@ # # See the slurm.conf man page for more information. # -ClusterName=friday -ControlMachine=friday -ControlAddr=10.0.0.1 +ClusterName= +ControlMachine= +#ControlAddr= #BackupController= #BackupAddr= # -SlurmUser=slurm +SlurmUser= #SlurmdUser=root -SlurmctldPort=6817 -SlurmdPort=6818 +SlurmctldPort= +SlurmdPort= +SrunPortRange= AuthType=auth/munge #JobCredentialPrivateKey= #JobCredentialPublicCertificate= -StateSaveLocation=/var/spool/slurm/ctld -SlurmdSpoolDir=/var/spool/slurm/ +#StateSaveLocation=/var/spool/ +SlurmdSpoolDir= SwitchType=switch/none MpiDefault=none -SlurmctldPidFile=/var/run/slurmctld.pid -SlurmdPidFile=/var/run/slurmd.pid +SlurmctldPidFile= +SlurmdPidFile= ProctrackType=proctrack/pgid #PluginDir= #FirstJobId= ReturnToService=2 #MaxJobCount= +MailProg=/usr/bin/mail #PlugStackConfig= #PropagatePrioProcess= #PropagateResourceLimits= @@ -55,6 +57,7 @@ InactiveLimit=0 MinJobAge=300 KillWait=30 Waittime=0 +MessageTimeout=60 # # SCHEDULING SchedulerType=sched/backfill @@ -72,9 +75,9 @@ PriorityMaxAge=14-0 # # LOGGING SlurmctldDebug=3 -SlurmctldLogFile=/var/log/slurm/slurmctld.log +SlurmctldLogFile= SlurmdDebug=1 -SlurmdLogFile=/var/log/slurm/slurmd.log +SlurmdLogFile= JobCompType=jobcomp/none #JobCompLoc= # @@ -87,11 +90,6 @@ AccountingStorageType=accounting_storage/slurmdbd #AccountingStorageLoc= #AccountingStoragePass= #AccountingStorageUser= -# +AccountingStoragePort= # COMPUTE NODES -#NodeName=linux[1-32] Procs=1 State=UNKNOWN -#NodeName=DEFAULT Sockets=2 CoresPerSocket=20 State=UNKNOWN -NodeName=compute000 Sockets=2 CoresPerSocket=8 -NodeName=compute[002-005] CoresPerSocket=20 -PartitionName=normal Nodes=ALL Default=YES MaxTime=INFINITE State=UP -#PartitionName=debug Nodes=ALL Default=YES MaxTime=INFINITE State=UP +PartitionName=normal Nodes=ALL Default=YES MaxTime=INFINITE State=UP \ No newline at end of file diff --git a/roles/slurm_common/tasks/main.yml b/roles/slurm_common/tasks/main.yml new file mode 100644 index 000000000..aadee5a56 --- /dev/null +++ b/roles/slurm_common/tasks/main.yml @@ -0,0 +1,197 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Get hostname + command: hostname + register: host_name + changed_when: true + +- name: Add host name in file + replace: + dest: "{{ hostname_dest }}" + regexp: localhost.localdomain + replace: "{{ host_name.stdout }}" + backup: yes + mode: "{{ common_mode }}" + +- name: Install packages for slurm + package: + name: "{{ item }}" + state: present + with_items: + - "{{ common_packages }}" + tags: install + +- name: Create munge key + command: "{{ munge_cmd }}" + changed_when: true + +- name: Copy munge key + copy: + src: munge.key + dest: "{{ munge_dest }}" + owner: munge + group: munge + mode: "{{ munge_mode }}" + tags: install + +- name: Slurm configuration - slurm.conf + copy: + src: slurm.conf + dest: "{{ slurm_dest }}" + mode: "{{ slurm_mode }}" + tags: install + +- name: Add cluster name + lineinfile: + path: "{{ slurm_confpth }}" + regexp: "ClusterName=" + line: "ClusterName={{ cluster_name }}" + +- name: Add slurm user name + lineinfile: + path: "{{ slurm_confpth }}" + regexp: "SlurmUser=" + line: "SlurmUser={{ slurm_user }}" + +- name: Add slurmctld port no + lineinfile: + path: "{{ slurm_confpth }}" + regexp: "SlurmctldPort=" + line: "SlurmctldPort={{ slurmctld_port }}" + +- name: Add slurmd port no + lineinfile: + path: "{{ slurm_confpth }}" + regexp: "SlurmdPort=" + line: "SlurmdPort={{ slurmd_port }}" + +- name: Add srun port range + lineinfile: + path: "{{ slurm_confpth }}" + regexp: "SrunPortRange=" + line: "SrunPortRange={{ srun_port_range }}" + +- name: Add spool path + lineinfile: + path: "{{ slurm_confpth }}" + regexp: "SlurmdSpoolDir=" + line: "SlurmdSpoolDir={{ spool_pth }}" + +- name: Add slurmctld pid file path + lineinfile: + path: "{{ slurm_confpth }}" + regexp: "SlurmctldPidFile=" + line: "SlurmctldPidFile={{ slurmctld_pid }}" + +- name: Add slurmd pid file path + lineinfile: + path: "{{ slurm_confpth }}" + regexp: "SlurmdPidFile=" + line: "SlurmdPidFile={{ slurmd_pid }}" + +- name: Add slurmctld log file path + lineinfile: + path: "{{ slurm_confpth }}" + regexp: "SlurmctldLogFile=" + line: "SlurmctldLogFile={{ slurmctld_log }}" + +- name: Add slurmd log file path + lineinfile: + path: "{{ slurm_confpth }}" + regexp: "SlurmdLogFile=" + line: "SlurmdLogFile={{ slurmd_log }}" + +- name: Add accounting storage port no + lineinfile: + path: "{{ slurm_confpth }}" + regexp: "AccountingStoragePort=" + line: "AccountingStoragePort={{ acct_port }}" + +- name: Create slurm group + group: + name: slurm + state: present + tags: install + +- name: Add the user 'slurm' with uid 6001 and a primary group of 'slurm' + user: + name: slurm + comment: Slurm User Account + uid: "{{ slurm_uid }}" + group: slurm + tags: install + +- name: Create slurm log directory + file: + path: "{{ slurm_logpth }}" + state: directory + owner: slurm + group: slurm + mode: "{{ gen_mode }}" + recurse: yes + tags: install + +- name: Give slurm user permission to spool + file: + path: "{{ spool_pth }}" + owner: slurm + group: slurm + state: directory + mode: "{{ gen_mode }}" + recurse: yes + +- name: Give slurm user permission to spool directory + file: + path: "{{ spool_dir }}" + owner: slurm + group: slurm + state: directory + mode: "{{ common_mode }}" + recurse: yes + +- name: Create slurm pid directory + file: + path: "{{ slurm_pidpth }}" + state: directory + owner: slurm + group: slurm + mode: "{{ gen_mode }}" + recurse: yes + tags: install + +- name: Give slurm user permission to slurmctld + file: + path: "{{ slurmctld_pid }}" + owner: slurm + group: slurm + mode: "{{ gen_mode }}" + state: touch + +- name: Give slurm user permission to slurmd + file: + path: "{{ slurmd_pid }}" + owner: slurm + group: slurm + mode: "{{ gen_mode }}" + state: touch + +- name: Start munge service + systemd: + name: munge + state: restarted + enabled: yes + tags: install + ignore_errors: yes \ No newline at end of file diff --git a/roles/slurm_common/vars/main.yml b/roles/slurm_common/vars/main.yml new file mode 100644 index 000000000..de930ad82 --- /dev/null +++ b/roles/slurm_common/vars/main.yml @@ -0,0 +1,50 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +common_packages: + - munge + - munge-libs + - munge-devel + - mariadb-server + - mariadb-devel + - man2html + - MySQL-python + - python-netaddr + +hostname_dest: "/etc/hostname" +hosts_dest: "/etc/hosts" +munge_dest: "/etc/munge/" +munge_cmd: "/usr/sbin/create-munge-key -f" +munge_mode: "0400" +slurm_mode: "0644" +common_mode: "0777" +slurm_dest: "/etc/slurm/" +slurm_confpth: "/etc/slurm/slurm.conf" +slurm_user: "slurm" +slurmctld_port: "6817" +slurmd_port: "6818" +srun_port_range: "60001-63000" +acct_port: "6819" +slurm_uid: "6001" +slurm_logpth: "/var/log/slurm/" +slurm_pidpth: "/var/run/slurm/" +gen_mode: "0755" +spool_dir: "/var/spool/" +spool_pth: "/var/spool/slurm/" +slurmctld_pid: "/var/run/slurmctld.pid" +slurmd_pid: "/var/run/slurmd.pid" +cluster_name : "manager,compute" +slurmctld_log: "/var/log/slurm/slurmctld.log" +slurmd_log: "/var/log/slurm/slurmd.log" \ No newline at end of file diff --git a/roles/slurm_exporter/files/prometheus-slurm-exporter.service b/roles/slurm_exporter/files/prometheus-slurm-exporter.service new file mode 100644 index 000000000..6dcfac52e --- /dev/null +++ b/roles/slurm_exporter/files/prometheus-slurm-exporter.service @@ -0,0 +1,10 @@ +[Unit] +Description = Start prometheus slurm exporter + +[Service] +ExecStart = /usr/bin/prometheus-slurm-exporter +Restart = always +RestartSec = 15 + +[Install] +WantedBy = multi-user.target \ No newline at end of file diff --git a/roles/slurm_exporter/files/slurm_exporter_config.yaml b/roles/slurm_exporter/files/slurm_exporter_config.yaml new file mode 100644 index 000000000..cd332d712 --- /dev/null +++ b/roles/slurm_exporter/files/slurm_exporter_config.yaml @@ -0,0 +1,30 @@ +apiVersion: v1 +kind: Service +metadata: + name: prometheus-slurmexporter-metrics-2 + namespace: default + annotations: + prometheus.io/scrape: 'true' + labels: + app: prometheus + app.kubernetes.io/managed-by: Helm + chart: prometheus-11.12.1 + component: server +spec: + ports: + - name: metrics + port: 8080 + protocol: TCP + targetPort: 8080 + selector: + app: prometheus + component: server + additionalScrapeConfigs: + name: prometheus-config + key: prometheus-config.yaml + job_name: 'prometheus-slurm-exporter' + scrape_interval: 15s + static_configs: + - targets: + - http:"{{ inventory_hostname }}":8080/metrics + serviceMonitorSelector: {} \ No newline at end of file diff --git a/roles/slurm_exporter/tasks/install_prometheus.yml b/roles/slurm_exporter/tasks/install_prometheus.yml new file mode 100644 index 000000000..ad18f3335 --- /dev/null +++ b/roles/slurm_exporter/tasks/install_prometheus.yml @@ -0,0 +1,40 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Download and untar prometheus stable version + unarchive: + src: "{{ prometheus_git_repo }}" + dest: "{{ installation_dir }}" + remote_src: yes + +- name: Copy prometheus executable to /usr/local/bin + copy: + src: "{{ prometheus_exec_path }}" + dest: "{{ system_local_path }}" + remote_src: yes + mode: "{{ file_permission }}" + +- name: Configure prometheus for slurm exporter + blockinfile: + path: "{{ prometheus_config_file }}" + insertafter: EOF + mode: "{{ file_permission }}" + block: | + # SLURM resource manager: + - job_name: 'my_slurm_exporter' + scrape_interval: 30s + scrape_timeout: 30s + static_configs: + - targets: ['localhost:8080'] \ No newline at end of file diff --git a/roles/slurm_exporter/tasks/install_slurm_exporter.yml b/roles/slurm_exporter/tasks/install_slurm_exporter.yml new file mode 100644 index 000000000..2c0bf7177 --- /dev/null +++ b/roles/slurm_exporter/tasks/install_slurm_exporter.yml @@ -0,0 +1,65 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Download and untar go package + unarchive: + src: "{{ go_pack_url }}" + dest: "{{ installation_dir }}" + remote_src: yes + +- name: Add to the linux path + shell: echo $PATH + environment: + PATH: "{{ extended_path }}:{{ ansible_env.PATH }}" + changed_when: False + +- name: Clone the source code + git: + repo: "{{ slurm_exporter_git_repo }}" + dest: "{{ slurm_exporter_inst_dir }}" + version: "{{ stable_commit_id }}" + +- name: export GOPATH + shell: echo $GOPATH + environment: + PATH: "{{ ansible_env.PATH }}:/var/lib/go/bin/" + GOPATH: "{{ go_modules_path }}" + changed_when: False + +- name: Download dependencies + command: "{{ go_exec_path }} mod download" + args: + chdir: "{{ slurm_exporter_inst_dir }}" + changed_when: False + +- name: Build the exporter + shell: "{{ go_exec_path }} build -o bin/prometheus-slurm-exporter {main,accounts,cpus,nodes,partitions,queue,scheduler,users}.go" + args: + chdir: "{{ slurm_exporter_inst_dir }}" + changed_when: False + +- name: Run all tests included in _test.go files + shell: "{{ go_exec_path }} test -v *.go" + args: + chdir: "{{ slurm_exporter_inst_dir }}" + changed_when: False + ignore_errors: yes + +- name: Copy executable to /usr/bin + copy: + src: "{{ slurm_exporter_exec }}" + dest: "{{ system_path }}" + remote_src: yes + mode: "{{ file_permission }}" \ No newline at end of file diff --git a/roles/slurm_exporter/tasks/main.yml b/roles/slurm_exporter/tasks/main.yml new file mode 100644 index 000000000..e1ec5986a --- /dev/null +++ b/roles/slurm_exporter/tasks/main.yml @@ -0,0 +1,39 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Verify if slurm-exporter is already installed + command: ls /usr/bin/prometheus-slurm-exporter + register: slurm_exporter_status + changed_when: False + ignore_errors: yes + +- name: Install slurm exporter + include_tasks: install_slurm_exporter.yml + when: "'No such file or directory' in slurm_exporter_status.stderr" + +- name: Start slurm exporter services + include_tasks: start_services.yml + +- name: Verify if kubernetes is already installed + command: ls /usr/bin/kubectl + register: k8s_installation_status + changed_when: False + ignore_errors: yes + +- name: Install prometheus on host + include_tasks: install_prometheus.yml + when: + - "'kubernetes' in ansible_skip_tags" + - "'No such file' in k8s_installation_status.stderr" \ No newline at end of file diff --git a/roles/slurm_exporter/tasks/start_services.yml b/roles/slurm_exporter/tasks/start_services.yml new file mode 100644 index 000000000..c2bf6996d --- /dev/null +++ b/roles/slurm_exporter/tasks/start_services.yml @@ -0,0 +1,27 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Create systemd unit file + copy: + src: "{{ role_path }}/files/prometheus-slurm-exporter.service" + dest: "{{ systemd_path_dest }}" + remote_src: no + mode: "{{ file_permission }}" + +- name: Start services + systemd: + name: prometheus-slurm-exporter + state: restarted + enabled: yes \ No newline at end of file diff --git a/roles/slurm_exporter/vars/main.yml b/roles/slurm_exporter/vars/main.yml new file mode 100644 index 000000000..f5ac703be --- /dev/null +++ b/roles/slurm_exporter/vars/main.yml @@ -0,0 +1,40 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# Usage: install_slurm_exporter.yml +installation_dir: "/var/lib/" +slurm_exporter_inst_dir: "/var/lib/slurm-exporter" +go_pack_url: "https://dl.google.com/go/go1.15.linux-amd64.tar.gz" +extended_path: "{{ installation_dir }}/go/bin" +go_exec_path: "{{ installation_dir }}/go/bin/go" +slurm_exporter_git_repo: "https://github.com/vpenso/prometheus-slurm-exporter.git" +go_modules_path: "{{ slurm_exporter_inst_dir }}/go/modules" +slurm_exporter_exec: "{{ slurm_exporter_inst_dir }}/bin/prometheus-slurm-exporter" +system_path: "/usr/bin" +stable_commit_id: "00a7dee" + +#Usage: install_prometheus.yml +prometheus_git_repo: "https://github.com/prometheus/prometheus/releases/download/v2.23.0/prometheus-2.23.0.linux-amd64.tar.gz" +prometheus_inst_path: "/var/lib/prometheus-2.23.0.linux-amd64/" +prometheus_exec_path: "{{ prometheus_inst_path }}/prometheus" +system_local_path: "/usr/local/bin" +prometheus_config_file: "{{ prometheus_inst_path }}/prometheus.yml" + +#Usage: start_service.yml +file_permission: "0755" +systemd_path_dest: "/etc/systemd/system/" + +#Usage: configure_prometheus_pod.yml +slurm_config_file: "slurm_exporter_config.yaml" \ No newline at end of file diff --git a/roles/slurm_manager/files/slurmdbd.conf b/roles/slurm_manager/files/slurmdbd.conf new file mode 100644 index 000000000..38d4bc96a --- /dev/null +++ b/roles/slurm_manager/files/slurmdbd.conf @@ -0,0 +1,38 @@ +# +# Example slurmdbd.conf file. +# +# See the slurmdbd.conf man page for more information. +# +# Archive info +#ArchiveJobs=yes +#ArchiveDir="/tmp" +#ArchiveSteps=yes +#ArchiveScript= +#JobPurge=12 +#StepPurge=1 +# +# Authentication info +AuthType=auth/munge +#AuthInfo=/var/run/munge/munge.socket.2 +# +# slurmDBD info +DbdAddr= +DbdHost= +#DbdPort=6019 +SlurmUser= +#MessageTimeout=300 +DebugLevel=verbose +#DefaultQOS=normal,standby +LogFile= +PidFile= +#PluginDir=/usr/lib/slurm +#PrivateData=accounts,users,usage,jobs +#TrackWCKey=yes +# +# Database info +StorageType=accounting_storage/mysql +#StorageHost= +#StoragePort= +StoragePass= +StorageUser= +#StorageLoc= \ No newline at end of file diff --git a/roles/slurm_manager/tasks/main.yml b/roles/slurm_manager/tasks/main.yml new file mode 100644 index 000000000..4031e55dd --- /dev/null +++ b/roles/slurm_manager/tasks/main.yml @@ -0,0 +1,241 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Give slurm user permission to slurmctld spool + file: + path: "{{ spool_slurmctld_pth }}" + owner: slurm + group: slurm + mode: "{{ tmp_mode }}" + state: touch + +- name: Give slurm ownership to cluster state + file: + path: "{{ cluster_state_path }}" + owner: slurm + group: slurm + mode: "{{ tmp_mode }}" + state: touch + +- name: Create slurmctld log file on manager + file: + path: "{{ slurm_logpth }}" + owner: slurm + mode: "{{ tmp_mode }}" + state: touch + with_items: + - slurmctld.log + +- name: Create log files on manager + file: + path: "{{ slurm_logpth }}" + owner: slurm + mode: "{{ tmp_mode }}" + state: touch + with_items: + - "{{ log_files_manager }}" + +- name: Install packages for slurm + package: + name: "{{ item }}" + state: present + with_items: + - "{{ slurm_packages }}" + tags: install + +- name: Install development tools + package: + name: "{{ item }}" + state: present + with_items: + - "{{ dev_tools }}" + tags: install + +- name: Verify if slurm is installed + shell: rpm -qa | grep slurm + ignore_errors: true + register: verify_result + changed_when: no + failed_when: no + args: + warn: no + +- name: Create temporary download folder for slurm + file: + path: "{{ tmp_path }}" + owner: slurm + group: slurm + mode: "{{ tmp_mode }}" + state: directory + when: verify_result.rc != 0 + +- name: Download slurm source + get_url: + url: "{{ slurm_url }}" + dest: "{{ tmp_path }}" + checksum: "{{ slurm_md5 }}" + validate_certs: no + tags: install + when: verify_result.rc != 0 + +- name: Build slurm rpms + command: rpmbuild -ta "{{ rpmbuild_path }}" --with mysql + changed_when: false + when: verify_result.rc != 0 + args: + warn: no + +- name: Install rpms + command: rpm -Uvh ~"{{ rpm_loop }}" + args: + chdir: "{{ rpm_path }}" + warn: no + changed_when: true + when: verify_result.rc != 0 + +- name: Get the hostname + command: hostname + register: machine_name + changed_when: true + +- name: Add control machine name + lineinfile: + path: "{{ slurm_confpth }}" + regexp: "ControlMachine=" + line: "ControlMachine={{ machine_name.stdout }}" + +- name: Add slurm user name + lineinfile: + path: "{{ slurm_confpth }}" + regexp: "SlurmUser=" + line: "SlurmUser={{ slurm_user }}" + +- name: Install firewalld + package: + name: firewalld + state: present + tags: firewalld + +- name: Start and enable firewalld + service: + name: firewalld + state: started + enabled: yes + tags: firewalld + +- name: Firewall rule for slurm - tcp/udp ports + firewalld: + zone: public + port: "{{ item }}" + permanent: true + state: enabled + with_items: + - "{{ tcp_port1 }}" + - "{{ tcp_port2 }}" + - "{{ tcp_port3 }}" + - "{{ tcp_port4 }}" + - "{{ udp_port3 }}" + - "{{ udp_port1 }}" + - "{{ udp_port2 }}" + when: "'manager' in group_names" + tags: firewalld + +- name: Get network address/subnet mask + set_fact: + network_address: "{{ (ansible_default_ipv4.network + '/' + ansible_default_ipv4.netmask) | ipaddr('network/prefix') }}" + +- name: Firewall rule slurm - allow all incoming traffic on internal network + firewalld: + zone: public + rich_rule: 'rule family="{{ family }}" source address="{{ network_address }}" accept' + permanent: true + state: enabled + tags: firewalld + +- name: Reload firewalld + command: firewall-cmd --reload + changed_when: true + tags: firewalld + +- name: Start mariadb + systemd: + name: mariadb + state: restarted + enabled: yes + tags: install + +- name: Grant permissions for slurm db + command: >- + mysql -u root -e "GRANT ALL ON slurm_acct_db.* TO '{{ db_user }}'@'{{ + db_host }}' identified by '{{ hostvars['127.0.0.1']['db_password'] }}'with + grant option;" + tags: install + changed_when: true + +- name: Create slurmdbd.conf file + copy: + src: slurmdbd.conf + dest: "{{ slurmdbd_path }}" + mode: "{{ slurmdbd_mode }}" + owner: slurm + tags: install + +- name: Add slurm user name + lineinfile: + path: "{{ slurmdbd_path }}" + regexp: "SlurmUser=" + line: "SlurmUser={{ slurm_user }}" + +- name: Add db address + lineinfile: + path: "{{ slurmdbd_path }}" + regexp: "DbdAddr=" + line: "DbdAddr={{ DbdAddr }}" + +- name: Add db host + lineinfile: + path: "{{ slurmdbd_path }}" + regexp: "DbdHost=" + line: "DbdHost={{ DbdHost }}" + +- name: Add storage password + lineinfile: + path: "{{ slurmdbd_path }}" + regexp: "StoragePass=" + line: "StoragePass={{ hostvars['127.0.0.1']['db_password'] }}" + +- name: Add storage user + lineinfile: + path: "{{ slurmdbd_path }}" + regexp: "StorageUser=" + line: "StorageUser={{ slurm_user }}" + +- name: Add log file path + lineinfile: + path: "{{ slurmdbd_path }}" + regexp: "LogFile=" + line: "LogFile={{ logfile }}" + +- name: Add pid file path + lineinfile: + path: "{{ slurmdbd_path }}" + regexp: "PidFile=" + line: "PidFile={{ pidfile }}" + +- name: Save slurm conf file in buffer + fetch: + src: "{{ slurm_confpth }}" + dest: "{{ buffer_path }}" + flat: true \ No newline at end of file diff --git a/roles/slurm_manager/vars/main.yml b/roles/slurm_manager/vars/main.yml new file mode 100644 index 000000000..4241354a5 --- /dev/null +++ b/roles/slurm_manager/vars/main.yml @@ -0,0 +1,76 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +slurm_packages: + - gcc + - openssl + - numactl + - hwloc + - lua + - readline + - perl-ExtUtils-MakeMaker + - rpm-build + - perl-DBI + - perl-Switch + - libibumad + - git + +dev_tools: + - rrdtool-devel + - lua-devel + - hwloc-devel + - libssh2-devel + - pam-devel + - readline-devel + - openssl-devel + - numactl-devel + - ncurses-devel + - gtk2-devel + +log_files_manager: + - slurm_jobacct.log + - slurm_jobcomp.log + +tmp_path: "/root/slurm-tmp" +tmp_mode: "0755" +cluster_state_path: "/var/spool/slurm/cluster_state" +spool_slurmctld_pth: "/var/spool/slurmctld" +spool_slurmd_pth: "/var/spool/slurmd" +slurm_logpth: "/var/log/slurm/" +slurm_url: https://download.schedmd.com/slurm/slurm-20.11.2.tar.bz2 +slurm_md5: "md5:592b8b24ff0f24327033eec59cd438d7" +rpmbuild_path: "/root/slurm-tmp/slurm-20.11.2.tar.bz2" +rpm_loop: "/rpmbuild/RPMS/x86_64/*.rpm" +tcp_port1: "6817/tcp" +tcp_port2: "6818/tcp" +tcp_port3: "6819/tcp" +tcp_port4: "60001-63000/tcp" +udp_port1: "6817/udp" +udp_port2: "6818/udp" +udp_port3: "6819/udp" +family: "ipv4" +db_user: "slurm" +db_host: "localhost" +slurmdbd_path: "/etc/slurm/slurmdbd.conf" +slurmdbd_mode: "0600" +slurm_confpth: "/etc/slurm/slurm.conf" +slurm_user: "slurm" +DbdAddr: "localhost" +DbdHost: "localhost" +logfile: "/var/log/slurm/slurmdbd.log" +pidfile: "/var/run/slurmdbd.pid" +buffer_path: "/tmp/slurm.conf" +rpm_path: "/root/rpmbuild/RPMS/x86_64/" +slurm_mode: "0644" \ No newline at end of file diff --git a/roles/slurm_start_services/tasks/main.yml b/roles/slurm_start_services/tasks/main.yml new file mode 100644 index 000000000..b016b3a98 --- /dev/null +++ b/roles/slurm_start_services/tasks/main.yml @@ -0,0 +1,74 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Include common variables + include_vars: ../../slurm_manager/vars/main.yml + +- name: Include common variables + include_vars: ../../slurm_common/vars/main.yml + +- name: Copy slurm conf from buffer + copy: + src: "{{ buffer_path }}" + dest: "{{ slurm_confpth }}" + mode: "{{ slurm_mode }}" + +- name: Enable slurmdbd on manager + systemd: + name: slurmdbd + state: restarted + enabled: yes + tags: install + +- name: Start slurmctld on manager + systemd: + name: slurmctld + state: restarted + enabled: yes + tags: install + +- name: check slurmdbd is active + systemd: + name: slurmdbd + register: slurmdbd_status + until: 'slurmdbd_status.status.ActiveState=="active"' + retries: 20 + +- name: Show cluster if exists + command: sacctmgr -n show cluster {{ cluster_name }} + register: slurm_clusterlist + changed_when: false + +- name: Create slurm cluster + command: sacctmgr -i add cluster {{ cluster_name }} + when: not slurm_clusterlist.stdout + +- name: Show account + command: sacctmgr show account -s + register: account_added + changed_when: false + +- name: Create default slurm group + command: sacctmgr -i add account defaultgroup Cluster={{ cluster_name }} Description="Default Account" Organization="Default Org" + when: account_added.rc != 0 + +- name: Check if user exists + command: sacctmgr show user -s + register: user_added + changed_when: false + +- name: Add root to the default account + command: sacctmgr -i add user root DefaultAccount=defaultgroup + when: user_added.rc != 0 \ No newline at end of file diff --git a/roles/slurm_workers/tasks/main.yml b/roles/slurm_workers/tasks/main.yml new file mode 100644 index 000000000..56662bc00 --- /dev/null +++ b/roles/slurm_workers/tasks/main.yml @@ -0,0 +1,151 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Include common variables + include_vars: ../../slurm_manager/vars/main.yml + +- name: Give slurm user permission to slurmd spool + file: + path: "{{ spool_slurmd_pth }}" + owner: slurm + group: slurm + mode: "{{ tmp_mode }}" + state: touch + +- name: Create log files on compute nodes + file: + path: "{{ slurm_logpth }}" + owner: slurm + group: slurm + mode: "{{ tmp_mode }}" + state: touch + with_items: + - slurmd.log + +- name: Install firewalld + package: + name: firewalld + state: present + tags: firewalld + +- name: Stop and disable firewalld + service: + name: firewalld + state: stopped + enabled: no + tags: firewalld + +- name: Copy slurm conf from buffer + copy: + src: "{{ buffer_path }}" + dest: "{{ slurm_confpth }}" + mode: "{{ slurm_mode }}" + +- name: Install packages for slurm + package: + name: "{{ slurm_packages }}" + state: present + with_items: + - "{{ slurm_packages }}" + tags: install + +- name: Install development tools + package: + name: "{{ item }}" + state: present + with_items: + - "{{ dev_tools }}" + tags: install + +- name: Verify if slurm is installed + shell: rpm -qa | grep slurm + ignore_errors: true + register: verify_result + changed_when: no + failed_when: no + args: + warn: no + +- name: Create temporary download folder for slurm + file: + path: "{{ tmp_path }}" + owner: slurm + group: slurm + mode: "{{ tmp_mode }}" + state: directory + when: verify_result.rc != 0 + +- name: Download slurm source + get_url: + url: "{{ slurm_url }}" + dest: "{{ tmp_path }}" + checksum: "{{ slurm_md5 }}" + validate_certs: no + tags: install + when: verify_result.rc != 0 + +- name: Build slurm rpms + command: rpmbuild -ta "{{ rpmbuild_path }}" --with mysql + changed_when: false + when: verify_result.rc != 0 + args: + warn: no + +- name: Install rpms + command: rpm -Uvh ~"{{ rpm_loop }}" + args: + chdir: "{{ rpm_path }}" + warn: no + changed_when: true + when: verify_result.rc != 0 + +- name: Get the hostname + command: hostname + register: machine_name + changed_when: true + +- name: Set compute node hostname/host ip to add in manager hosts file + set_fact: + compute_host: "{{ inventory_hostname }}" + compute_ip: "{{ machine_name.stdout }}" + +- name: Get socket and core info from compute nodes + set_fact: + node_name: "{{ machine_name.stdout }}" + sockets: "{{ hostvars[inventory_hostname]['ansible_facts']['processor_count'] }}" + cores: "{{ hostvars[inventory_hostname]['ansible_facts']['processor_cores'] }}" + +- name: Add compute nodes core & socket info in slurm config file + lineinfile: + dest: "{{ slurm_confpth }}" + line: "NodeName={{ hostvars[item].node_name }} Sockets={{ hostvars[item].sockets }} CoresPerSocket={{ hostvars[item].cores }}" + state: present + create: yes + mode: "{{ slurm_mode }}" + with_items: + - "{{ groups['compute'] }}" + +- name: Save slurm conf in buffer + fetch: + src: "{{ slurm_confpth }}" + dest: "{{ buffer_path }}" + flat: true + +- name: Start slurmd on compute nodes + systemd: + name: slurmd.service + state: started + enabled: yes + tags: install \ No newline at end of file diff --git a/slurm/roles/slurm-common/tasks/main.yaml b/slurm/roles/slurm-common/tasks/main.yaml deleted file mode 100644 index 82d1726f2..000000000 --- a/slurm/roles/slurm-common/tasks/main.yaml +++ /dev/null @@ -1,104 +0,0 @@ -# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- - -- name: install packages for slurm - yum: - name: - - munge - - mariadb - - mariadb-devel - - python3 - state: present - tags: install - -- name: create munge key - command: /usr/sbin/create-munge-key -f - tags: install - -- name: Copy munge key - copy: - src: munge.key - dest: /etc/munge - owner: munge - group: munge - mode: 0400 - tags: install - -- name: Copy example Slurm Configuration - slurm.conf - copy: - src: slurm.conf - dest: /etc/slurm/ - mode: 0644 - tags: install - - -- name: create SLURM Group - group: - name: slurm - state: present - tags: install - -- name: Add the user 'slurm' with uid 6001 and a primary group of 'slurm' - user: - name: slurm - comment: Slurm User Account - uid: 6001 - group: slurm - tags: install - -- name: create SLURM log directory - file: - path: /var/log/slurm - state: directory - owner: slurm - group: slurm - mode: 0755 - recurse: yes - tags: install - -- name: give slurm user permission to spool - file: - path: /var/spool/slurm - owner: slurm - group: slurm - state: directory - mode: 0755 - recurse: yes - -- name: give slurm user permission to slurmctld - file: - path: /var/run/slurmctld.pid - owner: slurm - group: slurm - mode: 0755 - state: touch - -- name: give slurm user permission to slurmd - file: - path: /var/run/slurmd.pid - owner: slurm - group: slurm - mode: 0755 - state: touch - -- name: start munge service - service: - name: munge - state: restarted - enabled: yes - tags: install - - - diff --git a/slurm/roles/slurm-master/tasks/main.yaml b/slurm/roles/slurm-master/tasks/main.yaml deleted file mode 100644 index 2f4af3d71..000000000 --- a/slurm/roles/slurm-master/tasks/main.yaml +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- - -- name: Download Slurm source - get_url: - url: "{{ slurm_url }}" - dest: /root/Downloads/ - checksum: "{{ slurm_md5 }}" - tags: install - -- name: Build SLURM RPMs - command: rpmbuild -ta /root/Downloads/slurm-20.02.0.tar.bz2 - tags: install - -- name: Copy RPMs to NFS share - copy: - src: "{{ item }}" - dest: /home/rpms/ - with_fileglob: - - /root/rpmbuild/RPMS/x86_64/slurm*20*.rpm - tags: install - -- name: Install SLURM RPMs on Master - yum: - name: "{{ item }}" - #name: "{{ query('fileglob', ['/home/rpms/slurm*20*.rpm']) }}" <-- how it should work to avoid loop - with_fileglob: - - /home/rpms/slurm*20*.rpm - tags: install - -- name: Firewall Rule slurm allow 6817/tcp - command: firewall-cmd --zone=internal --add-port=6817/tcp --permanent - tags: install - -- name: Firewall Rule slurm allow 6818/tcp - command: firewall-cmd --zone=internal --add-port=6818/tcp --permanent - tags: install - -- name: Firewall Rule slurm allow 6819/tcp - command: firewall-cmd --zone=internal --add-port=6819/tcp --permanent - tags: install - -- name: Firewall Rule slurm allow all incoming traffic on internal network - command: firewall-cmd --permanent --zone=internal --add-rich-rule='rule family="ipv4" source address="192.168.1.0/24" accept' - tags: install - -- name: Firewall Reload - command: firewall-cmd --reload - tags: install - - -- name: Start MariaDB - service: - name: mariadb - state: restarted - enabled: yes - tags: install - -- name: Grant Permissions for SLURM DB - command: mysql -u root -e "GRANT ALL ON slurm_acct_db.* TO 'slurm'@'localhost' identified by 'password' with grant option;" - tags: install - -- name: Create slurmdbd.conf file - copy: - src: /etc/slurm/slurmdbd.conf.example - dest: /etc/slurm/slurmdbd.conf - mode: 0600 - tags: install - -- name: Populate Accounting Database - command: slurmdbd - tags: install - -- name: Create Slurm Cluster - command: sacctmgr -i add cluster {{inventory_hostname}} - tags: install - -- name: Create Default Slurm Group - command: sacctmgr -i add account defaultgroup Cluster={{inventory_hostname}} Description="Default Account" Organization="Default Org" - tags: install - -- name: Add root to the Default Account - command: sacctmgr -i add user root DefaultAccount=defaultgroup - tags: install - -- name: Start slurmctld on Master - service: - name: slurmctld - state: restarted - enabled: yes - tags: install - -- name: Enable Slurmdbd on Master - service: - name: slurmdbd - state: restarted - enabled: yes - tags: install - diff --git a/slurm/slurm_inventory_file b/slurm/slurm_inventory_file deleted file mode 100644 index 9edcee009..000000000 --- a/slurm/slurm_inventory_file +++ /dev/null @@ -1,18 +0,0 @@ -[master] -friday - -[master:vars] -slurm_url=https://download.schedmd.com/slurm/slurm-20.02.0.tar.bz2 -slurm_md5=md5:8ed2257471ff24ca213b510a4c1c3563 - -[compute] -compute000 -compute[002:005] - - -[workers:children] -compute - -[cluster:children] -master -workers diff --git a/test/test_common.yml b/test/test_common.yml new file mode 100644 index 000000000..a6b2db3be --- /dev/null +++ b/test/test_common.yml @@ -0,0 +1,68 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# Testcase OMNIA_USP_US_VFSP_TC_009 +# Execute common role in manager & compute nodes with os installed centos 7.9 +- name: OMNIA_USP_US_VFSP_TC_009 + hosts: manager, compute + vars_files: + - test_vars/test_common_vars.yml + tasks: + - block: + - name: Call common role + include_role: + name: ../roles/common + tags: TC_009 + + - name: Checking ntpd service status + systemd: + name: ntpd + register: ntpd_service + tags: TC_009, VERIFY_009 + + - name: Validating ntpd service status + assert: + that: + - ntpd_service.status.ActiveState == 'active' + fail_msg: "{{ ntpd_service_fail_msg }}" + success_msg: "{{ ntpd_service_success_msg }}" + tags: TC_009, VERIFY_009 + +# Testcase OMNIA_USP_US_VFSP_TC_010 +# Execute common role in manager & compute nodes with common role already executed once +- name: OMNIA_USP_US_VFSP_TC_010 + hosts: manager, compute + vars_files: + - test_vars/test_common_vars.yml + tasks: + - block: + - name: Call common role + include_role: + name: ../roles/common + tags: TC_010 + + - name: Checking ntpd service status + systemd: + name: ntpd + register: ntpd_service + tags: TC_010, VERIFY_010 + + - name: Validating ntpd service status + assert: + that: + - ntpd_service.status.ActiveState == 'active' + fail_msg: "{{ ntpd_service_fail_msg }}" + success_msg: "{{ ntpd_service_success_msg }}" + tags: TC_010, VERIFY_010 \ No newline at end of file diff --git a/test/test_jupyterhub.yml b/test/test_jupyterhub.yml new file mode 100644 index 000000000..25abd5ae0 --- /dev/null +++ b/test/test_jupyterhub.yml @@ -0,0 +1,113 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# OMNIA_UKP_US_VFKP_TC_014 +# Execute jupyterhub role in manager nodes with os installed centos 7.9 +- name: OMNIA_UKP_US_VFKP_TC_014 + hosts: manager + gather_facts: false + vars_files: + - test_vars/test_jupyterhub_vars.yml + tasks: + - block: + - name: Call jupyterhub role + include_role: + name: ../platforms/roles/jupyterhub + tags: TC_014 + + - name: Waiting for the pods deployment + pause: + minutes: 5 + tags: TC_014 + + - name: Checking all running pods under default namespace + command: kubectl get pods --namespace default --field-selector=status.phase=Running + register: namesapce_default_running_pods + changed_when: false + ignore_errors: True + tags: TC_014, VERIFY_014 + + - name: Checking K8s services + command: kubectl get services + register: k8s_services + changed_when: false + ignore_errors: True + tags: TC_014, VERIFY_014 + + - name: Validating JupyterHub pods + assert: + that: + - "'hub' in namesapce_default_running_pods.stdout" + - "'proxy' in namesapce_default_running_pods.stdout" + fail_msg: "{{ jupyterhub_pods_fail_msg }}" + success_msg: "{{ jupyterhub_pods_success_msg }}" + tags: TC_014, VERIFY_014 + + - name: Validating JupyterHub services + assert: + that: + - "'hub' in k8s_services.stdout" + - "'proxy-public' in k8s_services.stdout" + - "'proxy-api' in k8s_services.stdout" + fail_msg: "{{ jupyterhub_services_fail_msg }}" + success_msg: "{{ jupyterhub_services_success_msg }}" + tags: TC_014, VERIFY_014 + +# OMNIA_UKP_US_VFKP_TC_015 +# Execute jupyterhub role in manager nodes with JupyterHub already deployed +- name: OMNIA_UKP_US_VFKP_TC_015 + hosts: manager + gather_facts: false + vars_files: + - test_vars/test_jupyterhub_vars.yml + tasks: + - block: + - name: Call jupyterhub role + include_role: + name: ../platforms/roles/jupyterhub + tags: TC_015, VERIFY_015 + + - name: Checking all running pods under default namespace + command: kubectl get pods --namespace default --field-selector=status.phase=Running + register: namesapce_default_running_pods + changed_when: false + ignore_errors: True + tags: TC_015, VERIFY_015 + + - name: Checking K8s services + command: kubectl get services + register: k8s_services + changed_when: false + ignore_errors: True + tags: TC_015, VERIFY_015 + + - name: Validating JupyterHub pods + assert: + that: + - "'hub' in namesapce_default_running_pods.stdout" + - "'proxy' in namesapce_default_running_pods.stdout" + fail_msg: "{{ jupyterhub_pods_fail_msg }}" + success_msg: "{{ jupyterhub_pods_success_msg }}" + tags: TC_015, VERIFY_015 + + - name: Validating JupyterHub services + assert: + that: + - "'hub' in k8s_services.stdout" + - "'proxy-public' in k8s_services.stdout" + - "'proxy-api' in k8s_services.stdout" + fail_msg: "{{ jupyterhub_services_fail_msg }}" + success_msg: "{{ jupyterhub_services_success_msg }}" + tags: TC_015, VERIFY_015 \ No newline at end of file diff --git a/test/test_k8s_common.yml b/test/test_k8s_common.yml new file mode 100644 index 000000000..3dc9d6957 --- /dev/null +++ b/test/test_k8s_common.yml @@ -0,0 +1,155 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# Testcase OMNIA_UKP_US_VFKP_TC_001 +# Execute k8s_common role in manager & compute nodes with os installed centos 7.8 +- name: OMNIA_UKP_US_VFKP_TC_001 + hosts: manager, compute + vars_files: + - test_vars/test_k8s_common_vars.yml + tasks: + - block: + - name: Call common role + include_role: + name: ../roles/k8s_common + tags: TC_001 + + - name: Checking common packages installation status + command: "'{{ item }}' --version" + with_items: + - "{{ common_packages }}" + register: common_packages_status + changed_when: false + ignore_errors: True + tags: TC_001, VERIFY_001 + + - name: Checking K8s packages installation status + command: "'{{ item }}' version" + with_items: + - "{{ k8_packages }}" + register: k8s_packages_status + changed_when: false + ignore_errors: True + tags: TC_001, VERIFY_001 + + - name: Checking docker service status + systemd: + name: docker + register: docker_service + tags: TC_001, VERIFY_001 + + - name: Checking K8s service status + systemd: + name: kubelet + register: kubelet_service + tags: TC_001, VERIFY_001 + + - name: Validate common & K8s packages status + assert: + that: + - "'command not found' not in {{ item }}" + fail_msg: "{{ packages_status_fail_msg }}" + success_msg: "{{ packages_status_success_msg }}" + with_items: + - "{{ common_packages_status.results }}" + - "{{ k8s_packages_status.results }}" + tags: TC_001, VERIFY_001 + + - name: Validating docker service status + assert: + that: + - docker_service.status.ActiveState == 'active' + fail_msg: "{{ docker_service_fail_msg }}" + success_msg: "{{ docker_service_success_msg }}" + tags: TC_001, VERIFY_001 + + - name: Validating K8s service status + assert: + that: + - kubelet_service.status.ActiveState == 'active' + fail_msg: "{{ kubelet_service_fail_msg }}" + success_msg: "{{ kubelet_service_success_msg }}" + tags: TC_001, VERIFY_001 + +# Testcase OMNIA_UKP_US_VFKP_TC_002 +# Execute k8s_common role in manager & compute nodes with common and K8s packages already installed +- name: OMNIA_UKP_US_VFKP_TC_002 + hosts: manager, compute + vars_files: + - test_vars/test_k8s_common_vars.yml + tasks: + - block: + - name: Call common role + include_role: + name: ../roles/k8s_common + tags: TC_002, VERIFY_002 + + - name: Checking common packages installation status + command: "'{{ item }}' --version" + with_items: + - "{{ common_packages }}" + register: common_packages_status + changed_when: false + ignore_errors: True + tags: TC_002, VERIFY_002 + + - name: Checking K8s packages installation status + command: "'{{ item }}' version" + with_items: + - "{{ k8_packages }}" + register: k8s_packages_status + changed_when: false + ignore_errors: True + tags: TC_002, VERIFY_002 + + - name: Checking docker service status + systemd: + name: docker + register: docker_service + tags: TC_002, VERIFY_002 + + - name: Checking K8s service status + systemd: + name: kubelet + register: kubelet_service + tags: TC_002, VERIFY_002 + + - name: Validate common & K8s packages status + assert: + that: + - "'command not found' not in {{ item }}" + fail_msg: "{{ packages_status_fail_msg }}" + success_msg: "{{ packages_status_success_msg }}" + quiet: true + with_items: + - "{{ common_packages_status.results }}" + - "{{ k8s_packages_status.results }}" + tags: TC_002, VERIFY_002 + + - name: Validating docker service status + assert: + that: + - docker_service.status.ActiveState == 'active' + fail_msg: "{{ docker_service_fail_msg }}" + success_msg: "{{ docker_service_success_msg }}" + tags: TC_002, VERIFY_002 + + - name: Validating K8s service status + assert: + that: + - kubelet_service.status.ActiveState == 'active' + fail_msg: "{{ kubelet_service_fail_msg }}" + success_msg: "{{ kubelet_service_success_msg }}" + tags: TC_002, VERIFY_002 \ No newline at end of file diff --git a/test/test_k8s_firewalld.yml b/test/test_k8s_firewalld.yml new file mode 100644 index 000000000..95edfb0fb --- /dev/null +++ b/test/test_k8s_firewalld.yml @@ -0,0 +1,226 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# OMNIA_UKP_US_VFKP_TC_007 +# Execute k8s_firewalld role in manager and compute nodes with os installed centos 7.9 +- name: OMNIA_UKP_US_VFKP_TC_007 + hosts: manager, compute + gather_facts: false + vars_files: + - test_vars/test_k8s_firewalld_vars.yml + - ../roles/k8s_firewalld/vars/main.yml + tasks: + - block: + - name: Call k8s_firewalld role + include_role: + name: ../roles/k8s_firewalld + tags: TC_007 + + - name: Start and enable firewalld + service: + name: firewalld + state: started + enabled: yes + tags: TC_007, VERIFY_007 + + - name: Checking firewalld open ports on manager node + command: firewall-cmd --list-ports + register: manager_firewalld_ports + when: "'manager' in group_names" + tags: TC_007, VERIFY_007 + + - name: Checking firewalld open ports on compute node + command: firewall-cmd --list-ports + register: compute_firewalld_ports + when: "'compute' in group_names" + tags: TC_007, VERIFY_007 + + - name: Validating K8s port on manager node + assert: + that: + - "'6443' in manager_firewalld_ports.stdout" + - "'2379-2380' in manager_firewalld_ports.stdout" + - "'10250' in manager_firewalld_ports.stdout" + - "'10251' in manager_firewalld_ports.stdout" + - "'10252' in manager_firewalld_ports.stdout" + fail_msg: "{{ manager_k8s_ports_status_fail_msg }}" + success_msg: "{{ manager_k8s_ports_status_success_msg }}" + when: "'manager' in group_names" + tags: TC_007, VERIFY_007 + + - name: Validating K8s port on compute node + assert: + that: + - "'10250' in compute_firewalld_ports.stdout" + - "'30000-32767' in compute_firewalld_ports.stdout" + fail_msg: "{{ compute_k8s_ports_status_fail_msg }}" + success_msg: "{{ compute_k8s_ports_status_success_msg }}" + when: "'compute' in group_names" + tags: TC_007, VERIFY_007 + + - name: Validating Calico udp/tcp ports on manager nodes + assert: + that: + - "'4789' in manager_firewalld_ports.stdout" + - "'5473' in manager_firewalld_ports.stdout" + - "'179' in manager_firewalld_ports.stdout" + fail_msg: "{{ calico_ports_manager_fail_msg }}" + success_msg: "{{ calico_ports_manager_success_msg }}" + when: "k8s_cni == 'calico' and 'manager' in group_names" + tags: TC_007, VERIFY_007 + + - name: Validating Calico udp/tcp ports on compute nodes + assert: + that: + - "'4789' in compute_firewalld_ports.stdout" + - "'5473' in compute_firewalld_ports.stdout" + - "'179' in compute_firewalld_ports.stdout" + fail_msg: "{{ calico_ports_compute_fail_msg }}" + success_msg: "{{ calico_ports_compute_success_msg }}" + when: "k8s_cni == 'calico' and 'compute' in group_names" + tags: TC_007, VERIFY_007 + + - name: Validating Flannel ports on manager nodes + assert: + that: + - "'8285' in manager_firewalld_ports.stdout" + - "'8472' in manager_firewalld_ports.stdout" + fail_msg: "{{ flannel_ports_manager_fail_msg }}" + success_msg: "{{ flannel_ports_manager_success_msg }}" + when: "k8s_cni == 'flannel' and 'manager' in group_names" + tags: TC_007, VERIFY_007 + + - name: Validating Flannel ports on compute nodes + assert: + that: + - "'8285' in compute_firewalld_ports.stdout" + - "'8472' in compute_firewalld_ports.stdout" + fail_msg: "{{ flannel_ports_compute_fail_msg }}" + success_msg: "{{ flannel_ports_compute_success_msg }}" + when: "k8s_cni == 'flannel' and 'compute' in group_names" + tags: TC_007, VERIFY_007 + + - name: Stop and disable firewalld + service: + name: firewalld + state: stopped + enabled: no + tags: TC_007, VERIFY_007 + +# OMNIA_UKP_US_VFKP_TC_008 +# Execute k8s_firewalld role in manager and compute nodes with K8s ports already opened +- name: OMNIA_UKP_US_VFKP_TC_008 + hosts: manager, compute + gather_facts: false + vars_files: + - test_vars/test_k8s_firewalld_vars.yml + - ../roles/k8s_firewalld/vars/main.yml + tasks: + - block: + - name: Call k8s_firewalld role + include_role: + name: ../roles/k8s_firewalld + tags: TC_008 + + - name: Start and enable firewalld + service: + name: firewalld + state: started + enabled: yes + tags: TC_008, VERIFY_008 + + - name: Checking firewalld open ports on manager node + command: firewall-cmd --list-ports + register: manager_firewalld_ports + when: "'manager' in group_names" + tags: TC_008, VERIFY_008 + + - name: Checking firewalld open ports on compute node + command: firewall-cmd --list-ports + register: compute_firewalld_ports + when: "'compute' in group_names" + tags: TC_008, VERIFY_008 + + - name: Validating K8s port on manager node + assert: + that: + - "'6443' in manager_firewalld_ports.stdout" + - "'2379-2380' in manager_firewalld_ports.stdout" + - "'10250' in manager_firewalld_ports.stdout" + - "'10251' in manager_firewalld_ports.stdout" + - "'10252' in manager_firewalld_ports.stdout" + fail_msg: "{{ manager_k8s_ports_status_fail_msg }}" + success_msg: "{{ manager_k8s_ports_status_success_msg }}" + when: "'manager' in group_names" + tags: TC_008, VERIFY_008 + + - name: Validating K8s port on compute node + assert: + that: + - "'10250' in compute_firewalld_ports.stdout" + - "'30000-32767' in compute_firewalld_ports.stdout" + fail_msg: "{{ compute_k8s_ports_status_fail_msg }}" + success_msg: "{{ compute_k8s_ports_status_success_msg }}" + when: "'compute' in group_names" + tags: TC_008, VERIFY_008 + + - name: Validating Calico udp/tcp ports on manager nodes + assert: + that: + - "'4789' in manager_firewalld_ports.stdout" + - "'5473' in manager_firewalld_ports.stdout" + - "'179' in manager_firewalld_ports.stdout" + fail_msg: "{{ calico_ports_manager_fail_msg }}" + success_msg: "{{ calico_ports_manager_success_msg }}" + when: "k8s_cni == 'calico' and 'manager' in group_names" + tags: TC_008, VERIFY_008 + + - name: Validating Calico udp/tcp ports on compute nodes + assert: + that: + - "'4789' in compute_firewalld_ports.stdout" + - "'5473' in compute_firewalld_ports.stdout" + - "'179' in compute_firewalld_ports.stdout" + fail_msg: "{{ calico_ports_compute_fail_msg }}" + success_msg: "{{ calico_ports_compute_success_msg }}" + when: "k8s_cni == 'calico' and 'compute' in group_names" + tags: TC_008, VERIFY_008 + + - name: Validating Flannel ports on manager nodes + assert: + that: + - "'8285' in manager_firewalld_ports.stdout" + - "'8472' in manager_firewalld_ports.stdout" + fail_msg: "{{ flannel_ports_manager_fail_msg }}" + success_msg: "{{ flannel_ports_manager_success_msg }}" + when: "k8s_cni == 'flannel' and 'manager' in group_names" + tags: TC_008, VERIFY_008 + + - name: Validating Flannel ports on compute nodes + assert: + that: + - "'8285' in compute_firewalld_ports.stdout" + - "'8472' in compute_firewalld_ports.stdout" + fail_msg: "{{ flannel_ports_compute_fail_msg }}" + success_msg: "{{ flannel_ports_compute_success_msg }}" + when: "k8s_cni == 'flannel' and 'compute' in group_names" + tags: TC_008, VERIFY_008 + + - name: Stop and disable firewalld + service: + name: firewalld + state: stopped + enabled: no + tags: TC_008, VERIFY_008 \ No newline at end of file diff --git a/test/test_k8s_manager.yml b/test/test_k8s_manager.yml new file mode 100644 index 000000000..a6e681864 --- /dev/null +++ b/test/test_k8s_manager.yml @@ -0,0 +1,70 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# OMNIA_UKP_US_VFKP_TC_005 +# Execute k8s_manager role in manager nodes with os installed centos 7.9 +- name: OMNIA_UKP_US_VFKP_TC_005 + hosts: manager + vars_files: + - test_vars/test_k8s_manager_vars.yml + tasks: + - block: + - name: Call manager role + include_role: + name: ../roles/k8s_manager + tags: TC_005 + + - name: Checking helm installation status + command: helm version + register: helm_status + changed_when: false + ignore_errors: True + tags: TC_005, VERIFY_005 + + - name: Validating helm installation status + assert: + that: + - "'version.BuildInfo' in helm_status.stdout" + fail_msg: "{{ helm_status_fail_msg }}" + success_msg: "{{ helm_status_success_msg }}" + tags: TC_005, VERIFY_005 + +# OMNIA_UKP_US_VFKP_TC_006 +# Execute k8s_manager role in manager nodes with helm already installed +- name: OMNIA_UKP_US_VFKP_TC_006 + hosts: manager + vars_files: + - test_vars/test_k8s_manager_vars.yml + tasks: + - block: + - name: Call manager role + include_role: + name: ../roles/k8s_manager + tags: TC_006, VERIFY_006 + + - name: Checking helm installation status + command: helm version + register: helm_status + changed_when: false + ignore_errors: True + tags: TC_006, VERIFY_006 + + - name: Validating helm installation status + assert: + that: + - "'command not found' not in helm_status.stdout" + fail_msg: "{{ helm_status_fail_msg }}" + success_msg: "{{ helm_status_success_msg }}" + tags: TC_006, VERIFY_006 \ No newline at end of file diff --git a/test/test_k8s_start_manager_workers.yml b/test/test_k8s_start_manager_workers.yml new file mode 100644 index 000000000..3b73df3a3 --- /dev/null +++ b/test/test_k8s_start_manager_workers.yml @@ -0,0 +1,144 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# OMNIA_UKP_US_VFKP_TC_009, OMNIA_UKP_US_VFKP_TC_010 +# Execute k8s_start_manager role in manager nodes with os installed centos 7.9 and swap enabled +- name: OMNIA_UKP_US_VFKP_TC_009, OMNIA_UKP_US_VFKP_TC_010 + hosts: manager + gather_facts: false + vars_files: + - test_vars/test_k8s_start_manager_workers_vars.yml + tasks: + - name: Enable Swap + command: /usr/sbin/swapon -a + changed_when: true + tags: TC_009, TC_010 + + - block: + - name: Call k8s_start_manager role + include_role: + name: ../roles/k8s_start_manager + tags: TC_009, TC_010 + + - name: Waiting for the pods deployment + pause: + minutes: 10 + tags: TC_009, TC_010 + + - name: Checking master node + command: kubectl get nodes + register: master_node_status + changed_when: false + ignore_errors: True + tags: TC_009, TC_010, VERIFY_009 + + - name: Checking kube-system pods + command: kubectl get pods --namespace kube-system --field-selector=status.phase=Running + register: kube_system_pods + changed_when: false + ignore_errors: True + tags: TC_009, TC_010, VERIFY_009 + + - name: Checking calico/flannel SDN network status + command: ip address + register: calico_flannel_status + changed_when: false + tags: TC_009, TC_010, VERIFY_009 + + - name: Checking K8s service account and token + command: kubectl get secrets + register: service_account_status + changed_when: false + ignore_errors: True + tags: TC_009, TC_010, VERIFY_009 + + - name: Validating master node status + assert: + that: + - "'master' in master_node_status.stdout" + fail_msg: "{{ master_node_status_fail_msg }}" + success_msg: "{{ master_node_status_success_msg }}" + tags: TC_009, TC_010, VERIFY_009 + + - name: Validating controller-manager and scheduler and coreDNS pods status + assert: + that: + - "'kube-scheduler' in kube_system_pods.stdout" + - "'kube-controller' in kube_system_pods.stdout" + fail_msg: "{{ controller_scheduler_status_fail_msg }}" + success_msg: "{{ controller_scheduler_status_success_msg }}" + tags: TC_009, TC_010, VERIFY_009 + + - name: Validating coreDNS pods status + assert: + that: + - "'coredns' in kube_system_pods.stdout" + fail_msg: "{{ coredns_status_fail_msg }}" + success_msg: "{{ coredns_status_success_msg }}" + tags: TC_009, TC_010, VERIFY_009 + + - name: Validating calico/flannel SDN network status + assert: + that: + - "'calico' in kube_system_pods.stdout or 'flannel' in kube_system_pods.stdout" + fail_msg: "{{ calico_flannel_status_fail_msg }}" + success_msg: "{{ calico_flannel_status_success_msg }}" + tags: TC_009, TC_010, VERIFY_009 + + - name: Validating K8s service account and token status + assert: + that: + - "'kubernetes.io/service-account-token' in service_account_status.stdout" + fail_msg: "{{ k8s_service_account_status_fail_msg }}" + success_msg: "{{ k8s_service_account_status_success_msg }}" + tags: TC_009, TC_010, VERIFY_009 + +# OMNIA_UKP_US_VFKP_TC_011, OMNIA_UKP_US_VFKP_TC_012 +# Execute k8s_start_workers role in compute nodes with os installed centos 7.9 and swap enabled +- name: OMNIA_UKP_US_VFKP_TC_011, OMNIA_UKP_US_VFKP_TC_012 + hosts: compute + gather_facts: false + tasks: + - name: Enable Swap + command: /usr/sbin/swapon -a + changed_when: true + tags: TC_011, TC_012 + + - block: + - name: Call k8s_start_workers role + include_role: + name: ../roles/k8s_start_workers.yml + tags: TC_011, TC_012 + +- name: OMNIA_UKP_US_VFKP_TC_011, OMNIA_UKP_US_VFKP_TC_012 + hosts: manager + gather_facts: false + vars_files: + - test_vars/test_k8s_start_manager_workers_vars.yml + tasks: + - name: Check worker nodes status + command: kubectl get node --selector='!node-role.kubernetes.io/master' + register: worker_nodes_status + changed_when: false + ignore_errors: True + tags: TC_011, TC_012, VERIFY_011 + + - name: Validating worker nodes status + assert: + that: + - "'Ready' in worker_nodes_status.stdout" + fail_msg: "{{ worker_nodes_status_fail_msg }}" + success_msg: "{{ worker_nodes_status_success_msg }}" + tags: TC_011, TC_012, VERIFY_011 \ No newline at end of file diff --git a/test/test_k8s_start_services.yml b/test/test_k8s_start_services.yml new file mode 100644 index 000000000..db17fe0c8 --- /dev/null +++ b/test/test_k8s_start_services.yml @@ -0,0 +1,97 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# OMNIA_UKP_US_VFKP_TC_013 +# Execute k8s_start_services role in manager nodes with os installed centos 7.9 +- name: OMNIA_UKP_US_VFKP_TC_013 + hosts: manager + gather_facts: false + vars_files: + - test_vars/test_k8s_start_services_vars.yml + tasks: + - block: + - name: Call k8s_start_services role + include_role: + name: ../roles/k8s_start_services + tags: TC_013 + + - name: Waiting for the pods deployment + pause: + minutes: 10 + tags: TC_013 + + - name: Checking all running pods + command: kubectl get pods --all-namespaces --field-selector=status.phase=Running + register: running_pods + changed_when: false + ignore_errors: True + tags: TC_013, VERIFY_013 + + - name: Checking default storage class + command: kubectl get sc + register: default_storage_class + changed_when: false + ignore_errors: True + tags: TC_013, VERIFY_013 + + - name: Validating Metallb, Prometheus and MPI pods + assert: + that: + - "'metallb' in running_pods.stdout" + - "'prometheus' in running_pods.stdout" + - "'mpi-operator' in running_pods.stdout" + fail_msg: "{{ metallb_prometheus_mpi_pods_fail_msg }}" + success_msg: "{{ metallb_prometheus_mpi_pods_success_msg }}" + tags: TC_013, VERIFY_013 + + - name: Validating K8s dashboard + assert: + that: + - "'kubernetes-dashboard' in running_pods.stdout" + fail_msg: "{{ kubernetes_dashboard_fail_msg }}" + success_msg: "{{ kubernetes_dashboard_success_msg }}" + tags: TC_013, VERIFY_013 + + - name: Validating NFS Client Provisioner pods + assert: + that: + - "'nfs-client-provisioner' in running_pods.stdout" + fail_msg: "{{ nfs_client_provisioner_pods_fail_msg }}" + success_msg: "{{ nfs_client_provisioner_pods_success_msg }}" + tags: TC_013, VERIFY_013 + + - name: Validating default storage class + assert: + that: + - "'nfs-client' in default_storage_class.stdout" + fail_msg: "{{ default_storage_class_fail_msg }}" + success_msg: "{{ default_storage_class_success_msg }}" + tags: TC_013, VERIFY_013 + + - name: Validating Node Feature Discovery pods + assert: + that: + - "'node-feature-discovery' in running_pods.stdout" + fail_msg: "{{ node_feature_discovery_pods_fail_msg }}" + success_msg: "{{ node_feature_discovery_pods_success_msg }}" + tags: TC_013, VERIFY_013 + + - name: Validating Nvidia device plugin pods + assert: + that: + - "'nvidia-device-plugin' in running_pods.stdout" + fail_msg: "{{ nvidia_device_plugin_pods_fail_msg }}" + success_msg: "{{ nvidia_device_plugin_pods_success_msg }}" + tags: TC_013, VERIFY_013 \ No newline at end of file diff --git a/test/test_k8s_start_workers.yml b/test/test_k8s_start_workers.yml new file mode 100644 index 000000000..bc4db9b15 --- /dev/null +++ b/test/test_k8s_start_workers.yml @@ -0,0 +1,50 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# OMNIA_UKP_US_VFKP_TC_013, OMNIA_UKP_US_VFKP_TC_014 +# Execute startworkers role in compute nodes with os installed centos 7.9 and swap enabled +- name: OMNIA_UKP_US_VFKP_TC_013, OMNIA_UKP_US_VFKP_TC_014 + hosts: compute + vars_files: + - test_vars/test_k8s_start_workers_vars.yml + tasks: + - name: Enable Swap + command: /usr/sbin/swapon -a + changed_when: true + tags: TC_013, TC_014 + + - block: + - name: Call k8s_start_workers role + include_role: + name: ../roles/k8s_start_workers.yml + tags: TC_013, TC_014 + +- name: OMNIA_UKP_US_VFKP_TC_013, OMNIA_UKP_US_VFKP_TC_014 + hosts: manager + vars_files: + - test_vars/test_k8s_start_workers_vars.yml + tasks: + - name: Check worker nodes status + command: kubectl get node --selector='!node-role.kubernetes.io/master' + register: worker_nodes_status + tags: TC_013, TC_014, VERIFY_013 + + - name: Validating worker nodes status + assert: + that: + - "'Ready' in worker_nodes_status.stdout" + fail_msg: "{{ worker_nodes_status_fail_msg }}" + success_msg: "{{ worker_nodes_status_success_msg }}" + tags: TC_013, TC_014, VERIFY_013 \ No newline at end of file diff --git a/test/test_kubeflow.yml b/test/test_kubeflow.yml new file mode 100644 index 000000000..4367b7d90 --- /dev/null +++ b/test/test_kubeflow.yml @@ -0,0 +1,123 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# OMNIA_UKP_US_VFKP_TC_016 +# Execute kubeflow role in manager nodes with os installed centos 7.9 +- name: OMNIA_UKP_US_VFKP_TC_016 + hosts: manager + gather_facts: false + vars_files: + - test_vars/test_kubeflow_vars.yml + tasks: + - block: + - name: Call kubeflow role + include_role: + name: ../platforms/roles/kubeflow + tags: TC_016 + + - name: Waiting for the pods deployment + pause: + minutes: 5 + tags: TC_016 + + - name: Checking installed Kubeflow version + command: kfctl version + register: kfctl_version + changed_when: false + ignore_errors: True + tags: TC_016, VERIFY_016 + + - name: Checking pods under kubeflow namespace + command: kubectl get pods --namespace kubeflow + register: kubeflow_pods + changed_when: false + ignore_errors: True + tags: TC_016, VERIFY_016 + + - name: Checking pods under istio-system namespace + command: kubectl get pods --namespace istio-system + register: istio_system_pods + changed_when: false + ignore_errors: True + tags: TC_016, VERIFY_016 + + - name: Validating Kubeflow Installation + assert: + that: + - "'command not found' not in kfctl_version.stdout" + fail_msg: "{{ kubeflow_install_fail_msg }}" + success_msg: "{{ kubeflow_install_success_msg }}" + tags: TC_016, VERIFY_016 + + - name: Validating Kubeflow pods deployment + assert: + that: + - "'Running' in kubeflow_pods.stdout or 'ContainerCreating' in kubeflow_pods.stdout" + - "'Running' in istio_system_pods.stdout or 'ContainerCreating' in istio_system_pods.stdout" + fail_msg: "{{ kubeflow_pods_deployment_fail_msg }}" + success_msg: "{{ kubeflow_pods_deployment_success_msg }}" + tags: TC_016, VERIFY_016 + +# OMNIA_UKP_US_VFKP_TC_017 +# Execute kubeflow role in manager nodes with kubeflow already deployed +- name: OMNIA_UKP_US_VFKP_TC_017 + hosts: manager + gather_facts: false + vars_files: + - test_vars/test_kubeflow_vars.yml + tasks: + - block: + - name: Call kubeflow role + include_role: + name: ../platforms/roles/kubeflow + tags: TC_017, VERIFY_017 + + - name: Checking installed Kubeflow version + command: kfctl version + register: kfctl_version + changed_when: false + ignore_errors: True + tags: TC_017, VERIFY_017 + + - name: Checking pods under kubeflow namespace + command: kubectl get pods --namespace kubeflow + register: kubeflow_pods + changed_when: false + ignore_errors: True + tags: TC_017, VERIFY_017 + + - name: Checking pods under istio-system namespace + command: kubectl get pods --namespace istio-system + register: istio_system_pods + changed_when: false + ignore_errors: True + tags: TC_017, VERIFY_017 + + - name: Validating Kubeflow Installation + assert: + that: + - "'command not found' not in kfctl_version.stdout" + fail_msg: "{{ kubeflow_install_fail_msg }}" + success_msg: "{{ kubeflow_install_success_msg }}" + tags: TC_017, VERIFY_017 + + - name: Validating Kubeflow pods deployment + assert: + that: + - "'Running' in kubeflow_pods.stdout or 'ContainerCreating' in kubeflow_pods.stdout" + - "'Running' in istio_system_pods.stdout or 'ContainerCreating' in istio_system_pods.stdout" + fail_msg: "{{ kubeflow_pods_deployment_fail_msg }}" + success_msg: "{{ kubeflow_pods_deployment_success_msg }}" + tags: TC_017, VERIFY_017 \ No newline at end of file diff --git a/test/test_omnia.yml b/test/test_omnia.yml new file mode 100644 index 000000000..6bf6a0211 --- /dev/null +++ b/test/test_omnia.yml @@ -0,0 +1,872 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +# Testcase OMNIA_CRM_US_AWXD_TC_006 +# Test case to validate whether the proper error message is displayed when slurm and kubernetes tags are skipped +- name: OMNIA_CRM_US_AWXD_TC_006 + hosts: localhost + connection: local + vars_files: + - ../appliance/roles/web_ui/vars/main.yml + - ../appliance/roles/common/vars/main.yml + - test_vars/test_omnia_vars.yml + tasks: + - name: Check input config file is encrypted + command: cat ../appliance/test/{{ test_input_config_filename }} + changed_when: false + register: config_content + tags: TC_006 + + - name: Decrpyt input_config.yml + command: ansible-vault decrypt ../appliance/test/{{ test_input_config_filename }} --vault-password-file {{ vault_filename }} + changed_when: false + when: "'$ANSIBLE_VAULT;' in config_content.stdout" + tags: TC_006 + + - name: Include variable file input_config.yml + include_vars: "../appliance/test/{{ test_input_config_filename }}" + tags: TC_006 + + - name: Creating inventory file with hosts associated to the groups + copy: + dest: "testinventory.yml" + mode: '{{ file_permission }}' + content: | + --- + manager: + hosts: + {{ host1 }} + + compute: + hosts: + {{ host2 }} + tags: TC_006 + + - name: Get present working directory + command: >- + pwd + register: path + changed_when: false + tags: TC_006 + + - name: Push the inventory to AWX + shell: | + set -o pipefail + docker exec awx_task awx-manage inventory_import --inventory-name {{ omnia_inventory_name }} --source "{{ inventory_path }}/{{ path.stdout.split('/')[-2] }}/test/testinventory.yml" + changed_when: false + tags: TC_006 + + - block: + - name: Launch the job template + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ awx_password }}" --conf.insecure + job_templates launch "{{ omnia_template_name }}" --credentials "{{ credential_name }}" --skip_tags slurm,kubernetes --monitor -f human + changed_when: false + register: command_output + + rescue: + - name: Validate error message + assert: + that: "'FAILED!' in command_output.stdout" + success_msg: "{{ test_case_success_msg }}" + fail_msg: "{{ test_case_failure_msg }}" + tags: TC_006 + + - name: Delete the hosts + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ awx_password }}" --conf.insecure + hosts delete {{ host1 }} --monitor -f human + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ awx_password }}" --conf.insecure + hosts delete {{ host2 }} --monitor -f human + changed_when: false + tags: TC_006 + + - name: Delete the inventory file + ignore_errors: yes + file: + state: absent + path: testinventory.yml + tags: TC_006 + + - name: Create inventory file if it doesn't exist + ignore_errors: yes + file: + path: "testinventory.yml" + state: touch + mode: '{{ file_permission }}' + tags: TC_006 + +# Testcase OMNIA_CRM_US_AWXD_TC_007 +# Test case to validate whether the skip tags validation is passed when slurm tag is given +- name: OMNIA_CRM_US_AWXD_TC_007 + hosts: localhost + connection: local + vars_files: + - ../appliance/roles/web_ui/vars/main.yml + - ../appliance/roles/common/vars/main.yml + - test_vars/test_omnia_vars.yml + tasks: + - name: Check input config file is encrypted + command: cat ../appliance/test/{{ test_input_config_filename }} + changed_when: false + register: config_content + tags: TC_007 + + - name: Decrpyt input_config.yml + command: ansible-vault decrypt ../appliance/test/{{ test_input_config_filename }} --vault-password-file {{ vault_filename }} + changed_when: false + when: "'$ANSIBLE_VAULT;' in config_content.stdout" + tags: TC_007 + + - name: Include variable file input_config.yml + include_vars: "../appliance/test/{{ test_input_config_filename }}" + tags: TC_007 + + - name: Creating inventory file with hosts associated to the groups + copy: + dest: "testinventory.yml" + mode: '{{ file_permission }}' + content: | + --- + manager: + hosts: + {{ host1 }} + + compute: + hosts: + {{ host2 }} + tags: TC_007 + + - name: Get present working directory + command: >- + pwd + register: path + changed_when: false + tags: TC_007 + + - name: Push the inventory to AWX + shell: | + set -o pipefail + docker exec awx_task awx-manage inventory_import --inventory-name {{ omnia_inventory_name }} --source "{{ inventory_path }}/{{ path.stdout.split('/')[-2] }}/test/testinventory.yml" + changed_when: false + tags: TC_007 + + - block: + - name: Launch the job template + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ awx_password }}" --conf.insecure + job_templates launch "{{ omnia_template_name }}" --credentials "{{ credential_name }}" --skip_tags slurm --monitor -f human + changed_when: false + register: command_output + tags: TC_007 + + rescue: + - name: Validate success message + assert: + that: "'FAILED!' not in command_output.stdout" + success_msg: "{{ test_case_success_msg }}" + fail_msg: "{{ test_case_failure_msg }}" + tags: TC_007 + + - name: Delete the hosts + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ awx_password }}" --conf.insecure + hosts delete {{ host1 }} --monitor -f human + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ awx_password }}" --conf.insecure + hosts delete {{ host2 }} --monitor -f human + changed_when: false + tags: TC_007 + + - name: Delete the inventory file + ignore_errors: yes + file: + state: absent + path: testinventory.yml + tags: TC_007 + + - name: Create inventory file if it doesn't exist + ignore_errors: yes + file: + path: "testinventory.yml" + state: touch + mode: '{{ file_permission }}' + tags: TC_007 + +# Testcase OMNIA_CRM_US_AWXD_TC_008 +# Test case to validate whether the skip tags validation is passed when kubernetes tag is given +- name: OMNIA_CRM_US_AWXD_TC_008 + hosts: localhost + connection: local + vars_files: + - ../appliance/roles/web_ui/vars/main.yml + - ../appliance/roles/common/vars/main.yml + - test_vars/test_omnia_vars.yml + tasks: + - name: Check input config file is encrypted + command: cat ../appliance/test/{{ test_input_config_filename }} + changed_when: false + register: config_content + tags: TC_008 + + - name: Decrpyt input_config.yml + command: ansible-vault decrypt ../appliance/test/{{ test_input_config_filename }} --vault-password-file {{ vault_filename }} + changed_when: false + when: "'$ANSIBLE_VAULT;' in config_content.stdout" + tags: TC_008 + + - name: Include variable file input_config.yml + include_vars: "../appliance/test/{{ test_input_config_filename }}" + tags: TC_008 + + - name: Creating inventory file with hosts associated to the groups + copy: + dest: "testinventory.yml" + mode: '{{ file_permission }}' + content: | + --- + manager: + hosts: + {{ host1 }} + + compute: + hosts: + {{ host2 }} + tags: TC_008 + + - name: Get present working directory + command: >- + pwd + register: path + changed_when: false + tags: TC_008 + + - name: Push the inventory to AWX + shell: | + set -o pipefail + docker exec awx_task awx-manage inventory_import --inventory-name {{ omnia_inventory_name }} --source "{{ inventory_path }}/{{ path.stdout.split('/')[-2] }}/test/testinventory.yml" + changed_when: false + tags: TC_008 + + - block: + - name: Launch the job template + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ awx_password }}" --conf.insecure + job_templates launch "{{ omnia_template_name }}" --credentials "{{ credential_name }}" --skip_tags kubernetes --monitor -f human + changed_when: false + register: command_output + + rescue: + - name: Validate success message + assert: + that: "'FAILED!' not in command_output.stdout" + success_msg: "{{ test_case_success_msg }}" + fail_msg: "{{ test_case_failure_msg }}" + tags: TC_008 + + - name: Delete the hosts + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ awx_password }}" --conf.insecure + hosts delete {{ host1 }} --monitor -f human + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ awx_password }}" --conf.insecure + hosts delete {{ host2 }} --monitor -f human + changed_when: false + tags: TC_008 + + - name: Delete the inventory file + ignore_errors: yes + file: + state: absent + path: testinventory.yml + tags: TC_008 + + - name: Create inventory file if it doesn't exist + ignore_errors: yes + file: + path: "testinventory.yml" + state: touch + mode: '{{ file_permission }}' + tags: TC_008 + +# Testcase OMNIA_CRM_US_AWXD_TC_009 +# Test case to validate whether the proper error message is displayed when no host is added to manager group +- name: OMNIA_CRM_US_AWXD_TC_009 + hosts: localhost + connection: local + vars_files: + - ../appliance/roles/web_ui/vars/main.yml + - ../appliance/roles/common/vars/main.yml + - test_vars/test_omnia_vars.yml + tasks: + - name: Check input config file is encrypted + command: cat ../appliance/test/{{ test_input_config_filename }} + changed_when: false + register: config_content + tags: TC_009 + + - name: Decrpyt input_config.yml + command: ansible-vault decrypt ../appliance/test/{{ test_input_config_filename }} --vault-password-file {{ vault_filename }} + changed_when: false + when: "'$ANSIBLE_VAULT;' in config_content.stdout" + tags: TC_009 + + - name: Include variable file input_config.yml + include_vars: "../appliance/test/{{ test_input_config_filename }}" + tags: TC_009 + + - name: Creating inventory file with hosts associated to the groups + copy: + dest: "testinventory.yml" + mode: '{{ file_permission }}' + content: | + --- + manager: + hosts: + + compute: + hosts: + {{ host2 }} + tags: TC_009 + + - name: Get present working directory + command: >- + pwd + register: path + changed_when: false + tags: TC_009 + + - name: Push the inventory to AWX + shell: | + set -o pipefail + docker exec awx_task awx-manage inventory_import --inventory-name {{ omnia_inventory_name }} --source "{{ inventory_path }}/{{ path.stdout.split('/')[-2] }}/test/testinventory.yml" + changed_when: false + tags: TC_009 + + - block: + - name: Launch the job template + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ awx_password }}" --conf.insecure + job_templates launch "{{ omnia_template_name }}" --credentials "{{ credential_name }}" --monitor -f human + changed_when: false + register: command_output + + rescue: + - name: Validate error message + assert: + that: "'FAILED!' in command_output.stdout" + success_msg: "{{ test_case_success_msg }}" + fail_msg: "{{ test_case_failure_msg }}" + tags: TC_009 + + - name: Delete the hosts + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ awx_password }}" --conf.insecure + hosts delete {{ host2 }} --monitor -f human + changed_when: false + tags: TC_009 + + - name: Delete the inventory file + ignore_errors: yes + file: + state: absent + path: testinventory.yml + tags: TC_009 + + - name: Create inventory file if it doesn't exist + ignore_errors: yes + file: + path: "testinventory.yml" + state: touch + mode: '{{ file_permission }}' + tags: TC_009 + +# Testcase OMNIA_CRM_US_AWXD_TC_010 +# Test case to verify whether the manger group validation is passed when single host is present +- name: OMNIA_CRM_US_AWXD_TC_010 + hosts: localhost + connection: local + vars_files: + - ../appliance/roles/web_ui/vars/main.yml + - ../appliance/roles/common/vars/main.yml + - test_vars/test_omnia_vars.yml + tasks: + - name: Check input config file is encrypted + command: cat ../appliance/test/{{ test_input_config_filename }} + changed_when: false + register: config_content + tags: TC_010 + + - name: Decrpyt input_config.yml + command: ansible-vault decrypt ../appliance/test/{{ test_input_config_filename }} --vault-password-file {{ vault_filename }} + changed_when: false + when: "'$ANSIBLE_VAULT;' in config_content.stdout" + tags: TC_010 + + - name: Include variable file input_config.yml + include_vars: "../appliance/test/{{ test_input_config_filename }}" + tags: TC_010 + + - name: Creating inventory file with hosts associated to the groups + copy: + dest: "testinventory.yml" + mode: '{{ file_permission }}' + content: | + --- + manager: + hosts: + {{ host1 }} + + compute: + hosts: + {{ host2 }} + tags: TC_010 + + - name: Get present working directory + command: >- + pwd + register: path + changed_when: false + tags: TC_010 + + - name: Push the inventory to AWX + shell: | + set -o pipefail + docker exec awx_task awx-manage inventory_import --inventory-name {{ omnia_inventory_name }} --source "{{ inventory_path }}/{{ path.stdout.split('/')[-2] }}/test/testinventory.yml" + changed_when: false + tags: TC_010 + + - block: + - name: Launch the job template + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ awx_password }}" --conf.insecure + job_templates launch "{{ omnia_template_name }}" --credentials "{{ credential_name }}" --monitor -f human + changed_when: false + register: command_output + tags: TC_010 + + rescue: + - name: Validate success message + assert: + that: "'FAILED!' not in command_output.stdout" + success_msg: "{{ test_case_success_msg }}" + fail_msg: "{{ test_case_failure_msg }}" + tags: TC_010 + + - name: Delete the hosts + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ awx_password }}" --conf.insecure + hosts delete {{ host1 }} --monitor -f human + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ awx_password }}" --conf.insecure + hosts delete {{ host2 }} --monitor -f human + changed_when: false + tags: TC_010 + + - name: Delete the inventory file + ignore_errors: yes + file: + state: absent + path: testinventory.yml + tags: TC_010 + + - name: Create inventory file if it doesn't exist + ignore_errors: yes + file: + path: "testinventory.yml" + state: touch + mode: '{{ file_permission }}' + tags: TC_010 + +# Testcase OMNIA_CRM_US_AWXD_TC_011 +# Test case to validate whether the proper error message is displayed when no host is added to compute group +- name: OMNIA_CRM_US_AWXD_TC_011 + hosts: localhost + connection: local + vars_files: + - ../appliance/roles/web_ui/vars/main.yml + - ../appliance/roles/common/vars/main.yml + - test_vars/test_omnia_vars.yml + tasks: + - name: Check input config file is encrypted + command: cat ../appliance/test/{{ test_input_config_filename }} + changed_when: false + register: config_content + tags: TC_011 + + - name: Decrpyt input_config.yml + command: ansible-vault decrypt ../appliance/test/{{ test_input_config_filename }} --vault-password-file {{ vault_filename }} + changed_when: false + when: "'$ANSIBLE_VAULT;' in config_content.stdout" + tags: TC_011 + + - name: Include variable file input_config.yml + include_vars: "../appliance/test/{{ test_input_config_filename }}" + tags: TC_011 + + - name: Creating inventory file with hosts associated to the groups + copy: + dest: "testinventory.yml" + mode: '{{ file_permission }}' + content: | + --- + manager: + hosts: + {{ host3 }} + + compute: + hosts: + + tags: TC_011 + + - name: Get present working directory + command: >- + pwd + register: path + changed_when: false + tags: TC_011 + + - name: Push the inventory to AWX + shell: | + set -o pipefail + docker exec awx_task awx-manage inventory_import --inventory-name {{ omnia_inventory_name }} --source "{{ inventory_path }}/{{ path.stdout.split('/')[-2] }}/test/testinventory.yml" + changed_when: false + tags: TC_011 + + - block: + - name: Launch the job template + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ awx_password }}" --conf.insecure + job_templates launch "{{ omnia_template_name }}" --credentials "{{ credential_name }}" --monitor -f human + changed_when: false + register: command_output + + rescue: + - name: Validate error message + assert: + that: "'FAILED!' in command_output.stdout" + success_msg: "{{ test_case_success_msg }}" + fail_msg: "{{ test_case_failure_msg }}" + tags: TC_011 + + - name: Delete the hosts + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ awx_password }}" --conf.insecure + hosts delete {{ host3 }} --monitor -f human + changed_when: false + tags: TC_011 + + - name: Delete the inventory file + file: + state: absent + path: testinventory.yml + ignore_errors: yes + tags: TC_011 + + - name: Create inventory file if it doesn't exist + file: + path: "testinventory.yml" + mode: '{{ file_permission }}' + state: touch + ignore_errors: yes + tags: TC_011 + +# Testcase OMNIA_CRM_US_AWXD_TC_012 +# Test case to verify whether the compute group validation is passed when more than 1 host is present +- name: OMNIA_CRM_US_AWXD_TC_012 + hosts: localhost + connection: local + vars_files: + - ../appliance/roles/web_ui/vars/main.yml + - ../appliance/roles/common/vars/main.yml + - test_vars/test_omnia_vars.yml + tasks: + - name: Check input config file is encrypted + command: cat ../appliance/test/{{ test_input_config_filename }} + changed_when: false + register: config_content + tags: TC_012 + + - name: Decrpyt input_config.yml + command: ansible-vault decrypt ../appliance/test/{{ test_input_config_filename }} --vault-password-file {{ vault_filename }} + changed_when: false + when: "'$ANSIBLE_VAULT;' in config_content.stdout" + tags: TC_012 + + - name: Include variable file input_config.yml + include_vars: "../appliance/test/{{ test_input_config_filename }}" + tags: TC_012 + + - name: Creating inventory file with hosts associated to the groups + copy: + dest: "testinventory.yml" + mode: '{{ file_permission }}' + content: | + --- + manager: + hosts: + {{ host1 }} + + compute: + hosts: + {{ host2 }} + hosts: + {{ host3 }} + tags: TC_012 + + - name: Get present working directory + command: >- + pwd + register: path + changed_when: false + tags: TC_012 + + - name: Push the inventory to AWX + shell: | + set -o pipefail + docker exec awx_task awx-manage inventory_import --inventory-name {{ omnia_inventory_name }} --source "{{ inventory_path }}/{{ path.stdout.split('/')[-2] }}/test/testinventory.yml" + changed_when: false + tags: TC_012 + + - block: + - name: Launch the job template + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ awx_password }}" --conf.insecure + job_templates launch "{{ omnia_template_name }}" --credentials "{{ credential_name }}" --monitor -f human + changed_when: false + register: command_output + + rescue: + - name: Validate success message + assert: + that: "'FAILED!' not in command_output.stdout" + success_msg: "{{ test_case_success_msg }}" + fail_msg: "{{ test_case_failure_msg }}" + tags: TC_012 + + - name: Delete the hosts + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ awx_password }}" --conf.insecure + hosts delete {{ host1 }} --monitor -f human + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ awx_password }}" --conf.insecure + hosts delete {{ host2 }} --monitor -f human + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ awx_password }}" --conf.insecure + hosts delete {{ host3 }} --monitor -f human + changed_when: false + tags: TC_012 + + - name: Delete the inventory file + ignore_errors: yes + file: + state: absent + path: testinventory.yml + tags: TC_012 + + - name: Create inventory file if it doesn't exist + ignore_errors: yes + file: + path: "testinventory.yml" + state: touch + mode: '{{ file_permission }}' + tags: TC_012 + +# Testcase OMNIA_CRM_US_AWXD_TC_013 +# Test case to validate the success meesage when a host is present in both manager and compute groups +- name: OMNIA_CRM_US_AWXD_TC_013 + hosts: localhost + connection: local + vars_files: + - ../appliance/roles/web_ui/vars/main.yml + - ../appliance/roles/common/vars/main.yml + - test_vars/test_omnia_vars.yml + tasks: + - name: Check input config file is encrypted + command: cat ../appliance/test/{{ test_input_config_filename }} + changed_when: false + register: config_content + tags: TC_013 + + - name: Decrpyt input_config.yml + command: ansible-vault decrypt ../appliance/test/{{ test_input_config_filename }} --vault-password-file {{ vault_filename }} + changed_when: false + when: "'$ANSIBLE_VAULT;' in config_content.stdout" + tags: TC_013 + + - name: Include variable file input_config.yml + include_vars: "../appliance/test/{{ test_input_config_filename }}" + tags: TC_013 + + - name: Creating inventory file with hosts associated to the groups + copy: + dest: "testinventory.yml" + mode: '{{ file_permission }}' + content: | + --- + manager: + hosts: + {{ host1 }} + + compute: + hosts: + {{ host1 }} + tags: TC_013 + + - name: Get present working directory + command: >- + pwd + register: path + changed_when: false + tags: TC_013 + + - name: Push the inventory to AWX + shell: | + set -o pipefail + docker exec awx_task awx-manage inventory_import --inventory-name {{ omnia_inventory_name }} --source "{{ inventory_path }}/{{ path.stdout.split('/')[-2] }}/test/testinventory.yml" + changed_when: false + tags: TC_013 + + - block: + - name: Launch the job template + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ awx_password }}" --conf.insecure + job_templates launch "{{ omnia_template_name }}" --credentials "{{ credential_name }}" --monitor -f human + changed_when: false + register: command_output + + rescue: + - name: Validate success message + assert: + that: "'FAILED!' not in command_output.stdout" + success_msg: "{{ test_case_success_msg }}" + fail_msg: "{{ test_case_failure_msg }}" + tags: TC_013 + + - name: Delete the hosts + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ awx_password }}" --conf.insecure + hosts delete {{ host1 }} --monitor -f human + changed_when: false + tags: TC_013 + + - name: Delete the inventory file + ignore_errors: yes + file: + state: absent + path: testinventory.yml + tags: TC_013 + + - name: Create inventory file if it doesn't exist + ignore_errors: yes + file: + path: "testinventory.yml" + state: touch + mode: '{{ file_permission }}' + tags: TC_013 + +# Testcase OMNIA_CRM_US_AWXD_TC_014 +# Test case to validate whether the proper error message is displayed when more than one host is added to manager group +- name: OMNIA_CRM_US_AWXD_TC_014 + hosts: localhost + connection: local + vars_files: + - ../appliance/roles/web_ui/vars/main.yml + - ../appliance/roles/common/vars/main.yml + - test_vars/test_omnia_vars.yml + tasks: + - name: Check input config file is encrypted + command: cat ../appliance/test/{{ test_input_config_filename }} + changed_when: false + register: config_content + tags: TC_014 + + - name: Decrpyt input_config.yml + command: ansible-vault decrypt ../appliance/test/{{ test_input_config_filename }} --vault-password-file {{ vault_filename }} + changed_when: false + when: "'$ANSIBLE_VAULT;' in config_content.stdout" + tags: TC_014 + + - name: Include variable file input_config.yml + include_vars: "../appliance/test/{{ test_input_config_filename }}" + tags: TC_014 + + - name: Creating inventory file with hosts associated to the groups + copy: + dest: "testinventory.yml" + mode: '{{ file_permission }}' + content: | + --- + compute: + hosts: + {{ host1 }} + + manager: + hosts: + {{ host2 }} + hosts: + {{ host3 }} + + tags: TC_014 + + - name: Get present working directory + command: >- + pwd + register: path + changed_when: false + tags: TC_014 + + - name: Push the inventory to AWX + shell: | + set -o pipefail + docker exec awx_task awx-manage inventory_import --inventory-name {{ omnia_inventory_name }} --source "{{ inventory_path }}/{{ path.stdout.split('/')[-2] }}/test/testinventory.yml" + changed_when: false + tags: TC_014 + + - block: + - name: Launch the job template + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ awx_password }}" --conf.insecure + job_templates launch "{{ omnia_template_name }}" --credentials "{{ credential_name }}" --monitor -f human + changed_when: false + register: command_output + + rescue: + - name: Validate error message + assert: + that: "'FAILED!' in command_output.stdout" + success_msg: "{{ test_case_success_msg }}" + fail_msg: "{{ test_case_failure_msg }}" + tags: TC_014 + + - name: Delete the hosts + command: >- + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ awx_password }}" --conf.insecure + hosts delete {{ host1 }} --monitor -f human + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ awx_password }}" --conf.insecure + hosts delete {{ host2 }} --monitor -f human + awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ awx_password }}" --conf.insecure + hosts delete {{ host3 }} --monitor -f human + changed_when: false + tags: TC_014 + + - name: Delete the inventory file + ignore_errors: yes + file: + state: absent + path: testinventory.yml + tags: TC_014 + + - name: Create inventory file if it doesn't exist + ignore_errors: yes + file: + path: "testinventory.yml" + state: touch + mode: '{{ file_permission }}' + tags: TC_014 \ No newline at end of file diff --git a/test/test_slurm_common.yml b/test/test_slurm_common.yml new file mode 100644 index 000000000..7c82d2055 --- /dev/null +++ b/test/test_slurm_common.yml @@ -0,0 +1,94 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# Testcase OMNIA_USP_US_VFSP_TC_001 +# Execute slurm_common role in manager & compute nodes with os installed centos 7.9 +- name: OMNIA_USP_US_VFSP_TC_001 + hosts: manager, compute + vars_files: + - test_vars/test_slurm_common_vars.yml + tasks: + - block: + - name: Call slurm common role + include_role: + name: ../roles/slurm_common + tags: TC_001 + + - name: Fetch common packages installed + package_facts: + manager: auto + tags: TC_001,VERIFY_001 + + - name: Checking munge service status + systemd: + name: munge + register: munge_service + tags: TC_001, VERIFY_001 + + - name: Verify all required packages are installed + assert: + that: "'{{ item }}' in ansible_facts.packages" + success_msg: "{{ packages_status_success_msg }}" + fail_msg: "{{ packages_status_fail_msg }}" + with_items: "{{ common_packages }}" + tags: TC_001,VERIFY_001 + + - name: Validating munge service status + assert: + that: + - munge_service.status.ActiveState == 'active' + fail_msg: "{{ munge_service_fail_msg }}" + success_msg: "{{ munge_service_success_msg }}" + tags: TC_001, VERIFY_001 + +# Testcase OMNIA_USP_US_VFSP_TC_002 +# Execute slurm_common role in manager & compute nodes with common packages already installed +- name: OMNIA_USP_US_VFSP_TC_002 + hosts: manager, compute + vars_files: + - test_vars/test_slurm_common_vars.yml + tasks: + - block: + - name: Call slurm common role + include_role: + name: ../roles/slurm_common + tags: TC_002, VERIFY_002 + + - name: Fetch common packages installed + package_facts: + manager: auto + tags: TC_002,VERIFY_002 + + - name: Checking munge service status + systemd: + name: munge + register: munge_service + tags: TC_002, VERIFY_002 + + - name: Verify all required packages are installed + assert: + that: "'{{ item }}' in ansible_facts.packages" + success_msg: "{{ packages_status_success_msg }}" + fail_msg: "{{ packages_status_fail_msg }}" + with_items: "{{ common_packages }}" + tags: TC_002,VERIFY_002 + + - name: Validating munge service status + assert: + that: + - munge_service.status.ActiveState == 'active' + fail_msg: "{{ munge_service_fail_msg }}" + success_msg: "{{ munge_service_success_msg }}" + tags: TC_002, VERIFY_002 \ No newline at end of file diff --git a/test/test_slurm_exporter_inst_host.yml b/test/test_slurm_exporter_inst_host.yml new file mode 100644 index 000000000..bfbb4af02 --- /dev/null +++ b/test/test_slurm_exporter_inst_host.yml @@ -0,0 +1,56 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# OMNIA_PSE_TC_002 +# Install prometheus on host when kubernetes is not installed +- name: OMNIA_PSE_TC_002 + hosts: manager + vars_files: + - test_vars/test_slurmexporter_vars.yml + tasks: + - block: + - name: Call install slurm exporter role + include_role: + name: ../roles/slurm_exporter + tags: TC_002 + + - name: Verify slurm exporter status + systemd: + name: prometheus-slurm-exporter + register: slurm_exporter_status + tags: TC_002, VERIFY_002 + + - name: Validate slurm exporter service status + assert: + that: + - slurm_exporter_status.status.ActiveState == 'active' + fail_msg: "{{ slurm_exporter_service_fail_msg }}" + success_msg: "{{ slurm_exporter_service_success_msg }}" + tags: TC_002, VERIFY_002 + + - name: Verify prometheus installation status + command: prometheus --version + register: prometheus_status + tags: TC_002, VERIFY_002 + ignore_errors: yes + changed_when: False + + - name: Validate prometheus version command + assert: + that: + - "'Command not found' not in prometheus_status.stdout" + fail_msg: "{{ prometheus_installation_fail_msg }}" + success_msg: "{{ prometheus_installation_success_msg }}" + tags: TC_002, VERIFY_002 \ No newline at end of file diff --git a/test/test_slurm_exporter_inst_k8s.yml b/test/test_slurm_exporter_inst_k8s.yml new file mode 100644 index 000000000..56ecefac2 --- /dev/null +++ b/test/test_slurm_exporter_inst_k8s.yml @@ -0,0 +1,55 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# OMNIA_PSE_TC_001 +# Successful installation of slurm exporter on the host when both slurm and kubernetes is installed +- name: OMNIA_PSE_TC_001 + hosts: manager + vars_files: + - test_vars/test_slurmexporter_vars.yml + tasks: + - block: + - name: Call install slurm exporter role + include_role: + name: ../roles/slurm_exporter + tags: TC_001 + + - name: Verify slurm exporter status + systemd: + name: prometheus-slurm-exporter + register: slurm_exporter_status + tags: TC_001, VERIFY_001 + + - name: Validate slurm exporter service status + assert: + that: + - slurm_exporter_status.status.ActiveState == 'active' + fail_msg: "{{ slurm_exporter_service_fail_msg }}" + success_msg: "{{ slurm_exporter_service_success_msg }}" + tags: TC_001, VERIFY_001 + + - name: Verify slurm exporter job in k8s services + command: kubectl get service prometheus-slurmexporter-metrics-1 + register: slurm_exporter_service_status + tags: TC_001, VERIFY_001 + changed_when: False + + - name: Validate slurm exporter job in k8s services + assert: + that: + - "'Error from server' not in slurm_exporter_service_status.stdout" + fail_msg: "{{ slurm_exporter_job_fail_msg }}" + success_msg: "{{ slurm_exporter_job_success_msg }}" + tags: TC_001, VERIFY_001 \ No newline at end of file diff --git a/test/test_slurm_manager.yml b/test/test_slurm_manager.yml new file mode 100644 index 000000000..d17f32f28 --- /dev/null +++ b/test/test_slurm_manager.yml @@ -0,0 +1,166 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# Testcase OMNIA_USP_US_VFSP_TC_003 +# Execute slurm_manager role in manager node with os installed centos 7.9 +- name: OMNIA_USP_US_VFSP_TC_003 + hosts: manager + vars_files: + - test_vars/test_slurm_manager_vars.yml + - ../appliance/roles/common/vars/main.yml + tasks: + - block: + - name: Call cluster_validation role + include_role: + name: ../roles/cluster_validation + tasks_from: fetch_password + tags: TC_003 + + - name: Call slurm manager role + include_role: + name: ../roles/slurm_manager + tags: TC_003 + + - name: Fetch slurm packages installed + package_facts: + manager: auto + tags: TC_003,VERIFY_003 + + - name: Start and enable firewalld + service: + name: firewalld + state: started + enabled: yes + tags: TC_003, VERIFY_003 + + - name: Checking firewalld tcp/udp ports on manager node + command: firewall-cmd --list-ports + register: manager_firewalld_ports + when: "'manager' in group_names" + tags: TC_003, VERIFY_003 + + - name: Checking mariadb service status + systemd: + name: mariadb + register: mariadb_service + tags: TC_003, VERIFY_003 + + - name: Validating tcp/udp ports on manager node + assert: + that: + - "'6817/tcp' in manager_firewalld_ports.stdout" + - "'6817/udp' in manager_firewalld_ports.stdout" + - "'6818/tcp' in manager_firewalld_ports.stdout" + - "'6818/udp' in manager_firewalld_ports.stdout" + - "'6819/tcp' in manager_firewalld_ports.stdout" + - "'6819/udp' in manager_firewalld_ports.stdout" + fail_msg: "{{ manager_ports_status_fail_msg }}" + success_msg: "{{ manager_ports_status_success_msg }}" + when: "'manager' in group_names" + tags: TC_003, VERIFY_003 + + - name: Verify all slurm packages are installed + assert: + that: "'{{ item }}' in ansible_facts.packages" + success_msg: "{{ slurm_packages_status_success_msg }}" + fail_msg: "{{ slurm_packages_status_fail_msg }}" + with_items: + - "{{ slurm_packages }}" + - "{{ dev_tools }}" + tags: TC_003, VERIFY_003 + + - name: Validating mariadb service status + assert: + that: + - mariadb_service.status.ActiveState == 'active' + fail_msg: "{{ mariadb_service_fail_msg }}" + success_msg: "{{ mariadb_service_success_msg }}" + tags: TC_003, VERIFY_003 + +# Testcase OMNIA_USP_US_VFSP_TC_004 +# Execute slurm_manager role in manager node with slurm packages already installed +- name: OMNIA_USP_US_VFSP_TC_004 + hosts: manager + vars_files: + - test_vars/test_slurm_manager_vars.yml + - ../appliance/roles/common/vars/main.yml + tasks: + - block: + - name: Call cluster_validation role + include_role: + name: ../roles/cluster_validation + tasks_from: fetch_password + tags: TC_004 + + - name: Call slurm manager role + include_role: + name: ../roles/slurm_manager + tags: TC_004 + + - name: Fetch slurm packages installed + package_facts: + manager: auto + tags: TC_004,VERIFY_004 + + - name: Start and enable firewalld + service: + name: firewalld + state: started + enabled: yes + tags: TC_004, VERIFY_004 + + - name: Checking firewalld tcp/udp ports on manager node + command: firewall-cmd --list-ports + register: manager_firewalld_ports + when: "'manager' in group_names" + tags: TC_004, VERIFY_004 + + - name: Checking mariadb service status + systemd: + name: mariadb + register: mariadb_service + tags: TC_004, VERIFY_004 + + - name: Validating tcp/udp ports on manager node + assert: + that: + - "'6817/tcp' in manager_firewalld_ports.stdout" + - "'6817/udp' in manager_firewalld_ports.stdout" + - "'6818/tcp' in manager_firewalld_ports.stdout" + - "'6818/udp' in manager_firewalld_ports.stdout" + - "'6819/tcp' in manager_firewalld_ports.stdout" + - "'6819/udp' in manager_firewalld_ports.stdout" + fail_msg: "{{ manager_ports_status_fail_msg }}" + success_msg: "{{ manager_ports_status_success_msg }}" + when: "'manager' in group_names" + tags: TC_004, VERIFY_004 + + - name: Verify all slurm packages are installed + assert: + that: "'{{ item }}' in ansible_facts.packages" + success_msg: "{{ slurm_packages_status_success_msg }}" + fail_msg: "{{ slurm_packages_status_fail_msg }}" + with_items: + - "{{ slurm_packages }}" + - "{{ dev_tools }}" + tags: TC_004, VERIFY_004 + + - name: Validating mariadb service status + assert: + that: + - mariadb_service.status.ActiveState == 'active' + fail_msg: "{{ mariadb_service_fail_msg }}" + success_msg: "{{ mariadb_service_success_msg }}" + tags: TC_004, VERIFY_004 \ No newline at end of file diff --git a/test/test_slurm_start_services.yml b/test/test_slurm_start_services.yml new file mode 100644 index 000000000..c91184318 --- /dev/null +++ b/test/test_slurm_start_services.yml @@ -0,0 +1,124 @@ +#Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# Testcase OMNIA_USP_US_VFSP_TC_007 +# Execute slurm_start_services role in manager node with os installed centos 7.9 +- name: OMNIA_USP_US_VFSP_TC_007 + hosts: manager + vars_files: + - test_vars/test_slurm_start_services_vars.yml + tasks: + - block: + - name: Call slurm start services role + include_role: + name: ../roles/slurm_start_services + tags: TC_007 + + - name: Checking slurmctld service status + systemd: + name: slurmctld + register: slurmctld_service + tags: TC_007, VERIFY_007 + + - name: Checking slurmdbd service status + systemd: + name: slurmdbd + register: slurmdbd_service + tags: TC_007, VERIFY_007 + + - name: Check if slurm is installed + command: sinfo -V + register: slurm_version + changed_when: false + ignore_errors: True + tags: TC_007,VERIFY_007 + + - name: Validating slurmctld service status + assert: + that: + - slurmctld_service.status.ActiveState == 'active' + fail_msg: "{{ slurmctld_service_fail_msg }}" + success_msg: "{{ slurmctld_service_success_msg }}" + tags: TC_007, VERIFY_007 + + - name: Validating slurmdbd service status + assert: + that: + - slurmdbd_service.status.ActiveState == 'active' + fail_msg: "{{ slurmdbd_service_fail_msg }}" + success_msg: "{{ slurmdbd_service_success_msg }}" + tags: TC_007, VERIFY_007 + + - name: Validate slurm installation + assert: + that: "'command not found' not in slurm_version.stdout" + fail_msg: "{{ slurm_status_fail_msg }}" + success_msg: "{{ slurm_status_success_msg }}" + tags: TC_007, VERIFY_007 + +# Testcase OMNIA_USP_US_VFSP_TC_008 +# Execute slurm_start_services role in manager node with services already running +- name: OMNIA_USP_US_VFSP_TC_008 + hosts: manager + vars_files: + - test_vars/test_slurm_start_services_vars.yml + tasks: + - block: + - name: Call slurm start services role + include_role: + name: ../roles/slurm_start_services + tags: TC_008 + + - name: Checking slurmctld service status + systemd: + name: slurmctld + register: slurmctld_service + tags: TC_008, VERIFY_008 + + - name: Checking slurmdbd service status + systemd: + name: slurmdbd + register: slurmdbd_service + tags: TC_008, VERIFY_008 + + - name: Check if slurm is installed + command: sinfo -V + register: slurm_version + changed_when: false + ignore_errors: True + tags: TC_008,VERIFY_008 + + - name: Validating slurmctld service status + assert: + that: + - slurmctld_service.status.ActiveState == 'active' + fail_msg: "{{ slurmctld_service_fail_msg }}" + success_msg: "{{ slurmctld_service_success_msg }}" + tags: TC_008, VERIFY_008 + + - name: Validating slurmdbd service status + assert: + that: + - slurmdbd_service.status.ActiveState == 'active' + fail_msg: "{{ slurmdbd_service_fail_msg }}" + success_msg: "{{ slurmdbd_service_success_msg }}" + tags: TC_008, VERIFY_008 + + - name: Validate slurm installation + assert: + that: "'command not found' not in slurm_version.stdout" + fail_msg: "{{ slurm_status_fail_msg }}" + success_msg: "{{ slurm_status_success_msg }}" + tags: TC_008, VERIFY_008 \ No newline at end of file diff --git a/test/test_slurm_workers.yml b/test/test_slurm_workers.yml new file mode 100644 index 000000000..cef86b440 --- /dev/null +++ b/test/test_slurm_workers.yml @@ -0,0 +1,126 @@ +#Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# Testcase OMNIA_USP_US_VFSP_TC_005 +# Execute slurm_worker role in compute node with os installed centos 7.9 +- name: OMNIA_USP_US_VFSP_TC_005 + hosts: compute + vars_files: + - test_vars/test_slurm_workers_vars.yml + tasks: + - block: + - name: Call slurm worker role + include_role: + name: ../roles/slurm_workers + tags: TC_005 + + - name: Fetch slurm packages installed + package_facts: + manager: auto + tags: TC_005,VERIFY_005 + + - name: Check if slurm is installed + command: sinfo -V + register: slurm_version + changed_when: false + ignore_errors: True + tags: TC_005,VERIFY_005 + + - name: Checking slurmd service status + service: + name: slurmd.service + register: slurmd_service + tags: TC_005, VERIFY_005 + + - name: Verify all slurm packages are installed + assert: + that: "'{{ item }}' in ansible_facts.packages" + success_msg: "{{ slurm_packages_status_success_msg }}" + fail_msg: "{{ slurm_packages_status_fail_msg }}" + with_items: + - "{{ slurm_packages }}" + - "{{ dev_tools }}" + tags: TC_005, VERIFY_005 + + - name: Validate slurm installation + assert: + that: "'command not found' not in slurm_version.stdout" + fail_msg: "{{ slurm_status_fail_msg }}" + success_msg: "{{ slurm_status_success_msg }}" + tags: TC_005, VERIFY_005 + + - name: Validating slurmd service status + assert: + that: + - slurmd_service.status.ActiveState == 'active' + fail_msg: "{{ slurmd_service_fail_msg }}" + success_msg: "{{ slurmd_service_success_msg }}" + tags: TC_005, VERIFY_005 + +# Testcase OMNIA_USP_US_VFSP_TC_006 +# Execute slurm_workers role in compute node with slurm packages already installed +- name: OMNIA_USP_US_VFSP_TC_006 + hosts: compute + vars_files: + - test_vars/test_slurm_workers_vars.yml + tasks: + - block: + - name: Call slurm worker role + include_role: + name: ../roles/slurm_workers + tags: TC_006 + + - name: Fetch slurm packages installed + package_facts: + manager: auto + tags: TC_006,VERIFY_006 + + - name: Checking slurmd service status + service: + name: slurmd.service + register: slurmd_service + tags: TC_006, VERIFY_006 + + - name: Check if slurm is installed + command: sinfo -V + register: slurm_version + changed_when: false + ignore_errors: True + tags: TC_006,VERIFY_006 + + - name: Verify all slurm packages are installed + assert: + that: "'{{ item }}' in ansible_facts.packages" + success_msg: "{{ slurm_packages_status_success_msg }}" + fail_msg: "{{ slurm_packages_status_fail_msg }}" + with_items: + - "{{ slurm_packages }}" + - "{{ dev_tools }}" + tags: TC_006, VERIFY_006 + + - name: Validate slurm installation + assert: + that: "'command not found' not in slurm_version.stdout" + fail_msg: "{{ slurm_status_fail_msg }}" + success_msg: "{{ slurm_status_success_msg }}" + tags: TC_006, VERIFY_006 + + - name: Validating slurmd service status + assert: + that: + - slurmd_service.status.ActiveState == 'active' + fail_msg: "{{ slurmd_service_fail_msg }}" + success_msg: "{{ slurmd_service_success_msg }}" + tags: TC_006, VERIFY_006 \ No newline at end of file diff --git a/test/test_vars/test_common_vars.yml b/test/test_vars/test_common_vars.yml new file mode 100644 index 000000000..fc4848765 --- /dev/null +++ b/test/test_vars/test_common_vars.yml @@ -0,0 +1,18 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +ntpd_service_fail_msg: "Ntpd service is not running" + +ntpd_service_success_msg: "Ntpd service is running" \ No newline at end of file diff --git a/test/test_vars/test_jupyterhub_vars.yml b/test/test_vars/test_jupyterhub_vars.yml new file mode 100644 index 000000000..0d8d0efef --- /dev/null +++ b/test/test_vars/test_jupyterhub_vars.yml @@ -0,0 +1,22 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +jupyterhub_pods_fail_msg: "JupyterHub pods are not deployed" + +jupyterhub_pods_success_msg: "JupyterHub pods are deployed and running" + +jupyterhub_services_fail_msg: "JupyterHub services are not running" + +jupyterhub_services_success_msg: "JupyterHub services are running" \ No newline at end of file diff --git a/test/test_vars/test_k8s_common_vars.yml b/test/test_vars/test_k8s_common_vars.yml new file mode 100644 index 000000000..495ac5a69 --- /dev/null +++ b/test/test_vars/test_k8s_common_vars.yml @@ -0,0 +1,34 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +common_packages: + - docker + - kubelet + +k8_packages: + - kubeadm + - kubectl + +packages_status_success_msg: "Common & K8s packages are installed" + +packages_status_fail_msg: "Common & K8s packages are not installed" + +docker_service_fail_msg: "Docker service is not running" + +docker_service_success_msg: "Docker service is running" + +kubelet_service_fail_msg: "K8s service is not running" + +kubelet_service_success_msg: "K8s service is running" \ No newline at end of file diff --git a/test/test_vars/test_k8s_firewalld_vars.yml b/test/test_vars/test_k8s_firewalld_vars.yml new file mode 100644 index 000000000..4a10a770f --- /dev/null +++ b/test/test_vars/test_k8s_firewalld_vars.yml @@ -0,0 +1,38 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +manager_k8s_ports_status_fail_msg: "Kubernetes Ports are not opened in manager node" + +manager_k8s_ports_status_success_msg: "Kubernetes Ports are opened in manager node" + +compute_k8s_ports_status_fail_msg: "Kubernetes Ports are not opened in compute nodes" + +compute_k8s_ports_status_success_msg: "Kubernetes Ports are opened in compute nodes" + +calico_ports_manager_fail_msg: "Calico ports are not opened in manager nodes" + +calico_ports_manager_success_msg: "Calico ports are opened in manager nodes" + +calico_ports_compute_fail_msg: "Calico ports are not opened in compute nodes" + +calico_ports_compute_success_msg: "Calico ports are opened in compute nodes" + +flannel_ports_manager_fail_msg: "Flannel ports are not opened in manager nodes" + +flannel_ports_manager_success_msg: "Flannel ports are opened in manager nodes" + +flannel_ports_compute_fail_msg: "Flannel ports are not opened in compute nodes" + +flannel_ports_compute_success_msg: "Flannel ports are opened in compute nodes" \ No newline at end of file diff --git a/test/test_vars/test_k8s_manager_vars.yml b/test/test_vars/test_k8s_manager_vars.yml new file mode 100644 index 000000000..d7d7a8c28 --- /dev/null +++ b/test/test_vars/test_k8s_manager_vars.yml @@ -0,0 +1,17 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +helm_status_fail_msg: "Helm is not installed" + +helm_status_success_msg: "Helm is installed" \ No newline at end of file diff --git a/test/test_vars/test_k8s_start_manager_workers_vars.yml b/test/test_vars/test_k8s_start_manager_workers_vars.yml new file mode 100644 index 000000000..7ce27599c --- /dev/null +++ b/test/test_vars/test_k8s_start_manager_workers_vars.yml @@ -0,0 +1,38 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +master_node_status_fail_msg: "Master Node is not configured" + +master_node_status_success_msg: "Master Node is configured and initialized successfully" + +controller_scheduler_status_fail_msg: "Static Pod manifests (controller-manager and scheduler) are not deployed" + +controller_scheduler_status_success_msg: "Static Pod manifests (controller-manager and scheduler) are deployed and running" + +coredns_status_fail_msg: "Core DNS pods are not deployed" + +coredns_status_success_msg: "Core DNS pods are deployed and running" + +calico_flannel_status_fail_msg: "Calico/Flannel SDN network is not deployed" + +calico_flannel_status_success_msg: "Calico/Flannel SDN network is deployed and running" + +k8s_service_account_status_fail_msg: "Kubernetes dashboard service account and token is not created" + +k8s_service_account_status_success_msg: "Kubernetes dashboard service account and token is created" + +worker_nodes_status_fail_msg: "Worker Nodes are not initialized" + +worker_nodes_status_success_msg: "Worker Nodes are initialized and joined to the cluster" \ No newline at end of file diff --git a/test/test_vars/test_k8s_start_services_vars.yml b/test/test_vars/test_k8s_start_services_vars.yml new file mode 100644 index 000000000..69ea84111 --- /dev/null +++ b/test/test_vars/test_k8s_start_services_vars.yml @@ -0,0 +1,38 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +metallb_prometheus_mpi_pods_fail_msg: "Metallb/Prometheus/MPI pods are not deployed/running" + +metallb_prometheus_mpi_pods_success_msg: "Metallb, Prometheus and MPI pods are deployed and running" + +kubernetes_dashboard_fail_msg: "Kubernetes Dashboard is not deployed" + +kubernetes_dashboard_success_msg: "Kubernetes Dashboard is deployed" + +nfs_client_provisioner_pods_fail_msg: "NFS Client Provisioner pod is not deployed" + +nfs_client_provisioner_pods_success_msg: "NFS Client Provisioner pod is deployed and running" + +node_feature_discovery_pods_fail_msg: "Node Feature Discovery pods are not deployed" + +node_feature_discovery_pods_success_msg: "Node Feature Discovery pods are deployed and running" + +nvidia_device_plugin_pods_fail_msg: "Nvidia Device Plugin pod is not deployed/running" + +nvidia_device_plugin_pods_success_msg: "Nvidia Device Plugin pod is deployed and running" + +default_storage_class_fail_msg: "NFS Client Provisioner is not configured as default storage class" + +default_storage_class_success_msg: "NFS Client Provisioner is configured as default storage class" \ No newline at end of file diff --git a/test/test_vars/test_kubeflow_vars.yml b/test/test_vars/test_kubeflow_vars.yml new file mode 100644 index 000000000..6c4a9dbcc --- /dev/null +++ b/test/test_vars/test_kubeflow_vars.yml @@ -0,0 +1,22 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +kubeflow_install_fail_msg: "Kubeflow (kfctl) is not installed" + +kubeflow_install_success_msg: "Kubeflow (kfctl) is installed" + +kubeflow_pods_deployment_fail_msg: "Kubeflow pods are not deployed" + +kubeflow_pods_deployment_success_msg: "Kubeflow pods are deployed" \ No newline at end of file diff --git a/test/test_vars/test_omnia_vars.yml b/test/test_vars/test_omnia_vars.yml new file mode 100644 index 000000000..ce18a7f06 --- /dev/null +++ b/test/test_vars/test_omnia_vars.yml @@ -0,0 +1,24 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# Usage: test_omnia.yml +host1: "100.10.20.30" +host2: "100.20.30.40" +host3: "100.30.40.50" +inventory_path: "/var/lib/awx/projects" +test_input_config_filename: "appliance_config_test.yml" +test_case_success_msg: "Test case passed" +test_case_failure_msg: "Test case failed" +file_permission: 0644 \ No newline at end of file diff --git a/test/test_vars/test_slurm_common_vars.yml b/test/test_vars/test_slurm_common_vars.yml new file mode 100644 index 000000000..48114561e --- /dev/null +++ b/test/test_vars/test_slurm_common_vars.yml @@ -0,0 +1,31 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +packages_status_success_msg: "Common packages are installed" + +packages_status_fail_msg: "Common packages are not installed" + +munge_service_fail_msg: "Munge service is not running" + +munge_service_success_msg: "Munge service is running" + +common_packages: + - munge + - munge-libs + - munge-devel + - mariadb-server + - mariadb-devel + - man2html + - MySQL-python \ No newline at end of file diff --git a/test/test_vars/test_slurm_manager_vars.yml b/test/test_vars/test_slurm_manager_vars.yml new file mode 100644 index 000000000..790b990e8 --- /dev/null +++ b/test/test_vars/test_slurm_manager_vars.yml @@ -0,0 +1,55 @@ +#Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +slurm_packages: + - gcc + - openssl + - numactl + - hwloc + - lua + - readline + - perl-ExtUtils-MakeMaker + - rpm-build + - perl-DBI + - perl-Switch + - libibumad + +dev_tools: + - rrdtool-devel + - lua-devel + - hwloc-devel + - libssh2-devel + - pam-devel + - readline-devel + - openssl-devel + - numactl-devel + - ncurses-devel + - gtk2-devel + +manager_ports_status_fail_msg: "Slurm ports are not opened in manager node" + +manager_ports_status_success_msg: "Slurm Ports are opened in manager node" + +slurm_packages_status_success_msg: "Slurm and dev packages are installed" + +slurm_packages_status_fail_msg: "Slurm and dev packages are not installed" + +slurm_status_fail_msg: "Slurm is not installed" + +slurm_status_success_msg: "Slurm is installed" + +mariadb_service_fail_msg: " Mariadb server is not running" + +mariadb_service_success_msg: " Mariadb server is up running" \ No newline at end of file diff --git a/test/test_vars/test_slurm_start_services_vars.yml b/test/test_vars/test_slurm_start_services_vars.yml new file mode 100644 index 000000000..cda616464 --- /dev/null +++ b/test/test_vars/test_slurm_start_services_vars.yml @@ -0,0 +1,26 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +slurmctld_service_fail_msg: "Slurmctld service is not running" + +slurmctld_service_success_msg: "Slurmctld service is running" + +slurmdbd_service_fail_msg: "Slurmdbd service is not running" + +slurmdbd_service_success_msg: "Slurmdbd service is running" + +slurm_status_success_msg: "Slurm is installed" + +slurm_status_fail_msg: " Slurm is not installed" \ No newline at end of file diff --git a/test/test_vars/test_slurm_workers_vars.yml b/test/test_vars/test_slurm_workers_vars.yml new file mode 100644 index 000000000..09808128c --- /dev/null +++ b/test/test_vars/test_slurm_workers_vars.yml @@ -0,0 +1,56 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +slurm_packages: + - python3 + - gcc + - openssl + - numactl + - hwloc + - lua + - readline + - perl-ExtUtils-MakeMaker + - rpm-build + - perl-DBI + - perl-Switch + - libibumad + +dev_tools: + - rrdtool-devel + - lua-devel + - hwloc-devel + - libssh2-devel + - pam-devel + - readline-devel + - openssl-devel + - numactl-devel + - ncurses-devel + - gtk2-devel + +manager_ports_status_fail_msg: "Slurm ports are not opened in manager node" + +manager_ports_status_success_msg: "Slurm Ports are opened in manager node" + +slurm_packages_status_success_msg: "Slurm and dev packages are installed" + +slurm_packages_status_fail_msg: "Slurm and dev packages are not installed" + +slurm_status_fail_msg: "Slurm is not installed" + +slurm_status_success_msg: "Slurm is installed" + +slurmd_service_fail_msg: "Slurmd service is not running" + +slurmd_service_success_msg: "Slurmd service is running" \ No newline at end of file diff --git a/test/test_vars/test_slurmexporter_vars.yml b/test/test_vars/test_slurmexporter_vars.yml new file mode 100644 index 000000000..0e5460d02 --- /dev/null +++ b/test/test_vars/test_slurmexporter_vars.yml @@ -0,0 +1,26 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +slurm_exporter_service_fail_msg: "Slurm exporter service is not running" + +slurm_exporter_service_success_msg: "Slurm exporter service is active and running" + +slurm_exporter_job_fail_msg: "Slurm-exporter-metrics not configured as k8s service" + +slurm_exporter_job_success_msg: "Slurm-exporter-metrics successfully configured as k8s service" + +prometheus_installation_fail_msg: "Prometheus not installed" + +prometheus_installation_success_msg: "Prometheus is installed" \ No newline at end of file diff --git a/tools/install_tools.yml b/tools/install_tools.yml index b8b81a7e7..6ea03ecf6 100644 --- a/tools/install_tools.yml +++ b/tools/install_tools.yml @@ -13,7 +13,7 @@ # limitations under the License. --- -- hosts: master +- hosts: manager tasks: - name: Install Change Personality Script copy: diff --git a/tools/intel_tools.yml b/tools/intel_tools.yml new file mode 100644 index 000000000..7c0d2e20d --- /dev/null +++ b/tools/intel_tools.yml @@ -0,0 +1,57 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +# intel_tools.yml: Install Intel-branded tools + +# Install the Intel Parallel Studio XE Runtime repository and packages +- hosts: cluster + tasks: + - name: Import the Intel(R) Parallel Studio XE Runtime Repo GPG Key + rpm_key: + state: present + key: https://yum.repos.intel.com/2020/setup/RPM-GPG-KEY-intel-psxe-runtime-2020 + - name: Enable the Intel(R) Parallel Studio XE Runtime Repository + package: + name: https://yum.repos.intel.com/2020/setup/intel-psxe-runtime-2020-reposetup-1-0.noarch.rpm + state: present + - name: Install Intel(R) Parallel Studio XE Runtime 2020 + package: + name: intel-psxe-runtime + state: present + +# Install the Intel Cluster Checker +- hosts: cluster + tasks: + - name: Import the Intel(R) Cluster Checker Repo GPG Key + rpm_key: + state: present + key: https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB + - name: Enable the Intel(R) Cluster Checker Repository + yum_repository: + name: intel-clck-2019-repo + description: Intel(R) Cluster Checker 2019 + baseurl: https://yum.repos.intel.com/clck/2019 + gpgcheck: yes + gpgkey: https://yum.repos.intel.com/clck/2019/setup/PUBLIC_KEY.PUB + - name: Enable the Intel(R) Cluster Checker Extensions Repository + yum_repository: + name: intel-clck-ext-2019-repo + description: Intel(R) Cluster Checker Select Solutions Extensions 2019 + baseurl: https://yum.repos.intel.com/clck-ext/2019 + gpgcheck: yes + gpgkey: https://yum.repos.intel.com/clck-ext/2019/setup/PUBLIC_KEY.PUB + - name: Install the Intel(R) Cluster Checker + package: + name: intel-clck-2019.8-* + state: present diff --git a/tools/olm.yml b/tools/olm.yml new file mode 100644 index 000000000..873fe6b7e --- /dev/null +++ b/tools/olm.yml @@ -0,0 +1,21 @@ +# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +# olm.yml: Install Operator Lifecycle Manager (OLM) for Operator Framework + +- name: Deploy Operator Lifecycle Manager (OLM) CRDs + command: kubectl apply -f https://github.com/operator-framework/operator-lifecycle-manager/releases/download/v0.17.0/crds.yaml + +- name: Deploy Operator Lifecycle Manager (OLM) + command: kubectl apply -f https://github.com/operator-framework/operator-lifecycle-manager/releases/download/v0.17.0/olm.yaml diff --git a/kubernetes/scuttle b/tools/scuttle similarity index 100% rename from kubernetes/scuttle rename to tools/scuttle