Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Intel HPC Platform Spec Integration Tests
Signed-off-by: Sean Smith <seaam@amazon.com>
- Loading branch information
1 parent
5fb0adf
commit 0b12996
Showing
11 changed files
with
278 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance | ||
# with the License. A copy of the License is located at | ||
# | ||
# http://aws.amazon.com/apache2.0/ | ||
# | ||
# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES | ||
# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and | ||
# limitations under the License. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"). | ||
# You may not use this file except in compliance with the License. | ||
# A copy of the License is located at | ||
# | ||
# http://aws.amazon.com/apache2.0/ | ||
# | ||
# or in the "LICENSE.txt" file accompanying this file. | ||
# This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. | ||
# See the License for the specific language governing permissions and limitations under the License. | ||
import logging | ||
|
||
import pytest | ||
|
||
from assertpy import assert_that | ||
from remote_command_executor import RemoteCommandExecutor | ||
from tests.common.assertions import assert_no_errors_in_logs | ||
from tests.common.schedulers_common import get_scheduler_commands | ||
from tests.common.utils import fetch_instance_slots | ||
|
||
|
||
@pytest.mark.regions(["us-east-1"]) | ||
@pytest.mark.instances(["c5n.18xlarge"]) | ||
@pytest.mark.oss(["centos7"]) | ||
@pytest.mark.schedulers(["sge"]) | ||
def test_intel_hpc(region, scheduler, instance, os, pcluster_config_reader, clusters_factory, test_datadir): | ||
""" | ||
Test Intel Cluster Checker | ||
""" | ||
slots_per_instance = fetch_instance_slots(region, instance) | ||
cluster_config = pcluster_config_reader() | ||
cluster = clusters_factory(cluster_config) | ||
remote_command_executor = RemoteCommandExecutor(cluster) | ||
scheduler_commands = get_scheduler_commands(scheduler, remote_command_executor) | ||
_test_intel_clck(remote_command_executor, scheduler_commands, slots_per_instance, test_datadir) | ||
|
||
assert_no_errors_in_logs(remote_command_executor, ["/var/log/sqswatcher", "/var/log/jobwatcher"]) | ||
|
||
|
||
def _test_intel_clck(remote_command_executor, scheduler_commands, slots_per_instance, test_datadir): | ||
# Install Intel Cluster Checker CLCK Master | ||
logging.info("Installing Intel Cluster Checker") | ||
remote_command_executor.run_remote_script(str(test_datadir / "install_clck.sh"), hide=False) | ||
|
||
# Install Intel Cluster Checker CLCK Compute | ||
result = scheduler_commands.submit_script( | ||
str(test_datadir / "install_clck_compute.sh"), slots=2 * slots_per_instance | ||
) | ||
job_id = scheduler_commands.assert_job_submitted(result.stdout) | ||
scheduler_commands.wait_job_completed(job_id) | ||
scheduler_commands.assert_job_succeeded(job_id) | ||
|
||
# Create nodefile | ||
# ip-172-31-15-31 # role: head | ||
# ip-172-31-12-237 # role: compute | ||
# ip-172-31-8-49 # role: compute | ||
remote_command_executor.run_remote_command("echo $HOSTNAME | awk '{print $1 \" # role: head\" }' > nodefile") | ||
remote_command_executor.run_remote_command( | ||
"qhost | tail -n +4 | awk '{print $1 \" # role: compute\" }' >> nodefile" | ||
) | ||
result = remote_command_executor.run_remote_command("cat nodefile | wc -l") | ||
assert_that(result.stdout).contains("3") | ||
|
||
# Setup network interface | ||
# <!-- This tag can be used to set the network interface used for | ||
# accumulating data collected on-demand. | ||
# --> | ||
# <!-- | ||
# <network_interface>ens5</network_interface> | ||
# --> | ||
# /opt/intel/clck/2019.3.5/etc/clck.xml | ||
remote_command_executor.run_remote_command( | ||
"sudo cp ~/clck.xml /opt/intel/clck/2019.3.5/etc/clck.xml", additional_files=[str(test_datadir / "clck.xml")] | ||
) | ||
|
||
# Run Cluster Checker | ||
result = remote_command_executor.run_remote_script(str(test_datadir / "run_clck.sh")) | ||
try: | ||
assert_that(result.stdout).contains("Overall Result: PASS") | ||
except AssertionError as e: | ||
logging.error(remote_command_executor.run_remote_command("cat clck_results.log")) | ||
raise (e) |
133 changes: 133 additions & 0 deletions
133
tests/integration-tests/tests/intel_hpc/test_intel_hpc/test_intel_hpc/clck.xml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<configuration> | ||
|
||
<plugins> | ||
|
||
<!-- Framework Definition configuration file --> | ||
<framework_definitions> | ||
</framework_definitions> | ||
|
||
</plugins> | ||
|
||
<analyzer> | ||
|
||
<!-- Knowledge Base Configuration --> | ||
<config> | ||
</config> | ||
|
||
<!-- DISPLAY OPTIONS --> | ||
|
||
<!-- LOG LEVEL --> | ||
|
||
<!-- This tag can be used to override the default log level. Valid | ||
values, in increasing order of severity are debug, info, | ||
notice, warning, error, critical, and alert. Only messages | ||
that correspond to specified level and above are output. The | ||
default log level is error. | ||
--> | ||
<!-- | ||
<log_level>error</log_level> | ||
--> | ||
|
||
<!-- SUPPRESSIONS --> | ||
|
||
<!-- This tag can be used to suppress signs and diagnoses. See the User's | ||
Guide for details. | ||
--> | ||
<!-- | ||
<suppressions> | ||
</suppressions> | ||
--> | ||
|
||
</analyzer> | ||
|
||
<postprocessor> | ||
<!-- This tag can be used to override postprocessor extensions in the fwd --> | ||
<!-- | ||
<postproc_extensions> | ||
<group> | ||
<entry>summary</entry> | ||
<entry>clck_output_log</entry> | ||
</group> | ||
</postproc_extensions> | ||
--> | ||
</postprocessor> | ||
|
||
<collector> | ||
<!-- This tag can be used to run collector extensions such as the | ||
mpi extention. | ||
--> | ||
<!-- | ||
<extension>mpi.so</extension> | ||
--> | ||
|
||
<!-- This tag can be used to set the network interface used for | ||
accumulating data collected on-demand. | ||
--> | ||
<network_interface>ens5</network_interface> | ||
|
||
<!-- This tag can be used to override the default location for | ||
data provider helper files. | ||
The string %PROVIDER_AUXILIARY_PATH% is replaced with the | ||
value of this tag in the data provider XML configuration | ||
files. | ||
--> | ||
<!-- | ||
<provider_auxiliary_path>/opt/intel/clck/2019.3.5/provider/share</provider_auxiliary_path> | ||
--> | ||
|
||
<!-- This can be used to override the default location for data | ||
providers. | ||
--> | ||
<!-- | ||
<provider_config_dir>/opt/intel/clck/2019.3.5/provider/etc</provider_config_dir> | ||
--> | ||
|
||
<!-- This tag can be used to collect missing or old data. The default | ||
is set to off. Valid values are 'on' or 'off'. --> | ||
<!-- <re-collect>off</re-collect> --> | ||
|
||
|
||
<!-- This tag can be used to override the global default minimum | ||
data provider timeout threshold. Individual data providers | ||
may set larger timeout values, but this global value | ||
overrides any smaller value. | ||
This parameter is the base value of time (in seconds) | ||
multiplied by a scale factor. If this time is exceeded, the | ||
data provider will be terminated to prevent it from hanging. | ||
The scale attribute specifies the rate at which the timeout | ||
value should increase based on the number of nodes. Valid | ||
options are: constant, linear, squared, logarithmic. | ||
The "constant" attribute value does not scale the timeout | ||
with the number of nodes used. | ||
The "linear" attribute value scales linearly with the number | ||
of nodes (e.g. base * num_nodes). | ||
The "squared" attribute value scales with the number of nodes | ||
squared (e.g. base * num_nodes^2). | ||
The "logarithmic" tag scales logarithmically with the number | ||
of nodes (e.g. base * ln((e-1) + num_nodes)). | ||
--> | ||
<!-- | ||
<timeout scale="constant">60</timeout> | ||
--> | ||
</collector> | ||
|
||
<datastore_extensions> | ||
<!-- This tag sets the database implementation. sqlite3 & odbc are the only | ||
supported backend at this time. | ||
--> | ||
<!-- | ||
<group path="datastore/intel64/"> | ||
<entry config_file="default_sqlite.xml">libsqlite.so</entry> | ||
</group> | ||
--> | ||
</datastore_extensions> | ||
|
||
</configuration> |
7 changes: 7 additions & 0 deletions
7
tests/integration-tests/tests/intel_hpc/test_intel_hpc/test_intel_hpc/install_clck.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
#!/usr/bin/env bash | ||
set -e | ||
|
||
rpm --import https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB | ||
sudo yum-config-manager --add-repo https://yum.repos.intel.com/clck/2019/setup/intel-clck-2019.repo | ||
sudo yum-config-manager --add-repo https://yum.repos.intel.com/clck-ext/2019/setup/intel-clck-ext-2019.repo | ||
sudo yum -y install intel-clck-2019.3.5-025 |
6 changes: 6 additions & 0 deletions
6
...s/integration-tests/tests/intel_hpc/test_intel_hpc/test_intel_hpc/install_clck_compute.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
#!/bin/bash | ||
set -e | ||
|
||
module load openmpi | ||
chmod +x ${HOME}/install_clck.sh | ||
mpirun --map-by ppr:1:node ${HOME}/install_clck.sh |
27 changes: 27 additions & 0 deletions
27
tests/integration-tests/tests/intel_hpc/test_intel_hpc/test_intel_hpc/pcluster.config.ini
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
[global] | ||
cluster_template = default | ||
|
||
[aws] | ||
aws_region_name = {{ region }} | ||
|
||
[cluster default] | ||
base_os = {{ os }} | ||
key_name = {{ key_name }} | ||
vpc_settings = parallelcluster-vpc | ||
scheduler = {{ scheduler }} | ||
master_instance_type = {{ instance }} | ||
compute_instance_type = {{ instance }} | ||
initial_queue_size = 2 | ||
maintain_initial_size = true | ||
master_root_volume_size = 80 | ||
compute_root_volume_size = 80 | ||
ebs_settings = large | ||
|
||
[ebs large] | ||
shared_dir = /shared | ||
volume_size = 200 | ||
|
||
[vpc parallelcluster-vpc] | ||
vpc_id = {{ vpc_id }} | ||
master_subnet_id = {{ public_subnet_id }} | ||
compute_subnet_id = {{ private_subnet_id }} |
6 changes: 6 additions & 0 deletions
6
tests/integration-tests/tests/intel_hpc/test_intel_hpc/test_intel_hpc/run_clck.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
#!/bin/bash | ||
set -e | ||
|
||
source /opt/intel/clck/2019.3.5/bin/clckvars.sh | ||
module load intelpsxe intelpython/2 intelpython/3 | ||
clck -f nodefile -F intel_hpc_platform_compat-hpc-2018.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters