Skip to content

Commit

Permalink
Clean up inconsistencies in the directory where the tests are run from (
Browse files Browse the repository at this point in the history
#588)

* Clean up inconsistencies in the directory where the tests are run from

* Add a shared directory for all CI clusters

* fix test failures

* Flake and newlines

* Handle LocalCluster test problem
  • Loading branch information
guillaumeeb committed Sep 12, 2022
1 parent 9a66042 commit f79f913
Show file tree
Hide file tree
Showing 10 changed files with 66 additions and 13 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,5 @@ ci/pbs/environment.yml
ci/sge/environment.yml
ci/htcondor/environment.yml
.vscode/
ca.pem
key.pem
3 changes: 3 additions & 0 deletions ci/htcondor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ function jobqueue_before_install {
docker-compose exec -T submit /bin/bash -c "condor_q"
cd -

#Set shared space permissions
docker-compose exec -T submit /bin/bash -c "chmod -R 777 /shared_space"

docker ps -a
docker images
}
Expand Down
5 changes: 5 additions & 0 deletions ci/htcondor/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@ services:
environment:
- CONDOR_HOST=cm
- USE_POOL_PASSWORD=yes
- CI_SHARED_SPACE=/shared_space
depends_on:
- cm
volumes:
- secrets:/root/secrets
- ../..:/dask-jobqueue
- ./condor_config.local:/etc/condor/condor_config.local
- shared_space:/shared_space

execute1:
image: daskdev/dask-jobqueue:htcondor-execute
Expand All @@ -41,6 +43,7 @@ services:
volumes:
- secrets:/root/secrets
- ./condor_config.local:/etc/condor/condor_config.local
- shared_space:/shared_space

execute2:
image: daskdev/dask-jobqueue:htcondor-execute
Expand All @@ -56,6 +59,8 @@ services:
volumes:
- secrets:/root/secrets
- ./condor_config.local:/etc/condor/condor_config.local
- shared_space:/shared_space

volumes:
secrets:
shared_space:
4 changes: 4 additions & 0 deletions ci/pbs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ function jobqueue_before_install {
./start-pbs.sh
cd -

#Set shared space permissions
docker exec pbs_master /bin/bash -c "chmod -R 777 /shared_space"
docker exec pbs_master /bin/bash -c "chown -R pbsuser:pbsuser /home/pbsuser"

docker exec -u pbsuser pbs_master pbsnodes -a
docker ps -a
docker images
Expand Down
14 changes: 12 additions & 2 deletions ci/pbs/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,12 @@ services:
build: .
container_name: pbs_master
hostname: pbs_master
environment:
- CI_SHARED_SPACE=/shared_space
volumes:
- ../..:/dask-jobqueue
- userhome:/home/pbsuser
- shared_space:/shared_space
command: bash /run-master.sh

slave_one:
Expand All @@ -17,7 +21,8 @@ services:
container_name: pbs_slave_1
hostname: pbs_slave_1
volumes:
- ../..:/dask-jobqueue
- userhome:/home/pbsuser
- shared_space:/shared_space
entrypoint: "bash /slave-entrypoint.sh"
command: bash /run-slave.sh
links:
Expand All @@ -33,7 +38,8 @@ services:
container_name: pbs_slave_2
hostname: pbs_slave_2
volumes:
- ../..:/dask-jobqueue
- userhome:/home/pbsuser
- shared_space:/shared_space
entrypoint: "bash /slave-entrypoint.sh"
command: bash /run-slave.sh
links:
Expand All @@ -42,3 +48,7 @@ services:
- PBS_MASTER=pbs_master
depends_on:
- master

volumes:
userhome:
shared_space:
5 changes: 4 additions & 1 deletion ci/sge.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ function jobqueue_before_install {
./start-sge.sh
cd -

#Set shared space permissions
docker exec sge_master /bin/bash -c "chmod -R 777 /shared_space"

docker ps -a
docker images
docker exec sge_master qconf -sq dask.q
Expand All @@ -20,7 +23,7 @@ function jobqueue_install {
}

function jobqueue_script {
docker exec sge_master /bin/bash -c "cd /dask-jobqueue; pytest dask_jobqueue --verbose -s -E sge"
docker exec sge_master /bin/bash -c "cd; pytest /dask-jobqueue/dask_jobqueue --verbose -s -E sge"
}

function jobqueue_after_script {
Expand Down
12 changes: 12 additions & 0 deletions ci/sge/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,12 @@ services:
container_name: sge_master
hostname: sge_master
#network_mode: host
environment:
- CI_SHARED_SPACE=/shared_space
volumes:
- ../..:/dask-jobqueue
- userhome:/root
- shared_space:/shared_space
command: bash /dask-jobqueue/ci/sge/run-master.sh

slave-one:
Expand All @@ -24,6 +28,8 @@ services:
#network_mode: host
volumes:
- ../..:/dask-jobqueue
- userhome:/root
- shared_space:/shared_space
command: bash /dask-jobqueue/ci/sge/run-slave.sh
links:
- "master:sge_master"
Expand All @@ -42,8 +48,14 @@ services:
#network_mode: host
volumes:
- ../..:/dask-jobqueue
- userhome:/root
- shared_space:/shared_space
command: bash /dask-jobqueue/ci/sge/run-slave.sh
links:
- "master:sge_master"
depends_on:
- master

volumes:
userhome:
shared_space:
5 changes: 4 additions & 1 deletion ci/slurm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ function jobqueue_before_install {
./start-slurm.sh
cd -

#Set shared space permissions
docker exec slurmctld /bin/bash -c "chmod -R 777 /shared_space"

docker ps -a
docker images
show_network_interfaces
Expand All @@ -29,7 +32,7 @@ function jobqueue_install {
}

function jobqueue_script {
docker exec slurmctld /bin/bash -c "pytest /dask-jobqueue/dask_jobqueue --verbose -E slurm -s"
docker exec slurmctld /bin/bash -c "cd; pytest /dask-jobqueue/dask_jobqueue --verbose -E slurm -s"
}

function jobqueue_after_script {
Expand Down
6 changes: 6 additions & 0 deletions ci/slurm/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,15 @@ services:
command: ["slurmctld"]
container_name: slurmctld
hostname: slurmctld
environment:
- CI_SHARED_SPACE=/shared_space
volumes:
- etc_munge:/etc/munge
- etc_slurm:/etc/slurm
- slurm_jobdir:/data
- var_log_slurm:/var/log/slurm
- ../..:/dask-jobqueue
- shared_space:/shared_space
expose:
- "6817"
depends_on:
Expand All @@ -65,6 +68,7 @@ services:
- etc_slurm:/etc/slurm
- slurm_jobdir:/data
- var_log_slurm:/var/log/slurm
- shared_space:/shared_space
expose:
- "6818"
depends_on:
Expand All @@ -86,6 +90,7 @@ services:
- etc_slurm:/etc/slurm
- slurm_jobdir:/data
- var_log_slurm:/var/log/slurm
- shared_space:/shared_space
expose:
- "6818"
depends_on:
Expand All @@ -102,6 +107,7 @@ volumes:
slurm_jobdir:
var_lib_mysql:
var_log_slurm:
shared_space:

networks:
common-network:
Expand Down
23 changes: 14 additions & 9 deletions dask_jobqueue/tests/test_jobqueue_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,11 +348,14 @@ def test_wrong_parameter_error(Cluster):
Cluster(cores=1, memory="1GB", wrong_parameter="wrong_parameter_value")


@pytest.mark.xfail_env({"htcondor": "#535 no shared filesystem in htcondor ci"})
@pytest.mark.xfail_env({"slurm": "#535 no shared filesystem in slurm ci"})
@pytest.mark.filterwarnings("error:Using a temporary security object:UserWarning")
def test_security(EnvSpecificCluster, loop):
dirname = os.path.dirname(__file__)
# Shared space configured in all docker compose CIs, fallback to current dir if does not exist (LocalCluster)
dirname = os.environ.get("CI_SHARED_SPACE", os.getcwd())
# Copy security files into the shared folder
test_dir = os.path.dirname(__file__)
shutil.copy2(os.path.join(test_dir, "key.pem"), dirname)
shutil.copy2(os.path.join(test_dir, "ca.pem"), dirname)
key = os.path.join(dirname, "key.pem")
cert = os.path.join(dirname, "ca.pem")
security = Security(
Expand Down Expand Up @@ -396,10 +399,9 @@ def test_security(EnvSpecificCluster, loop):
assert "tls://" in job_script


@pytest.mark.xfail_env({"htcondor": "#535 no shared filesystem in htcondor ci"})
@pytest.mark.xfail_env({"slurm": "#535 no shared filesystem in slurm ci"})
def test_security_temporary(EnvSpecificCluster, loop):
dirname = os.path.dirname(__file__)
# Shared space configured in all docker compose CIs, fallback to current dir if does not exist (LocalCluster)
dirname = os.environ.get("CI_SHARED_SPACE", os.getcwd())
with EnvSpecificCluster(
cores=1,
memory="500MiB",
Expand Down Expand Up @@ -440,9 +442,12 @@ def test_security_temporary(EnvSpecificCluster, loop):
# TODO assert not any([os.path.exists(f) for f in [keyfile, certfile, cafile]])


@pytest.mark.xfail_env({"htcondor": "#535 no shared filesystem in htcondor ci"})
@pytest.mark.xfail_env({"slurm": "#535 no shared filesystem in slurm ci"})
@pytest.mark.xfail_env({"pbs": "current directory (pbsuser home) not shared"})
@pytest.mark.xfail_env(
{"htcondor": "Submitting user do not have a shared home directory in CI"}
)
@pytest.mark.xfail_env(
{"slurm": "Submitting user do not have a shared home directory in CI"}
)
def test_security_temporary_defaults(EnvSpecificCluster, loop):
# test automatic behaviour if security is true and shared_temp_directory not set
with pytest.warns(UserWarning, match="shared_temp_directory"), EnvSpecificCluster(
Expand Down

0 comments on commit f79f913

Please sign in to comment.