Skip to content

Commit

Permalink
Merge pull request #111 from HECBioSim/issue105
Browse files Browse the repository at this point in the history
Fixed NAMD SMP and general undersubscription issues.
  • Loading branch information
jimboid committed Aug 9, 2018
2 parents c81fcac + d22c5c7 commit 34961e7
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 25 deletions.
13 changes: 7 additions & 6 deletions longbow/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,29 +93,30 @@
"host": "",
"localworkdir": "",
"lsf-cluster": "",
"modules": "",
"maxtime": "24:00",
"memory": "",
"modules": "",
"mpiprocs": "",
"polling-frequency": "300",
"port": "22",
"queue": "",
"recoveryfile": "",
"remoteworkdir": "",
"resource": "",
"replicates": "1",
"replicate-naming": "rep",
"resource": "",
"scheduler": "",
"scripts": "",
"sge-peflag": "mpi",
"sge-peoverride": "false",
"slurm-gres": "",
"staging-frequency": "300",
"stdout": "",
"stderr": "",
"sge-peflag": "mpi",
"sge-peoverride": "false",
"subfile": "",
"user": "",
"upload-exclude": "",
"upload-include": ""
"upload-include": "",
"user": ""
}


Expand Down
29 changes: 11 additions & 18 deletions longbow/schedulers/pbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,30 +108,23 @@ def prepare(job):
jobfile.write("#PBS " + job["accountflag"] + " " +
job["account"] + "\n")

processes = job["cores"]
cpn = job["corespernode"]
mpiprocs = job["mpiprocs"]

cores = job["cores"]
# If not undersubscribing then.
if mpiprocs == "" and processes < cpn:

# Load levelling override. In cases where # of cores is less than
# corespernode, user is likely to be undersubscribing.
if int(cores) < int(cpn):
mpiprocs = processes

cpn = cores
elif mpiprocs == "":

# Calculate the number of nodes.
nodes = float(cores) / float(cpn)

# Makes sure nodes is rounded up to the next highest integer.
nodes = str(int(math.ceil(nodes)))
mpiprocs = cpn

# Number of cpus per node (most machines will charge for all available cpus
# per node whether you are using them or not)
ncpus = cpn

# Number of mpi processes per node.
mpiprocs = cpn
# Calculate the number of nodes.
nodes = str(int(math.ceil(float(processes) / float(mpiprocs))))

tmp = "select=" + nodes + ":ncpus=" + ncpus + ":mpiprocs=" + mpiprocs
tmp = "select=" + nodes + ":ncpus=" + cpn + ":mpiprocs=" + mpiprocs

# If user has specified memory append the flag (not all machines support
# this).
Expand Down Expand Up @@ -203,7 +196,7 @@ def prepare(job):
# CRAY's use aprun which has slightly different requirements to mpirun.
if mpirun == "aprun":

mpirun = mpirun + " -n " + cores + " -N " + mpiprocs
mpirun = mpirun + " -n " + processes + " -N " + mpiprocs

# Single jobs only need one run command.
if int(job["replicates"]) == 1:
Expand Down
13 changes: 13 additions & 0 deletions tests/standards/pbs_submitfiles/case11.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash --login
#PBS -N testjob
#PBS -q debug
#PBS -l select=2:ncpus=24:mpiprocs=1
#PBS -l walltime=24:00:00

export PBS_O_WORKDIR=$(readlink -f $PBS_O_WORKDIR)
cd $PBS_O_WORKDIR
export OMP_NUM_THREADS=1

module load namd

aprun -n 2 -N 1 namd2 +ppn 23 +pemap 1-23 +commap 0 bench.in > bench.log
2 changes: 1 addition & 1 deletion tests/standards/pbs_submitfiles/case3.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash --login
#PBS -N testjob
#PBS -q debug
#PBS -l select=1:ncpus=16:mpiprocs=16
#PBS -l select=1:ncpus=24:mpiprocs=16
#PBS -l walltime=24:00:00

export PBS_O_WORKDIR=$(readlink -f $PBS_O_WORKDIR)
Expand Down
4 changes: 4 additions & 0 deletions tests/unit/configuration/test_processconfigsresource.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ def test_processconfigsresource1():
"modules": "",
"maxtime": "24:00",
"memory": "",
"mpiprocs": "",
"nochecks": False,
"scripts": "",
"slurm-gres": "",
Expand Down Expand Up @@ -232,6 +233,7 @@ def test_processconfigsresource2():
"modules": "",
"maxtime": "24:00",
"memory": "",
"mpiprocs": "",
"nochecks": False,
"scripts": "",
"slurm-gres": "",
Expand Down Expand Up @@ -345,6 +347,7 @@ def test_processconfigsresource3():
"modules": "",
"maxtime": "24:00",
"memory": "",
"mpiprocs": "",
"nochecks": False,
"scripts": "",
"slurm-gres": "",
Expand Down Expand Up @@ -457,6 +460,7 @@ def test_processconfigsresource4():
"modules": "",
"maxtime": "24:00",
"memory": "",
"mpiprocs": "",
"nochecks": False,
"scripts": "",
"slurm-gres": "",
Expand Down
48 changes: 48 additions & 0 deletions tests/unit/schedulers_pbs/test_pbs_prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def test_prepare_case1():
"maxtime": "24:00",
"memory": "",
"modules": "amber",
"mpiprocs": "",
"queue": "debug",
"replicates": "1",
"stdout": "",
Expand Down Expand Up @@ -96,6 +97,7 @@ def test_prepare_case2():
"maxtime": "24:00",
"memory": "",
"modules": "amber",
"mpiprocs": "",
"queue": "debug",
"replicates": "5",
"stdout": "",
Expand Down Expand Up @@ -132,6 +134,7 @@ def test_prepare_case3():
"maxtime": "24:00",
"memory": "",
"modules": "amber",
"mpiprocs": "",
"queue": "debug",
"replicates": "1",
"stdout": "",
Expand Down Expand Up @@ -169,6 +172,7 @@ def test_prepare_case4():
"maxtime": "24:00",
"memory": "",
"modules": "amber",
"mpiprocs": "",
"queue": "debug",
"replicates": "1",
"stdout": "",
Expand Down Expand Up @@ -206,6 +210,7 @@ def test_prepare_case5():
"maxtime": "24:00",
"memory": "",
"modules": "amber",
"mpiprocs": "",
"queue": "debug",
"replicates": "1",
"stdout": "",
Expand Down Expand Up @@ -243,6 +248,7 @@ def test_prepare_case6():
"maxtime": "24:00",
"memory": "",
"modules": "amber",
"mpiprocs": "",
"queue": "debug",
"replicates": "1",
"stdout": "",
Expand Down Expand Up @@ -280,6 +286,7 @@ def test_prepare_case7():
"maxtime": "24:00",
"memory": "",
"modules": "amber",
"mpiprocs": "",
"queue": "debug",
"replicates": "1",
"stdout": "",
Expand Down Expand Up @@ -317,6 +324,7 @@ def test_prepare_case8():
"maxtime": "24:00",
"memory": "",
"modules": "amber",
"mpiprocs": "",
"queue": "debug",
"replicates": "1",
"stdout": "",
Expand Down Expand Up @@ -354,6 +362,7 @@ def test_prepare_case9():
"maxtime": "24:00",
"memory": "8",
"modules": "amber",
"mpiprocs": "",
"queue": "debug",
"replicates": "1",
"stdout": "",
Expand Down Expand Up @@ -390,6 +399,7 @@ def test_prepare_case10():
"maxtime": "24:00",
"memory": "",
"modules": "amber",
"mpiprocs": "",
"queue": "debug",
"replicates": "1",
"stdout": "test.log",
Expand All @@ -406,3 +416,41 @@ def test_prepare_case10():
os.path.join(
os.getcwd(),
"tests/standards/pbs_submitfiles/case10.txt"), "rb").read()


def test_prepare_case11():

"""
Test SMP type
"""

job = {
"account": "",
"cluster": "cluster1",
"cores": "2",
"corespernode": "24",
"executableargs": ("namd2 +ppn 23 +pemap 1-23 +commap 0 " +
"bench.in > bench.log"),
"handler": "aprun",
"email-address": "",
"email-flags": "",
"jobname": "testjob",
"localworkdir": "/tmp",
"maxtime": "24:00",
"memory": "",
"modules": "namd",
"mpiprocs": "1",
"queue": "debug",
"replicates": "1",
"stdout": "",
"stderr": "",
"scripts": "",
"upload-include": "file1, file2"
}

prepare(job)

assert open("/tmp/submit.pbs", "rb").read() == open(
os.path.join(
os.getcwd(),
"tests/standards/pbs_submitfiles/case11.txt"), "rb").read()

0 comments on commit 34961e7

Please sign in to comment.