Skip to content

Commit

Permalink
Merge pull request #6 from andsor/keep-going
Browse files Browse the repository at this point in the history
Implement keep_going option
  • Loading branch information
andsor committed Aug 20, 2015
2 parents 9ffe8ef + 5feccc2 commit f0e6095
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 0 deletions.
9 changes: 9 additions & 0 deletions gridjug/grid_jug.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ def grid_jug(
jug_args=None,
jug_nworkers=4,
name='gridjug',
keep_going=False,
**kwargs
):
"""
Expand Down Expand Up @@ -45,6 +46,12 @@ def grid_jug(
name : str, optional
base name of the Grid Engine task
keep_going : bool, optional
Strongly recommended! Defaults to ``False``: if a single Jug task
fails, GridMap will cancel all jobs!
If ``True``, Jug does not raise an exception but keeps retrying the
task.
**kwargs : keyword-dict, optional
additional options passed through to :any:`gridmap.grid_map`
Expand All @@ -63,6 +70,8 @@ def grid_jug(
jug_argv.append('{}'.format(jugfile))
if jugdir is not None:
jug_argv.append('--jugdir={}'.format(jugdir))
if keep_going:
jug_argv.append('--keep-going')
if jug_args is not None:
jug_argv.extend(jug_args)

Expand Down
1 change: 1 addition & 0 deletions gridjug/test/.gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
primes.jugdata
tmp
failing.jugdata
20 changes: 20 additions & 0 deletions gridjug/test/failing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# coding: utf-8

from time import sleep

from jug import TaskGenerator


@TaskGenerator
def is_prime(n):
sleep(0.1)

if n == 6:
raise RuntimeError

for j in range(2, n - 1):
if (n % j) == 0:
return False
return True

primes10 = list(map(is_prime, range(2, 11)))
45 changes: 45 additions & 0 deletions gridjug/test/test_gridjug.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
PRIMES_JUGFILE = os.path.join(THIS_DIR, 'primes.py')
PRIMES_JUGDIR = os.path.join(THIS_DIR, 'primes.jugdata')

FAILING_JUGFILE = os.path.join(THIS_DIR, 'failing.py')

# Determine whether we are on NLD clusters
ON_NLD_CLUSTER = (os.environ.get('SGE_CLUSTER_NAME', None) == 'NLD')
ON_NLD_LOGIN = ON_NLD_CLUSTER and (
Expand Down Expand Up @@ -76,6 +78,26 @@ def test_access_results(tmpdir):
]


def test_failing(tmpdir):
jugdir = tmpdir
res = gridjug.grid_jug(
jugfile=FAILING_JUGFILE, jugdir=jugdir.strpath, local=True,
)
for result in res:
assert isinstance(result, RuntimeError)


def test_failing_keep_going(tmpdir):
jugdir = tmpdir
gridjug.grid_jug(
jugfile=FAILING_JUGFILE, jugdir=jugdir.strpath, local=True,
keep_going=True,
)
_, jugspace = jug.init(jugfile=FAILING_JUGFILE, jugdir=jugdir.strpath)
for n, task in zip(range(2, 11), jugspace['primes10']):
assert task.can_load() == (n != 6)


@pytest.mark.skipif(not ON_NLD_LOGIN, reason='Not on NLD cluster login node')
def test_nld_execute():
gridjug.grid_jug(
Expand Down Expand Up @@ -132,3 +154,26 @@ def test_nld_access_results(jugdir):
assert jug.value(jugspace['primes10']) == [
True, True, False, True, False, True, False, False, False
]


@pytest.mark.skipif(not ON_NLD_LOGIN, reason='Not on NLD cluster login node')
def test_nld_failing(jugdir):
with pytest.raises(RuntimeError):
gridjug.grid_jug(
jugfile=FAILING_JUGFILE,
jugdir=jugdir,
**NLD_GRIDMAP_PARAMS
)


@pytest.mark.skipif(not ON_NLD_LOGIN, reason='Not on NLD cluster login node')
def test_nld_failing_keep_going(jugdir):
gridjug.grid_jug(
jugfile=FAILING_JUGFILE,
jugdir=jugdir,
keep_going=True,
**NLD_GRIDMAP_PARAMS
)
_, jugspace = jug.init(jugfile=FAILING_JUGFILE, jugdir=jugdir)
for n, task in zip(range(2, 11), jugspace['primes10']):
assert task.can_load() == (n != 6)

0 comments on commit f0e6095

Please sign in to comment.