Skip to content

Commit

Permalink
Merge pull request #6997 from bernt-matthias/topic/drmaa-soft-externa…
Browse files Browse the repository at this point in the history
…l-runner

drmaa: soft catch of external runner fails
  • Loading branch information
jmchilton committed Dec 12, 2018
2 parents 7bed9f0 + b0bc14a commit 476be87
Showing 1 changed file with 13 additions and 9 deletions.
22 changes: 13 additions & 9 deletions lib/galaxy/jobs/runners/drmaa.py
Expand Up @@ -210,7 +210,10 @@ def queue_job(self, job_wrapper):
log.debug('(%s) submitting with credentials: %s [uid: %s]' % (galaxy_id_tag, pwent[0], pwent[2]))
filename = self.store_jobtemplate(job_wrapper, jt)
self.userid = pwent[2]
external_job_id = self.external_runjob(external_runjob_script, filename, pwent[2]).strip()
external_job_id = self.external_runjob(external_runjob_script, filename, pwent[2])
if external_job_id is None:
job_wrapper.fail("(%s) could not queue job" % galaxy_id_tag)
return
log.info("(%s) queued as %s" % (galaxy_id_tag, external_job_id))

# store runner information for tracking if Galaxy restarts
Expand Down Expand Up @@ -390,9 +393,9 @@ def store_jobtemplate(self, job_wrapper, jt):
return filename

def external_runjob(self, external_runjob_script, jobtemplate_filename, username):
""" runs an external script the will QSUB a new job.
""" runs an external script that will QSUB a new job.
The external script needs to be run with sudo, and will setuid() to the specified user.
Effectively, will QSUB as a different user (then the one used by Galaxy).
Effectively, will QSUB as a different user (than the one used by Galaxy).
"""
command = shlex.split(external_runjob_script)
command.extend([str(username), jobtemplate_filename])
Expand All @@ -404,14 +407,15 @@ def external_runjob(self, external_runjob_script, jobtemplate_filename, username
# os.unlink(jobtemplate_filename)
if exitcode != 0:
# There was an error in the child process
raise RuntimeError("External_runjob failed (exit code %s)\nChild process reported error:\n%s" % (str(exitcode), stderrdata))
if not stdoutdata.strip():
raise RuntimeError("External_runjob did return the job id: %s" % (stdoutdata))

log.exception("External_runjob failed (exit code %s). Child process reported error: %s" % (str(exitcode), stderrdata))
return None
# The expected output is a single line containing a single numeric value:
# the DRMAA job-ID. If not the case, will throw an error.
jobId = stdoutdata
return jobId
stdoutdata = stdoutdata.strip()
if not stdoutdata:
log.exception("External_runjob did not returned nothing instead of the job id")
return None
return stdoutdata

def _job_name(self, job_wrapper):
external_runjob_script = job_wrapper.get_destination_configuration("drmaa_external_runjob_script", None)
Expand Down

0 comments on commit 476be87

Please sign in to comment.