Skip to content

Commit

Permalink
Tweaks to the Pulsar job runners handling of async messags.
Browse files Browse the repository at this point in the history
 - Report a more appropriate message for responding to "failed" status.
 - Respond to the new "lost" message (galaxyproject/pulsar@61ed774).
 - Do not fail "lost" or "failed" jobs that may Galaxy may think have already completed (roughly).
  • Loading branch information
jmchilton committed Apr 10, 2015
1 parent aad36d0 commit 647cf55
Showing 1 changed file with 11 additions and 4 deletions.
15 changes: 11 additions & 4 deletions lib/galaxy/jobs/runners/pulsar.py
Expand Up @@ -32,6 +32,8 @@
NO_REMOTE_GALAXY_FOR_METADATA_MESSAGE = "Pulsar misconfiguration - Pulsar client configured to set metadata remotely, but remote Pulsar isn't properly configured with a galaxy_home directory."
NO_REMOTE_DATATYPES_CONFIG = "Pulsar client is configured to use remote datatypes configuration when setting metadata externally, but Pulsar is not configured with this information. Defaulting to datatypes_conf.xml."
GENERIC_REMOTE_ERROR = "Failed to communicate with remote job server."
FAILED_REMOTE_ERROR = "Remote job server indicated a problem running or monitoring this job."
LOST_REMOTE_ERROR = "Remote job server could not determine this job's state."

# Is there a good way to infer some default for this? Can only use
# url_for from web threads. https://gist.github.com/jmchilton/9098762
Expand Down Expand Up @@ -170,8 +172,13 @@ def _update_job_state_for_status(self, job_state, pulsar_status):
if pulsar_status == "complete":
self.mark_as_finished(job_state)
return None
if pulsar_status == "failed":
self.fail_job(job_state)
if pulsar_status in ["failed", "lost"]:
if pulsar_status == "failed":
message = FAILED_REMOTE_ERROR
else:
message = LOST_REMOTE_ERROR
if not job_state.job_wrapper.get_job().finished:
self.fail_job(job_state, message)
return None
if pulsar_status == "running" and not job_state.running:
job_state.running = True
Expand Down Expand Up @@ -398,12 +405,12 @@ def finish_job( self, job_state ):
log.exception("Job wrapper finish method failed")
job_wrapper.fail("Unable to finish job", exception=True)

def fail_job( self, job_state ):
def fail_job( self, job_state, message=GENERIC_REMOTE_ERROR ):
"""
Seperated out so we can use the worker threads for it.
"""
self.stop_job( self.sa_session.query( self.app.model.Job ).get( job_state.job_wrapper.job_id ) )
job_state.job_wrapper.fail( getattr( job_state, "fail_message", GENERIC_REMOTE_ERROR ) )
job_state.job_wrapper.fail( getattr( job_state, "fail_message", message ) )

def check_pid( self, pid ):
try:
Expand Down

0 comments on commit 647cf55

Please sign in to comment.