Skip to content

Commit e73ca53

Browse files
committed
Mark jobs as cancelled on future.exception() (likely broken process / killed)
1 parent aa759c7 commit e73ca53

File tree

1 file changed

+16
-8
lines changed

1 file changed

+16
-8
lines changed

plain-worker/plain/worker/workers.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -235,17 +235,25 @@ def rescue_job_results(self):
235235
def future_finished_callback(job_uuid: str, future: Future):
236236
if future.cancelled():
237237
logger.warning("Job cancelled job_uuid=%s", job_uuid)
238-
job = Job.objects.get(uuid=job_uuid)
239-
job.convert_to_result(status=JobResultStatuses.CANCELLED)
240-
elif future.exception():
241-
# This is an uncaught error in running process_job,
242-
# which is likely an internal bug. Not sure if it should be marked as a failure and retried?
243-
exception = future.exception()
244-
logger.exception(
245-
"process_job failed job_uuid=%s",
238+
try:
239+
job = Job.objects.get(uuid=job_uuid)
240+
job.convert_to_result(status=JobResultStatuses.CANCELLED)
241+
except Job.DoesNotExist:
242+
# Job may have already been cleaned up
243+
pass
244+
elif exception := future.exception():
245+
# Process pool may have been killed...
246+
logger.warning(
247+
"Job failed job_uuid=%s",
246248
job_uuid,
247249
exc_info=exception,
248250
)
251+
try:
252+
job = Job.objects.get(uuid=job_uuid)
253+
job.convert_to_result(status=JobResultStatuses.CANCELLED)
254+
except Job.DoesNotExist:
255+
# Job may have already been cleaned up
256+
pass
249257
else:
250258
logger.debug("Job finished job_uuid=%s", job_uuid)
251259

0 commit comments

Comments
 (0)