From 661b261959dce6ab650eb34251ae05a1ba3d24aa Mon Sep 17 00:00:00 2001 From: Tingluo Huang Date: Mon, 27 Nov 2023 16:43:36 -0500 Subject: [PATCH] Mark job as failed on worker crash. (#3006) --- src/Runner.Listener/JobDispatcher.cs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/Runner.Listener/JobDispatcher.cs b/src/Runner.Listener/JobDispatcher.cs index 512a4ee4188..de5f6fda96c 100644 --- a/src/Runner.Listener/JobDispatcher.cs +++ b/src/Runner.Listener/JobDispatcher.cs @@ -1134,6 +1134,15 @@ private async Task LogWorkerProcessUnhandledException(IRunnerService server, Pip jobRecord.ErrorCount++; jobRecord.Issues.Add(unhandledExceptionIssue); + if (message.Variables.TryGetValue("DistributedTask.MarkJobAsFailedOnWorkerCrash", out var markJobAsFailedOnWorkerCrash) && + StringUtil.ConvertToBoolean(markJobAsFailedOnWorkerCrash?.Value)) + { + Trace.Info("Mark the job as failed since the worker crashed"); + jobRecord.Result = TaskResult.Failed; + // mark the job as completed so service will pickup the result + jobRecord.State = TimelineRecordState.Completed; + } + await jobServer.UpdateTimelineRecordsAsync(message.Plan.ScopeIdentifier, message.Plan.PlanType, message.Plan.PlanId, message.Timeline.Id, new TimelineRecord[] { jobRecord }, CancellationToken.None); } catch (Exception ex)