From 2c0587d6daaf6a47b19091084057be5f21fd6868 Mon Sep 17 00:00:00 2001 From: shuangkun Date: Tue, 9 Apr 2024 20:41:01 +0800 Subject: [PATCH] fix: retry status Signed-off-by: shuangkun --- workflow/controller/controller.go | 12 ++++++++++++ workflow/controller/operator.go | 17 ++++++++--------- workflow/controller/pod_cleanup_key.go | 10 +++++----- 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/workflow/controller/controller.go b/workflow/controller/controller.go index 0db3adee61ce..0f99464afb36 100644 --- a/workflow/controller/controller.go +++ b/workflow/controller/controller.go @@ -567,6 +567,18 @@ func (wfc *WorkflowController) processNextPodCleanupItem(ctx context.Context) bo if err != nil && !apierr.IsNotFound(err) { return err } + case batchDeletePods: + workflowName := podName + wfClient := wfc.wfclientset.ArgoprojV1alpha1().Workflows(namespace) + wf, err := wfClient.Get(ctx, workflowName, metav1.GetOptions{}) + if err != nil && !apierr.IsNotFound(err) { + return err + } + wf.ObjectMeta.Labels[common.LabelKeyWorkflowRetryStatus] = "Retried" + wf, err = wfClient.Update(ctx, wf, metav1.UpdateOptions{}) + if err != nil && !apierr.IsNotFound(err) { + return err + } } return nil }() diff --git a/workflow/controller/operator.go b/workflow/controller/operator.go index 1d37df185ebf..aa4c44740b43 100644 --- a/workflow/controller/operator.go +++ b/workflow/controller/operator.go @@ -2379,7 +2379,6 @@ func (woc *wfOperationCtx) markWorkflowPhase(ctx context.Context, phase wfv1.Wor } } woc.updated = true - woc.wf.Status.RetryStatus = nil woc.controller.queuePodForCleanup(woc.wf.Namespace, woc.getAgentPodName(), deletePod) } } @@ -3842,15 +3841,11 @@ func (woc *wfOperationCtx) shouldRetry() bool { if !ok || retryStatus == "Retried" { return false } - if retryStatus == "Retrying" { - // TODO make sure all pod in podsToDelete deleted, avoid "create pod exists" - return false - } return true } func (woc *wfOperationCtx) IsRetried() bool { - return woc.wf.Labels[common.LabelKeyWorkflowRetried] != "Retried" + return woc.wf.ObjectMeta.Labels[common.LabelKeyWorkflowRetryStatus] != "Retried" } func (woc *wfOperationCtx) retryWorkflow(ctx context.Context) error { @@ -3864,7 +3859,11 @@ func (woc *wfOperationCtx) retryWorkflow(ctx context.Context) error { if err != nil { return fmt.Errorf("fail to unmarshaling parameters: %v", err) } - restartSuccessful := woc.wf.Labels[common.LabelKeyRetryRestartSuccessful] + restartSuccessful := false + restartSuccessfulStr := woc.wf.Labels[common.LabelKeyRetryRestartSuccessful] + if restartSuccessfulStr == "true" { + restartSuccessful = true + } // Clean up remaining pods in the workflow wf, podsToDelete, err := wfutil.FormulateRetryWorkflow(ctx, woc.wf, restartSuccessful, nodeFiledSelector, parameters) @@ -3874,9 +3873,9 @@ func (woc *wfOperationCtx) retryWorkflow(ctx context.Context) error { for _, podName := range podsToDelete { woc.controller.queuePodForCleanup(wf.Namespace, podName, deletePod) } - woc.controller.queuePodForCleanup(wf.Namespace, wf.Name, deletedAllPodsFlag) + woc.controller.queuePodForCleanup(wf.Namespace, wf.Name, batchDeletePods) woc.wf = wf - woc.wf.labels[common.LabelKeyWorkflowRetryStatus] = "Retrying" + woc.wf.ObjectMeta.Labels[common.LabelKeyWorkflowRetryStatus] = "Retrying" woc.updated = true return nil } diff --git a/workflow/controller/pod_cleanup_key.go b/workflow/controller/pod_cleanup_key.go index 42328c67d71f..e68a4e3f1130 100644 --- a/workflow/controller/pod_cleanup_key.go +++ b/workflow/controller/pod_cleanup_key.go @@ -15,11 +15,11 @@ type ( ) const ( - deletePod podCleanupAction = "deletePod" - labelPodCompleted podCleanupAction = "labelPodCompleted" - terminateContainers podCleanupAction = "terminateContainers" - killContainers podCleanupAction = "killContainers" - showDeletedAllPodsFlag podCleanupAction = "deletedAllPods" + deletePod podCleanupAction = "deletePod" + labelPodCompleted podCleanupAction = "labelPodCompleted" + terminateContainers podCleanupAction = "terminateContainers" + killContainers podCleanupAction = "killContainers" + batchDeletePods podCleanupAction = "batchDeletePods" ) func newPodCleanupKey(namespace string, podName string, action podCleanupAction) podCleanupKey {