Skip to content

Commit

Permalink
fix: retry status
Browse files Browse the repository at this point in the history
Signed-off-by: shuangkun <tsk2013uestc@163.com>
  • Loading branch information
shuangkun committed Apr 9, 2024
1 parent cd1fad2 commit 2c0587d
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 14 deletions.
12 changes: 12 additions & 0 deletions workflow/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,18 @@ func (wfc *WorkflowController) processNextPodCleanupItem(ctx context.Context) bo
if err != nil && !apierr.IsNotFound(err) {
return err
}
case batchDeletePods:
workflowName := podName
wfClient := wfc.wfclientset.ArgoprojV1alpha1().Workflows(namespace)
wf, err := wfClient.Get(ctx, workflowName, metav1.GetOptions{})
if err != nil && !apierr.IsNotFound(err) {
return err
}
wf.ObjectMeta.Labels[common.LabelKeyWorkflowRetryStatus] = "Retried"
wf, err = wfClient.Update(ctx, wf, metav1.UpdateOptions{})
if err != nil && !apierr.IsNotFound(err) {
return err
}
}
return nil
}()
Expand Down
17 changes: 8 additions & 9 deletions workflow/controller/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -2379,7 +2379,6 @@ func (woc *wfOperationCtx) markWorkflowPhase(ctx context.Context, phase wfv1.Wor
}
}
woc.updated = true
woc.wf.Status.RetryStatus = nil
woc.controller.queuePodForCleanup(woc.wf.Namespace, woc.getAgentPodName(), deletePod)
}
}
Expand Down Expand Up @@ -3842,15 +3841,11 @@ func (woc *wfOperationCtx) shouldRetry() bool {
if !ok || retryStatus == "Retried" {
return false
}
if retryStatus == "Retrying" {
// TODO make sure all pod in podsToDelete deleted, avoid "create pod exists"
return false
}
return true
}

func (woc *wfOperationCtx) IsRetried() bool {
return woc.wf.Labels[common.LabelKeyWorkflowRetried] != "Retried"
return woc.wf.ObjectMeta.Labels[common.LabelKeyWorkflowRetryStatus] != "Retried"
}

func (woc *wfOperationCtx) retryWorkflow(ctx context.Context) error {
Expand All @@ -3864,7 +3859,11 @@ func (woc *wfOperationCtx) retryWorkflow(ctx context.Context) error {
if err != nil {
return fmt.Errorf("fail to unmarshaling parameters: %v", err)
}
restartSuccessful := woc.wf.Labels[common.LabelKeyRetryRestartSuccessful]
restartSuccessful := false
restartSuccessfulStr := woc.wf.Labels[common.LabelKeyRetryRestartSuccessful]
if restartSuccessfulStr == "true" {
restartSuccessful = true
}

// Clean up remaining pods in the workflow
wf, podsToDelete, err := wfutil.FormulateRetryWorkflow(ctx, woc.wf, restartSuccessful, nodeFiledSelector, parameters)
Expand All @@ -3874,9 +3873,9 @@ func (woc *wfOperationCtx) retryWorkflow(ctx context.Context) error {
for _, podName := range podsToDelete {
woc.controller.queuePodForCleanup(wf.Namespace, podName, deletePod)
}
woc.controller.queuePodForCleanup(wf.Namespace, wf.Name, deletedAllPodsFlag)
woc.controller.queuePodForCleanup(wf.Namespace, wf.Name, batchDeletePods)
woc.wf = wf
woc.wf.labels[common.LabelKeyWorkflowRetryStatus] = "Retrying"
woc.wf.ObjectMeta.Labels[common.LabelKeyWorkflowRetryStatus] = "Retrying"
woc.updated = true
return nil
}
Expand Down
10 changes: 5 additions & 5 deletions workflow/controller/pod_cleanup_key.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ type (
)

const (
deletePod podCleanupAction = "deletePod"
labelPodCompleted podCleanupAction = "labelPodCompleted"
terminateContainers podCleanupAction = "terminateContainers"
killContainers podCleanupAction = "killContainers"
showDeletedAllPodsFlag podCleanupAction = "deletedAllPods"
deletePod podCleanupAction = "deletePod"
labelPodCompleted podCleanupAction = "labelPodCompleted"
terminateContainers podCleanupAction = "terminateContainers"
killContainers podCleanupAction = "killContainers"
batchDeletePods podCleanupAction = "batchDeletePods"
)

func newPodCleanupKey(namespace string, podName string, action podCleanupAction) podCleanupKey {
Expand Down

0 comments on commit 2c0587d

Please sign in to comment.