diff --git a/workflow/common/util.go b/workflow/common/util.go index 8eabda18ec32..b5ca17f4f6c2 100644 --- a/workflow/common/util.go +++ b/workflow/common/util.go @@ -322,3 +322,18 @@ func IsDone(un *unstructured.Unstructured) bool { un.GetLabels()[LabelKeyCompleted] == "true" && un.GetLabels()[LabelKeyWorkflowArchivingStatus] != "Pending" } + +// Check whether child hooked nodes Fulfilled +func CheckAllHooksFullfilled(node *wfv1.NodeStatus, nodes wfv1.Nodes) bool { + childs := node.Children + for _, id := range childs { + n, ok := nodes[id] + if !ok { + continue + } + if n.NodeFlag != nil && n.NodeFlag.Hooked && !n.Fulfilled() { + return false + } + } + return true +} diff --git a/workflow/controller/dag.go b/workflow/controller/dag.go index a5fc603fbf54..9c00eba13c37 100644 --- a/workflow/controller/dag.go +++ b/workflow/controller/dag.go @@ -838,20 +838,10 @@ func (d *dagContext) evaluateDependsLogic(taskName string) (bool, bool, error) { // If the task is still running, we should not proceed. depNode := d.getTaskNode(taskName) - if depNode == nil || !depNode.Fulfilled() { + if depNode == nil || !depNode.Fulfilled() || !common.CheckAllHooksFullfilled(depNode, d.wf.Status.Nodes) { return false, false, nil } - // If a task happens to have an onExit node, don't proceed until the onExit node is fulfilled - if onExitNode, err := d.wf.GetNodeByName(common.GenerateOnExitNodeName(depNode.Name)); onExitNode != nil { - if err != nil { - return false, false, err - } - if !onExitNode.Fulfilled() { - return false, false, nil - } - } - evalTaskName := strings.Replace(taskName, "-", "_", -1) if _, ok := evalScope[evalTaskName]; ok { continue diff --git a/workflow/controller/dag_test.go b/workflow/controller/dag_test.go index 1ce7d1f9b47f..5bb84700a782 100644 --- a/workflow/controller/dag_test.go +++ b/workflow/controller/dag_test.go @@ -3655,3 +3655,57 @@ spec: woc1.operate(ctx) assert.Equal(t, wfv1.WorkflowRunning, woc.wf.Status.Phase) } + +func TestDagWftmplHookWithRetry(t *testing.T) { + wf := wfv1.MustUnmarshalWorkflow("@testdata/dag_wftmpl_hook_with_retry.yaml") + woc := newWoc(*wf) + ctx := context.Background() + woc.operate(ctx) + + // assert task kicked + taskNode := woc.wf.Status.Nodes.FindByDisplayName("task") + assert.Equal(t, wfv1.NodePending, taskNode.Phase) + + // task failed + makePodsPhase(ctx, woc, v1.PodFailed) + woc.operate(ctx) + + // onFailure retry hook(0) kicked + taskNode = woc.wf.Status.Nodes.FindByDisplayName("task") + assert.Equal(t, wfv1.NodeFailed, taskNode.Phase) + failHookRetryNode := woc.wf.Status.Nodes.FindByDisplayName("task.hooks.failure") + failHookChild0Node := woc.wf.Status.Nodes.FindByDisplayName("task.hooks.failure(0)") + assert.Equal(t, wfv1.NodeRunning, failHookRetryNode.Phase) + assert.Equal(t, wfv1.NodePending, failHookChild0Node.Phase) + + // onFailure retry hook(0) failed + makePodsPhase(ctx, woc, v1.PodFailed) + woc.operate(ctx) + + // onFailure retry hook(1) kicked + taskNode = woc.wf.Status.Nodes.FindByDisplayName("task") + assert.Equal(t, wfv1.NodeFailed, taskNode.Phase) + failHookRetryNode = woc.wf.Status.Nodes.FindByDisplayName("task.hooks.failure") + failHookChild0Node = woc.wf.Status.Nodes.FindByDisplayName("task.hooks.failure(0)") + failHookChild1Node := woc.wf.Status.Nodes.FindByDisplayName("task.hooks.failure(1)") + assert.Equal(t, wfv1.NodeRunning, failHookRetryNode.Phase) + assert.Equal(t, wfv1.NodeFailed, failHookChild0Node.Phase) + assert.Equal(t, wfv1.NodePending, failHookChild1Node.Phase) + + // onFailure retry hook(1) failed + makePodsPhase(ctx, woc, v1.PodFailed) + woc.operate(ctx) + + // onFailure retry node faled + taskNode = woc.wf.Status.Nodes.FindByDisplayName("task") + assert.Equal(t, wfv1.NodeFailed, taskNode.Phase) + failHookRetryNode = woc.wf.Status.Nodes.FindByDisplayName("task.hooks.failure") + failHookChild0Node = woc.wf.Status.Nodes.FindByDisplayName("task.hooks.failure(0)") + failHookChild1Node = woc.wf.Status.Nodes.FindByDisplayName("task.hooks.failure(1)") + assert.Equal(t, wfv1.NodeFailed, failHookRetryNode.Phase) + assert.Equal(t, wfv1.NodeFailed, failHookChild0Node.Phase) + assert.Equal(t, wfv1.NodeFailed, failHookChild1Node.Phase) + // finish Node skipped + finishNode := woc.wf.Status.Nodes.FindByDisplayName("finish") + assert.Equal(t, wfv1.NodeOmitted, finishNode.Phase) +} diff --git a/workflow/controller/operator.go b/workflow/controller/operator.go index 98ebbaf7fbc8..81958b00ba1f 100644 --- a/workflow/controller/operator.go +++ b/workflow/controller/operator.go @@ -1735,7 +1735,7 @@ func getRetryNodeChildrenIds(node *wfv1.NodeStatus, nodes wfv1.Nodes) []string { if node == nil { continue } - if strings.HasSuffix(node.Name, ".onExit") { + if node.NodeFlag != nil && node.NodeFlag.Hooked { childrenIds = append(childrenIds, node.ID) } else if len(node.Children) > 0 { childrenIds = append(childrenIds, node.Children...) diff --git a/workflow/controller/testdata/dag_wftmpl_hook_with_retry.yaml b/workflow/controller/testdata/dag_wftmpl_hook_with_retry.yaml new file mode 100644 index 000000000000..2f5af8e07932 --- /dev/null +++ b/workflow/controller/testdata/dag_wftmpl_hook_with_retry.yaml @@ -0,0 +1,52 @@ +# https://github.com/argoproj/argo-workflows/issues/12120 +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + name: dag-wftmpl-hook-with-retry + namespace: argo +spec: + templates: + - name: main + dag: + tasks: + - name: task + template: task + hooks: + failure: + template: exit-handler + expression: tasks["task"].status == "Failed" + - name: finish + template: finish + dependencies: + - task + - name: task + container: + name: '' + image: alpine:latest + command: + - sh + - '-c' + args: + - exit 1; + - name: exit-handler + container: + name: '' + image: alpine:latest + command: + - sh + - '-c' + args: + - exit 1; + retryStrategy: + limit: 1 + retryPolicy: Always + - name: finish + container: + name: '' + image: alpine:latest + command: + - sh + - '-c' + args: + - echo "Finished!"; + entrypoint: main