-
Notifications
You must be signed in to change notification settings - Fork 3.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix(controller): Fix node status when daemon pod deleted but its children nodes are still running #4683
fix(controller): Fix node status when daemon pod deleted but its children nodes are still running #4683
Changes from 2 commits
ff09725
9c0dbfd
ef29495
bd86d8c
2ac4258
f5a61db
127eb31
cab1aa7
5aea4ee
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -903,7 +903,7 @@ func (woc *wfOperationCtx) podReconciliation() error { | |
// It is now impossible to infer pod status. We can do at this point is to mark the node with Error, or | ||
// we can re-submit it. | ||
for nodeID, node := range woc.wf.Status.Nodes { | ||
if node.Type != wfv1.NodeTypePod || node.Fulfilled() || node.StartedAt.IsZero() { | ||
if node.Type != wfv1.NodeTypePod || node.Phase.Fulfilled() || node.StartedAt.IsZero() { | ||
// node is not a pod, it is already complete, or it can be re-run. | ||
continue | ||
} | ||
|
@@ -917,6 +917,7 @@ func (woc *wfOperationCtx) podReconciliation() error { | |
|
||
node.Message = "pod deleted" | ||
node.Phase = wfv1.NodeError | ||
node.Daemoned = nil | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what is this needed for please? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When daemon pod deleted and |
||
// FinishedAt must be set since retry strategy depends on it to determine the backoff duration. | ||
// See processNodeRetries for more details. | ||
node.FinishedAt = metav1.Time{Time: time.Now().UTC()} | ||
|
@@ -1069,6 +1070,7 @@ func (woc *wfOperationCtx) assessNodeStatus(pod *apiv1.Pod, node *wfv1.NodeStatu | |
if pod.DeletionTimestamp != nil { | ||
// pod is being terminated | ||
newPhase = wfv1.NodeError | ||
newDaemonStatus = pointer.BoolPtr(false) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and the purpose of this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto. |
||
message = "pod deleted during operation" | ||
woc.log.WithField("displayName", node.DisplayName).WithField("templateName", node.TemplateName). | ||
WithField("pod", pod.Name).Error(message) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you explain this particular change more? Not sure if we want this
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The case for this code is "explained" in the unittest.