-
Notifications
You must be signed in to change notification settings - Fork 3.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(controller): Use deterministic name for cron workflow children #4638
Changes from 6 commits
a07fcf8
4a5be29
22f83e4
ade44d7
d67b001
ef8e3e3
e1efd93
adb1ce0
4b4e042
8b95542
a033efd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,6 +34,8 @@ type cronWfOperationCtx struct { | |
cronWfIf typed.CronWorkflowInterface | ||
log *log.Entry | ||
metrics *metrics.Metrics | ||
// scheduledTimeFunc returns the nearest scheduled time when it is called | ||
simster7 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
scheduledTimeFunc ScheduledTimeFunc | ||
} | ||
|
||
func newCronWfOperationCtx(cronWorkflow *v1alpha1.CronWorkflow, wfClientset versioned.Interface, metrics *metrics.Metrics) *cronWfOperationCtx { | ||
|
@@ -48,10 +50,20 @@ func newCronWfOperationCtx(cronWorkflow *v1alpha1.CronWorkflow, wfClientset vers | |
"namespace": cronWorkflow.ObjectMeta.Namespace, | ||
}), | ||
metrics: metrics, | ||
// inferScheduledTime returns an inferred scheduled time based on the current time and only works if it is called | ||
// within 59 seconds of the scheduled time. Here it acts as a placeholder until it is replaced by a similar | ||
// function that returns the last scheduled time deterministically from the cron engine. Since we are only able | ||
// to generate the latter function after the job is scheduled, there is a tiny chance that the job is run before | ||
// the deterministic function is supplanted. If that happens, we use the infer function as the next-best thing | ||
scheduledTimeFunc: inferScheduledTime, | ||
} | ||
} | ||
|
||
func (woc *cronWfOperationCtx) Run() { | ||
woc.run(woc.scheduledTimeFunc()) | ||
} | ||
|
||
func (woc *cronWfOperationCtx) run(scheduledRuntime time.Time) { | ||
defer woc.persistUpdate() | ||
|
||
woc.log.Infof("Running %s", woc.name) | ||
|
@@ -69,16 +81,25 @@ func (woc *cronWfOperationCtx) Run() { | |
return | ||
} | ||
|
||
wf := common.ConvertCronWorkflowToWorkflow(woc.cronWf) | ||
wf := common.ConvertCronWorkflowToWorkflowWithName(woc.cronWf, getChildWorkflowName(woc.cronWf.Name, scheduledRuntime)) | ||
|
||
runWf, err := util.SubmitWorkflow(woc.wfClient, woc.wfClientset, woc.cronWf.Namespace, wf, &v1alpha1.SubmitOpts{}) | ||
if err != nil { | ||
if errors.IsAlreadyExists(err) { | ||
simster7 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// The scheduled workflow already exists, likely indicating that there is a corrupted LastScheduledTime field. | ||
// If the intended scheduledRuntime is later than the present value in LastScheduledTime, then replace it | ||
if scheduledRuntime.After(woc.cronWf.Status.LastScheduledTime.Time) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If it is a conflict error, then our time should be the same surely? This code does not get run? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Correct, I think after the overwriting stale There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you need to write tests and then bug fix and maintain all new code - so add less code if it achieves the same outcome There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, will remove this then |
||
woc.cronWf.Status.LastScheduledTime = &v1.Time{Time: scheduledRuntime} | ||
} | ||
woc.reportCronWorkflowError(v1alpha1.ConditionTypeSubmissionError, fmt.Sprintf("Workflow scheduled for %s already exists", scheduledRuntime)) | ||
return | ||
} | ||
woc.reportCronWorkflowError(v1alpha1.ConditionTypeSubmissionError, fmt.Sprintf("Failed to submit Workflow: %s", err)) | ||
return | ||
} | ||
|
||
woc.cronWf.Status.Active = append(woc.cronWf.Status.Active, getWorkflowObjectReference(wf, runWf)) | ||
woc.cronWf.Status.LastScheduledTime = &v1.Time{Time: time.Now()} | ||
woc.cronWf.Status.LastScheduledTime = &runWf.CreationTimestamp | ||
woc.cronWf.Status.Conditions.RemoveCondition(v1alpha1.ConditionTypeSubmissionError) | ||
} | ||
|
||
|
@@ -169,40 +190,40 @@ func (woc *cronWfOperationCtx) terminateOutstandingWorkflows() error { | |
} | ||
|
||
func (woc *cronWfOperationCtx) runOutstandingWorkflows() (bool, error) { | ||
proceed, err := woc.shouldOutstandingWorkflowsBeRun() | ||
proceed, missedExecutionTime, err := woc.shouldOutstandingWorkflowsBeRun() | ||
simster7 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if err != nil { | ||
return false, err | ||
} | ||
if proceed { | ||
woc.Run() | ||
woc.run(missedExecutionTime) | ||
return true, nil | ||
} | ||
return false, nil | ||
} | ||
|
||
func (woc *cronWfOperationCtx) shouldOutstandingWorkflowsBeRun() (bool, error) { | ||
func (woc *cronWfOperationCtx) shouldOutstandingWorkflowsBeRun() (bool, time.Time, error) { | ||
// If this CronWorkflow has been run before, check if we have missed any scheduled executions | ||
if woc.cronWf.Status.LastScheduledTime != nil { | ||
var now time.Time | ||
var cronSchedule cron.Schedule | ||
if woc.cronWf.Spec.Timezone != "" { | ||
loc, err := time.LoadLocation(woc.cronWf.Spec.Timezone) | ||
if err != nil { | ||
return false, fmt.Errorf("invalid timezone '%s': %s", woc.cronWf.Spec.Timezone, err) | ||
return false, time.Time{}, fmt.Errorf("invalid timezone '%s': %s", woc.cronWf.Spec.Timezone, err) | ||
} | ||
now = time.Now().In(loc) | ||
|
||
cronScheduleString := "CRON_TZ=" + woc.cronWf.Spec.Timezone + " " + woc.cronWf.Spec.Schedule | ||
cronSchedule, err = cron.ParseStandard(cronScheduleString) | ||
if err != nil { | ||
return false, fmt.Errorf("unable to form timezone schedule '%s': %s", cronScheduleString, err) | ||
return false, time.Time{}, fmt.Errorf("unable to form timezone schedule '%s': %s", cronScheduleString, err) | ||
} | ||
} else { | ||
var err error | ||
now = time.Now() | ||
cronSchedule, err = cron.ParseStandard(woc.cronWf.Spec.Schedule) | ||
if err != nil { | ||
return false, err | ||
return false, time.Time{}, err | ||
} | ||
} | ||
|
||
|
@@ -219,11 +240,11 @@ func (woc *cronWfOperationCtx) shouldOutstandingWorkflowsBeRun() (bool, error) { | |
// If StartingDeadlineSeconds is not set, or we are still within the deadline window, run the Workflow | ||
if woc.cronWf.Spec.StartingDeadlineSeconds == nil || *woc.cronWf.Spec.StartingDeadlineSeconds == 0 || now.Before(missedExecutionTime.Add(time.Duration(*woc.cronWf.Spec.StartingDeadlineSeconds)*time.Second)) { | ||
woc.log.Infof("%s missed an execution at %s and is within StartingDeadline", woc.cronWf.Name, missedExecutionTime.Format("Mon Jan _2 15:04:05 2006")) | ||
return true, nil | ||
return true, missedExecutionTime, nil | ||
} | ||
} | ||
} | ||
return false, nil | ||
return false, time.Time{}, nil | ||
} | ||
|
||
func (woc *cronWfOperationCtx) reconcileActiveWfs(workflows []v1alpha1.Workflow) error { | ||
|
@@ -321,3 +342,16 @@ func (woc *cronWfOperationCtx) reportCronWorkflowError(conditionType v1alpha1.Co | |
}) | ||
woc.metrics.CronWorkflowSubmissionError() | ||
} | ||
|
||
func inferScheduledTime() time.Time { | ||
// Infer scheduled runtime by getting current time and zeroing out current seconds and nanoseconds | ||
// This works because the finest possible scheduled runtime is a minute. It is unlikely to ever be used, since this | ||
// function is quickly supplanted by a deterministic function from the cron engine. | ||
log.Infof("inferred scheduled time") | ||
alexec marked this conversation as resolved.
Show resolved
Hide resolved
|
||
now := time.Now().UTC() | ||
return time.Date(now.Year(), now.Month(), now.Day(), now.Hour(), now.Minute(), 0, 0, now.Location()) | ||
} | ||
|
||
func getChildWorkflowName(cronWorkflowName string, scheduledRuntime time.Time) string { | ||
return fmt.Sprintf("%s-%d", cronWorkflowName, scheduledRuntime.Unix()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. truncate time to 1 minute? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this function should accept There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do you truncate there? |
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is this covered by existing tests?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes it is. I added an extra test as well