-
Notifications
You must be signed in to change notification settings - Fork 3.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
refactor: Use polling model for workflow phase metric #4557
Changes from 3 commits
ef42e1a
c8c7e47
f2a6a83
0877871
272f1df
7a07186
1118de4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -54,10 +54,6 @@ import ( | |
"github.com/argoproj/argo/workflow/util" | ||
) | ||
|
||
const enoughTimeForInformerSync = 1 * time.Second | ||
|
||
const semaphoreConfigIndexName = "bySemaphoreConfigMap" | ||
|
||
// WorkflowController is the controller for workflow resources | ||
type WorkflowController struct { | ||
// namespace of the workflow controller | ||
|
@@ -109,6 +105,7 @@ const ( | |
workflowTemplateResyncPeriod = 20 * time.Minute | ||
podResyncPeriod = 30 * time.Minute | ||
clusterWorkflowTemplateResyncPeriod = 20 * time.Minute | ||
enoughTimeForInformerSync = 1 * time.Second | ||
) | ||
|
||
// NewWorkflowController instantiates a new WorkflowController | ||
|
@@ -170,7 +167,8 @@ var indexers = cache.Indexers{ | |
indexes.ClusterWorkflowTemplateIndex: indexes.MetaNamespaceLabelIndexFunc(common.LabelKeyClusterWorkflowTemplate), | ||
indexes.CronWorkflowIndex: indexes.MetaNamespaceLabelIndexFunc(common.LabelKeyCronWorkflow), | ||
indexes.WorkflowTemplateIndex: indexes.MetaNamespaceLabelIndexFunc(common.LabelKeyWorkflowTemplate), | ||
semaphoreConfigIndexName: workflowIndexerBySemaphoreKeys, | ||
indexes.SemaphoreConfigIndexName: indexes.WorkflowSemaphoreKeysIndexFunc(), | ||
indexes.WorkflowPhaseIndex: indexes.MetaLabelIndexFunc(common.LabelKeyPhase), | ||
} | ||
|
||
// Run starts an Workflow resource controller | ||
|
@@ -200,7 +198,9 @@ func (wfc *WorkflowController) Run(ctx context.Context, wfWorkers, podWorkers in | |
|
||
go wfc.runTTLController(ctx) | ||
go wfc.runCronController(ctx) | ||
|
||
go wfc.metrics.RunServer(ctx) | ||
go wait.Until(wfc.syncWorkflowPhaseMetrics, 5*time.Second, ctx.Done()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think 5 seconds is a good balance here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So real-time for Prometheus means up to 15s old. Plus whatever delay the app has. Every 15s would mean Prometheus would be up to 30s out of date. @jessesuen I'd like to do as little polling as possible. |
||
|
||
wfc.createClusterWorkflowTemplateInformer(ctx) | ||
wfc.waitForCacheSync(ctx) | ||
|
@@ -220,20 +220,6 @@ func (wfc *WorkflowController) Run(ctx context.Context, wfWorkers, podWorkers in | |
<-ctx.Done() | ||
} | ||
|
||
func workflowIndexerBySemaphoreKeys(obj interface{}) ([]string, error) { | ||
un, ok := obj.(*unstructured.Unstructured) | ||
if !ok { | ||
log.Warnf("cannot convert obj into unstructured.Unstructured in Indexer %s", semaphoreConfigIndexName) | ||
return []string{}, nil | ||
} | ||
wf, err := util.FromUnstructured(un) | ||
if err != nil { | ||
log.Warnf("failed to convert to workflow from unstructured: %v", err) | ||
return []string{}, nil | ||
} | ||
return wf.GetSemaphoreKeys(), nil | ||
} | ||
|
||
func (wfc *WorkflowController) waitForCacheSync(ctx context.Context) { | ||
// Wait for all involved caches to be synced, before processing items from the queue is started | ||
if !cache.WaitForCacheSync(ctx.Done(), wfc.wfInformer.HasSynced, wfc.wftmplInformer.Informer().HasSynced, wfc.podInformer.HasSynced) { | ||
|
@@ -317,15 +303,15 @@ func (wfc *WorkflowController) runConfigMapWatcher(stopCh <-chan struct{}) { | |
|
||
// notifySemaphoreConfigUpdate will notify semaphore config update to pending workflows | ||
func (wfc *WorkflowController) notifySemaphoreConfigUpdate(cm *apiv1.ConfigMap) { | ||
wfs, err := wfc.wfInformer.GetIndexer().ByIndex(semaphoreConfigIndexName, fmt.Sprintf("%s/%s", cm.Namespace, cm.Name)) | ||
wfs, err := wfc.wfInformer.GetIndexer().ByIndex(indexes.SemaphoreConfigIndexName, fmt.Sprintf("%s/%s", cm.Namespace, cm.Name)) | ||
if err != nil { | ||
log.Errorf("failed get the workflow from informer. %v", err) | ||
} | ||
|
||
for _, obj := range wfs { | ||
un, ok := obj.(*unstructured.Unstructured) | ||
if !ok { | ||
log.Warnf("received object from indexer %s is not an unstructured", semaphoreConfigIndexName) | ||
log.Warnf("received object from indexer %s is not an unstructured", indexes.SemaphoreConfigIndexName) | ||
continue | ||
} | ||
wf, err := util.FromUnstructured(un) | ||
|
@@ -694,21 +680,6 @@ func getWfPriority(obj interface{}) (int32, time.Time) { | |
return int32(priority), un.GetCreationTimestamp().Time | ||
} | ||
|
||
func getWfPhase(obj interface{}) wfv1.NodePhase { | ||
un, ok := obj.(*unstructured.Unstructured) | ||
if !ok { | ||
return "" | ||
} | ||
phase, hasPhase, err := unstructured.NestedString(un.Object, "status", "phase") | ||
if err != nil { | ||
return "" | ||
} | ||
if !hasPhase { | ||
return wfv1.NodePending | ||
} | ||
return wfv1.NodePhase(phase) | ||
} | ||
|
||
func (wfc *WorkflowController) addWorkflowInformerHandlers() { | ||
wfc.wfInformer.AddEventHandler( | ||
cache.FilteringResourceEventHandler{ | ||
|
@@ -765,18 +736,10 @@ func (wfc *WorkflowController) addWorkflowInformerHandlers() { | |
}, | ||
) | ||
wfc.wfInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ | ||
AddFunc: func(obj interface{}) { | ||
wf := obj.(*unstructured.Unstructured) | ||
wfc.metrics.WorkflowAdded(string(wf.GetUID()), getWfPhase(obj)) | ||
}, | ||
UpdateFunc: func(old, new interface{}) { | ||
wf := new.(*unstructured.Unstructured) | ||
wfc.metrics.WorkflowUpdated(string(wf.GetUID()), getWfPhase(old), getWfPhase(new)) | ||
}, | ||
DeleteFunc: func(obj interface{}) { | ||
wf, ok := obj.(*unstructured.Unstructured) | ||
if ok { // maybe cache.DeletedFinalStateUnknown | ||
wfc.metrics.WorkflowDeleted(string(wf.GetUID()), getWfPhase(obj)) | ||
wfc.metrics.StopRealtimeMetricsForKey(string(wf.GetUID())) | ||
simster7 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
}, | ||
}) | ||
|
@@ -987,3 +950,14 @@ func (wfc *WorkflowController) releaseAllWorkflowLocks(obj interface{}) { | |
func (wfc *WorkflowController) isArchivable(wf *wfv1.Workflow) bool { | ||
return wfc.archiveLabelSelector.Matches(labels.Set(wf.Labels)) | ||
} | ||
|
||
func (wfc *WorkflowController) syncWorkflowPhaseMetrics() { | ||
for _, phase := range []wfv1.NodePhase{wfv1.NodePending, wfv1.NodeRunning, wfv1.NodeSucceeded, wfv1.NodeFailed, wfv1.NodeError} { | ||
objs, err := wfc.wfInformer.GetIndexer().ByIndex(indexes.WorkflowPhaseIndex, string(phase)) | ||
if err != nil { | ||
log.WithError(err).Errorf("failed to list workflows by '%s'", phase) | ||
continue | ||
} | ||
wfc.metrics.SetWorkflowPhaseGauge(phase, len(objs)) | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Some pork-barrel style changes in this PR