Skip to content

Commit

Permalink
fix: Health check from lister not apiserver (#11375)
Browse files Browse the repository at this point in the history
Signed-off-by: weidongcai <cwdsuzhou@gmail.com>
  • Loading branch information
cwdsuzhou committed Aug 30, 2023
1 parent 3c3ed77 commit e90d6bf
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 8 deletions.
1 change: 0 additions & 1 deletion docs/environment-variables.md
Expand Up @@ -33,7 +33,6 @@ most users. Environment variables may be removed at any time.
| `GZIP_IMPLEMENTATION` | `string` | `PGZip` | The implementation of compression/decompression. Currently only "`PGZip`" and "`GZip`" are supported. |
| `INFORMER_WRITE_BACK` | `bool` | `true` | Whether to write back to informer instead of catching up. |
| `HEALTHZ_AGE` | `time.Duration` | `5m` | How old a un-reconciled workflow is to report unhealthy. |
| `HEALTHZ_LIST_LIMIT` | `int` | `200` | The maximum number of responses to return for a list call on workflows for liveness check. |
| `INDEX_WORKFLOW_SEMAPHORE_KEYS` | `bool` | `true` | Whether or not to index semaphores. |
| `LEADER_ELECTION_IDENTITY` | `string` | Controller's `metadata.name` | The ID used for workflow controllers to elect a leader. |
| `LEADER_ELECTION_DISABLE` | `bool` | `false` | Whether leader election should be disabled. |
Expand Down
17 changes: 10 additions & 7 deletions workflow/controller/healthz.go
Expand Up @@ -6,21 +6,20 @@ import (
"time"

log "github.com/sirupsen/logrus"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"

"github.com/argoproj/argo-workflows/v3/pkg/client/listers/workflow/v1alpha1"
"github.com/argoproj/argo-workflows/v3/util/env"
"github.com/argoproj/argo-workflows/v3/workflow/common"
)

var (
age = env.LookupEnvDurationOr("HEALTHZ_AGE", 5*time.Minute)
limit = int64(env.LookupEnvIntOr("HEALTHZ_LIST_LIMIT", 200))
age = env.LookupEnvDurationOr("HEALTHZ_AGE", 5*time.Minute)
)

// https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-http-request
// If we are in a state where there are any workflows that have not been reconciled in the last 2m, we've gone wrong.
func (wfc *WorkflowController) Healthz(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
instanceID := wfc.Config.InstanceID
instanceIDSelector := func() string {
if instanceID != "" {
Expand All @@ -30,12 +29,16 @@ func (wfc *WorkflowController) Healthz(w http.ResponseWriter, r *http.Request) {
}()
labelSelector := "!" + common.LabelKeyPhase + "," + instanceIDSelector
err := func() error {
// avoid problems with informers, but directly querying the API
list, err := wfc.wfclientset.ArgoprojV1alpha1().Workflows(wfc.managedNamespace).List(ctx, metav1.ListOptions{LabelSelector: labelSelector, Limit: limit})
seletor, err := labels.Parse(labelSelector)
if err != nil {
return err
}
for _, wf := range list.Items {
lister := v1alpha1.NewWorkflowLister(wfc.wfInformer.GetIndexer())
list, err := lister.Workflows(wfc.managedNamespace).List(seletor)
if err != nil {
return err
}
for _, wf := range list {
if time.Since(wf.GetCreationTimestamp().Time) > age {
return fmt.Errorf("workflow never reconciled: %s", wf.Name)
}
Expand Down

0 comments on commit e90d6bf

Please sign in to comment.