Skip to content

Commit

Permalink
Set retry timeout on deployment not found error (#807)
Browse files Browse the repository at this point in the history
* Set requeue timeout on deployment not found

* Refactor events and test
  • Loading branch information
int128 committed Nov 13, 2022
1 parent 4249c00 commit e7dea1c
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 8 deletions.
24 changes: 20 additions & 4 deletions controllers/applicationhealthdeployment_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,15 @@ import (
)

var (
// When the GitHub Deployment is not found, this action will retry by this interval
// until the application is synced with a valid GitHub Deployment.
// This should be reasonable to avoid the rate limit of GitHub API.
requeueIntervalWhenDeploymentNotFound = 30 * time.Second

// When the GitHub Deployment is not found, this action will retry by this timeout.
// Argo CD refreshes an application every 3 minutes by default.
// This should be reasonable to avoid the rate limit of GitHub API.
requeueTimeoutWhenDeploymentNotFound = 10 * time.Minute
)

// ApplicationHealthDeploymentReconciler reconciles an Application object
Expand Down Expand Up @@ -68,14 +76,22 @@ func (r *ApplicationHealthDeploymentReconciler) Reconcile(ctx context.Context, r
if deploymentURL == "" {
return ctrl.Result{}, nil
}

deploymentIsAlreadyHealthy, err := r.Notification.CheckIfDeploymentIsAlreadyHealthy(ctx, deploymentURL)
if notification.IsNotFoundError(err) {
// Retry until the application is synced with a valid GitHub Deployment.
// https://github.com/int128/argocd-commenter/issues/762
logger.Info("requeue because deployment is not found", "after", requeueIntervalWhenDeploymentNotFound, "error", err)
r.Recorder.Eventf(&app, corev1.EventTypeNormal, "DeploymentNotFound",
"deployment %s is not found, requeue after %s", deploymentURL, requeueIntervalWhenDeploymentNotFound)
return ctrl.Result{RequeueAfter: requeueIntervalWhenDeploymentNotFound}, nil
lastOperationAt := argocd.GetLastOperationAt(app)
if time.Now().Before(lastOperationAt.Add(requeueTimeoutWhenDeploymentNotFound)) {
logger.Info("retry due to deployment not found error", "after", requeueIntervalWhenDeploymentNotFound, "error", err)
r.Recorder.Eventf(&app, corev1.EventTypeNormal, "DeploymentNotFound",
"deployment %s not found, retry after %s", deploymentURL, requeueIntervalWhenDeploymentNotFound)
return ctrl.Result{RequeueAfter: requeueIntervalWhenDeploymentNotFound}, nil
}
logger.Info("retry timeout because last operation is too old", "lastOperationAt", lastOperationAt)
r.Recorder.Eventf(&app, corev1.EventTypeWarning, "DeploymentNotFoundRetryTimeout",
"deployment %s not found, retry timeout", deploymentURL)
return ctrl.Result{}, nil
}
if deploymentIsAlreadyHealthy {
logger.Info("skip notification because the deployment is already healthy", "deployment", deploymentURL)
Expand Down
32 changes: 32 additions & 0 deletions controllers/applicationhealthdeployment_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ import (
"github.com/google/go-github/v47/github"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
)

var _ = Describe("Application health deployment controller", func() {
Expand Down Expand Up @@ -164,5 +166,35 @@ var _ = Describe("Application health deployment controller", func() {
Eventually(func() int { return githubMock.DeploymentStatuses.CountBy(999303) }, timeout, interval).Should(Equal(1))
Expect(githubMock.DeploymentStatuses.CountBy(999302)).Should(Equal(1))
})

It("Should retry a deployment status until timeout", func() {
By("Updating the deployment annotation")
app.Annotations = map[string]string{
"argocd-commenter.int128.github.io/deployment-url": "https://api.github.com/repos/int128/manifests/deployments/999999",
}
app.Status = argocdv1alpha1.ApplicationStatus{
OperationState: &argocdv1alpha1.OperationState{
StartedAt: metav1.NewTime(time.Now().Add(-requeueTimeoutWhenDeploymentNotFound)),
},
}
Expect(k8sClient.Update(ctx, &app)).Should(Succeed())

By("Updating the application to progressing")
app.Status.Health.Status = health.HealthStatusProgressing
Expect(k8sClient.Update(ctx, &app)).Should(Succeed())

By("Updating the application to healthy")
app.Status.Health.Status = health.HealthStatusHealthy
Expect(k8sClient.Update(ctx, &app)).Should(Succeed())

Eventually(func(g Gomega) {
var eventList corev1.EventList
g.Expect(k8sClient.List(ctx, &eventList, client.MatchingFields{
"involvedObject.name": app.Name,
"reason": "DeploymentNotFoundRetryTimeout",
})).Should(Succeed())
g.Expect(eventList.Items).Should(HaveLen(1))
}, timeout, interval)
})
})
})
20 changes: 16 additions & 4 deletions controllers/applicationphasedeployment_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package controllers

import (
"context"
"time"

argocdv1alpha1 "github.com/argoproj/argo-cd/v2/pkg/apis/application/v1alpha1"
synccommon "github.com/argoproj/gitops-engine/pkg/sync/common"
Expand Down Expand Up @@ -57,6 +58,10 @@ func (r *ApplicationPhaseDeploymentReconciler) Reconcile(ctx context.Context, re
return ctrl.Result{}, nil
}
phase := argocd.GetOperationPhase(app)
if phase == "" {
return ctrl.Result{}, nil
}

deploymentURL := argocd.GetDeploymentURL(app)
if deploymentURL == "" {
return ctrl.Result{}, nil
Expand All @@ -65,10 +70,17 @@ func (r *ApplicationPhaseDeploymentReconciler) Reconcile(ctx context.Context, re
if notification.IsNotFoundError(err) {
// Retry until the application is synced with a valid GitHub Deployment.
// https://github.com/int128/argocd-commenter/issues/762
logger.Info("requeue because deployment is not found", "after", requeueIntervalWhenDeploymentNotFound, "error", err)
r.Recorder.Eventf(&app, corev1.EventTypeNormal, "DeploymentNotFound",
"deployment %s is not found, requeue after %s", deploymentURL, requeueIntervalWhenDeploymentNotFound)
return ctrl.Result{RequeueAfter: requeueIntervalWhenDeploymentNotFound}, nil
lastOperationAt := argocd.GetLastOperationAt(app)
if time.Now().Before(lastOperationAt.Add(requeueTimeoutWhenDeploymentNotFound)) {
logger.Info("retry due to deployment not found error", "after", requeueIntervalWhenDeploymentNotFound, "error", err)
r.Recorder.Eventf(&app, corev1.EventTypeNormal, "DeploymentNotFound",
"deployment %s not found, retry after %s", deploymentURL, requeueIntervalWhenDeploymentNotFound)
return ctrl.Result{RequeueAfter: requeueIntervalWhenDeploymentNotFound}, nil
}
logger.Info("retry timeout because last operation is too old", "lastOperationAt", lastOperationAt)
r.Recorder.Eventf(&app, corev1.EventTypeWarning, "DeploymentNotFoundRetryTimeout",
"deployment %s not found, retry timeout", deploymentURL)
return ctrl.Result{}, nil
}
if deploymentIsAlreadyHealthy {
logger.Info("skip notification because the deployment is already healthy", "deployment", deploymentURL)
Expand Down
12 changes: 12 additions & 0 deletions pkg/argocd/application.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package argocd
import (
argocdv1alpha1 "github.com/argoproj/argo-cd/v2/pkg/apis/application/v1alpha1"
synccommon "github.com/argoproj/gitops-engine/pkg/sync/common"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// GetDeployedRevision returns the last synced revision
Expand Down Expand Up @@ -30,3 +31,14 @@ func GetOperationPhase(a argocdv1alpha1.Application) synccommon.OperationPhase {
}
return a.Status.OperationState.Phase
}

// GetLastOperationAt returns OperationState.FinishedAt, OperationState.StartedAt or zero Time.
func GetLastOperationAt(a argocdv1alpha1.Application) metav1.Time {
if a.Status.OperationState == nil {
return metav1.Time{}
}
if a.Status.OperationState.FinishedAt != nil {
return *a.Status.OperationState.FinishedAt
}
return a.Status.OperationState.StartedAt
}

0 comments on commit e7dea1c

Please sign in to comment.