Skip to content

Commit

Permalink
Ignore reconcile errors that occur because a pod is being terminated (o…
Browse files Browse the repository at this point in the history
…pen-telemetry#1233)

* Ignore reconcile errors that occur because a pod is being terminated

Signed-off-by: Kevin Earls <kearls@redhat.com>

* Appease the all powerfull linter

Signed-off-by: Kevin Earls <kearls@redhat.com>

* Change behavior to end reconcile loop if pod has been terminated

Signed-off-by: Kevin Earls <kearls@redhat.com>

* Print a log message if we exit the reconciler loop

Signed-off-by: Kevin Earls <kearls@redhat.com>

* Look for NamespaceTerminatingCause

Signed-off-by: Kevin Earls <kearls@redhat.com>

* Appease the almighty linter

Signed-off-by: Kevin Earls <kearls@redhat.com>

* Fix log message

Signed-off-by: Kevin Earls <kearls@redhat.com>

* Skip flaky test

Signed-off-by: Kevin Earls <kearls@redhat.com>

Signed-off-by: Kevin Earls <kearls@redhat.com>
Co-authored-by: Ben B <bongartz@klimlive.de>
  • Loading branch information
2 people authored and ihalaij1 committed Dec 8, 2022
1 parent 6b05de8 commit 67e6dde
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 0 deletions.
1 change: 1 addition & 0 deletions cmd/otel-allocator/allocation/least_weighted_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ func TestNoCollectorReassignment(t *testing.T) {
}

func TestSmartCollectorReassignment(t *testing.T) {
t.Skip("This test is flaky and fails frequently, see issue 1291")
s, _ := New("least-weighted", logger)

cols := makeNCollectors(4, 0)
Expand Down
5 changes: 5 additions & 0 deletions controllers/opentelemetrycollector_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,11 @@ func (r *OpenTelemetryCollectorReconciler) Reconcile(ctx context.Context, req ct
func (r *OpenTelemetryCollectorReconciler) RunTasks(ctx context.Context, params reconcile.Params) error {
for _, task := range r.tasks {
if err := task.Do(ctx, params); err != nil {
// If we get an error that occurs because a pod is being terminated, then exit this loop
if apierrors.IsForbidden(err) && apierrors.HasStatusCause(err, corev1.NamespaceTerminatingCause) {
r.log.V(2).Info("Exiting reconcile loop because namespace is being terminated", "namespace", params.Instance.Namespace)
return nil
}
r.log.Error(err, fmt.Sprintf("failed to reconcile %s", task.Name))
if task.BailOnError {
return err
Expand Down

0 comments on commit 67e6dde

Please sign in to comment.