Skip to content

Commit

Permalink
Add a retry mechanism for ReadOne calls in rm.createResource (#81)
Browse files Browse the repository at this point in the history
Issue #, if available:

Description of changes:

In some rare cases and with some specific AWS APIs, calling `ReadOne`
right after a `rm.Create` can return a `NotFound` error. We want to
retry calling `rm.ReadOne` in hopes of receiving a correct response.

This patch adds a backoff/retry mechanism around `ReadOne` call in
`rm.createResource`.

By submitting this pull request, I confirm that my contribution is made
under the terms of the Apache 2.0 license.
  • Loading branch information
a-hilaly authored Apr 15, 2022
1 parent 594be6a commit 360e72a
Show file tree
Hide file tree
Showing 6 changed files with 149 additions and 1 deletion.
27 changes: 27 additions & 0 deletions ATTRIBUTION.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ License version 2.0, we include the full text of the package's License below.
* `k8s.io/client-go`
* `sigs.k8s.io/controller-runtime`
* `sigs.k8s.io/controller-tools`
* `github.com/cenkalti/backoff`

### github.com/aws/aws-sdk-go

Expand Down Expand Up @@ -1388,3 +1389,29 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

# github.com/cenkalti/backoff

The MIT License (MIT)

Copyright (c) 2014 Cenk Altı

Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


Subdependencies: N/A
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ go 1.17

require (
github.com/aws/aws-sdk-go v1.42.0
github.com/cenkalti/backoff/v4 v4.1.2
github.com/go-logr/logr v1.2.0
github.com/google/go-cmp v0.5.5
github.com/itchyny/gojq v0.12.6
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kB
github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84=
github.com/bketelsen/crypt v0.0.4/go.mod h1:aI6NrJ0pMGgvZKL1iVgXLnfIFJtfV+bKCoqOes/6LfM=
github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk=
github.com/cenkalti/backoff/v4 v4.1.2 h1:6Yo7N8UP2K6LWZnW94DLVSSrbobcWdVzAYOisuDPIFo=
github.com/cenkalti/backoff/v4 v4.1.2/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/certifi/gocertifi v0.0.0-20191021191039-0944d244cd40/go.mod h1:sGbDF6GwGcLpkNXPUTkMRoywsNa/ol15pxFe6ERfguA=
github.com/certifi/gocertifi v0.0.0-20200922220541-2c3bb06c6054/go.mod h1:sGbDF6GwGcLpkNXPUTkMRoywsNa/ol15pxFe6ERfguA=
Expand Down
9 changes: 9 additions & 0 deletions pkg/errors/error.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ var (
// SecretNotFound is returned if specified kubernetes secret is not found.
SecretNotFound = fmt.Errorf(
"kubernetes secret not found")
// ReadOneFailedAfterCreate is returned if a ReadOne call fails right after
// a create operation.
ReadOneFailedAfterCreate = fmt.Errorf("ReadOne call failed after a Create operation")
)

// AWSError returns the type conversion for the supplied error to an aws-sdk-go
Expand All @@ -74,6 +77,12 @@ func AWSRequestFailure(err error) (awserr.RequestFailure, bool) {
return awsRF, ok
}

// NewReadOneFailAfterCreate takes a number of attempts and returns a
// ReadOneFailedAfterCreate error if multiple ReadOne calls fails.
func NewReadOneFailAfterCreate(numAttempts int) error {
return fmt.Errorf("%w: number of attempts: %d", ReadOneFailedAfterCreate, numAttempts)
}

// HTTPStatusCode returns the HTTP status code from the supplied error by
// introspecting the error to see if it's an awserr.RequestFailure interface
// and if so, calling StatusCode() on that type-converted RequestFailure. If
Expand Down
59 changes: 58 additions & 1 deletion pkg/runtime/reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@ package runtime

import (
"context"
"fmt"
"time"

backoff "github.com/cenkalti/backoff/v4"
"github.com/go-logr/logr"
"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
Expand All @@ -39,6 +41,10 @@ import (
acktypes "github.com/aws-controllers-k8s/runtime/pkg/types"
)

const (
backoffReadOneTimeout = 10 * time.Second
)

// reconciler describes a generic reconciler within ACK.
type reconciler struct {
sc acktypes.ServiceController
Expand Down Expand Up @@ -380,7 +386,21 @@ func (r *resourceReconciler) createResource(
observed, err := rm.ReadOne(ctx, latest)
rlog.Exit("rm.ReadOne", err)
if err != nil {
return latest, err
if err == ackerr.NotFound {
// Some eventually-consistent APIs return a 404 from a
// ReadOne operation immediately after a successful
// Create operation. In these exceptional cases
// we retry the ReadOne operation with a backoff
// until we get the expected 200 from the ReadOne.
rlog.Enter("rm.delayedReadOneAfterCreate")
observed, err = r.delayedReadOneAfterCreate(ctx, rm, latest)
rlog.Exit("rm.delayedReadOneAfterCreate", err)
if err != nil {
return latest, err
}
} else {
return latest, err
}
}

// Take the status from the latest ReadOne
Expand All @@ -397,6 +417,43 @@ func (r *resourceReconciler) createResource(
return latest, nil
}

// delayedReadOneAfterCreate is a helper function called when a ReadOne call
// fails with a 404 error right after a Create call. It uses a backoff/retry
// mechanism to retrieve the observed state right after a readone call.
func (r *resourceReconciler) delayedReadOneAfterCreate(
ctx context.Context,
rm acktypes.AWSResourceManager,
res acktypes.AWSResource,
) (acktypes.AWSResource, error) {
var err error
rlog := ackrtlog.FromContext(ctx)
exit := rlog.Trace("r.delayedReadOneAfterCreate")
defer exit(err)

bo := backoff.NewExponentialBackOff()
bo.MaxElapsedTime = backoffReadOneTimeout
ticker := backoff.NewTicker(bo)
attempts := 0

var observed acktypes.AWSResource

for range ticker.C {
attempts++

rlog.Enter(fmt.Sprintf("rm.ReadOne (attempt %d)", attempts))
observed, err = rm.ReadOne(ctx, res)
rlog.Exit(fmt.Sprintf("rm.ReadOne (attempt %d)", attempts), err)
if err == nil || err != ackerr.NotFound {
ticker.Stop()
break
}
}
if err != nil {
return res, ackerr.NewReadOneFailAfterCreate(attempts)
}
return observed, nil
}

// updateResource calls one or more AWS APIs to modify the backend AWS resource
// and patches the CR's Metadata and Spec back to the Kubernetes API.
//
Expand Down
52 changes: 52 additions & 0 deletions pkg/runtime/reconciler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,58 @@ func managerFactoryMocks(
return rmf, rd
}

func TestReconcilerCreate_BackoffRetries(t *testing.T) {
require := require.New(t)

ctx := context.TODO()
arn := ackv1alpha1.AWSResourceName("mybook-arn")

desired, _, _ := resourceMocks()
desired.On("ReplaceConditions", []*ackv1alpha1.Condition{}).Return()

ids := &ackmocks.AWSResourceIdentifiers{}
ids.On("ARN").Return(&arn)

latest, latestRTObj, _ := resourceMocks()
latest.On("Identifiers").Return(ids)

latest.On("Conditions").Return([]*ackv1alpha1.Condition{})
latest.On(
"ReplaceConditions",
mock.AnythingOfType("[]*v1alpha1.Condition"),
).Return()

rm := &ackmocks.AWSResourceManager{}
rm.On("ResolveReferences", ctx, nil, desired).Return(
desired, nil,
).Times(2)
rm.On("ReadOne", ctx, desired).Return(
latest, ackerr.NotFound,
).Once()
rm.On("ReadOne", ctx, latest).Return(
latest, ackerr.NotFound,
).Times(4)
rm.On("ReadOne", ctx, latest).Return(
latest, nil,
)
rm.On("Create", ctx, desired).Return(
latest, nil,
)
rm.On("IsSynced", ctx, latest).Return(true, nil)
rmf, rd := managedResourceManagerFactoryMocks(desired, latest)

rm.On("LateInitialize", ctx, latest).Return(latest, nil)
rd.On("IsManaged", desired).Return(true)
rd.On("Delta", desired, latest).Return(ackcompare.NewDelta())
rd.On("Delta", latest, latest).Return(ackcompare.NewDelta())

r, kc := reconcilerMocks(rmf)
kc.On("Patch", ctx, latestRTObj, mock.AnythingOfType("*client.mergeFromPatch")).Return(nil)
_, err := r.Sync(ctx, rm, desired)
require.Nil(err)
rm.AssertNumberOfCalls(t, "ReadOne", 6)
}

func TestReconcilerCreate_UnManagedResource_CheckReferencesResolveTwice(t *testing.T) {
require := require.New(t)

Expand Down

0 comments on commit 360e72a

Please sign in to comment.