-
Notifications
You must be signed in to change notification settings - Fork 3.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(controller): Retry transient offload errors. Resolves #4464 #4482
Merged
Merged
Changes from 9 commits
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
28f9676
feat(controller): Retry transient offload errors. Resolves #4464
alexec 19df510
Merge branch 'master' into nr
alexec a37b532
Revert "fix(controller): Failure tolerant workflow archiving and offl…
alexec 4ba6d3b
nr: D persist/sqldb/retry/offload_node_status_repo_with_retry.go
alexec 37488e0
Revert "nr: D persist/sqldb/retry/offload_node_status_repo_with_retr…
alexec 1b4b0f6
nr: M test/e2e/argo_server_test.go
alexec 58d5128
nr: M persist/sqldb/retry/offload_node_status_repo_with_retry.go
alexec 57c5e34
Merge branch 'master' into nr
alexec fc3a28a
Merge branch 'master' into nr
alexec 16b6bc2
Merge branch 'master' into nr
alexec 4dcee81
Merge branch 'master' into nr
alexec bf7ca32
nr: D persist/sqldb/retry/offload_node_status_repo_with_retry.go
alexec File filter
Filter by extension
Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
There are no files selected for viewing
96 changes: 96 additions & 0 deletions
96
persist/sqldb/retry/offload_node_status_repo_with_retry.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
package retry | ||
|
||
import ( | ||
"time" | ||
|
||
"k8s.io/apimachinery/pkg/util/wait" | ||
|
||
"github.com/argoproj/argo/persist/sqldb" | ||
wfv1 "github.com/argoproj/argo/pkg/apis/workflow/v1alpha1" | ||
"github.com/argoproj/argo/util/errors" | ||
) | ||
|
||
// should be <10s | ||
// Retry Seconds | ||
// 1 0.10 | ||
// 2 0.30 | ||
// 3 0.70 | ||
// 4 1.50 | ||
// 5 3.10 | ||
var readRetry = wait.Backoff{Steps: 5, Duration: 100 * time.Millisecond, Factor: 2} | ||
|
||
// needs to be long | ||
// http://backoffcalculator.com/?attempts=5&rate=2&interval=1 | ||
// Retry Seconds | ||
// 1 1.00 | ||
// 2 3.00 | ||
// 3 7.00 | ||
// 4 15.00 | ||
// 5 31.00 | ||
var writeRetry = wait.Backoff{Steps: 5, Duration: 1 * time.Second, Factor: 2} | ||
|
||
type offloadNodeStatusRepoWithRetry struct { | ||
delegate sqldb.OffloadNodeStatusRepo | ||
} | ||
|
||
func WithRetry(delegate sqldb.OffloadNodeStatusRepo) sqldb.OffloadNodeStatusRepo { | ||
return &offloadNodeStatusRepoWithRetry{delegate} | ||
} | ||
|
||
func (o *offloadNodeStatusRepoWithRetry) Save(uid, namespace string, nodes wfv1.Nodes) (string, error) { | ||
var version string | ||
err := wait.ExponentialBackoff(writeRetry, func() (bool, error) { | ||
var err error | ||
version, err = o.delegate.Save(uid, namespace, nodes) | ||
return done(err), err | ||
}) | ||
return version, err | ||
} | ||
|
||
func (o *offloadNodeStatusRepoWithRetry) Get(uid, version string) (wfv1.Nodes, error) { | ||
var nodes wfv1.Nodes | ||
err := wait.ExponentialBackoff(readRetry, func() (bool, error) { | ||
var err error | ||
nodes, err = o.delegate.Get(uid, version) | ||
return done(err), err | ||
}) | ||
return nodes, err | ||
} | ||
|
||
func done(err error) bool { | ||
return err == nil || !errors.IsTransientErr(err) | ||
} | ||
|
||
func (o *offloadNodeStatusRepoWithRetry) List(namespace string) (map[sqldb.UUIDVersion]wfv1.Nodes, error) { | ||
var nodes map[sqldb.UUIDVersion]wfv1.Nodes | ||
err := wait.ExponentialBackoff(readRetry, func() (bool, error) { | ||
var err error | ||
nodes, err = o.delegate.List(namespace) | ||
return done(err), err | ||
}) | ||
return nodes, err | ||
} | ||
|
||
func (o *offloadNodeStatusRepoWithRetry) ListOldOffloads(namespace string) ([]sqldb.UUIDVersion, error) { | ||
var versions []sqldb.UUIDVersion | ||
err := wait.ExponentialBackoff(readRetry, func() (bool, error) { | ||
var err error | ||
versions, err = o.delegate.ListOldOffloads(namespace) | ||
return done(err), err | ||
}) | ||
return versions, err | ||
} | ||
|
||
func (o *offloadNodeStatusRepoWithRetry) Delete(uid, version string) error { | ||
err := wait.ExponentialBackoff(writeRetry, func() (bool, error) { | ||
err := o.delegate.Delete(uid, version) | ||
return done(err), err | ||
}) | ||
return err | ||
} | ||
|
||
func (o *offloadNodeStatusRepoWithRetry) IsEnabled() bool { | ||
return o.delegate.IsEnabled() | ||
} | ||
|
||
var _ sqldb.OffloadNodeStatusRepo = &offloadNodeStatusRepoWithRetry{} |
85 changes: 85 additions & 0 deletions
85
persist/sqldb/retry/offload_node_status_repo_with_retry_test.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
package retry | ||
|
||
import ( | ||
"errors" | ||
"testing" | ||
|
||
"github.com/stretchr/testify/assert" | ||
"github.com/stretchr/testify/mock" | ||
apierr "k8s.io/apimachinery/pkg/api/errors" | ||
|
||
"github.com/argoproj/argo/persist/sqldb" | ||
sqldbmocks "github.com/argoproj/argo/persist/sqldb/mocks" | ||
wfv1 "github.com/argoproj/argo/pkg/apis/workflow/v1alpha1" | ||
) | ||
|
||
var transientErr = apierr.NewTooManyRequests("", 0) | ||
var permanentErr = errors.New("") | ||
|
||
func Test_offloadNodeStatusRepoWithRetry(t *testing.T) { | ||
t.Run("PermanentError", func(t *testing.T) { | ||
delegate := &sqldbmocks.OffloadNodeStatusRepo{} | ||
o := WithRetry(delegate) | ||
delegate.On("Save", mock.Anything, mock.Anything, mock.Anything). | ||
Return("", transientErr). | ||
Return("", permanentErr) | ||
_, err := o.Save("my-uid", "my-ns", wfv1.Nodes{}) | ||
assert.Equal(t, permanentErr, err) | ||
}) | ||
delegate := &sqldbmocks.OffloadNodeStatusRepo{} | ||
o := WithRetry(delegate) | ||
t.Run("Save", func(t *testing.T) { | ||
delegate.On("Save", "my-uid", "my-ns", mock.Anything). | ||
Return("", transientErr). | ||
Return("my-version", nil) | ||
version, err := o.Save("my-uid", "my-ns", wfv1.Nodes{}) | ||
if assert.NoError(t, err) { | ||
assert.Equal(t, "my-version", version) | ||
} | ||
}) | ||
t.Run("Get", func(t *testing.T) { | ||
delegate.On("Get", "my-uid", "my-version"). | ||
Return(nil, transientErr). | ||
Return(wfv1.Nodes{}, nil) | ||
nodes, err := o.Get("my-uid", "my-version") | ||
if assert.NoError(t, err) { | ||
assert.NotNil(t, nodes) | ||
} | ||
}) | ||
t.Run("List", func(t *testing.T) { | ||
delegate.On("List", "my-ns"). | ||
Return(nil, transientErr). | ||
Return(make(map[sqldb.UUIDVersion]wfv1.Nodes), nil) | ||
list, err := o.List("my-ns") | ||
if assert.NoError(t, err) { | ||
assert.NotNil(t, list) | ||
} | ||
}) | ||
t.Run("ListOldOffloads", func(t *testing.T) { | ||
delegate.On("ListOldOffloads", "my-ns"). | ||
Return(nil, transientErr). | ||
Return(make([]sqldb.UUIDVersion, 0), nil) | ||
list, err := o.ListOldOffloads("my-ns") | ||
if assert.NoError(t, err) { | ||
assert.NotNil(t, list) | ||
} | ||
}) | ||
t.Run("Delete", func(t *testing.T) { | ||
delegate.On("Delete", "my-uid", "my-version"). | ||
Return(transientErr). | ||
Return(nil) | ||
err := o.Delete("my-uid", "my-version") | ||
assert.NoError(t, err) | ||
}) | ||
t.Run("IsEnabled", func(t *testing.T) { | ||
delegate.On("IsEnabled"). | ||
Return(true) | ||
assert.True(t, o.IsEnabled()) | ||
}) | ||
} | ||
|
||
func Test_done(t *testing.T) { | ||
assert.True(t, done(nil)) | ||
assert.False(t, done(transientErr)) | ||
assert.True(t, done(permanentErr)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is a significant change - I think this makes this v3 feature