Skip to content
This repository has been archived by the owner on Jul 16, 2021. It is now read-only.

Copy backoff module from Trillian #1424

Merged
merged 3 commits into from Jan 3, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
130 changes: 130 additions & 0 deletions internal/backoff/backoff.go
@@ -0,0 +1,130 @@
// Copyright 2020 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package backoff allows retrying an operation with backoff.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any way of denoting that this is a copy of code that is shared in Trillian (and vice versa)? I notelic lid your copy vs dependency proverb, but the potential to divergence in both flavours is worrisome.

While here - was there not a public backoff or retry library that was suitable?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Divergence is less of a risk here because we can fully test it in this project without worrying about breaking other projects. Perhaps I'm missing something though.

grpc-go has a backoff library but it is also inside an internal directory.

package backoff

import (
"context"
"fmt"
"math/rand"
"time"

"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)

// RetriableError explicitly instructs Backoff to retry.
type RetriableError string

// Error returns string representation of the retriable error.
func (re RetriableError) Error() string {
return string(re)
}

// RetriableErrorf wraps a formatted string into a RetriableError.
func RetriableErrorf(format string, a ...interface{}) error {
return RetriableError(fmt.Sprintf(format, a...))
}

// Backoff specifies the parameters of the backoff algorithm. Works correctly
// if 0 < Min <= Max <= 2^62 (nanosec), and Factor >= 1.
type Backoff struct {
Min time.Duration // Duration of the first pause.
Max time.Duration // Max duration of a pause.
Factor float64 // The factor of duration increase between iterations.
Jitter bool // Add random noise to pauses.

delta time.Duration // Current pause duration relative to Min, no jitter.
}

// Duration returns the time to wait on current retry iteration. Every time
// Duration is called, the returned value will exponentially increase by Factor
// until Backoff.Max. If Jitter is enabled, will add an additional random value
// between 0 and the duration, so the result can at most double.
func (b *Backoff) Duration() time.Duration {
base := b.Min + b.delta
pause := base
if b.Jitter { // Add a number in the range [0, pause).
pause += time.Duration(rand.Int63n(int64(pause)))
}

nextPause := time.Duration(float64(base) * b.Factor)
if nextPause > b.Max || nextPause < b.Min { // Multiplication could overflow.
nextPause = b.Max
}
b.delta = nextPause - b.Min

return pause
}

// Reset sets the internal state back to first retry iteration.
func (b *Backoff) Reset() {
b.delta = 0
}

// Retry calls a function until it succeeds or the context is done.
// It will backoff if the function returns a retryable error.
// Once the context is done, retries will end and the most recent error will be returned.
// Backoff is not reset by this function.
func (b *Backoff) Retry(ctx context.Context, f func() error, retry ...codes.Code) error {
// If the context is already done, don't make any attempts to call f.
if ctx.Err() != nil {
return ctx.Err()
}

// Try calling f while the error is retryable and ctx is not done.
for {
if err := f(); !IsRetryable(err, retry...) {
return err
}
select {
case <-time.After(b.Duration()):
case <-ctx.Done():
return ctx.Err()
}
}
}

// IsRetryable returns false unless the error is explicitly retriable per
// https://godoc.org/google.golang.org/grpc/codes,
// or if the error codes is in retry. codes.OK is not retryable.
func IsRetryable(err error, retry ...codes.Code) bool {
code := status.Code(err)
switch code {
// Fast path.
case codes.OK:
return false

// Debatable cases:
case codes.DeadlineExceeded,
codes.ResourceExhausted: // Retry with backoff.
return true

// Errors that are explicitly retryable:
case codes.Unavailable, // Client can just retry the call.
codes.Aborted: // Client can retry the read-modify-write function.
return true
}

for _, c := range retry {
if code == c {
return true
}
}

// Don't retry for all other errors, unless it is a RetriableError.
_, ok := err.(RetriableError)
return ok
}
197 changes: 197 additions & 0 deletions internal/backoff/backoff_test.go
@@ -0,0 +1,197 @@
// Copyright 2020 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package backoff

import (
"context"
"errors"
"testing"
"time"

"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"

_ "github.com/golang/glog"
)

func TestBackoff(t *testing.T) {
b := Backoff{
Min: time.Duration(1),
Max: time.Duration(100),
Factor: 2,
}
for _, test := range []struct {
b Backoff
times int
want time.Duration
}{
{b, 1, time.Duration(1)},
{b, 2, time.Duration(2)},
{b, 3, time.Duration(4)},
{b, 4, time.Duration(8)},
{b, 8, time.Duration(100)},
} {
test.b.Reset()
var got time.Duration
for i := 0; i < test.times; i++ {
got = test.b.Duration()
}
if got != test.want {
t.Errorf("Duration() %v times: %v, want %v", test.times, got, test.want)
}
}
}

func TestJitter(t *testing.T) {
b := Backoff{
Min: 1 * time.Second,
Max: 100 * time.Second,
Factor: 2,
Jitter: true,
}
for _, test := range []struct {
b Backoff
times int
min time.Duration
max time.Duration
}{
{b, 1, 1 * time.Second, 2 * time.Second},
{b, 2, 2 * time.Second, 4 * time.Second},
{b, 3, 4 * time.Second, 8 * time.Second},
{b, 4, 8 * time.Second, 16 * time.Second},
{b, 8, 100 * time.Second, 200 * time.Second},
} {
test.b.Reset()
var got1 time.Duration
for i := 0; i < test.times; i++ {
got1 = test.b.Duration()
}
if got1 < test.min || got1 > test.max {
t.Errorf("Duration() %v times, want %v < %v < %v", test.times, test.min, got1, test.max)
}

// Ensure a random value is being produced.
test.b.Reset()
var got2 time.Duration
for i := 0; i < test.times; i++ {
got2 = test.b.Duration()
}
if got1 == got2 {
t.Errorf("Duration() %v times == Duration() %v times, want %v != %v",
test.times, test.times, got1, got2)
}
}
}

func TestRetry(t *testing.T) {
b := Backoff{
Min: 50 * time.Millisecond,
Max: 200 * time.Millisecond,
Factor: 2,
}

// ctx used by Retry(), declared here to that test.ctxFunc can set it.
var ctx context.Context
var cancel context.CancelFunc

for _, test := range []struct {
name string
f func() error
ctxFunc func()
wantErr bool
}{
{
name: "func that immediately succeeds",
f: func() error { return nil },
},
{
name: "func that succeeds on second attempt",
f: func() func() error {
var callCount int
return func() error {
callCount++
if callCount == 1 {
return status.Errorf(codes.Unavailable, "error")
}
return nil
}
}(),
},
{
name: "explicitly retry",
f: func() func() error {
var callCount int
return func() error {
callCount++
if callCount < 10 {
return RetriableErrorf("attempt %d", callCount)
}
return nil
}
}(),
},
{
name: "explicitly retry and fail",
f: func() func() error {
var callCount int
return func() error {
callCount++
if callCount < 10 {
return RetriableErrorf("attempt %d", callCount)
}
return errors.New("failed 10 times")
}
}(),
wantErr: true,
},
{
name: "func that takes too long to succeed",
f: func() error {
// Cancel the context and return an error. This func will succeed on
// any future calls, but it should not be retried due to the context
// being canceled.
if ctx.Err() == nil {
cancel()
return status.Errorf(codes.Unavailable, "error")
}
return nil
},
wantErr: true,
},
{
name: "context done before Retry() called",
f: func() error {
return nil
},
ctxFunc: func() {
ctx, cancel = context.WithCancel(context.Background())
cancel()
},
wantErr: true,
},
} {
if test.ctxFunc != nil {
test.ctxFunc()
} else {
ctx, cancel = context.WithCancel(context.Background())
}

err := b.Retry(ctx, test.f)
cancel()
if gotErr := err != nil; gotErr != test.wantErr {
t.Errorf("%v: Retry() = %v, want err? %v", test.name, err, test.wantErr)
}
}
}