Skip to content

Commit

Permalink
operator: make an infinity retry for connecting to store (pingcap#52177)
Browse files Browse the repository at this point in the history
  • Loading branch information
YuJuncen committed May 6, 2024
1 parent cc57d93 commit cb70a41
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 4 deletions.
5 changes: 2 additions & 3 deletions br/pkg/backup/prepare_snap/env.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,9 +180,8 @@ type RetryAndSplitRequestEnv struct {
}

func (r RetryAndSplitRequestEnv) ConnectToStore(ctx context.Context, storeID uint64) (PrepareClient, error) {
// Retry for about 2 minutes.
rs := utils.InitialRetryState(12, 10*time.Second, 10*time.Second)
bo := utils.Backoffer(&rs)
rs := utils.ConstantBackoff(10 * time.Second)
bo := utils.Backoffer(rs)
if r.GetBackoffer != nil {
bo = r.GetBackoffer()
}
Expand Down
2 changes: 1 addition & 1 deletion br/pkg/utils/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ go_test(
],
embed = [":utils"],
flaky = True,
shard_count = 36,
shard_count = 33,
deps = [
"//br/pkg/errors",
"//br/pkg/metautil",
Expand Down
15 changes: 15 additions & 0 deletions br/pkg/utils/backoff.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"context"
"database/sql"
"io"
"math"
"strings"
"time"

Expand Down Expand Up @@ -62,6 +63,20 @@ func isGRPCCancel(err error) bool {
return false
}

// ConstantBackoff is a backoffer that retry forever until success.
type ConstantBackoff time.Duration

// NextBackoff returns a duration to wait before retrying again
func (c ConstantBackoff) NextBackoff(err error) time.Duration {
return time.Duration(c)
}

// Attempt returns the remain attempt times
func (c ConstantBackoff) Attempt() int {
// A large enough value. Also still safe for arithmetic operations (won't easily overflow).
return math.MaxInt16
}

// RetryState is the mutable state needed for retrying.
// It likes the `utils.Backoffer`, but more fundamental:
// this only control the backoff time and knows nothing about what error happens.
Expand Down
42 changes: 42 additions & 0 deletions br/pkg/utils/backoff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ package utils_test

import (
"context"
"fmt"
"io"
"math"
"testing"
"time"

Expand Down Expand Up @@ -197,3 +199,43 @@ func TestNewBackupSSTBackofferWithCancel(t *testing.T) {
context.Canceled,
}, multierr.Errors(err))
}

func TestConstantBackoff(t *testing.T) {
backedOff := func(t *testing.T) {
backoffer := utils.ConstantBackoff(10 * time.Millisecond)
ctx, cancel := context.WithCancel(context.Background())
i := 0
ch := make(chan error)

go func() {
_, err := utils.WithRetryV2(ctx, backoffer, func(ctx context.Context) (struct{}, error) {
i += 1
return struct{}{}, fmt.Errorf("%d times, no meaning", i)
})
ch <- err
}()
time.Sleep(100 * time.Millisecond)
cancel()
require.Error(t, <-ch)
// Make sure we have backed off.
require.Less(t, i, 20)
}

infRetry := func(t *testing.T) {
backoffer := utils.ConstantBackoff(0)
ctx := context.Background()
i := math.MaxInt16

_, err := utils.WithRetryV2(ctx, backoffer, func(ctx context.Context) (struct{}, error) {
i -= 1
if i == 0 {
return struct{}{}, nil
}
return struct{}{}, fmt.Errorf("try %d more times", i)
})
require.NoError(t, err)
}

t.Run("backedOff", backedOff)
t.Run("infRetry", infRetry)
}
1 change: 1 addition & 0 deletions br/pkg/utils/retry.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ func WithRetryV2[T any](
allErrors = multierr.Append(allErrors, err)
select {
case <-ctx.Done():
// allErrors must not be `nil` here, so ignore the context error.
return *new(T), allErrors
case <-time.After(backoffer.NextBackoff(err)):
}
Expand Down

0 comments on commit cb70a41

Please sign in to comment.