Skip to content

Commit

Permalink
Use time limit instead of number of attempts for retry loops
Browse files Browse the repository at this point in the history
Writing the maximum duration the program will wait is more precise than
using a number of attempts.

Example:
errors.RetryAfter(120, waitForPendingCsr, time.Second*5) is equivalent
of waiting 120 * (execution time of waitForPendingCsr + 5 seconds).
This is way more than 120 seconds, the duration expected to last when
this line was added.
Now, it will try as much as possible within 120 seconds and wait 5
seconds between each try.
  • Loading branch information
guillaumerose authored and praveenkumar committed Oct 7, 2020
1 parent 47f6782 commit d2a0c84
Show file tree
Hide file tree
Showing 9 changed files with 25 additions and 22 deletions.
2 changes: 1 addition & 1 deletion pkg/crc/cluster/cert-renewal.go
Expand Up @@ -28,7 +28,7 @@ func waitForPendingCsrs(ocConfig oc.Config) error {
return nil
}

return errors.RetryAfter(120, waitForPendingCsr, time.Second*5)
return errors.RetryAfter(8*time.Minute, waitForPendingCsr, time.Second*5)
}

func RegenerateCertificates(sshRunner *ssh.Runner, ocConfig oc.Config) error {
Expand Down
6 changes: 3 additions & 3 deletions pkg/crc/cluster/cluster.go
Expand Up @@ -29,7 +29,7 @@ func WaitForSSH(sshRunner *ssh.Runner) error {
return nil
}

return errors.RetryAfter(60, checkSSHConnectivity, time.Second)
return errors.RetryAfter(60*time.Second, checkSSHConnectivity, time.Second)
}

type CertExpiryState int
Expand Down Expand Up @@ -310,7 +310,7 @@ func WaitforRequestHeaderClientCaFile(ocConfig oc.Config) error {
logging.Debugf("Found .data.requestheader-client-ca-file: %s", stdout)
return nil
}
return errors.RetryAfter(90, lookupRequestHeaderClientCa, 2*time.Second)
return errors.RetryAfter(8*time.Minute, lookupRequestHeaderClientCa, 2*time.Second)
}

func DeleteOpenshiftAPIServerPods(ocConfig oc.Config) error {
Expand All @@ -327,7 +327,7 @@ func DeleteOpenshiftAPIServerPods(ocConfig oc.Config) error {
return nil
}

return errors.RetryAfter(60, deleteOpenshiftAPIServerPods, time.Second)
return errors.RetryAfter(60*time.Second, deleteOpenshiftAPIServerPods, time.Second)
}

func CheckProxySettingsForOperator(ocConfig oc.Config, proxy *network.ProxyConfig, deployment, namespace string) (bool, error) {
Expand Down
2 changes: 1 addition & 1 deletion pkg/crc/cluster/csr.go
Expand Up @@ -21,7 +21,7 @@ func WaitForOpenshiftResource(ocConfig oc.Config, resource string) error {
logging.Debug(stdout)
return nil
}
return errors.RetryAfter(80, waitForAPIServer, time.Second)
return errors.RetryAfter(80*time.Second, waitForAPIServer, time.Second)
}

// ApproveNodeCSR approves the certificate for the node.
Expand Down
15 changes: 8 additions & 7 deletions pkg/crc/errors/multierror.go
Expand Up @@ -60,17 +60,18 @@ func (r RetriableError) Error() string {
return "Temporary error: " + r.Err.Error()
}

// RetryAfter retries a number of attempts, after a delay
func RetryAfter(attempts int, callback func() error, d time.Duration) error {
// RetryAfter retries for a certain duration, after a delay
func RetryAfter(limit time.Duration, callback func() error, d time.Duration) error {
m := MultiError{}
for i := 0; i < attempts; i++ {
if i > 0 {
logging.Debugf("retry loop: attempt %d out of %d", i, attempts)
}
timeLimit := time.Now().Add(limit)
attempt := 0
for time.Now().Before(timeLimit) {
logging.Debugf("retry loop: attempt %d", attempt)
err := callback()
if err == nil {
return nil
}
attempt++
m.Collect(err)
if _, ok := err.(*RetriableError); !ok {
logging.Debugf("non-retriable error: %v", err)
Expand All @@ -79,6 +80,6 @@ func RetryAfter(attempts int, callback func() error, d time.Duration) error {
logging.Debugf("error: %v - sleeping %s", err, d)
time.Sleep(d)
}
logging.Debugf("RetryAfter timeout after %d tries", attempts)
logging.Debugf("RetryAfter timeout after %d tries", attempt)
return m
}
14 changes: 8 additions & 6 deletions pkg/crc/errors/multierror_test.go
Expand Up @@ -2,14 +2,16 @@ package errors

import (
"errors"
"fmt"
"testing"
"time"

"github.com/stretchr/testify/assert"
)

func TestRetryAfter(t *testing.T) {
calls := 0
ret := RetryAfter(10, func() error {
ret := RetryAfter(time.Second, func() error {
calls++
return nil
}, 0)
Expand All @@ -19,7 +21,7 @@ func TestRetryAfter(t *testing.T) {

func TestRetryAfterFailure(t *testing.T) {
calls := 0
ret := RetryAfter(10, func() error {
ret := RetryAfter(time.Second, func() error {
calls++
return errors.New("failed")
}, 0)
Expand All @@ -29,17 +31,17 @@ func TestRetryAfterFailure(t *testing.T) {

func TestRetryAfterMaxAttempts(t *testing.T) {
calls := 0
ret := RetryAfter(3, func() error {
ret := RetryAfter(10*time.Millisecond, func() error {
calls++
return &RetriableError{Err: errors.New("failed")}
}, 0)
assert.EqualError(t, ret, "Temporary error: failed (x3)")
assert.Equal(t, 3, calls)
assert.EqualError(t, ret, fmt.Sprintf("Temporary error: failed (x%d)", calls))
assert.Greater(t, calls, 5)
}

func TestRetryAfterSuccessAfterFailures(t *testing.T) {
calls := 0
ret := RetryAfter(5, func() error {
ret := RetryAfter(time.Second, func() error {
calls++
if calls < 3 {
return &RetriableError{Err: errors.New("failed")}
Expand Down
2 changes: 1 addition & 1 deletion pkg/crc/machine/kubeconfig.go
Expand Up @@ -32,7 +32,7 @@ const (
)

func eventuallyWriteKubeconfig(ocConfig oc.Config, ip string, clusterConfig *ClusterConfig) error {
if err := errors.RetryAfter(60, func() error {
if err := errors.RetryAfter(60*time.Second, func() error {
status, err := cluster.GetClusterOperatorStatus(ocConfig, "authentication")
if err != nil {
return &errors.RetriableError{Err: err}
Expand Down
2 changes: 1 addition & 1 deletion pkg/crc/machine/machine.go
Expand Up @@ -808,7 +808,7 @@ func waitForProxyPropagation(ocConfig oc.Config, proxyConfig *network.ProxyConfi
return nil
}

if err := errors.RetryAfter(60, checkProxySettingsForOperator, 2*time.Second); err != nil {
if err := errors.RetryAfter(60*time.Second, checkProxySettingsForOperator, 2*time.Second); err != nil {
logging.Debug("Failed to propagate proxy settings to cluster")
}
}
2 changes: 1 addition & 1 deletion pkg/crc/services/dns/dns.go
Expand Up @@ -97,7 +97,7 @@ func CheckCRCLocalDNSReachable(serviceConfig services.ServicePostStartConfig) (s
return nil
}

if err := errors.RetryAfter(30, checkLocalDNSReach, time.Second); err != nil {
if err := errors.RetryAfter(30*time.Second, checkLocalDNSReach, time.Second); err != nil {
return queryOutput, err
}
return queryOutput, err
Expand Down
2 changes: 1 addition & 1 deletion pkg/crc/services/dns/dns_darwin.go
Expand Up @@ -128,7 +128,7 @@ func waitForNetwork() error {
return nil
}

if err := errors.RetryAfter(10, getResolvValueFromHost, time.Second); err != nil {
if err := errors.RetryAfter(10*time.Second, getResolvValueFromHost, time.Second); err != nil {
return fmt.Errorf("Unable to read host resolv file (%v)", err)
}
// retry up to 5 times
Expand Down

0 comments on commit d2a0c84

Please sign in to comment.