Skip to content

Commit

Permalink
Add additional tweaks to HTTP / TCP settings to improve retries
Browse files Browse the repository at this point in the history
- Retry on temporary errors when reading HTTP body
  - Our ECONNRESET check did not actually work, but this error is
    covered correctly under Temporary errors
- Add logging when an error occurs and we retry
- Re-added TCP keepalive flag with explanation

Signed-off-by: Brian Cunnie <bcunnie@pivotal.io>

[#134011373](https://www.pivotaltracker.com/story/show/134011373)
  • Loading branch information
ljfranklin authored and zachgersh committed Nov 27, 2016
1 parent 1105385 commit 889a220
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 9 deletions.
14 changes: 8 additions & 6 deletions boshio/boshio.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import (
"path/filepath"
"strconv"
"strings"
"syscall"

"golang.org/x/sync/errgroup"
)
Expand Down Expand Up @@ -212,11 +211,14 @@ func (c Client) retryableRequest(stemcellURL string, byteRange string) ([]byte,
resp.Body.Close()

if err != nil {
isEOF := (err == io.ErrUnexpectedEOF)
opErr, ok := err.(*net.OpError)
isConnectionRefused := (ok && opErr.Err == syscall.ECONNRESET)

if isConnectionRefused || isEOF {
if netErr, ok := err.(net.Error); ok {
if netErr.Temporary() {
fmt.Fprintf(os.Stderr, "Retrying on temporary error: %s", netErr.Error())
continue
}
}
if err == io.ErrUnexpectedEOF {
fmt.Fprint(os.Stderr, "Retrying after server unexpectly closed connection")
continue
}

Expand Down
11 changes: 8 additions & 3 deletions boshio/http_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"net"
"net/http"
"net/url"
"os"
"time"
)

Expand All @@ -17,12 +18,14 @@ func NewHTTPClient(host string, wait time.Duration) HTTPClient {
Proxy: http.ProxyFromEnvironment,

Dial: (&net.Dialer{
Timeout: 30 * time.Second,
KeepAlive: 0, // don't send keepalive TCP messages
Timeout: 30 * time.Second,
// The OS determines the number of failed keepalive probes before the connection is closed.
// The default is 9 retries on Linux.
KeepAlive: 30 * time.Second,
}).Dial,

TLSHandshakeTimeout: 60 * time.Second,
DisableKeepAlives: true,
DisableKeepAlives: true, // don't re-use TCP connections between requests
},
},
}
Expand All @@ -49,8 +52,10 @@ func (h HTTPClient) Do(req *http.Request) (*http.Response, error) {

for {
resp, err = h.Client.Do(req)

if netErr, ok := err.(net.Error); ok {
if netErr.Temporary() {
fmt.Fprintf(os.Stderr, "Retrying on temporary error: %s", netErr.Error())
time.Sleep(h.Wait)
continue
}
Expand Down

0 comments on commit 889a220

Please sign in to comment.