Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More understandable timeouts in checks when value exceeds interval, Fixes #5834 #5835

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 26 additions & 19 deletions agent/checks/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -327,18 +327,9 @@ func (c *CheckHTTP) Start() {

// Create the HTTP client.
c.httpClient = &http.Client{
Timeout: 10 * time.Second,
Timeout: ComputeTimeoutForCheck(c.Logger, c.CheckID, c.Interval, c.Timeout),
Transport: trans,
}

// For long (>10s) interval checks the http timeout is 10s, otherwise the
// timeout is the interval. This means that a check *should* return
// before the next check begins.
if c.Timeout > 0 && c.Timeout < c.Interval {
c.httpClient.Timeout = c.Timeout
} else if c.Interval < 10*time.Second {
c.httpClient.Timeout = c.Interval
}
}

c.stop = false
Expand Down Expand Up @@ -458,6 +449,29 @@ type CheckTCP struct {
stopLock sync.Mutex
}

// ComputeTimeoutForCheck compute timeout for checks from interval and timeouts.
// For long (>10s) interval checks the http timeout is 10s, otherwise the
// timeout is the interval. This means that a check *should* return
// before the next check begins.
func ComputeTimeoutForCheck(logger *log.Logger, name types.CheckID, interval, timeout time.Duration) time.Duration {
defaultTimeoutAndInterval := 10 * time.Second
if interval <= 0 {
interval = defaultTimeoutAndInterval
}
if timeout > 0 {
if timeout <= interval {
return timeout
}
logger.Printf("[WARN] agent: Check Timeout (%q) > Interval (%q), using Interval for %q", timeout, interval, name)
} else {
if interval > defaultTimeoutAndInterval {
return defaultTimeoutAndInterval
}
}
return interval

}

// Start is used to start a TCP check.
// The check runs until stop is called
func (c *CheckTCP) Start() {
Expand All @@ -471,11 +485,7 @@ func (c *CheckTCP) Start() {
// For long (>10s) interval checks the socket timeout is 10s, otherwise
// the timeout is the interval. This means that a check *should* return
// before the next check begins.
if c.Timeout > 0 && c.Timeout < c.Interval {
c.dialer.Timeout = c.Timeout
} else if c.Interval < 10*time.Second {
c.dialer.Timeout = c.Interval
}
c.dialer.Timeout = ComputeTimeoutForCheck(c.Logger, c.CheckID, c.Interval, c.Timeout)
}

c.stop = false
Expand Down Expand Up @@ -663,10 +673,7 @@ type CheckGRPC struct {
func (c *CheckGRPC) Start() {
c.stopLock.Lock()
defer c.stopLock.Unlock()
timeout := 10 * time.Second
if c.Timeout > 0 {
timeout = c.Timeout
}
timeout := ComputeTimeoutForCheck(c.Logger, c.CheckID, c.Interval, c.Timeout)
c.probe = NewGrpcHealthProbe(c.GRPC, timeout, c.TLSClientConfig)
c.stop = false
c.stopCh = make(chan struct{})
Expand Down
66 changes: 66 additions & 0 deletions agent/checks/check_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,72 @@ func TestCheckHTTP(t *testing.T) {
}
}

func TestChecksTimeoutValues(t *testing.T) {
tests := []struct {
desc string
timeout string
interval string
etimeout string
}{
// without timeout, default 10s unless interval < 10s
{desc: "Default Timeout 10s For Interval 15s", timeout: "", interval: "15s", etimeout: "10s"},
{desc: "Default Timeout 10s For Interval 10s", timeout: "", interval: "10s", etimeout: "10s"},
{desc: "Default Timeout 9s For Interval 9s", timeout: "", interval: "9s", etimeout: "9s"},
// with timeout, max := interval
{desc: "Timeout 10s For Interval 10s", timeout: "10s", interval: "10s", etimeout: "10s"},
{desc: "Timeout 7s For Interval 10s", timeout: "7s", interval: "10s", etimeout: "7s"},
{desc: "Timeout 19s For Interval 10s, real timeout 10s", timeout: "19s", interval: "10s", etimeout: "10s"},
{desc: "Timeout 19s For Interval 5s, real timeout 5s", timeout: "19s", interval: "5s", etimeout: "5s"},
{desc: "Timeout 19s For Interval 12s, real timeout 12s", timeout: "19s", interval: "12s", etimeout: "12s"},
}
t.Parallel()
notif := mock.NewNotify()
timeout := 5 * time.Millisecond
server := httptest.NewServer(http.HandlerFunc(func(http.ResponseWriter, *http.Request) {
time.Sleep(2 * timeout)
}))
defer server.Close()

for _, tt := range tests {
t.Run(tt.desc, func(t *testing.T) {
timeout, err := time.ParseDuration(tt.timeout)
if err != nil {
timeout = 0
}
interval, _ := time.ParseDuration(tt.interval)
checkHTTP := &CheckHTTP{
Notify: notif,
CheckID: types.CheckID(fmt.Sprintf("HTTP %q", tt.desc)),
HTTP: server.URL,
Timeout: timeout,
Interval: interval,
Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags),
}

checkTCP := &CheckTCP{
Notify: notif,
CheckID: types.CheckID(fmt.Sprintf("TCP %q", tt.desc)),
TCP: "tcp://127.0.0.1:65533",
Timeout: timeout,
Interval: interval,
Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags),
}

checkHTTP.Start()
defer checkHTTP.Stop()
checkTCP.Start()
defer checkTCP.Stop()
etimeout, _ := time.ParseDuration(tt.etimeout)
if checkHTTP.httpClient.Timeout != etimeout {
t.Fatalf("expected HTTP timeout to be %q, was %q", etimeout, checkHTTP.httpClient.Timeout)
}
if checkTCP.dialer.Timeout != etimeout {
t.Fatalf("expected TCP timeout to be %q, was %q", etimeout, checkTCP.dialer.Timeout)
}
})
}
}

func TestCheckHTTPTimeout(t *testing.T) {
t.Parallel()
timeout := 5 * time.Millisecond
Expand Down