Skip to content

Commit

Permalink
lib/netutil/tcpdialer: enhance timeout handling
Browse files Browse the repository at this point in the history
Use TCP_TIMEOUT_USER linux socket option to close connection based on user provided timeout.
Using default timeout and keepalive flow is not sufficient to close connection in some cases which leads to timeout not being applied.

Effectively, if connection stops processing new packets it will take up to 1 minute(with default configuration) instead of 1 second(based on keepalive value) to mark connection as broken.

This can be tested by adding firewall rule to deny packets to vmstorage port:
iptables -A INPUT -p tcp --dport {port} -j DROP
To reverse this:
iptables -D INPUT -p tcp --dport {port} -j DROP

See: #4423 for more details.
Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com>
  • Loading branch information
zekker6 committed Aug 14, 2023
1 parent bde876f commit 5813870
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 0 deletions.
11 changes: 11 additions & 0 deletions lib/netutil/tcpdialer.go
Expand Up @@ -3,6 +3,7 @@ package netutil
import (
"fmt"
"net"
"syscall"
"time"

"github.com/VictoriaMetrics/metrics"
Expand All @@ -27,6 +28,16 @@ func NewTCPDialer(ms *metrics.Set, name, addr string, dialTimeout time.Duration)
dialErrors: ms.NewCounter(fmt.Sprintf(`vm_tcpdialer_errors_total{name=%q, addr=%q, type="dial"}`, name, addr)),
}
d.connMetrics.init(ms, "vm_tcpdialer", name, addr)
d.d.Control = func(network, address string, c syscall.RawConn) (err error) {
controlErr := c.Control(func(fd uintptr) {
err = setTCPUserTimeout(fd, dialTimeout)
})
if controlErr != nil {
return controlErr
}
return err
}

return d
}

Expand Down
12 changes: 12 additions & 0 deletions lib/netutil/tcpdialer_default.go
@@ -0,0 +1,12 @@
//go:build !linux
// +build !linux

package netutil

import (
"time"
)

func setTCPUserTimeout(fd uintptr, timeout time.Duration) error {
return nil
}
13 changes: 13 additions & 0 deletions lib/netutil/tcpdialer_linux.go
@@ -0,0 +1,13 @@
package netutil

import (
"syscall"
"time"

"golang.org/x/sys/unix"
)

func setTCPUserTimeout(fd uintptr, timeout time.Duration) error {
return syscall.SetsockoptInt(
int(fd), syscall.IPPROTO_TCP, unix.TCP_USER_TIMEOUT, int(timeout.Milliseconds()))
}

0 comments on commit 5813870

Please sign in to comment.