Skip to content

Commit 17435fb

Browse files
committed
[tunnel] bump TUN MTU to 1420 and fix Geneve MTU update on reload
Introduce TunnelMTU=1420 (sized for QUIC over 1500-byte paths) and DefaultGeneveMTU=1450 constants. Fix SetUp to update Geneve device MTU when it already exists, preventing MTU mismatch after binary reload that caused IPv6 fragmentation failures (Ip6FragFails).
1 parent 74add4f commit 17435fb

9 files changed

Lines changed: 76 additions & 16 deletions

File tree

pkg/net/lwtunnel/constants.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
package lwtunnel
2+
3+
const (
4+
// DefaultGeneveMTU is the MTU for Geneve tunnel interfaces. Must be >=
5+
// the tunnel TUN MTU (1420) so that overlay packets are not fragmented
6+
// on the Geneve leg. Safe to set high since Geneve runs over VPC
7+
// networks that support jumbo frames.
8+
DefaultGeneveMTU = 1450
9+
)

pkg/net/lwtunnel/geneve.go

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ func defaultGeneveOptions() *geneveOptions {
4949
dev: "gnv0",
5050
vni: 0x61,
5151
port: 6081,
52-
mtu: 1380,
52+
mtu: DefaultGeneveMTU,
5353
}
5454
}
5555

@@ -180,6 +180,20 @@ func (r *Geneve) SetUp(_ context.Context, privAddr netip.Addr) error {
180180
link = geneve
181181
} else if err != nil {
182182
return fmt.Errorf("failed to get Geneve interface: %w", err)
183+
} else if link.Attrs().MTU != r.opts.mtu {
184+
slog.Info("Updating Geneve interface MTU",
185+
slog.String("dev", r.opts.dev),
186+
slog.Int("old_mtu", link.Attrs().MTU),
187+
slog.Int("new_mtu", r.opts.mtu),
188+
)
189+
if h != nil {
190+
err = h.LinkSetMTU(link, r.opts.mtu)
191+
} else {
192+
err = netlink.LinkSetMTU(link, r.opts.mtu)
193+
}
194+
if err != nil {
195+
return fmt.Errorf("failed to update Geneve MTU: %w", err)
196+
}
183197
}
184198

185199
if h != nil {

pkg/net/lwtunnel/lwtunnel_stub.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ func WithMTU(mtu int) option { return func(o *geneveOptions) { o.mtu = mtu
3535
func WithNetNS(ns string) option { return func(o *geneveOptions) { o.netns = ns } }
3636

3737
func NewGeneve(opts ...option) *Geneve {
38-
o := &geneveOptions{dev: "gnv0", vni: 0x61, port: 6081, mtu: 1380}
38+
o := &geneveOptions{dev: "gnv0", vni: 0x61, port: 6081, mtu: DefaultGeneveMTU}
3939
for _, opt := range opts {
4040
opt(o)
4141
}

pkg/netstack/tun_device.go

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"time"
1313

1414
"github.com/dpeckett/network"
15+
"github.com/prometheus/client_golang/prometheus"
1516
"golang.zx2c4.com/wireguard/tun"
1617

1718
"gvisor.dev/gvisor/pkg/buffer"
@@ -30,6 +31,12 @@ import (
3031

3132
const IPv6MinMTU = 1280 // IPv6 minimum MTU, required for some PPPoE links.
3233

34+
// TunnelMTU is the MTU used for tunnel TUN devices. Sized to fit in a single
35+
// QUIC datagram after PMTUD on a typical 1500-byte internet path:
36+
// 1500 (Ethernet) - 20 (IP) - 8 (UDP) - ~26 (QUIC framing) - 1 (contextID) ≈ 1445.
37+
// We use 1420 to leave headroom for path variance.
38+
const TunnelMTU = 1420
39+
3340
var _ tun.Device = (*TunDevice)(nil)
3441

3542
type TunDevice struct {
@@ -80,18 +87,18 @@ func NewTunDevice(pcapPath string) (*TunDevice, error) {
8087

8188
// High-performance TCP buffer settings.
8289
tcpRcvBuf := tcpip.TCPReceiveBufferSizeRangeOption{
83-
Min: 64 << 10, // 64 KiB
84-
Default: 2 << 20, // 2 MiB
85-
Max: 16 << 20, // 16 MiB
90+
Min: 64 << 10, // 64 KiB
91+
Default: 2 << 20, // 2 MiB
92+
Max: 16 << 20, // 16 MiB
8693
}
8794
tcpipErr = ipstack.SetTransportProtocolOption(tcp.ProtocolNumber, &tcpRcvBuf)
8895
if tcpipErr != nil {
8996
return nil, fmt.Errorf("could not set TCP receive buffer size: %v", tcpipErr)
9097
}
9198
tcpSndBuf := tcpip.TCPSendBufferSizeRangeOption{
92-
Min: 64 << 10, // 64 KiB
93-
Default: 2 << 20, // 2 MiB
94-
Max: 16 << 20, // 16 MiB
99+
Min: 64 << 10, // 64 KiB
100+
Default: 2 << 20, // 2 MiB
101+
Max: 16 << 20, // 16 MiB
95102
}
96103
tcpipErr = ipstack.SetTransportProtocolOption(tcp.ProtocolNumber, &tcpSndBuf)
97104
if tcpipErr != nil {
@@ -129,7 +136,7 @@ func NewTunDevice(pcapPath string) (*TunDevice, error) {
129136
}
130137

131138
nicID := ipstack.NextNICID()
132-
linkEP := channel.New(4096, uint32(IPv6MinMTU), "")
139+
linkEP := channel.New(4096, uint32(TunnelMTU), "")
133140
var nicEP stack.LinkEndpoint = linkEP
134141

135142
var pcapFile *os.File
@@ -361,6 +368,36 @@ func (tun *TunDevice) ListenPacket(addr netip.AddrPort) (net.PacketConn, error)
361368
return gonet.DialUDP(tun.stack, fa, nil, protoNum)
362369
}
363370

371+
// RegisterTCPStatsMetrics registers netstack TCP stats as Prometheus gauges
372+
// that are read at push/scrape time. Call once after creating the TunDevice.
373+
func (tun *TunDevice) RegisterTCPStatsMetrics(reg prometheus.Registerer) {
374+
s := tun.stack.Stats().TCP
375+
gauges := []struct {
376+
name string
377+
help string
378+
fn func() float64
379+
}{
380+
{"tunnel_netstack_tcp_segments_sent_total", "TCP segments sent.", func() float64 { return float64(s.SegmentsSent.Value()) }},
381+
{"tunnel_netstack_tcp_segments_received_total", "TCP segments received.", func() float64 { return float64(s.ValidSegmentsReceived.Value()) }},
382+
{"tunnel_netstack_tcp_retransmits_total", "TCP segments retransmitted.", func() float64 { return float64(s.Retransmits.Value()) }},
383+
{"tunnel_netstack_tcp_fast_retransmit_total", "TCP fast retransmits.", func() float64 { return float64(s.FastRetransmit.Value()) }},
384+
{"tunnel_netstack_tcp_slow_start_retransmits_total", "TCP slow start retransmits.", func() float64 { return float64(s.SlowStartRetransmits.Value()) }},
385+
{"tunnel_netstack_tcp_timeouts_total", "TCP RTO timeouts.", func() float64 { return float64(s.Timeouts.Value()) }},
386+
{"tunnel_netstack_tcp_fast_recovery_total", "TCP fast recovery events.", func() float64 { return float64(s.FastRecovery.Value()) }},
387+
{"tunnel_netstack_tcp_sack_recovery_total", "TCP SACK recovery events.", func() float64 { return float64(s.SACKRecovery.Value()) }},
388+
{"tunnel_netstack_tcp_checksum_errors_total", "TCP checksum errors.", func() float64 { return float64(s.ChecksumErrors.Value()) }},
389+
{"tunnel_netstack_tcp_established", "Current established TCP connections.", func() float64 { return float64(s.CurrentEstablished.Value()) }},
390+
{"tunnel_netstack_tcp_resets_sent_total", "TCP resets sent.", func() float64 { return float64(s.ResetsSent.Value()) }},
391+
{"tunnel_netstack_tcp_resets_received_total", "TCP resets received.", func() float64 { return float64(s.ResetsReceived.Value()) }},
392+
}
393+
for _, g := range gauges {
394+
reg.MustRegister(prometheus.NewGaugeFunc(
395+
prometheus.GaugeOpts{Name: g.name, Help: g.help},
396+
g.fn,
397+
))
398+
}
399+
}
400+
364401
// ForwardTo forwards all inbound traffic to the upstream network.
365402
func (tun *TunDevice) ForwardTo(ctx context.Context, upstream network.Network) error {
366403
// Allow outgoing packets to have a source address different from the address

pkg/tunnel/connection/device.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ func (d *Device) Close() error {
2525
}
2626

2727
func (d *Device) MTU() (int, error) {
28-
return netstack.IPv6MinMTU, nil
28+
return netstack.TunnelMTU, nil
2929
}
3030

3131
func (d *Device) Name() string {

pkg/tunnel/connection/splice.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ func Splice(tunDev tun.Device, conn Connection, opts ...SpliceOption) error {
9898
sizes := make([]int, batchSize)
9999
pkts := make([][]byte, batchSize)
100100
for i := range pkts {
101-
pkts[i] = make([]byte, netstack.IPv6MinMTU)
101+
pkts[i] = make([]byte, netstack.TunnelMTU)
102102
}
103103

104104
for {
@@ -235,7 +235,7 @@ func Splice(tunDev tun.Device, conn Connection, opts ...SpliceOption) error {
235235
// Non-zero-copy fallback: use intermediate channel for batching.
236236
pktPool := &sync.Pool{
237237
New: func() any {
238-
return ptr.To(make([]byte, netstack.IPv6MinMTU+tunOffset))
238+
return ptr.To(make([]byte, netstack.TunnelMTU+tunOffset))
239239
},
240240
}
241241

pkg/tunnel/proxy/proxy.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ const (
2424
defaultProxyGeneveDev = "proxy-gnv0"
2525
defaultProxyGenevePort = 6082
2626
defaultProxyGeneveVNI = 200
27-
defaultProxyGeneveMTU = 1400
27+
defaultProxyGeneveMTU = lwtunnel.DefaultGeneveMTU
2828
)
2929

3030
// ProxyTunnelReconciler reconciles Proxy objects and manages L3 Geneve tunnels

pkg/tunnel/router/client_netlink_linux.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,9 @@ func newClientNetlinkRouter(opts ...Option) (*ClientNetlinkRouter, error) {
7676
opt(options)
7777
}
7878

79-
slog.Info("Create a TUN device", "name", options.tunIfaceName, "mtu", netstack.IPv6MinMTU)
79+
slog.Info("Create a TUN device", "name", options.tunIfaceName, "mtu", netstack.TunnelMTU)
8080

81-
tunDev, err := tun.CreateTUN(options.tunIfaceName, netstack.IPv6MinMTU)
81+
tunDev, err := tun.CreateTUN(options.tunIfaceName, netstack.TunnelMTU)
8282
if err != nil {
8383
return nil, fmt.Errorf("failed to create TUN interface: %w", err)
8484
}

pkg/tunnel/router/server_netlink_linux.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ func NewNetlinkRouter(opts ...Option) (*NetlinkRouter, error) {
6565
return nil, fmt.Errorf("failed to get external interface %s: %w", options.extIfaceName, err)
6666
}
6767

68-
tunDev, err := tun.CreateTUN(options.tunIfaceName, netstack.IPv6MinMTU)
68+
tunDev, err := tun.CreateTUN(options.tunIfaceName, netstack.TunnelMTU)
6969
if err != nil {
7070
return nil, fmt.Errorf("failed to create TUN interface: %w", err)
7171
}

0 commit comments

Comments
 (0)