Skip to content

Commit 2d8c5bd

Browse files
committed
[tunnel] addr cleanup in router on disconnect
1 parent 0f49f84 commit 2d8c5bd

File tree

3 files changed

+77
-12
lines changed

3 files changed

+77
-12
lines changed

pkg/cmd/tunnel/run.go

Lines changed: 54 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -510,32 +510,76 @@ func (t *tunnelNodeReconciler) reconcile(ctx context.Context, req ctrl.Request)
510510
return ctrl.Result{}, nil
511511
}
512512

513-
// healthHandler returns 200 OK when at least one tunnel connection is active, 503 otherwise.
514-
// This endpoint is used for health checks to determine if the tunnel node has active connections.
513+
// healthHandler returns 200 OK when at least one tunnel connection is active and functional.
514+
// This endpoint is used for health checks to determine if the tunnel node has working connections.
515515
// The health endpoint is only started when the --health-endpoint flag is provided with a valid
516516
// address (e.g., ":8080" or "0.0.0.0:8080").
517517
//
518518
// Response codes:
519-
// - 200 OK: At least one tunnel connection is active
520-
// - 503 Service Unavailable: No active tunnel connections
519+
// - 200 OK: At least one tunnel connection is active with assigned addresses
520+
// - 503 Service Unavailable: No active tunnel connections or connections lack addresses
521521
func (t *tunnelNodeReconciler) healthHandler(w http.ResponseWriter, r *http.Request) {
522522
t.tunMu.RLock()
523523
defer t.tunMu.RUnlock()
524524

525-
// Check if we have at least one active connection
526525
activeConns := 0
526+
healthyConns := 0
527+
var allAddrs []string
528+
var connDetails []string
529+
527530
for _, conn := range t.tunDialerWorkers {
528-
if conn.conn != nil && conn.conn.Context().Err() == nil {
529-
activeConns++
531+
if conn.conn == nil {
532+
continue
533+
}
534+
535+
// Check 1: QUIC connection context is alive.
536+
if conn.conn.Context().Err() != nil {
537+
continue
538+
}
539+
activeConns++
540+
541+
// Check 2: Connection has assigned addresses (tunnel is actually functional).
542+
addrs, err := conn.conn.LocalAddrs()
543+
if err != nil || len(addrs) == 0 {
544+
connDetails = append(connDetails, fmt.Sprintf(" - %s: no addresses", conn.id.String()[:8]))
545+
continue
530546
}
547+
healthyConns++
548+
549+
// Collect addresses for this connection.
550+
var addrStrs []string
551+
for _, addr := range addrs {
552+
addrStrs = append(addrStrs, addr.String())
553+
allAddrs = append(allAddrs, addr.String())
554+
}
555+
uptime := time.Since(conn.connectedAt).Truncate(time.Second)
556+
connDetails = append(connDetails, fmt.Sprintf(" - %s: %v (uptime: %s)", conn.id.String()[:8], addrStrs, uptime))
531557
}
532558

533-
if activeConns > 0 {
559+
if healthyConns > 0 {
534560
w.WriteHeader(http.StatusOK)
535-
fmt.Fprintf(w, "OK - %d active connection(s)\n", activeConns)
561+
fmt.Fprintf(w, "OK\n\n")
562+
fmt.Fprintf(w, "Status: healthy\n")
563+
fmt.Fprintf(w, "Connections: %d healthy, %d active\n", healthyConns, activeConns)
564+
fmt.Fprintf(w, "Tunnel IPs: %v\n", allAddrs)
565+
fmt.Fprintf(w, "\nConnection Details:\n")
566+
for _, detail := range connDetails {
567+
fmt.Fprintf(w, "%s\n", detail)
568+
}
569+
} else if activeConns > 0 {
570+
w.WriteHeader(http.StatusServiceUnavailable)
571+
fmt.Fprintf(w, "UNHEALTHY\n\n")
572+
fmt.Fprintf(w, "Status: degraded\n")
573+
fmt.Fprintf(w, "Connections: %d active but none have addresses assigned\n", activeConns)
574+
fmt.Fprintf(w, "\nConnection Details:\n")
575+
for _, detail := range connDetails {
576+
fmt.Fprintf(w, "%s\n", detail)
577+
}
536578
} else {
537579
w.WriteHeader(http.StatusServiceUnavailable)
538-
fmt.Fprintf(w, "UNHEALTHY - no active connections\n")
580+
fmt.Fprintf(w, "UNHEALTHY\n\n")
581+
fmt.Fprintf(w, "Status: disconnected\n")
582+
fmt.Fprintf(w, "Connections: none active\n")
539583
}
540584
}
541585

pkg/tunnel/client.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,20 @@ func (cw *connWrapper) String() string {
354354

355355
func (c *Conn) run(ctx context.Context) {
356356
log := alog.FromContext(ctx)
357+
358+
// Cleanup: remove all addresses from router when connection closes.
359+
defer func() {
360+
c.mu.RLock()
361+
addrs := make([]netip.Prefix, len(c.addrs))
362+
copy(addrs, c.addrs)
363+
c.mu.RUnlock()
364+
365+
for _, addr := range addrs {
366+
log.Info("Removing local prefix on connection close", slog.Any("prefix", addr))
367+
c.router.DelAddr(addr)
368+
}
369+
}()
370+
357371
for {
358372
select {
359373
case <-ctx.Done():

pkg/tunnel/connection/muxed_conn.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"log/slog"
77
"net"
88
"net/netip"
9+
"strings"
910
"sync"
1011
"sync/atomic"
1112

@@ -55,8 +56,14 @@ func (m *muxedConn) readFromConn(src netip.Prefix, conn Connection) {
5556
// If the connection is closed, remove it from the multiplexer and quit
5657
// the read loop. Otherwise, treat it as transient error and just log it.
5758
var closedErr *connectip.CloseError
58-
if errors.As(err, &closedErr) {
59-
slog.Info("Connection closed", slog.Any("src", src), slog.Bool("remoteClosed", closedErr.Remote))
59+
isClosedErr := errors.As(err, &closedErr) ||
60+
errors.Is(err, net.ErrClosed) ||
61+
strings.Contains(err.Error(), "use of closed network connection")
62+
63+
if isClosedErr {
64+
slog.Info("Connection closed, removing from mux",
65+
slog.Any("src", src),
66+
slog.Any("error", err))
6067
metrics.TunnelPacketsReceivedErrors.WithLabelValues("read_closed").Inc()
6168

6269
m.Remove(src)

0 commit comments

Comments
 (0)