@@ -510,32 +510,76 @@ func (t *tunnelNodeReconciler) reconcile(ctx context.Context, req ctrl.Request)
510510 return ctrl.Result {}, nil
511511}
512512
513- // healthHandler returns 200 OK when at least one tunnel connection is active, 503 otherwise .
514- // This endpoint is used for health checks to determine if the tunnel node has active connections.
513+ // healthHandler returns 200 OK when at least one tunnel connection is active and functional .
514+ // This endpoint is used for health checks to determine if the tunnel node has working connections.
515515// The health endpoint is only started when the --health-endpoint flag is provided with a valid
516516// address (e.g., ":8080" or "0.0.0.0:8080").
517517//
518518// Response codes:
519- // - 200 OK: At least one tunnel connection is active
520- // - 503 Service Unavailable: No active tunnel connections
519+ // - 200 OK: At least one tunnel connection is active with assigned addresses
520+ // - 503 Service Unavailable: No active tunnel connections or connections lack addresses
521521func (t * tunnelNodeReconciler ) healthHandler (w http.ResponseWriter , r * http.Request ) {
522522 t .tunMu .RLock ()
523523 defer t .tunMu .RUnlock ()
524524
525- // Check if we have at least one active connection
526525 activeConns := 0
526+ healthyConns := 0
527+ var allAddrs []string
528+ var connDetails []string
529+
527530 for _ , conn := range t .tunDialerWorkers {
528- if conn .conn != nil && conn .conn .Context ().Err () == nil {
529- activeConns ++
531+ if conn .conn == nil {
532+ continue
533+ }
534+
535+ // Check 1: QUIC connection context is alive.
536+ if conn .conn .Context ().Err () != nil {
537+ continue
538+ }
539+ activeConns ++
540+
541+ // Check 2: Connection has assigned addresses (tunnel is actually functional).
542+ addrs , err := conn .conn .LocalAddrs ()
543+ if err != nil || len (addrs ) == 0 {
544+ connDetails = append (connDetails , fmt .Sprintf (" - %s: no addresses" , conn .id .String ()[:8 ]))
545+ continue
530546 }
547+ healthyConns ++
548+
549+ // Collect addresses for this connection.
550+ var addrStrs []string
551+ for _ , addr := range addrs {
552+ addrStrs = append (addrStrs , addr .String ())
553+ allAddrs = append (allAddrs , addr .String ())
554+ }
555+ uptime := time .Since (conn .connectedAt ).Truncate (time .Second )
556+ connDetails = append (connDetails , fmt .Sprintf (" - %s: %v (uptime: %s)" , conn .id .String ()[:8 ], addrStrs , uptime ))
531557 }
532558
533- if activeConns > 0 {
559+ if healthyConns > 0 {
534560 w .WriteHeader (http .StatusOK )
535- fmt .Fprintf (w , "OK - %d active connection(s)\n " , activeConns )
561+ fmt .Fprintf (w , "OK\n \n " )
562+ fmt .Fprintf (w , "Status: healthy\n " )
563+ fmt .Fprintf (w , "Connections: %d healthy, %d active\n " , healthyConns , activeConns )
564+ fmt .Fprintf (w , "Tunnel IPs: %v\n " , allAddrs )
565+ fmt .Fprintf (w , "\n Connection Details:\n " )
566+ for _ , detail := range connDetails {
567+ fmt .Fprintf (w , "%s\n " , detail )
568+ }
569+ } else if activeConns > 0 {
570+ w .WriteHeader (http .StatusServiceUnavailable )
571+ fmt .Fprintf (w , "UNHEALTHY\n \n " )
572+ fmt .Fprintf (w , "Status: degraded\n " )
573+ fmt .Fprintf (w , "Connections: %d active but none have addresses assigned\n " , activeConns )
574+ fmt .Fprintf (w , "\n Connection Details:\n " )
575+ for _ , detail := range connDetails {
576+ fmt .Fprintf (w , "%s\n " , detail )
577+ }
536578 } else {
537579 w .WriteHeader (http .StatusServiceUnavailable )
538- fmt .Fprintf (w , "UNHEALTHY - no active connections\n " )
580+ fmt .Fprintf (w , "UNHEALTHY\n \n " )
581+ fmt .Fprintf (w , "Status: disconnected\n " )
582+ fmt .Fprintf (w , "Connections: none active\n " )
539583 }
540584}
541585
0 commit comments