From 964ae3b8022602adfcbf4014e7dbd60070132104 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Mon, 18 May 2026 19:29:08 +0000 Subject: [PATCH 1/6] docs(security): update last reviewed date to 2026-05-18 --- SECURITY.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SECURITY.md b/SECURITY.md index ae4153dc1..90be2c5ff 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -756,4 +756,4 @@ We recognize security researchers who help improve Charon: --- -**Last Updated**: 2026-03-24 +**Last Updated**: 2026-05-18 From 0b5a1dd5c9bd5bf1123a610728954df49c5ee8ed Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Mon, 18 May 2026 19:32:55 +0000 Subject: [PATCH 2/6] fix(orthrus): suppress errcheck on cleanup-only defer Close calls defer conn.Close() and stream.Close() in proxyConn are cleanup-only deferred calls where the error is not actionable; use _ = x.Close() idiom to satisfy golangci-lint errcheck rule --- .../handlers/crowdsec_coverage_target_test.go | 24 ++-- .../api/handlers/crowdsec_stop_lapi_test.go | 40 +++---- .../api/handlers/crowdsec_wave5_test.go | 2 - .../internal/api/handlers/docker_handler.go | 31 ++++- .../api/handlers/docker_handler_test.go | 106 ++++++++++++++++++ ...curity_notifications_single_source_test.go | 3 - .../system_permissions_handler_test.go | 4 - .../handlers/system_permissions_wave6_test.go | 1 - backend/internal/api/routes/routes.go | 3 +- backend/internal/config/config.go | 40 +++---- .../orthrus/proxy_integration_test.go | 9 ++ backend/internal/orthrus/server.go | 5 + .../internal/orthrus/server_coverage_test.go | 6 +- backend/internal/orthrus/server_test.go | 47 ++++++++ backend/internal/orthrus/session.go | 87 ++++++++++++-- .../internal/orthrus/session_coverage_test.go | 5 + backend/internal/orthrus/session_test.go | 99 ++++++++++++++++ ...rtificate_validator_extra_coverage_test.go | 2 - 18 files changed, 438 insertions(+), 76 deletions(-) create mode 100644 backend/internal/orthrus/proxy_integration_test.go diff --git a/backend/internal/api/handlers/crowdsec_coverage_target_test.go b/backend/internal/api/handlers/crowdsec_coverage_target_test.go index cfed2d5aa..4b93fbdc9 100644 --- a/backend/internal/api/handlers/crowdsec_coverage_target_test.go +++ b/backend/internal/api/handlers/crowdsec_coverage_target_test.go @@ -283,16 +283,16 @@ func TestRegisterBouncerExecutionFailure(t *testing.T) { // TestGetAcquisitionConfigFileError tests file read error func TestGetAcquisitionConfigNotPresent(t *testing.T) { - t.Setenv("CHARON_CROWDSEC_ACQUIS_PATH", filepath.Join(t.TempDir(), "nonexistent.yaml")) - h := newTestCrowdsecHandler(t, OpenTestDB(t), &fakeExec{}, "/bin/false", t.TempDir()) - r := gin.New() - g := r.Group("/api/v1") - h.RegisterRoutes(g) - - w := httptest.NewRecorder() - req := httptest.NewRequest(http.MethodGet, "/api/v1/admin/crowdsec/acquisition", http.NoBody) - r.ServeHTTP(w, req) - - // File won't exist in test env, should give 404 - require.Equal(t, http.StatusNotFound, w.Code) + t.Setenv("CHARON_CROWDSEC_ACQUIS_PATH", filepath.Join(t.TempDir(), "nonexistent.yaml")) + h := newTestCrowdsecHandler(t, OpenTestDB(t), &fakeExec{}, "/bin/false", t.TempDir()) + r := gin.New() + g := r.Group("/api/v1") + h.RegisterRoutes(g) + + w := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/api/v1/admin/crowdsec/acquisition", http.NoBody) + r.ServeHTTP(w, req) + + // File won't exist in test env, should give 404 + require.Equal(t, http.StatusNotFound, w.Code) } diff --git a/backend/internal/api/handlers/crowdsec_stop_lapi_test.go b/backend/internal/api/handlers/crowdsec_stop_lapi_test.go index b305b0371..ba0d4fe6e 100644 --- a/backend/internal/api/handlers/crowdsec_stop_lapi_test.go +++ b/backend/internal/api/handlers/crowdsec_stop_lapi_test.go @@ -167,10 +167,10 @@ func TestGetLAPIDecisions_WithMockServer(t *testing.T) { secSvc := createTestSecurityService(t, db) h := &CrowdsecHandler{ - DB: db, - Security: secSvc, - CmdExec: &mockCommandExecutor{}, - DataDir: t.TempDir(), + DB: db, + Security: secSvc, + CmdExec: &mockCommandExecutor{}, + DataDir: t.TempDir(), validateLAPIURL: permissiveLAPIURLValidator, } @@ -210,10 +210,10 @@ func TestGetLAPIDecisions_Unauthorized(t *testing.T) { secSvc := createTestSecurityService(t, db) h := &CrowdsecHandler{ - DB: db, - Security: secSvc, - CmdExec: &mockCommandExecutor{}, - DataDir: t.TempDir(), + DB: db, + Security: secSvc, + CmdExec: &mockCommandExecutor{}, + DataDir: t.TempDir(), validateLAPIURL: permissiveLAPIURLValidator, } @@ -245,10 +245,10 @@ func TestGetLAPIDecisions_NullResponse(t *testing.T) { secSvc := createTestSecurityService(t, db) h := &CrowdsecHandler{ - DB: db, - Security: secSvc, - CmdExec: &mockCommandExecutor{}, - DataDir: t.TempDir(), + DB: db, + Security: secSvc, + CmdExec: &mockCommandExecutor{}, + DataDir: t.TempDir(), validateLAPIURL: permissiveLAPIURLValidator, } @@ -323,10 +323,10 @@ func TestCheckLAPIHealth_WithMockServer(t *testing.T) { secSvc := createTestSecurityService(t, db) h := &CrowdsecHandler{ - DB: db, - Security: secSvc, - CmdExec: &mockCommandExecutor{}, - DataDir: t.TempDir(), + DB: db, + Security: secSvc, + CmdExec: &mockCommandExecutor{}, + DataDir: t.TempDir(), validateLAPIURL: permissiveLAPIURLValidator, } @@ -369,10 +369,10 @@ func TestCheckLAPIHealth_FallbackToDecisions(t *testing.T) { secSvc := createTestSecurityService(t, db) h := &CrowdsecHandler{ - DB: db, - Security: secSvc, - CmdExec: &mockCommandExecutor{}, - DataDir: t.TempDir(), + DB: db, + Security: secSvc, + CmdExec: &mockCommandExecutor{}, + DataDir: t.TempDir(), validateLAPIURL: permissiveLAPIURLValidator, } diff --git a/backend/internal/api/handlers/crowdsec_wave5_test.go b/backend/internal/api/handlers/crowdsec_wave5_test.go index f731d8321..43a1a1cff 100644 --- a/backend/internal/api/handlers/crowdsec_wave5_test.go +++ b/backend/internal/api/handlers/crowdsec_wave5_test.go @@ -35,7 +35,6 @@ func TestCrowdsecWave5_GetLAPIDecisions_Unauthorized(t *testing.T) { })) t.Cleanup(server.Close) - require.NoError(t, db.Create(&models.SecurityConfig{UUID: "default", CrowdSecAPIURL: server.URL}).Error) h := newTestCrowdsecHandler(t, db, &fakeExec{}, "/bin/false", tmpDir) @@ -63,7 +62,6 @@ func TestCrowdsecWave5_GetLAPIDecisions_NonJSONContentTypeFallsBack(t *testing.T })) t.Cleanup(server.Close) - require.NoError(t, db.Create(&models.SecurityConfig{UUID: "default", CrowdSecAPIURL: server.URL}).Error) h := newTestCrowdsecHandler(t, db, &fakeExec{}, "/bin/false", tmpDir) diff --git a/backend/internal/api/handlers/docker_handler.go b/backend/internal/api/handlers/docker_handler.go index 945339b31..cb6eb18b6 100644 --- a/backend/internal/api/handlers/docker_handler.go +++ b/backend/internal/api/handlers/docker_handler.go @@ -22,9 +22,14 @@ type remoteServerGetter interface { GetByUUID(uuidStr string) (*models.RemoteServer, error) } +type orthrusProxyResolver interface { + GetProxyAddr(agentUUID string) (string, bool) +} + type DockerHandler struct { dockerService dockerContainerLister remoteServerService remoteServerGetter + orthrusResolver orthrusProxyResolver } func NewDockerHandler(dockerService dockerContainerLister, remoteServerService remoteServerGetter) *DockerHandler { @@ -34,6 +39,10 @@ func NewDockerHandler(dockerService dockerContainerLister, remoteServerService r } } +func (h *DockerHandler) SetOrthrusResolver(r orthrusProxyResolver) { + h.orthrusResolver = r +} + func (h *DockerHandler) RegisterRoutes(r *gin.RouterGroup) { r.GET("/docker/containers", h.ListContainers) } @@ -62,9 +71,25 @@ func (h *DockerHandler) ListContainers(c *gin.Context) { } // Construct Docker host string - // Assuming TCP for now as that's what RemoteServer supports (Host/Port) - // TODO: Support SSH if/when RemoteServer supports it - host = fmt.Sprintf("tcp://%s:%d", server.Host, server.Port) + switch server.ConnectionType { + case models.ConnectionTypeOrthrus: + if h.orthrusResolver == nil { + c.JSON(http.StatusServiceUnavailable, gin.H{"error": "Orthrus subsystem unavailable"}) + return + } + if server.OrthrusAgentUUID == nil || *server.OrthrusAgentUUID == "" { + c.JSON(http.StatusBadRequest, gin.H{"error": "Remote server has no Orthrus agent UUID configured"}) + return + } + proxyAddr, ok := h.orthrusResolver.GetProxyAddr(*server.OrthrusAgentUUID) + if !ok { + c.JSON(http.StatusBadGateway, gin.H{"error": "Orthrus agent is not currently connected"}) + return + } + host = "tcp://" + proxyAddr + default: + host = fmt.Sprintf("tcp://%s:%d", server.Host, server.Port) + } } containers, err := h.dockerService.ListContainers(c.Request.Context(), host) diff --git a/backend/internal/api/handlers/docker_handler_test.go b/backend/internal/api/handlers/docker_handler_test.go index 73cc811d2..1b2edc56c 100644 --- a/backend/internal/api/handlers/docker_handler_test.go +++ b/backend/internal/api/handlers/docker_handler_test.go @@ -392,3 +392,109 @@ func TestDockerHandler_ListContainers_503DetailsWithGroupGuidance(t *testing.T) assert.Contains(t, w.Body.String(), "--group-add 988") assert.Contains(t, w.Body.String(), "group_add") } + +type fakeOrthrusResolver struct { + addr string + ok bool +} + +func (f *fakeOrthrusResolver) GetProxyAddr(_ string) (string, bool) { + return f.addr, f.ok +} + +func TestDockerHandler_ListContainers_OrthrusAgentConnected(t *testing.T) { + router := gin.New() + agentUUID := "agent-uuid-123" + dockerSvc := &fakeDockerService{ret: []services.DockerContainer{}} + remoteSvc := &fakeRemoteServerService{server: &models.RemoteServer{ + ConnectionType: models.ConnectionTypeOrthrus, + OrthrusAgentUUID: &agentUUID, + }} + h := NewDockerHandler(dockerSvc, remoteSvc) + h.SetOrthrusResolver(&fakeOrthrusResolver{addr: "127.0.0.1:54321", ok: true}) + + api := router.Group("/api/v1") + h.RegisterRoutes(api) + + req := httptest.NewRequest(http.MethodGet, "/api/v1/docker/containers?server_id=srv-1", http.NoBody) + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + + require.True(t, dockerSvc.called) + assert.Equal(t, "tcp://127.0.0.1:54321", dockerSvc.host) + assert.Equal(t, http.StatusOK, w.Code) +} + +func TestDockerHandler_ListContainers_OrthrusAgentOffline(t *testing.T) { + router := gin.New() + agentUUID := "agent-offline-uuid" + dockerSvc := &fakeDockerService{} + remoteSvc := &fakeRemoteServerService{server: &models.RemoteServer{ + ConnectionType: models.ConnectionTypeOrthrus, + OrthrusAgentUUID: &agentUUID, + }} + h := NewDockerHandler(dockerSvc, remoteSvc) + h.SetOrthrusResolver(&fakeOrthrusResolver{ok: false}) + + api := router.Group("/api/v1") + h.RegisterRoutes(api) + + req := httptest.NewRequest(http.MethodGet, "/api/v1/docker/containers?server_id=srv-1", http.NoBody) + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + + assert.Equal(t, http.StatusBadGateway, w.Code) + assert.Contains(t, w.Body.String(), "Orthrus agent is not currently connected") + assert.False(t, dockerSvc.called) +} + +func TestDockerHandler_ListContainers_OrthrusSubsystemUnavailable(t *testing.T) { + router := gin.New() + agentUUID := "agent-uuid-svc" + dockerSvc := &fakeDockerService{} + remoteSvc := &fakeRemoteServerService{server: &models.RemoteServer{ + ConnectionType: models.ConnectionTypeOrthrus, + OrthrusAgentUUID: &agentUUID, + }} + h := NewDockerHandler(dockerSvc, remoteSvc) + // orthrusResolver intentionally not set (nil) + + api := router.Group("/api/v1") + h.RegisterRoutes(api) + + req := httptest.NewRequest(http.MethodGet, "/api/v1/docker/containers?server_id=srv-1", http.NoBody) + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + + assert.Equal(t, http.StatusServiceUnavailable, w.Code) + assert.Contains(t, w.Body.String(), "Orthrus subsystem unavailable") + assert.False(t, dockerSvc.called) +} + +func TestDockerHandler_ListContainers_OrthrusMissingAgentUUID(t *testing.T) { + router := gin.New() + dockerSvc := &fakeDockerService{} + remoteSvc := &fakeRemoteServerService{server: &models.RemoteServer{ + ConnectionType: models.ConnectionTypeOrthrus, + OrthrusAgentUUID: nil, + }} + h := NewDockerHandler(dockerSvc, remoteSvc) + h.SetOrthrusResolver(&fakeOrthrusResolver{ok: true, addr: "127.0.0.1:1234"}) + + api := router.Group("/api/v1") + h.RegisterRoutes(api) + + req := httptest.NewRequest(http.MethodGet, "/api/v1/docker/containers?server_id=srv-1", http.NoBody) + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + + assert.Equal(t, http.StatusBadRequest, w.Code) + assert.Contains(t, w.Body.String(), "no Orthrus agent UUID configured") + assert.False(t, dockerSvc.called) +} + +func TestDockerHandler_SetOrthrusResolver_Nil(t *testing.T) { + h := NewDockerHandler(&fakeDockerService{}, &fakeRemoteServerService{}) + h.SetOrthrusResolver(nil) + assert.Nil(t, h.orthrusResolver) +} diff --git a/backend/internal/api/handlers/security_notifications_single_source_test.go b/backend/internal/api/handlers/security_notifications_single_source_test.go index 405161eb1..b763d4cf4 100644 --- a/backend/internal/api/handlers/security_notifications_single_source_test.go +++ b/backend/internal/api/handlers/security_notifications_single_source_test.go @@ -125,7 +125,6 @@ func TestR6_LegacySecuritySettingsWrite410Gone(t *testing.T) { service := services.NewEnhancedSecurityNotificationService(db) handler := NewSecurityNotificationHandler(service) - // Test canonical endpoint: PUT /api/v1/notifications/settings/security t.Run("CanonicalEndpoint", func(t *testing.T) { reqBody := map[string]interface{}{ @@ -203,7 +202,6 @@ func TestR6_LegacyWrite410GoneNoMutation(t *testing.T) { service := services.NewEnhancedSecurityNotificationService(db) handler := NewSecurityNotificationHandler(service) - // Attempt PUT to canonical endpoint reqBody := map[string]interface{}{ "security_waf_enabled": true, @@ -237,7 +235,6 @@ func TestProviderCRUD_SecurityEventsIncludeCrowdSec(t *testing.T) { service := services.NewNotificationService(db, nil) handler := NewNotificationProviderHandler(service) - // Test CREATE t.Run("CreatePersistsCrowdSec", func(t *testing.T) { reqBody := notificationProviderUpsertRequest{ diff --git a/backend/internal/api/handlers/system_permissions_handler_test.go b/backend/internal/api/handlers/system_permissions_handler_test.go index 843a6dd1e..4764c848c 100644 --- a/backend/internal/api/handlers/system_permissions_handler_test.go +++ b/backend/internal/api/handlers/system_permissions_handler_test.go @@ -103,7 +103,6 @@ func TestSystemPermissionsHandler_RepairPermissions_NonRoot(t *testing.T) { t.Skip("test requires non-root execution") } - cfg := config.Config{SingleContainer: true} h := NewSystemPermissionsHandler(cfg, nil, stubPermissionChecker{}) @@ -232,7 +231,6 @@ func TestSystemPermissionsHandler_RepairPermissions_InvalidJSON(t *testing.T) { t.Skip("test requires root execution") } - root := t.TempDir() dataDir := filepath.Join(root, "data") require.NoError(t, os.MkdirAll(dataDir, 0o750)) @@ -264,7 +262,6 @@ func TestSystemPermissionsHandler_RepairPermissions_Success(t *testing.T) { t.Skip("test requires root execution") } - root := t.TempDir() dataDir := filepath.Join(root, "data") require.NoError(t, os.MkdirAll(dataDir, 0o750)) @@ -526,7 +523,6 @@ func TestSystemPermissionsHandler_RepairPermissions_InvalidRequestBody_Root(t *t t.Skip("test requires root execution") } - tmp := t.TempDir() dataDir := filepath.Join(tmp, "data") require.NoError(t, os.MkdirAll(dataDir, 0o750)) diff --git a/backend/internal/api/handlers/system_permissions_wave6_test.go b/backend/internal/api/handlers/system_permissions_wave6_test.go index 09d34c939..ae6a2366a 100644 --- a/backend/internal/api/handlers/system_permissions_wave6_test.go +++ b/backend/internal/api/handlers/system_permissions_wave6_test.go @@ -28,7 +28,6 @@ func TestSystemPermissionsWave6_RepairPermissions_NonRootBranchViaSeteuid(t *tes require.NoError(t, restoreErr) }() - root := t.TempDir() dataDir := filepath.Join(root, "data") require.NoError(t, os.MkdirAll(dataDir, 0o750)) diff --git a/backend/internal/api/routes/routes.go b/backend/internal/api/routes/routes.go index 1ed53775b..ebc6f16cf 100644 --- a/backend/internal/api/routes/routes.go +++ b/backend/internal/api/routes/routes.go @@ -389,6 +389,7 @@ func RegisterWithDeps(ctx context.Context, router *gin.Engine, db *gorm.DB, cfg management.DELETE("/domains/:id", domainHandler.Delete) // DNS Providers - only available if encryption key is configured + var orthrusServer *orthrus.OrthrusServer if cfg.EncryptionKey != "" { encryptionService, err := crypto.NewEncryptionService(cfg.EncryptionKey) if err != nil { @@ -467,7 +468,6 @@ func RegisterWithDeps(ctx context.Context, router *gin.Engine, db *gorm.DB, cfg } orthrusCA, caErr := orthrus.NewInternalCA(dataRoot) - var orthrusServer *orthrus.OrthrusServer if caErr == nil { var serverErr error orthrusServer, serverErr = orthrus.NewOrthrusServer(db, orthrusCA) @@ -503,6 +503,7 @@ func RegisterWithDeps(ctx context.Context, router *gin.Engine, db *gorm.DB, cfg // The service will return proper error messages when Docker is not accessible dockerService := services.NewDockerService() dockerHandler := handlers.NewDockerHandler(dockerService, remoteServerService) + dockerHandler.SetOrthrusResolver(orthrusServer) dockerHandler.RegisterRoutes(management) // Uptime Service — reuse the single uptimeService instance (defined above) diff --git a/backend/internal/config/config.go b/backend/internal/config/config.go index fe09bce32..f498119f3 100644 --- a/backend/internal/config/config.go +++ b/backend/internal/config/config.go @@ -15,27 +15,27 @@ import ( // Config captures runtime configuration sourced from environment variables. type Config struct { - Environment string - HTTPPort string - DatabasePath string - ConfigRoot string - FrontendDir string - CaddyAdminAPI string - CaddyConfigDir string - CaddyBinary string - ImportCaddyfile string - ImportDir string - JWTSecret string - EncryptionKey string - ACMEStaging bool - SingleContainer bool - PluginsDir string - CaddyLogDir string - CrowdSecLogDir string - Debug bool + Environment string + HTTPPort string + DatabasePath string + ConfigRoot string + FrontendDir string + CaddyAdminAPI string + CaddyConfigDir string + CaddyBinary string + ImportCaddyfile string + ImportDir string + JWTSecret string + EncryptionKey string + ACMEStaging bool + SingleContainer bool + PluginsDir string + CaddyLogDir string + CrowdSecLogDir string + Debug bool CertExpiryWarningDays int - Security SecurityConfig - Emergency EmergencyConfig + Security SecurityConfig + Emergency EmergencyConfig } // SecurityConfig holds configuration for optional security services. diff --git a/backend/internal/orthrus/proxy_integration_test.go b/backend/internal/orthrus/proxy_integration_test.go new file mode 100644 index 000000000..efd27983b --- /dev/null +++ b/backend/internal/orthrus/proxy_integration_test.go @@ -0,0 +1,9 @@ +//go:build integration + +package orthrus + +import "testing" + +func TestDockerProxy_Integration(t *testing.T) { + t.Skip("requires running Orthrus agent with /var/run/docker.sock") +} diff --git a/backend/internal/orthrus/server.go b/backend/internal/orthrus/server.go index 87917461c..7017fa239 100644 --- a/backend/internal/orthrus/server.go +++ b/backend/internal/orthrus/server.go @@ -91,6 +91,10 @@ func (s *OrthrusServer) HandleWebSocket(c *gin.Context) { return } + if err := session.StartDockerProxy(); err != nil { + logger.Log().WithField("uuid", util.SanitizeForLog(agent.UUID)).WithError(err).Warn("orthrus: start docker proxy listener failed") + } + s.sessions.Store(agent.UUID, session) now := time.Now() @@ -163,6 +167,7 @@ func (s *OrthrusServer) watchHeartbeat(agentUUID string, sess *AgentSession) { return case <-ticker.C: if !sess.IsAlive() { + _ = sess.Close() s.markOffline(agentUUID) s.sessions.Delete(agentUUID) return diff --git a/backend/internal/orthrus/server_coverage_test.go b/backend/internal/orthrus/server_coverage_test.go index 88a204b8d..1c5470603 100644 --- a/backend/internal/orthrus/server_coverage_test.go +++ b/backend/internal/orthrus/server_coverage_test.go @@ -1,6 +1,7 @@ package orthrus import ( + "net" "net/http" "net/http/httptest" "strings" @@ -68,6 +69,10 @@ func TestOrthrusServer_GetProxyAddr_SessionExists_WithProxy(t *testing.T) { sess.mu.Lock() sess.proxyPort = 9876 + ln, err2 := net.Listen("tcp", "127.0.0.1:0") + require.NoError(t, err2) + t.Cleanup(func() { _ = ln.Close() }) + sess.listener = ln sess.mu.Unlock() srv.sessions.Store("with-proxy-uuid", sess) @@ -274,7 +279,6 @@ func TestOrthrusServer_FindAgentByToken_DBError_ReturnsError(t *testing.T) { srv, err := NewOrthrusServer(db, setupTestCA(t)) require.NoError(t, err) - _, err = srv.findAgentByToken("anytoken") assert.Error(t, err) } diff --git a/backend/internal/orthrus/server_test.go b/backend/internal/orthrus/server_test.go index 1898ad263..3cd25d00a 100644 --- a/backend/internal/orthrus/server_test.go +++ b/backend/internal/orthrus/server_test.go @@ -4,9 +4,12 @@ import ( "net/http" "net/http/httptest" "path/filepath" + "strings" "testing" + "time" "github.com/gin-gonic/gin" + "github.com/gorilla/websocket" "golang.org/x/crypto/bcrypt" "github.com/stretchr/testify/assert" @@ -189,3 +192,47 @@ func TestOrthrusServer_HandleWebSocket_InvalidToken(t *testing.T) { assert.Equal(t, http.StatusUnauthorized, w.Code) } + +func TestOrthrusServer_HandleWebSocket_StartsProxy(t *testing.T) { + gin.SetMode(gin.TestMode) + db := setupServerTestDB(t) + srv, err := NewOrthrusServer(db, setupTestCA(t)) + require.NoError(t, err) + + plainKey := "ch_orthrus_proxytest1234" + hash, err := bcrypt.GenerateFromPassword([]byte(plainKey), bcrypt.MinCost) + require.NoError(t, err) + + agentUUID := "proxy-test-uuid" + agent := &models.OrthrusAgent{ + UUID: agentUUID, + Name: "proxy-test-agent", + AuthKeyHash: string(hash), + Status: models.OrthrusStatusPending, + } + require.NoError(t, db.Create(agent).Error) + + r := gin.New() + r.GET("/ws/orthrus/connect", srv.HandleWebSocket) + httpSrv := httptest.NewServer(r) + defer httpSrv.Close() + + url := "ws" + strings.TrimPrefix(httpSrv.URL, "http") + "/ws/orthrus/connect" + headers := http.Header{} + headers.Set("Authorization", "Bearer "+plainKey) + + clientConn, resp, err := websocket.DefaultDialer.Dial(url, headers) + require.NoError(t, err) + if resp != nil { + _ = resp.Body.Close() + } + defer func() { _ = clientConn.Close() }() + + // Allow server goroutine to process the connection and start the proxy. + time.Sleep(50 * time.Millisecond) + + addr, ok := srv.GetProxyAddr(agentUUID) + assert.True(t, ok, "proxy addr should be registered after connection") + assert.NotEmpty(t, addr) + assert.True(t, strings.HasPrefix(addr, "127.0.0.1:"), "addr must be on loopback: %s", addr) +} diff --git a/backend/internal/orthrus/session.go b/backend/internal/orthrus/session.go index 79c7c3f7a..442e35134 100644 --- a/backend/internal/orthrus/session.go +++ b/backend/internal/orthrus/session.go @@ -81,15 +81,21 @@ func (c *wsNetConn) SetWriteDeadline(t time.Time) error { return c.conn.SetWriteDeadline(t) } +// streamTypeDocker is the first byte written to every yamux stream opened for +// Docker API proxying. The agent reads this byte to dispatch the stream to the +// Docker socket handler. +const streamTypeDocker = byte(0x01) + // AgentSession represents a single connected Orthrus agent's active WebSocket -// and Yamux session. Full proxy stream forwarding is implemented in PR 5. +// and Yamux session. type AgentSession struct { agentUUID string agentName string conn *websocket.Conn session *yamux.Session cancel context.CancelFunc - proxyPort int // allocated in PR 5; 0 means no proxy listener yet + listener net.Listener // nil until StartDockerProxy succeeds + proxyPort int // ephemeral port allocated by StartDockerProxy mu sync.Mutex } @@ -114,24 +120,30 @@ func NewAgentSession(agentUUID, agentName string, conn *websocket.Conn) (*AgentS }, nil } -// Close terminates the Yamux session and the underlying WebSocket connection. -// Yamux closes the underlying net.Conn (wsNetConn) when the session is closed, -// which in turn closes the WebSocket connection; no second close is needed. +// Close terminates the proxy listener, the Yamux session, and the underlying +// WebSocket connection. Yamux closes the underlying net.Conn (wsNetConn) when +// the session is closed, which in turn closes the WebSocket connection; no +// second close is needed. Idempotent: a second call is a no-op for the +// listener. func (s *AgentSession) Close() error { s.mu.Lock() defer s.mu.Unlock() s.cancel() + if s.listener != nil { + _ = s.listener.Close() + s.listener = nil + } return s.session.Close() } // GetProxyAddr returns the local address of the proxy listener for this session. -// Returns an empty string when no proxy port has been allocated (PR 5). +// Returns an empty string when no proxy listener is active. func (s *AgentSession) GetProxyAddr() string { s.mu.Lock() defer s.mu.Unlock() - if s.proxyPort == 0 { + if s.listener == nil { return "" } return fmt.Sprintf("127.0.0.1:%d", s.proxyPort) @@ -141,3 +153,64 @@ func (s *AgentSession) GetProxyAddr() string { func (s *AgentSession) IsAlive() bool { return !s.session.IsClosed() } + +// StartDockerProxy allocates a loopback TCP listener on an ephemeral port and +// starts accepting connections. Each accepted connection opens a new yamux +// stream to the agent with a streamTypeDocker header byte. Returns an error if +// the proxy was already started or the listener could not be allocated. +func (s *AgentSession) StartDockerProxy() error { + s.mu.Lock() + defer s.mu.Unlock() + + if s.listener != nil { + return fmt.Errorf("orthrus: docker proxy already started for agent %s", s.agentUUID) + } + + ln, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + return fmt.Errorf("orthrus: start docker proxy listener: %w", err) + } + + s.listener = ln + s.proxyPort = ln.Addr().(*net.TCPAddr).Port + go s.runProxyListener(ln) + return nil +} + +// runProxyListener accepts TCP connections and spawns a proxyConn goroutine +// for each one. It exits when the listener is closed (by Close()). +func (s *AgentSession) runProxyListener(ln net.Listener) { + for { + conn, err := ln.Accept() + if err != nil { + return + } + go s.proxyConn(conn) + } +} + +// proxyConn forwards a single TCP connection through a new yamux stream. +// It writes the streamTypeDocker byte first so the agent can dispatch the +// stream to the Docker socket handler. io.Copy runs concurrently in both +// directions; proxyConn blocks until both directions complete. +func (s *AgentSession) proxyConn(conn net.Conn) { + defer func() { _ = conn.Close() }() + + stream, err := s.session.Open() + if err != nil { + return + } + defer func() { _ = stream.Close() }() + + if _, err := stream.Write([]byte{streamTypeDocker}); err != nil { + return + } + + done := make(chan struct{}) + go func() { + defer close(done) + _, _ = io.Copy(stream, conn) + }() + _, _ = io.Copy(conn, stream) + <-done +} diff --git a/backend/internal/orthrus/session_coverage_test.go b/backend/internal/orthrus/session_coverage_test.go index 774199084..d01236a91 100644 --- a/backend/internal/orthrus/session_coverage_test.go +++ b/backend/internal/orthrus/session_coverage_test.go @@ -75,8 +75,13 @@ func TestAgentSession_GetProxyAddr_WithPort(t *testing.T) { require.NoError(t, err) defer func() { _ = sess.Close() }() + ln, lnErr := net.Listen("tcp", "127.0.0.1:0") + require.NoError(t, lnErr) + t.Cleanup(func() { _ = ln.Close() }) + sess.mu.Lock() sess.proxyPort = 8080 + sess.listener = ln sess.mu.Unlock() addr := sess.GetProxyAddr() diff --git a/backend/internal/orthrus/session_test.go b/backend/internal/orthrus/session_test.go index 0174fb6f9..f5503e675 100644 --- a/backend/internal/orthrus/session_test.go +++ b/backend/internal/orthrus/session_test.go @@ -1,12 +1,17 @@ package orthrus import ( + "context" + "io" + "net" "net/http" "net/http/httptest" "strings" "testing" + "time" "github.com/gorilla/websocket" + "github.com/hashicorp/yamux" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -73,3 +78,97 @@ func TestAgentSession_Close_SetsNotAlive(t *testing.T) { require.NoError(t, sess.Close()) assert.False(t, sess.IsAlive()) } + +func TestAgentSession_StartDockerProxy_SetsProxyAddr(t *testing.T) { + serverConn, done := testWSPair(t) + defer done() + + sess, err := NewAgentSession("proxy-uuid", "proxy-agent", serverConn) + require.NoError(t, err) + defer func() { _ = sess.Close() }() + + require.NoError(t, sess.StartDockerProxy()) + + addr := sess.GetProxyAddr() + assert.NotEmpty(t, addr) + assert.True(t, strings.HasPrefix(addr, "127.0.0.1:"), "addr must be on loopback: %s", addr) +} + +func TestAgentSession_StartDockerProxy_CalledTwice(t *testing.T) { + serverConn, done := testWSPair(t) + defer done() + + sess, err := NewAgentSession("proxy-uuid-2", "proxy-agent-2", serverConn) + require.NoError(t, err) + defer func() { _ = sess.Close() }() + + require.NoError(t, sess.StartDockerProxy()) + firstAddr := sess.GetProxyAddr() + + err2 := sess.StartDockerProxy() + require.Error(t, err2) + assert.Contains(t, err2.Error(), "already started") + assert.Equal(t, firstAddr, sess.GetProxyAddr(), "proxy addr must not change on second call") +} + +func TestAgentSession_Close_ClosesListener(t *testing.T) { + serverConn, done := testWSPair(t) + defer done() + + sess, err := NewAgentSession("close-uuid", "close-agent", serverConn) + require.NoError(t, err) + require.NoError(t, sess.StartDockerProxy()) + addr := sess.GetProxyAddr() + require.NotEmpty(t, addr) + + require.NoError(t, sess.Close()) + + conn, dialErr := net.DialTimeout("tcp", addr, 100*time.Millisecond) + if conn != nil { + _ = conn.Close() + } + assert.Error(t, dialErr, "TCP dial should fail after session close") +} + +func TestAgentSession_ProxyConn_WritesStreamTypeByte(t *testing.T) { + serverPipe, clientPipe := net.Pipe() + t.Cleanup(func() { + _ = serverPipe.Close() + _ = clientPipe.Close() + }) + + cfg := yamux.DefaultConfig() + cfg.LogOutput = io.Discard + yamuxServer, err := yamux.Server(serverPipe, cfg) + require.NoError(t, err) + t.Cleanup(func() { _ = yamuxServer.Close() }) + + yamuxClient, err := yamux.Client(clientPipe, cfg) + require.NoError(t, err) + t.Cleanup(func() { _ = yamuxClient.Close() }) + + _, cancel := context.WithCancel(context.Background()) + sess := &AgentSession{ + agentUUID: "type-uuid", + agentName: "type-agent", + session: yamuxServer, + cancel: cancel, + } + t.Cleanup(func() { _ = sess.Close() }) + + tcpServer, tcpClient := net.Pipe() + t.Cleanup(func() { + _ = tcpServer.Close() + _ = tcpClient.Close() + }) + go sess.proxyConn(tcpServer) + + stream, err := yamuxClient.Accept() + require.NoError(t, err) + defer func() { _ = stream.Close() }() + + buf := make([]byte, 1) + _, err = io.ReadFull(stream, buf) + require.NoError(t, err) + assert.Equal(t, streamTypeDocker, buf[0]) +} diff --git a/backend/internal/services/certificate_validator_extra_coverage_test.go b/backend/internal/services/certificate_validator_extra_coverage_test.go index 5d20f49c6..f2a8f3941 100644 --- a/backend/internal/services/certificate_validator_extra_coverage_test.go +++ b/backend/internal/services/certificate_validator_extra_coverage_test.go @@ -252,5 +252,3 @@ func TestDetectFormat_PEM(t *testing.T) { format := DetectFormat(certPEM) assert.Equal(t, FormatPEM, format) } - - From 810ae1646d6c3aabf966f7c6d94251b1a041d87b Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Mon, 18 May 2026 23:09:03 +0000 Subject: [PATCH 3/6] docs(security): add QA Security Audit Report for Orthrus Docker Proxy feature --- docs/plans/current_spec.md | 2206 ++++-------------- docs/reports/qa-security-audit-2026-05-18.md | 245 ++ 2 files changed, 728 insertions(+), 1723 deletions(-) create mode 100644 docs/reports/qa-security-audit-2026-05-18.md diff --git a/docs/plans/current_spec.md b/docs/plans/current_spec.md index ba9e29ed2..a45b0347e 100644 --- a/docs/plans/current_spec.md +++ b/docs/plans/current_spec.md @@ -1,8 +1,9 @@ -# Hecate Simplified Architecture — Feature Spec +# Orthrus Agent Docker Proxy Listener — Feature Spec (PR 5) **Branch**: `feature/hecate` -**PR**: #983 -**Date**: 2026-05-03 +**PR**: #5 (Orthrus Docker Proxy — server-side listener) +**Date**: 2026-05-18 +**Status**: Ready for implementation --- @@ -10,1948 +11,707 @@ ### Overview -This spec replaces the previous plan, which suffered from a "double-setup problem": users had to configure Tailscale in Hecate > Providers, and then also configure Tailscale-specific fields on each Orthrus Agent. The new design eliminates that redundancy. +Orthrus agents connect to Charon over a persistent WebSocket multiplexed with yamux. The agent binary (`agent/leash/leash.go`) already handles an incoming yamux stream whose first byte is `0x01` (`streamTypeDocker`) by connecting to its local `/var/run/docker.sock` and bidirectionally copying. The server-side half of this tunnel was deferred with the comment "PR 5". -### Core Architecture Principle +This spec covers the complete implementation: -Providers are configured **once** in Hecate > Providers. They can then be: - -1. **Assigned to an Orthrus Agent** — the agent holds a `hecate_tunnel_uuid` + `device_id` reference. When a Remote Server picks that agent, connectivity is derived automatically from `resolved_address`. -2. **Used directly on a Remote Server** — pick any provider + device directly (no agent required). - -``` -Providers (configured once in Hecate > Providers): - └── Tailscale "My Network" → TunnelConfig UUID = abc-123 - └── Cloudflare "My Tunnel" → TunnelConfig UUID = def-456 - └── NetBird "My Network" → TunnelConfig UUID = ghi-789 - -Agent "prod-server-01": - └── hecate_tunnel_uuid: "abc-123" (Tailscale "My Network") - └── device_id: "ts-node-abcde" - └── resolved_address: "100.64.0.5" ← cached at assignment time - -Remote Server → 3 radio options: - ├── Direct → host entered manually - ├── Agent → pick agent → host = agent.resolved_address (auto-derived) - └── Provider → pick any tunnel + device directly (no agent needed) -``` +1. **Server-side proxy listener** — when an agent session is registered, allocate an ephemeral `127.0.0.1:0` TCP listener; for each accepted connection, open a yamux stream to the agent, write `0x01`, then bidirectionally copy. +2. **`DockerHandler` integration** — when a `RemoteServer` has `connection_type == "orthrus"`, resolve the agent's proxy address and use `tcp://127.0.0.1:` as the Docker host instead of the server's `Host:Port` fields. ### Objectives -1. **Problem 1** — Add inline tunnel list + edit button on each provider card in `HecateProviders.tsx` -2. **Problem 2** — Add generic provider assignment to Orthrus agents (backend model + frontend dialog) -3. **Problem 3** — Simplify `RemoteServers` connection UI to 3 clean radios: direct / agent / provider +1. `GetProxyAddr()` returns a non-empty address for every live agent session. +2. Docker container listing via `GET /api/v1/docker/containers?server_id=` works for Orthrus-backed remote servers. +3. Clear 502/503 errors when the agent is offline or the orthrus subsystem is unavailable. +4. All changes are covered by unit tests; an integration test stub is provided. --- ## 2. Research Findings -### 2.1 Backend — Current State +### 2.1 `backend/internal/orthrus/session.go` -#### `backend/internal/models/orthrus_agent.go` - -The `OrthrusAgent` struct currently has **no provider-related fields**: +**`AgentSession` struct** (relevant fields): ```go -type OrthrusAgent struct { - ID uint - UUID string - Name string - AuthKeyHash string // json:"-", bcrypt hash - Status OrthrusStatus // online / offline / pending - Capabilities string // JSON array - AgentCertPEM string - LastHeartbeat *time.Time - LastSeen *time.Time - CreatedAt time.Time - UpdatedAt time.Time +type AgentSession struct { + agentUUID string + agentName string + conn *websocket.Conn + session *yamux.Session + cancel context.CancelFunc + proxyPort int // 0 means no proxy listener yet (allocated in PR 5) + mu sync.Mutex } ``` -**No `hecate_tunnel_uuid`, `device_id`, or `resolved_address` fields exist.** These must be added. - -#### `backend/internal/models/remote_server.go` - -`RemoteServer` already has: -- `ConnectionType` — `'direct' | 'orthrus' | 'cloudflare' | 'tailscale' | 'netbird' | 'zerotier'` -- `OrthrusAgentUUID *string` -- `HecateTunnelUUID *string` - -**No `device_id` or `resolved_address` on RemoteServer** — that's correct; these fields live on the agent when using agent mode. In provider mode (direct tunnel selection), host is resolved at device-pick time. +**`GetProxyAddr()`** already returns the correct format, updated to use `s.listener != nil` as the sentinel for consistency with the idempotency guard: -#### `backend/internal/api/handlers/orthrus_handler.go` - -Current routes (confirmed): -``` -GET /orthrus/agents -POST /orthrus/agents -GET /orthrus/agents/:uuid -PATCH /orthrus/agents/:uuid → Rename handler -DELETE /orthrus/agents/:uuid -POST /orthrus/agents/:uuid/revoke -GET /orthrus/agents/:uuid/snippets +```go +func (s *AgentSession) GetProxyAddr() string { + s.mu.Lock() + defer s.mu.Unlock() + if s.listener == nil { + return "" + } + return fmt.Sprintf("127.0.0.1:%d", s.proxyPort) +} ``` -The `PATCH` handler currently uses `renameRequest { Name string binding:"required" }`. This **blocks** partial updates — sending only `hecate_tunnel_uuid` would fail validation. Must replace with `patchAgentRequest` using all-optional pointer fields. +An existing coverage test (`session_coverage_test.go`) manually sets `sess.proxyPort = 8080` and asserts `GetProxyAddr()` returns `"127.0.0.1:8080"` — the integer field remains the source of truth for the port value; `listener` is the guard that determines whether a proxy is active. -#### `backend/internal/services/orthrus_service.go` +**`Close()`** cancels the context and closes the yamux session (which also closes the underlying WebSocket). The listener allocated in PR 5 must be closed here. -Has `Rename(uuid, newName string)` but no general `Patch` method. Must add one that accepts all 4 optional fields and uses a GORM map-based selective update. +**`IsAlive()`** returns `!s.session.IsClosed()`. -### 2.2 Frontend — Current State +### 2.2 `backend/internal/orthrus/server.go` -#### `frontend/src/pages/HecateProviders.tsx` +**`OrthrusServer`** stores sessions in a `sync.Map` keyed by `agentUUID → *AgentSession`. -- Shows 4 provider cards (Cloudflare, Tailscale, NetBird, ZeroTier) -- Each card shows tunnel count and a single "Add Provider" button -- **No inline tunnel list** — no way to see existing tunnels from the card -- **No edit button per tunnel** — users cannot edit an existing tunnel from this page -- `HecateTunnelForm` already has `tunnel?: TunnelConfig` prop for edit mode — it is just never passed +**`HandleWebSocket()`** — the connection lifecycle: +1. Authenticates the bearer token (bcrypt compare against all agents). +2. Calls `NewAgentSession(uuid, name, conn)` to create the yamux server session. +3. Stores in `sessions`. +4. Updates the agent's `status = online` in the DB. +5. Launches `watchHeartbeat` goroutine. -#### `frontend/src/components/hecate/HecateTunnelForm.tsx` +`StartDockerProxy()` (to be added) must be called between steps 2 and 3. -- Accepts `tunnel?: TunnelConfig` prop — when present, renders in edit mode (`isEdit = !!tunnel`) -- Initialises name, provider, isActive from the tunnel prop -- Calls `updateTunnel` on submit when `isEdit` is true -- **Already fully supports edit mode — just needs to be wired from `HecateProviders`** +**`watchHeartbeat()`** polls `sess.IsAlive()` on a ticker. When false, it calls `markOffline` and `s.sessions.Delete(agentUUID)` — but does **not** call `sess.Close()`. This means the listener goroutine would outlive the session. The fix: call `sess.Close()` in `watchHeartbeat` when `!sess.IsAlive()`. -#### `frontend/src/components/hecate/OrthrusAgentManager.tsx` +**`GetProxyAddr(agentUUID string) (string, bool)`** and **`GetSession(agentUUID string) (*AgentSession, bool)`** are already implemented as stubs and will work correctly once `proxyPort` is set. -- Renders a table: Name (inline-editable), UUID, Status, Last Seen, Actions -- Actions column: only Delete button currently -- **No "Assign Provider" action exists** +**`Stop()`** already calls `sess.Close()` for all sessions — listener cleanup will be covered once `Close()` is updated. -#### `frontend/src/components/hecate/ConnectionTypeSelector.tsx` +### 2.3 `backend/internal/api/handlers/docker_handler.go` -Current type: -```typescript -export type ConnectionMode = 'direct' | 'agent' -``` +**Current struct**: -Renders **2 radios**: Direct / Agent. The Agent mode combines Orthrus agents AND provider tunnels in one dropdown — these are logically separate concepts and must be split. - -#### `frontend/src/components/RemoteServerForm.tsx` - -Current `resolveConnectionMode()` collapses all non-direct modes into `'agent'`. The form also has an `orthrus_ip_mode` state (`'' | 'tailscale' | 'netbird' | 'zerotier' | 'manual'`) that adds complexity. The simplified design removes this in favour of the 3-radio model. - -#### Existing Device Pickers (confirmed, no changes required) - -- `frontend/src/components/hecate/TailscaleDevicePicker.tsx` — accepts `onSelect(device)`, `open`, `onClose` -- `frontend/src/components/hecate/NetBirdPeerPicker.tsx` — accepts `onSelect(peer)`, `selectedId`, `open`, `onClose` -- `frontend/src/components/hecate/ZeroTierMemberPicker.tsx` — two-step (network → member), accepts `onSelect(member)`, `open`, `onClose` - -#### Existing i18n Keys Present - -```json -"hecate.form.mode.label" -"hecate.form.mode.direct" -"hecate.form.mode.agent" -"hecate.form.mode.directDescription" -"hecate.form.mode.agentDescription" -"hecate.form.mode.provider" ← label exists, description missing -"hecate.form.mode.selectProvider" -"hecate.form.mode.noProviders" -"hecate.form.mode.goToHecate" -"hecate.form.mode.selectedDevice" -"hecate.form.mode.changeDevice" -"hecate.providers.title" -"hecate.providers.description" -"hecate.providers.tunnelCount_one" -"hecate.providers.tunnelCount_other" -"hecate.agentManager.tableLabel" -"hecate.agentManager.colName" -"hecate.agentManager.colUUID" -"hecate.agentManager.colStatus" -"hecate.agentManager.colLastSeen" -"hecate.agentManager.colActions" +```go +type DockerHandler struct { + dockerService dockerContainerLister + remoteServerService remoteServerGetter +} ``` ---- +**Current `ListContainers` flow for `server_id`**: -## 3. Technical Specifications +```go +server, err := h.remoteServerService.GetByUUID(serverID) +// ... +host = fmt.Sprintf("tcp://%s:%d", server.Host, server.Port) +``` -### 3.1 Problem 1 — HecateProviders: Inline Tunnel List + Edit +No awareness of `ConnectionType` or `OrthrusAgentUUID`. No reference to `OrthrusServer`. -#### `frontend/src/pages/HecateProviders.tsx` — Changes +### 2.4 `backend/internal/api/routes/routes.go` -**New state:** +**Critical wiring gap**: `orthrusServer` is created inside `if strings.TrimSpace(os.Getenv("CHARON_ENCRYPTION_KEY")) != "" { ... }`. The `dockerHandler` is created **after** this block closes: -```typescript -const [editTunnel, setEditTunnel] = useState(null) -const [editFormOpen, setEditFormOpen] = useState(false) +```go +if encryptionKey != "" { + // ... + orthrusServer, _ = orthrus.NewOrthrusServer(db, orthrusCA) + // orthrusServer registered with api and caddyManager here +} +// dockerHandler created here — orthrusServer is OUT OF SCOPE +dockerService := services.NewDockerService() +dockerHandler := handlers.NewDockerHandler(dockerService, remoteServerService) +dockerHandler.RegisterRoutes(management) ``` -**Helper handlers:** +Fix: hoist `var orthrusServer *orthrus.OrthrusServer` declaration above the `if` block, then call `dockerHandler.SetOrthrusResolver(orthrusServer)` (may be nil) after the handler is created. -```typescript -const openEdit = (tunnel: TunnelConfig) => { - setEditTunnel(tunnel) - setEditFormOpen(true) -} +### 2.5 `backend/internal/services/docker_service.go` + +**`ListContainers(ctx, host string)`**: when `host` is neither empty nor `"local"`, creates a new `*client.Client` with `client.WithHost(host)`. A `tcp://127.0.0.1:` value is a valid host string — no changes needed to `docker_service.go`. -const openCreate = (provider: TunnelProvider) => { - setEditTunnel(null) - setSelectedProvider(provider) - setFormOpen(true) +### 2.6 `backend/internal/models/remote_server.go` + +```go +type ConnectionType string + +const ( + ConnectionTypeDirect ConnectionType = "direct" + ConnectionTypeOrthrus ConnectionType = "orthrus" + ConnectionTypeCloudflare ConnectionType = "cloudflare" + ConnectionTypeTailscale ConnectionType = "tailscale" + ConnectionTypeNetbird ConnectionType = "netbird" + ConnectionTypeZerotier ConnectionType = "zerotier" +) + +type RemoteServer struct { + // ... + ConnectionType ConnectionType `json:"connection_type" gorm:"default:'direct';index"` + OrthrusAgentUUID *string `json:"orthrus_agent_uuid,omitempty" gorm:"index"` + // ... } ``` -**Updated provider card — inline tunnel list section:** - -```tsx -{/* Inline tunnel list */} -
    - {providerTunnels.map(tun => ( -
  • -
    - {tun.name} - -
    - -
  • - ))} - {providerTunnels.length === 0 && ( -
  • - {t('hecate.providers.noTunnels')} -
  • - )} -
-``` +All six values are enumerated here for completeness. Only `orthrus` uses the proxy path; the `default:` branch in `ListContainers` handles all remaining types (`direct`, `cloudflare`, `tailscale`, `netbird`, `zerotier`) correctly by falling through to the `tcp://host:port` construction. -**Second `HecateTunnelForm` instance (edit mode):** +### 2.7 `agent/leash/leash.go` — Agent-side protocol (already implemented) -```tsx -{editFormOpen && ( - { setEditFormOpen(false); setEditTunnel(null) }} - /> -)} +```go +const ( + streamTypeDocker = byte(0x01) + streamTypePortForward = byte(0x02) +) ``` -**No backend changes required for Problem 1** — `HecateTunnelForm` calls `updateTunnel` → `PUT /hecate/tunnels/:uuid`, already implemented. +**`handleStream(stream *yamux.Stream)`**: reads 1 type byte, dispatches to `handleDockerStream` for `0x01`. ---- - -### 3.2 Problem 2 — Agent Generic Provider Assignment +**`handleDockerStream(stream *yamux.Stream)`**: calls `l.filter.ServeProxy(l.dockerSock, stream, stream)` — the `muzzle.Filter` proxies the stream to the local Docker socket, applying the allowlist filter. -#### 3.2.1 Backend Model — `backend/internal/models/orthrus_agent.go` +**Server-side requirement**: open a yamux stream (`s.session.Open()`), write `[]byte{0x01}`, then bidirectionally copy between the TCP connection and the yamux stream. -Add 3 nullable pointer fields. GORM AutoMigrate adds the columns on next startup: +### 2.8 Existing Tests -```go -// HecateTunnelUUID is the TunnelConfig assigned to THIS AGENT for its own outbound -// connectivity. Distinct from RemoteServer.HecateTunnelUUID which governs how a -// RemoteServer is reached by Charon. -HecateTunnelUUID *string `json:"hecate_tunnel_uuid,omitempty" gorm:"index"` - -// DeviceID is the provider-specific peer/device/member identifier within the -// assigned tunnel (e.g. Tailscale node ID, NetBird peer ID). Empty for Cloudflare. -DeviceID *string `json:"device_id,omitempty"` - -// ResolvedAddress is the cached connectivity address for this agent, -// set by Charon at assignment time, used as upstream host in Remote Servers. -ResolvedAddress *string `json:"resolved_address,omitempty"` -``` +- `session_coverage_test.go` directly accesses `sess.proxyPort` (unexported). The new `StartDockerProxy()` sets this field — existing test remains valid. +- `docker_handler_test.go` uses `fakeDockerService` and `fakeRemoteServerService`. New tests will add a `fakeOrthrusResolver`. +- `server_test.go` uses a real SQLite DB and real WebSocket pairs. New tests will exercise the proxy listener start/stop lifecycle. -#### 3.2.2 Backend Service — `backend/internal/services/orthrus_service.go` +--- -Replace `Rename` with `Patch`. Use GORM map-based updates to avoid zero-value overwrites: +## 3. Technical Specifications -```go -// Patch applies a partial update to an OrthrusAgent. -// Only non-nil pointer fields are written; omitted fields are left unchanged. -func (s *OrthrusService) Patch( - uuid string, - name, hecateTunnelUUID, deviceID, resolvedAddress *string, -) (*models.OrthrusAgent, error) { - updates := map[string]interface{}{} - - if name != nil { - trimmed := strings.TrimSpace(*name) - if trimmed == "" { - return nil, fmt.Errorf("orthrus: agent name cannot be blank") - } - updates["name"] = trimmed - } - if hecateTunnelUUID != nil { - updates["hecate_tunnel_uuid"] = *hecateTunnelUUID - } - if deviceID != nil { - updates["device_id"] = *deviceID - } - if resolvedAddress != nil { - updates["resolved_address"] = *resolvedAddress - } +### 3.1 New Constant - if len(updates) == 0 { - return s.Get(uuid) - } +**File**: `backend/internal/orthrus/session.go` - if err := s.db.Model(&models.OrthrusAgent{}). - Where("uuid = ?", uuid). - Updates(updates).Error; err != nil { - return nil, fmt.Errorf("orthrus: patch agent %s: %w", uuid, err) - } - return s.Get(uuid) -} +```go +// streamTypeDocker is the yamux stream type byte for Docker socket proxy traffic. +// Must match streamTypeDocker in the Orthrus agent (agent/leash/leash.go). +const streamTypeDocker = byte(0x01) ``` -Remove the old `Rename` method (subsumed by `Patch`). +### 3.2 `AgentSession` Struct Changes -#### 3.2.3 Backend Handler — `backend/internal/api/handlers/orthrus_handler.go` +**File**: `backend/internal/orthrus/session.go` -Replace `renameRequest` struct with `patchAgentRequest` (all-optional pointers): +Add one field to `AgentSession`: ```go -type patchAgentRequest struct { - Name *string `json:"name"` - HecateTunnelUUID *string `json:"hecate_tunnel_uuid"` - DeviceID *string `json:"device_id"` - ResolvedAddress *string `json:"resolved_address"` +type AgentSession struct { + agentUUID string + agentName string + conn *websocket.Conn + session *yamux.Session + cancel context.CancelFunc + proxyPort int // ephemeral port; 0 until StartDockerProxy succeeds + listener net.Listener // nil until StartDockerProxy succeeds + mu sync.Mutex } ``` -Replace `Rename` handler with `Patch`: +**`GetProxyAddr()` sentinel updated** — the nil-check changes from `s.proxyPort == 0` to `s.listener == nil` for consistency with the idempotency guard introduced in `StartDockerProxy()`. The integer `s.proxyPort` field continues to hold the ephemeral port value and is still set atomically alongside `s.listener`. + +### 3.3 `StartDockerProxy()` Method + +**File**: `backend/internal/orthrus/session.go` ```go -func (h *OrthrusHandler) Patch(c *gin.Context) { - uuid := c.Param("uuid") - var req patchAgentRequest - if err := c.ShouldBindJSON(&req); err != nil { - c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) - return +// StartDockerProxy allocates an ephemeral TCP listener on localhost and starts +// accepting connections. Each accepted connection is proxied to the agent's +// Docker socket via a new yamux stream with stream type 0x01. +// Returns an error if the listener cannot be bound or if the proxy has already +// been started for this session (idempotency guard). +func (s *AgentSession) StartDockerProxy() error { + s.mu.Lock() + if s.listener != nil { + s.mu.Unlock() + return fmt.Errorf("orthrus: docker proxy already started for session %s", s.agentUUID) } - agent, err := h.svc.Patch(uuid, req.Name, req.HecateTunnelUUID, req.DeviceID, req.ResolvedAddress) + s.mu.Unlock() + + ln, err := net.Listen("tcp", "127.0.0.1:0") if err != nil { - c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) - return + return fmt.Errorf("orthrus: allocate proxy listener: %w", err) } - c.JSON(http.StatusOK, agent) -} -``` - -Route registration change: -```go -// Before: -rg.PATCH("/orthrus/agents/:uuid", h.Rename) -// After: -rg.PATCH("/orthrus/agents/:uuid", h.Patch) -``` - -#### 3.2.4 API Contract - -``` -PATCH /orthrus/agents/:uuid - -Request (all fields optional): -{ - "name": "string", // omit to keep current name - "hecate_tunnel_uuid": "string", // omit to keep current tunnel - "device_id": "string", // omit to keep current device - "resolved_address": "string" // omit to keep current address -} -Response 200: OrthrusAgent (full object) -Response 400: { "error": "..." } // invalid JSON or blank name -Response 500: { "error": "..." } // DB error or agent not found -``` + port := ln.Addr().(*net.TCPAddr).Port -#### 3.2.5 Frontend API Types — `frontend/src/api/orthrus.ts` - -Add new fields to `OrthrusAgent` interface: - -```typescript -export interface OrthrusAgent { - uuid: string; - name: string; - status: OrthrusStatus; - capabilities: string; - agent_cert_pem?: string; - last_heartbeat?: string; - last_seen?: string; - created_at: string; - updated_at: string; - // Provider assignment — set via Assign Provider dialog - hecate_tunnel_uuid?: string; // Agent's own outbound tunnel (≠ RemoteServer.hecate_tunnel_uuid) - device_id?: string; // Provider-specific peer ID; empty string for Cloudflare - resolved_address?: string; // Cached host: IP for Tailscale/NetBird, hostname for Cloudflare -} + s.mu.Lock() + s.listener = ln + s.proxyPort = port + s.mu.Unlock() -export interface PatchAgentRequest { - name?: string; - hecate_tunnel_uuid?: string; - device_id?: string; - resolved_address?: string; + go s.runProxyListener(ln) + return nil } - -export const patchAgent = async (uuid: string, req: PatchAgentRequest): Promise => { - const { data } = await client.patch(`/orthrus/agents/${uuid}`, req); - return data; -}; - -// Backward-compatible wrapper — existing callers of renameAgent are unaffected -export const renameAgent = async (uuid: string, name: string): Promise => - patchAgent(uuid, { name }); -``` - -#### 3.2.6 Frontend Hook — `frontend/src/hooks/useOrthrus.ts` - -```typescript -export const usePatchAgent = () => { - const queryClient = useQueryClient(); - return useMutation({ - mutationFn: ({ uuid, req }: { uuid: string; req: PatchAgentRequest }) => - patchAgent(uuid, req), - onSuccess: () => { - queryClient.invalidateQueries({ queryKey: AGENTS_QUERY_KEY }); - }, - }); -}; ``` -`useRenameAgent` stays unchanged — it still calls `renameAgent`. +### 3.4 `runProxyListener()` and `proxyConn()` Methods -#### 3.2.7 `OrthrusAgentManager.tsx` — Assign Provider Dialog +**File**: `backend/internal/orthrus/session.go` -**New "Provider" column header** (between Status and Last Seen): - -```tsx - - {t('hecate.agentManager.colProvider')} - -``` - -**New "Provider" cell per row** — shows `resolved_address` or "No provider assigned": - -```tsx - - {agent.hecate_tunnel_uuid - ? - {agent.resolved_address ?? agent.device_id ?? '—'} - - : - {t('hecate.agentManager.noProviderAssigned')} - - } - -``` +```go +// runProxyListener accepts TCP connections and proxies each to the agent's +// Docker socket via a new yamux stream. Exits when the listener is closed. +func (s *AgentSession) runProxyListener(ln net.Listener) { + for { + conn, err := ln.Accept() + if err != nil { + // Listener closed — normal shutdown. + return + } + go s.proxyConn(conn) + } +} -**New action button in Actions column** — "Assign Provider": - -```tsx - -``` +// proxyConn opens a yamux stream for Docker proxy traffic, writes the type byte, +// then bidirectionally copies until either side closes. +func (s *AgentSession) proxyConn(conn net.Conn) { + defer conn.Close() -**`AssignProviderDialog` component** (in same file or extracted to `AssignProviderDialog.tsx`): + stream, err := s.session.Open() + if err != nil { + // yamux session already closed; nothing to proxy. + return + } + defer stream.Close() -```tsx -interface AssignProviderDialogProps { - agent: OrthrusAgent - open: boolean - onClose: () => void -} + if _, err := stream.Write([]byte{streamTypeDocker}); err != nil { + return + } -function AssignProviderDialog({ agent, open, onClose }: AssignProviderDialogProps) { - const { t } = useTranslation() - const { tunnels } = useHecate() - const { mutate: patch, isPending } = usePatchAgent() - - const [selectedTunnelUUID, setSelectedTunnelUUID] = useState(agent.hecate_tunnel_uuid ?? '') - const [deviceId, setDeviceId] = useState(agent.device_id ?? '') - const [resolvedAddress, setResolvedAddress] = useState(agent.resolved_address ?? '') - const [pickerOpen, setPickerOpen] = useState(false) - - const selectedTunnel = tunnels.find(tn => tn.uuid === selectedTunnelUUID) - const provider = selectedTunnel?.provider as TunnelProvider | undefined - - const handleSave = () => { - patch( - { - uuid: agent.uuid, - req: { - hecate_tunnel_uuid: selectedTunnelUUID || undefined, - device_id: deviceId || undefined, - resolved_address: resolvedAddress || undefined, - }, - }, - { onSuccess: onClose }, - ) - } - - return ( - - - - - {t('hecate.agentManager.assignProviderTitle', { name: agent.name })} - - - -
- {/* Tunnel selector (grouped by provider type) */} -
- - -
- - {/* Device picker trigger (non-Cloudflare only) */} - {selectedTunnel && provider !== 'cloudflare' && ( -
-

{t('hecate.agentManager.deviceId')}

- {deviceId && ( -

{deviceId}

- )} - -
- )} - - {/* Cloudflare hostname input — replaces device picker for Cloudflare tunnels */} - {selectedTunnel && provider === 'cloudflare' && ( -
- - { - setResolvedAddress(e.target.value) - setDeviceId('') - }} - className="w-full bg-surface-subtle border border-border rounded-lg px-4 py-2 text-content-primary focus:outline-none focus:ring-2 focus:ring-blue-500" - aria-describedby="assign-cf-hostname-hint" - /> -

- {t('hecate.form.provider.cloudflareTunnelHint')} -

-
- )} - - {/* Resolved address (auto-filled from device pick, editable — non-Cloudflare only) */} - {selectedTunnel && provider !== 'cloudflare' && ( -
- - setResolvedAddress(e.target.value)} - placeholder="100.x.x.x or hostname" - className="w-full bg-surface-subtle border border-border rounded-lg px-4 py-2 text-content-primary focus:outline-none focus:ring-2 focus:ring-blue-500" - /> -
- )} -
- - - - - - - {/* Provider-specific device pickers */} - {pickerOpen && provider === 'tailscale' && ( - setPickerOpen(false)} - onSelect={device => { - setDeviceId(device.id) - setResolvedAddress(device.addresses[0] ?? '') - setPickerOpen(false) - }} - /> - )} - {pickerOpen && provider === 'netbird' && ( - setPickerOpen(false)} - onSelect={peer => { - setDeviceId(peer.id) - setResolvedAddress(peer.ip) - setPickerOpen(false) - }} - selectedId={deviceId} - /> - )} - {pickerOpen && provider === 'zerotier' && ( - setPickerOpen(false)} - onSelect={member => { - setDeviceId(member.node_id) - setResolvedAddress(member.ip_assignments[0] ?? '') - setPickerOpen(false) - }} - /> - )} -
-
- ) + var wg sync.WaitGroup + wg.Add(2) + go func() { + defer wg.Done() + defer stream.Close() + io.Copy(stream, conn) //nolint:errcheck + }() + go func() { + defer wg.Done() + defer conn.Close() + io.Copy(conn, stream) //nolint:errcheck + }() + wg.Wait() } ``` ---- +### 3.5 `Close()` Changes -### 3.3 Problem 3 — RemoteServers: 3-Radio Connection Model - -#### 3.3.1 `ConnectionTypeSelector.tsx` — Updated Types and Props - -```typescript -export type ConnectionMode = 'direct' | 'agent' | 'provider' -export type ConnectionType = 'direct' | 'orthrus' | 'cloudflare' | 'tailscale' | 'netbird' | 'zerotier' -export type HecateProvider = 'cloudflare' | 'tailscale' | 'netbird' | 'zerotier' - -export interface ConnectionTypeSelectorProps { - mode: ConnectionMode - onModeChange: (mode: ConnectionMode) => void - selectedTunnelUUID: string | null - selectedDeviceId: string | null - selectedAgentUUID: string | null - onTunnelSelect: (tunnelUUID: string, provider: HecateProvider) => void - onDeviceSelect: (deviceId: string, resolvedAddress: string) => void - onAgentSelect: (agentUUID: string) => void - disabled?: boolean -} -``` +**File**: `backend/internal/orthrus/session.go` -**3-radio render + tier-2 pickers:** - -```tsx -{/* 3 radio buttons */} -
- {t('hecate.form.mode.label')} -
- - {/* Direct */} - - - {/* Agent */} - - - {/* Provider */} - -
-
- -{/* Tier 2a: Agent picker */} -{mode === 'agent' && ( -
- - - {/* Warning when selected agent has no provider */} - {selectedAgentUUID && !selectedAgent?.resolved_address && ( - - )} -
-)} - -{/* Tier 2b: Provider tunnel picker */} -{mode === 'provider' && ( - -)} -``` +Close the listener in addition to the yamux session: + +```go +func (s *AgentSession) Close() error { + s.mu.Lock() + defer s.mu.Unlock() -#### 3.3.2 New `ProviderDevicePicker` Component + s.cancel() -File: `frontend/src/components/hecate/ProviderDevicePicker.tsx` + if s.listener != nil { + _ = s.listener.Close() + s.listener = nil + } -```typescript -interface ProviderDevicePickerProps { - selectedTunnelUUID: string | null - selectedDeviceId: string | null - tunnels: TunnelConfig[] - onTunnelSelect: (tunnelUUID: string, provider: HecateProvider) => void - onDeviceSelect: (deviceId: string, resolvedAddress: string) => void - disabled?: boolean + return s.session.Close() } ``` -Renders tunnel dropdown + conditionally shows either a freeform hostname text input (for `cloudflare`) or a device picker trigger button (for `tailscale`, `netbird`, `zerotier`) based on the selected tunnel's provider. Manages internal picker dialog state. `onDeviceSelect(deviceId, resolvedAddress)` propagates the final pair up to the form. - -**Cloudflare branch** — when `provider === 'cloudflare'`, renders a hostname input instead of the device picker trigger. Adds `cloudflareHost` local state that resets when the tunnel selection changes: - -```tsx -const [cloudflareHost, setCloudflareHost] = useState('') - -{provider === 'cloudflare' && ( -
- - { - setCloudflareHost(e.target.value) - onDeviceSelect('', e.target.value) - }} - className="w-full bg-surface-subtle border border-border rounded-lg px-4 py-2 text-content-primary focus:outline-none focus:ring-2 focus:ring-blue-500" - aria-describedby="cf-hostname-hint" - /> -

- {t('hecate.form.provider.cloudflareTunnelHint')} -

-
-)} -``` - -When the tunnel dropdown changes, reset `cloudflareHost` to `''` to clear any previously entered hostname. +### 3.6 `server.go` — `HandleWebSocket` Changes -**Non-Cloudflare path** — show the existing device picker trigger button for Tailscale / NetBird / ZeroTier. +**File**: `backend/internal/orthrus/server.go` -#### 3.3.3 `RemoteServerForm.tsx` — Simplified +After creating the session and before storing it, start the proxy listener: -**Updated `resolveConnectionMode()`:** - -```typescript -const resolveConnectionMode = (): ConnectionMode => { - if (!server?.connection_type || server.connection_type === 'direct') return 'direct' - if (server.connection_type === 'orthrus') return 'agent' - return 'provider' // cloudflare | tailscale | netbird | zerotier +```go +session, err := NewAgentSession(agent.UUID, agent.Name, conn) +if err != nil { + logger.Log().WithError(err).Error("orthrus: create agent session failed") + _ = conn.Close() + return } -``` - -**Simplified form state** — remove `orthrus_ip_mode`: - -```typescript -const [formData, setFormData] = useState({ - name: server?.name || '', - provider: server?.provider || 'generic', - host: server?.host || '', - port: server?.port ?? 22, - username: server?.username || '', - enabled: server?.enabled ?? true, - connection_mode: resolveConnectionMode() as ConnectionMode, - connection_type: (server?.connection_type ?? 'direct') as ConnectionType, - orthrus_agent_uuid: server?.orthrus_agent_uuid ?? '', - hecate_tunnel_uuid: server?.hecate_tunnel_uuid ?? '', - device_id: '', - resolved_address: '', -}) -``` -**Updated submit payload logic:** - -```typescript -if (formData.connection_mode === 'direct') { - payload.host = formData.host - payload.port = formData.port - payload.username = formData.username - payload.orthrus_agent_uuid = undefined - payload.hecate_tunnel_uuid = undefined -} else if (formData.connection_mode === 'agent') { - const agent = agents.find(a => a.uuid === formData.orthrus_agent_uuid) - payload.host = agent?.resolved_address ?? formData.host - payload.port = formData.port - payload.orthrus_agent_uuid = formData.orthrus_agent_uuid || undefined - payload.hecate_tunnel_uuid = undefined -} else if (formData.connection_mode === 'provider') { - payload.host = formData.resolved_address || formData.host - payload.port = formData.port - payload.hecate_tunnel_uuid = formData.hecate_tunnel_uuid || undefined - payload.orthrus_agent_uuid = undefined +if err := session.StartDockerProxy(); err != nil { + logger.Log().WithField("uuid", util.SanitizeForLog(agent.UUID)). + WithError(err).Warn("orthrus: failed to start docker proxy listener — Docker tunneling unavailable for this session") + // Non-fatal: session still registered; GetProxyAddr() returns "" -> Docker handler returns 502 } -``` -**Updated `ConnectionTypeSelector` props wiring:** - -```tsx - { - setFormData(prev => ({ - ...prev, - connection_mode: mode, - connection_type: mode === 'direct' ? 'direct' : prev.connection_type, - hecate_tunnel_uuid: mode === 'provider' ? prev.hecate_tunnel_uuid : '', - orthrus_agent_uuid: mode === 'agent' ? prev.orthrus_agent_uuid : '', - device_id: '', - resolved_address: '', - })) - }} - selectedTunnelUUID={formData.hecate_tunnel_uuid || null} - selectedDeviceId={formData.device_id || null} - selectedAgentUUID={formData.orthrus_agent_uuid || null} - onTunnelSelect={(uuid, p) => - setFormData(prev => ({ - ...prev, - connection_type: p as ConnectionType, - hecate_tunnel_uuid: uuid, - device_id: '', - resolved_address: '', - })) - } - onDeviceSelect={(deviceId, resolvedAddress) => - setFormData(prev => ({ ...prev, device_id: deviceId, resolved_address: resolvedAddress })) - } - onAgentSelect={uuid => - setFormData(prev => ({ - ...prev, - connection_type: 'orthrus', - orthrus_agent_uuid: uuid, - hecate_tunnel_uuid: '', - device_id: '', - resolved_address: '', - })) - } - disabled={loading} -/> +s.sessions.Store(agent.UUID, session) ``` -**Remove from `RemoteServerForm`:** All `showTailscalePicker`, `showNetBirdPicker`, `showZeroTierPicker` state variables and their corresponding JSX (device picking is now internal to `ProviderDevicePicker`). +### 3.7 `server.go` — `watchHeartbeat` Changes -**Remove from `RemoteServerForm`:** `orthrus_ip_mode` state and all conditional rendering based on it. +**File**: `backend/internal/orthrus/server.go` -#### 3.3.4 Data Flow Diagram +Call `sess.Close()` when the yamux session is found dead to ensure the listener goroutine exits: +```go +if !sess.IsAlive() { + _ = sess.Close() // closes listener goroutine; idempotent + s.markOffline(agentUUID) + s.sessions.Delete(agentUUID) + return +} ``` -User selects radio → - 'direct' → host/port/username fields visible - → payload: host=formData.host - - 'agent' → Agent picker dropdown - → pick agent UUID - → if agent.resolved_address: host derived automatically - → if not: warning shown with link to Agent page - → payload: host=agent.resolved_address, connection_type='orthrus' - - 'provider' → Tunnel dropdown (grouped by provider type) - → pick tunnel UUID - → if non-Cloudflare: device picker opens - → onDeviceSelect(deviceId, resolvedAddress) - → payload: host=resolvedAddress, connection_type=provider, - hecate_tunnel_uuid=tunnelUUID -``` - ---- -## 4. i18n Keys to Add +### 3.8 `docker_handler.go` — New Interface and Field -All additions go in `frontend/src/locales/en/translation.json`: +**File**: `backend/internal/api/handlers/docker_handler.go` -**Under `"hecate.providers"`:** -```json -"editTunnel": "Edit {{name}}", -"noTunnels": "No tunnels configured." -``` +Add interface and field: -**Under `"hecate.agentManager"`:** -```json -"colProvider": "Provider", -"assignProvider": "Assign provider to {{name}}", -"assignProviderTitle": "Assign Provider — {{name}}", -"providerTunnel": "Provider Tunnel", -"deviceId": "Device ID", -"resolvedAddress": "Resolved Address", -"noProviderAssigned": "No provider assigned", -"saveProviderAssignment": "Save Assignment" -``` +```go +// orthrusProxyResolver resolves the local TCP proxy address for a connected Orthrus agent. +// The address is in "host:port" form, suitable for use as tcp://host:port with Docker. +type orthrusProxyResolver interface { + GetProxyAddr(agentUUID string) (string, bool) +} -**Under `"hecate.form.mode"`:** -```json -"providerLabel": "Provider", -"providerDescription": "Route via a configured network provider (Tailscale, NetBird, etc.)", -"selectAgent": "Select an agent", -"selectDevice": "Select device", -"agent": { - "noProviderWarning": "This agent has no provider assigned — connectivity address unavailable.", - "noProviderLink": "Assign one on the Agent page." +type DockerHandler struct { + dockerService dockerContainerLister + remoteServerService remoteServerGetter + orthrusResolver orthrusProxyResolver // nil when Orthrus subsystem is unavailable } ``` -**Under `"hecate.form.provider"`:** -```json -"cloudflareTunnelHostname": "Tunnel Hostname", -"cloudflareTunnelHint": "Enter the public hostname configured for this Cloudflare tunnel (e.g. app.example.com)" -``` +**`NewDockerHandler` signature is unchanged.** Add a setter following the existing `caddyManager.SetOrthrusServer` pattern: -**Under `"remoteServers"`:** -```json -"connectionTypeProvider": "Provider" +```go +// SetOrthrusResolver configures the Orthrus proxy resolver for Docker tunneling. +// If never called (or called with nil), requests for Orthrus-backed servers +// return 503 Service Unavailable. +func (h *DockerHandler) SetOrthrusResolver(r orthrusProxyResolver) { + h.orthrusResolver = r +} ``` ---- - -## 5. Implementation Plan - -### Phase 1 — Playwright Tests (Expected Behaviour First) - -**New test files** (should fail until implementation is complete): - -- `tests/hecate-providers-edit.spec.ts` - - Provider card shows inline tunnel list - - Edit button per tunnel opens form with tunnel name pre-filled - - Save calls update endpoint and refreshes list - -- `tests/hecate-agent-provider.spec.ts` - - Agent row has "Assign Provider" button - - Dialog shows tunnel dropdown grouped by provider - - Selecting non-Cloudflare tunnel shows device picker trigger - - After selection, resolved address field is populated - - Save updates agent and shows resolved address in table +### 3.9 `docker_handler.go` — `ListContainers` Logic -- `tests/remote-server-3modes.spec.ts` - - Connection mode has exactly 3 radios: Direct, Agent, Provider - - Selecting Agent shows only agent picker - - Selecting Provider shows only tunnel + device picker - - Agent with no resolved_address shows warning - - Tailscale is not a top-level radio option - - `ProviderDevicePicker` shows text input instead of device picker when Cloudflare provider is selected - - Cloudflare case: user selects Cloudflare tunnel → enters hostname → submits → `host` field equals the typed hostname +**File**: `backend/internal/api/handlers/docker_handler.go` -### Phase 2 — Backend (Problem 2) +Replace the single `host = fmt.Sprintf(...)` line in the `serverID != ""` branch with connection-type-aware logic: -**Files and changes:** +```go +if serverID != "" { + server, err := h.remoteServerService.GetByUUID(serverID) + if err != nil { + log.WithFields(map[string]any{"server_id": util.SanitizeForLog(serverID)}).Warn("remote server not found") + c.JSON(http.StatusNotFound, gin.H{"error": "Remote server not found"}) + return + } -| File | Change | -|---|---| -| `backend/internal/models/orthrus_agent.go` | Add `HecateTunnelUUID`, `DeviceID`, `ResolvedAddress` fields | -| `backend/internal/services/orthrus_service.go` | Replace `Rename` with `Patch` (map-based partial update) | -| `backend/internal/api/handlers/orthrus_handler.go` | Replace `renameRequest`/`Rename` with `patchAgentRequest`/`Patch` | + switch server.ConnectionType { + case models.ConnectionTypeOrthrus: + if h.orthrusResolver == nil { + c.JSON(http.StatusServiceUnavailable, gin.H{ + "error": "Orthrus subsystem unavailable", + "details": "The Orthrus agent tunnel service is not running. Ensure CHARON_ENCRYPTION_KEY is set.", + }) + return + } + if server.OrthrusAgentUUID == nil || *server.OrthrusAgentUUID == "" { + c.JSON(http.StatusBadRequest, gin.H{"error": "Remote server has no linked Orthrus agent"}) + return + } + addr, ok := h.orthrusResolver.GetProxyAddr(*server.OrthrusAgentUUID) + if !ok { + log.WithFields(map[string]any{"agent_uuid": util.SanitizeForLog(*server.OrthrusAgentUUID)}).Warn("orthrus agent not connected") + c.JSON(http.StatusBadGateway, gin.H{ + "error": "Orthrus agent is not currently connected", + "details": "The Orthrus agent for this server is offline. Please ensure the agent is running and connected to Charon.", + }) + return + } + host = "tcp://" + addr // e.g. tcp://127.0.0.1:54321 -**Validation:** -```bash -cd /projects/Charon/backend && go build ./... && go vet ./... -cd /projects/Charon && go test ./backend/... + default: + // Direct, Tailscale, NetBird, ZeroTier, Cloudflare — use explicit host/port + host = fmt.Sprintf("tcp://%s:%d", server.Host, server.Port) + } +} ``` -### Phase 3 — Frontend (Problems 1, 2, 3) +### 3.10 `routes.go` — Hoist `orthrusServer` and Wire Resolver -**File order (dependencies flow downward):** +**File**: `backend/internal/api/routes/routes.go` -1. `frontend/src/api/orthrus.ts` — extend `OrthrusAgent`, add `patchAgent` -2. `frontend/src/hooks/useOrthrus.ts` — add `usePatchAgent` -3. `frontend/src/pages/HecateProviders.tsx` — inline list + edit wiring (Problem 1) -4. `frontend/src/components/hecate/OrthrusAgentManager.tsx` — Provider column + AssignProviderDialog (Problem 2) -5. `frontend/src/components/hecate/ProviderDevicePicker.tsx` — **new file** (Problem 3 dependency) -6. `frontend/src/components/hecate/ConnectionTypeSelector.tsx` — 3-radio mode (Problem 3) -7. `frontend/src/components/RemoteServerForm.tsx` — simplified form (Problem 3) -8. `frontend/src/locales/en/translation.json` — all new i18n keys +1. Hoist the variable declaration before the encryption-key `if` block: -**Validation:** -```bash -cd /projects/Charon/frontend && node node_modules/.bin/vitest run --reporter=verbose +```go +// Declared here so dockerHandler (created below the if-block) can access it. +var orthrusServer *orthrus.OrthrusServer ``` -### Phase 4 — Integration and Testing +2. Remove the `var orthrusServer *orthrus.OrthrusServer` declaration inside the `if` block (it becomes an assignment only). -**Update existing tests:** -- `frontend/src/pages/__tests__/HecateProviders.test.tsx` — inline list + edit flow tests -- `frontend/src/pages/__tests__/HecateAgent.test.tsx` — Assign Provider dialog tests -- Any test that imports `ConnectionMode` type must add `'provider'` to its type assertions +3. After `dockerHandler` is created (outside the block), wire the resolver: -**New test files:** -- `frontend/src/components/hecate/__tests__/ConnectionTypeSelector.test.tsx` -- `frontend/src/components/hecate/__tests__/ProviderDevicePicker.test.tsx` - -**Run E2E:** -```bash -cd /projects/Charon && npx playwright test tests/hecate-providers-edit.spec.ts tests/hecate-agent-provider.spec.ts tests/remote-server-3modes.spec.ts --project=firefox -``` - -### Phase 5 — GORM Security Scan + DoD - -```bash -./scripts/scan-gorm-security.sh --check +```go +dockerService := services.NewDockerService() +dockerHandler := handlers.NewDockerHandler(dockerService, remoteServerService) +if orthrusServer != nil { + dockerHandler.SetOrthrusResolver(orthrusServer) +} +dockerHandler.RegisterRoutes(management) ``` -New `OrthrusAgent` fields (`HecateTunnelUUID`, `DeviceID`, `ResolvedAddress`) are not credentials — scan should pass cleanly. - -Run local patch coverage report after backend and frontend coverage tests. - ---- - -## 6. Acceptance Criteria - -### Problem 1 - -- [ ] Each provider card lists its tunnels inline with name + status badge -- [ ] Each tunnel row has an accessible Edit button (`aria-label` includes tunnel name) -- [ ] Clicking Edit opens `HecateTunnelForm` in edit mode with tunnel name pre-filled -- [ ] Editing and saving calls `PUT /hecate/tunnels/:uuid` and refreshes the list -- [ ] Provider cards with no tunnels show "No tunnels configured" text -- [ ] "Add Provider" still opens form in create mode - -### Problem 2 - -- [ ] `orthrus_agents` DB table gains `hecate_tunnel_uuid`, `device_id`, `resolved_address` columns after AutoMigrate -- [ ] `PATCH /orthrus/agents/:uuid` with only `{ "name": "foo" }` renames successfully -- [ ] `PATCH /orthrus/agents/:uuid` with only `{ "hecate_tunnel_uuid": "...", "device_id": "...", "resolved_address": "..." }` assigns provider without changing name -- [ ] Agent table has "Provider" column showing resolved address or "No provider assigned" -- [ ] Assign Provider button per row opens dialog -- [ ] Dialog tunnel dropdown groups tunnels by provider type (Cloudflare / Tailscale / NetBird / ZeroTier) -- [ ] Selecting Tailscale tunnel shows TailscaleDevicePicker trigger -- [ ] Selecting NetBird tunnel shows NetBirdPeerPicker trigger -- [ ] Selecting ZeroTier tunnel shows ZeroTierMemberPicker trigger -- [ ] Selecting Cloudflare tunnel shows no device picker (cloudflare = tunnel IS the endpoint) -- [ ] After picking a device, Resolved Address is auto-filled -- [ ] Saving updates the agent and table refreshes with new resolved address -- [ ] `useRenameAgent` inline rename in the table still works unchanged - -### Problem 3 - -- [ ] Connection mode selector shows exactly 3 radios: Direct, Agent, Provider -- [ ] Selecting Direct shows host/port/username fields; hides agent and tunnel pickers -- [ ] Selecting Agent shows agent picker dropdown; hides tunnel picker; hides host field -- [ ] Selecting Provider shows tunnel picker grouped by type + device picker; hides agent picker; hides host field -- [ ] Agent picker option labels indicate "(No provider assigned)" when `resolved_address` is empty -- [ ] Warning + link shown when selected agent has no `resolved_address` -- [ ] Saving in Agent mode sends `host = agent.resolved_address`, `connection_type = 'orthrus'` -- [ ] Saving in Provider mode sends `host = device.resolvedAddress`, `connection_type = provider`, `hecate_tunnel_uuid` -- [ ] Tailscale does NOT appear as a top-level radio — only under Provider > Tailscale group - --- -## 7. Commit Slicing Strategy - -### Decision - -Single PR #983 with 3 ordered commits. Commits 1 and 2 are independently compilable and have standalone test gates. Commit 3 has a TypeScript type dependency on Commit 2 (`OrthrusAgent.resolved_address` must be present in `api/orthrus.ts`) and must be developed and reviewed after Commit 2 is merged or cherry-picked. - -### Commit 1 — Frontend only (Problem 1) - -**Subject**: `feat(hecate/providers): add inline tunnel list with edit support` - -**Files:** -- `frontend/src/pages/HecateProviders.tsx` -- `frontend/src/locales/en/translation.json` (`hecate.providers.editTunnel`, `hecate.providers.noTunnels`) -- `frontend/src/pages/__tests__/HecateProviders.test.tsx` - -**Dependencies**: None — HecateTunnelForm edit mode already works -**Validation gate**: `node node_modules/.bin/vitest run src/pages/__tests__/HecateProviders.test.tsx` -**Risk**: Low — purely additive - -### Commit 2 — Backend + Frontend (Problem 2) - -**Subject**: `feat(orthrus): add generic provider assignment to agents` - -**Files:** -- `backend/internal/models/orthrus_agent.go` -- `backend/internal/services/orthrus_service.go` -- `backend/internal/api/handlers/orthrus_handler.go` -- `backend/internal/api/handlers/orthrus_handler_test.go` *(new test cases — see below)* -- `frontend/src/api/orthrus.ts` -- `frontend/src/api/__tests__/orthrus.test.ts` *(new test cases — see below)* -- `frontend/src/hooks/useOrthrus.ts` -- `frontend/src/components/hecate/OrthrusAgentManager.tsx` -- `frontend/src/locales/en/translation.json` (agentManager keys) -- `frontend/src/pages/__tests__/HecateAgent.test.tsx` - -**New backend test cases** (`orthrus_handler_test.go`): -- `TestOrthrusHandler_PatchAgent_NameOnly` — PATCH with only `{ "name": "new-name" }` → 200, name updated, tunnel fields unchanged -- `TestOrthrusHandler_PatchAgent_TunnelFields` — PATCH with only tunnel fields → 200, name unchanged, tunnel fields updated -- `TestOrthrusHandler_PatchAgent_EmptyBody` — PATCH with `{}` → 200, returns current agent unchanged -- `TestOrthrusHandler_PatchAgent_UnknownUUID` — PATCH with valid body but unknown UUID → 404 +## 4. Data Flow -**New frontend API test cases** (`orthrus.test.ts`): -- `renameAgent delegates to patchAgent with only name field` — verify `axios.patch` called with `{ name: 'foo' }` and no other fields -- `patchAgent with provider fields sends correct partial payload` — verify only the provided fields appear in the PATCH body +### 4.1 Happy Path — Orthrus Agent Connected -**Dependencies**: None (fully independent of Commit 1) -**Validation gate**: -```bash -cd /projects/Charon/backend && go build ./... && go vet ./... && go test ./backend/internal/api/handlers/... -cd /projects/Charon/frontend && node node_modules/.bin/vitest run src/pages/__tests__/HecateAgent.test.tsx src/api/__tests__/orthrus.test.ts ``` -**Risk**: Medium — AutoMigrate adds nullable columns (no data loss) -**Rollback note**: Orphaned DB columns are harmless if reverted - -### Commit 3 — Frontend only (Problem 3, gates on Commit 2 type) - -**Subject**: `feat(remote-servers): replace 2-radio connection mode with 3-radio model` - -**Files:** -- `frontend/src/components/hecate/ConnectionTypeSelector.tsx` -- `frontend/src/components/hecate/ProviderDevicePicker.tsx` (new) -- `frontend/src/components/RemoteServerForm.tsx` -- `frontend/src/locales/en/translation.json` (form.mode keys) -- `frontend/src/components/hecate/__tests__/ConnectionTypeSelector.test.tsx` (new) - -**Dependencies**: Commit 2 (`OrthrusAgent.resolved_address` must be in the type definition for the warning logic) -**Validation gate**: -```bash -cd /projects/Charon/frontend && node node_modules/.bin/vitest run --reporter=verbose +Client + → GET /api/v1/docker/containers?server_id= + → DockerHandler.ListContainers + → remoteServerService.GetByUUID(uuid) + → RemoteServer{connection_type: "orthrus", orthrus_agent_uuid: "agent-abc"} + → h.orthrusResolver.GetProxyAddr("agent-abc") + → AgentSession.proxyPort = 54321 + → returns ("127.0.0.1:54321", true) + → host = "tcp://127.0.0.1:54321" + → dockerService.ListContainers(ctx, "tcp://127.0.0.1:54321") + → Docker client dials 127.0.0.1:54321 + → TCP connection accepted by AgentSession.runProxyListener + → AgentSession.proxyConn: + → session.Open() → new yamux stream + → stream.Write([]byte{0x01}) + → io.Copy(stream, conn) / io.Copy(conn, stream) + → yamux stream arrives at agent + → agent.handleDockerStream: + → filter.ServeProxy("/var/run/docker.sock", stream, stream) + → HTTP request forwarded to local Docker API + → Docker API response returns through tunnel + → []DockerContainer returned + → 200 OK JSON ``` -**Risk**: Medium — breaks existing 2-radio pattern; existing tests for RemoteServerForm must be updated - ---- - -## 8. Edge Cases and Error Handling - -| Scenario | Handling | -|---|---| -| Agent selected but later deleted | `agents.find()` returns undefined → show "Agent not found" in picker; do not submit | -| Agent has no `resolved_address` | Show amber warning with link; allow save; host will be empty string (backend returns 400) | -| Cloudflare tunnel selected as provider | `ProviderDevicePicker` renders a freeform hostname text input (`cf-hostname`) in place of the device picker. `onDeviceSelect('', enteredHost)` is called on `onChange`. `resolved_address` stores the typed public hostname (e.g. `app.example.com`). Same pattern in `AssignProviderDialog`: Cloudflare block writes directly to `resolvedAddress` state and clears `deviceId`; the generic resolved address field is hidden for Cloudflare. | -| ZeroTier network has no members | ZeroTierMemberPicker renders empty state message | -| `PATCH /orthrus/agents/:uuid` with empty body `{}` | No DB write; returns current agent state | -| Dangling `hecate_tunnel_uuid` (tunnel deleted after assignment) | UI shows tunnel UUID with fallback "Tunnel not found" label; still submittable | -| `name` field sent as empty string | Service rejects with "name cannot be blank" → 500 → form shows error | -| Multiple tunnels of same provider type | `` shows all; user selects the correct one | - ---- - -## 9. Non-Goals - -- Automatic `resolved_address` refresh on agent heartbeat (future work) -- Cloudflare sub-device/hostname picker (Cloudflare tunnels route by hostname, not peer IP) -- Removing deprecated `connection_type` enum values from the database -- Changes to Caddy config generation (existing `hecate_tunnel_uuid` handling stays as-is) -- Support for assigning multiple providers to a single agent - ---- - -## Phase 4: UX Enhancements & Test Gap Closure - -**Scope**: Targeted follow-up to the Hecate provider/agent redesign. Fixes broken E2E tests caused by the `` implementation that was replaced by three radio buttons rendered by `ConnectionTypeSelector.tsx`. These tests will fail with "Element not found" or type-mismatch errors at runtime. - -#### A.2 Broken Tests Identified - -| Line range | Test title | Broken step | Broken selector/call | -|---|---|---|---| -| ~173–196 | "should open Add Server form when Add Server button is clicked" | "Verify Connection Type selector is present" | `page.locator('#connection-type').or(page.getByRole('combobox', { name: /connection type/i }))` — neither exists | -| ~207–232 | "should show orthrus agent section when orthrus connection type is selected" | "Change connection type to Orthrus Agent" | `page.locator('#connection-type').selectOption('orthrus')` — `selectOption` on a non-existent select | -| ~234–253 | "should show cloudflare wizard when cloudflare connection type is selected" | "Change connection type to Cloudflare Tunnel" | `page.locator('#connection-type').selectOption('cloudflare')` — same | -| ~255–281 | "Connection Type selector accessibility snapshot" | "Verify connection type selector accessibility" | `matchAriaSnapshot` asserting `combobox "Connection Type"` with options `Direct / Orthrus Agent / Cloudflare Tunnel` — no such combobox exists | - -#### A.3 New DOM Structure (from `ConnectionTypeSelector.tsx`) +### 4.2 Agent Disconnected — 502 ``` -
- Connection mode ← t('hecate.form.mode.label') - - - -
+GET /api/v1/docker/containers?server_id= +→ server.ConnectionType == "orthrus" +→ h.orthrusResolver.GetProxyAddr("agent-abc") → ("", false) +→ 502 Bad Gateway {"error": "Orthrus agent is not currently connected", ...} ``` -When `mode === 'agent'`: -``` - ← agent dropdown -``` +### 4.3 Orthrus Subsystem Unavailable (no encryption key) — 503 -When `mode === 'provider'`: ``` -
← ProviderDevicePicker (mocked in unit tests) +GET /api/v1/docker/containers?server_id= +→ server.ConnectionType == "orthrus" +→ h.orthrusResolver == nil +→ 503 Service Unavailable {"error": "Orthrus subsystem unavailable", ...} ``` -#### A.4 Replacement Spec - -**File**: `tests/hecate-tunnel-manager.spec.ts` - ---- - -**Test: "should open Add Server form — verify connection mode radios present"** - -Replace the broken step "Verify Connection Type selector is present" with: +### 4.4 Session Disconnect — Listener Cleanup -```typescript -await test.step('Verify connection mode radio group is present', async () => { - const directRadio = page.getByRole('radio', { name: /direct/i }); - await expect(directRadio).toBeVisible(); - await expect(page.getByRole('radio', { name: /agent/i })).toBeVisible(); - await expect(page.getByRole('radio', { name: /provider/i })).toBeVisible(); - // Direct mode is selected by default - await expect(directRadio).toBeChecked(); -}); ``` - ---- - -**Test: "should show agent section when Agent radio is selected"** - -Replace the entire `#connection-type.selectOption('orthrus')` flow: - -```typescript -test('should show agent dropdown when Agent radio is selected', async ({ page }) => { - await page.route(ORTHRUS_AGENTS_API, (route) => { - route.fulfill({ json: [] }); - }); - - await page.goto('/hecate/remote-servers'); - await waitForLoadingComplete(page); - - await test.step('Open Add Server form', async () => { - await page.getByRole('button', { name: /add server/i }).first().click(); - await expect(page.getByRole('heading', { name: /add remote server/i })).toBeVisible({ timeout: 5000 }); - }); - - await test.step('Select Agent radio', async () => { - await page.getByRole('radio', { name: /^agent$/i }).click(); - await expect(page.getByRole('radio', { name: /^agent$/i })).toBeChecked(); - }); - - await test.step('Verify agent select dropdown appears', async () => { - const agentSelect = page.locator('#cts-agent'); - await expect(agentSelect).toBeVisible({ timeout: 5000 }); - }); - - await test.step('Verify host/port fields are hidden for agent mode', async () => { - await expect(page.getByRole('textbox', { name: /^host$/i })).toHaveCount(0); - }); -}); +watchHeartbeat: sess.IsAlive() == false + → sess.Close() + → s.listener.Close() ← runProxyListener exits Accept() loop + → s.session.Close() + → markOffline(agentUUID) + → s.sessions.Delete(agentUUID) ``` ---- - -**Test: "should show provider picker when Provider radio is selected"** - -Replace the broken Cloudflare test. Note: in the new design there is no "cloudflare" radio — instead "Provider" mode loads `ProviderDevicePicker`. The test should verify the picker area appears and host/port are hidden: - -```typescript -test('should show provider picker when Provider radio is selected', async ({ page }) => { - await page.route(HECATE_TUNNELS_API, (route) => { - route.fulfill({ json: [] }); - }); - - await page.goto('/hecate/remote-servers'); - await waitForLoadingComplete(page); - - await test.step('Open Add Server form', async () => { - await page.getByRole('button', { name: /add server/i }).first().click(); - await expect(page.getByRole('heading', { name: /add remote server/i })).toBeVisible({ timeout: 5000 }); - }); - - await test.step('Select Provider radio', async () => { - await page.getByRole('radio', { name: /^provider$/i }).click(); - await expect(page.getByRole('radio', { name: /^provider$/i })).toBeChecked(); - }); - - await test.step('Verify host/port fields are hidden for provider mode', async () => { - await expect(page.getByRole('textbox', { name: /^host$/i })).toHaveCount(0); - await expect(page.getByRole('spinbutton', { name: /port/i })).toHaveCount(0); - }); -}); -``` +> **Async goroutine completion**: `proxyConn` goroutines launched from `runProxyListener` complete asynchronously after `Close()` returns. They are bounded by the time to flush in-flight `io.Copy` calls, which is safe because closing the yamux session terminates all streams, causing the in-flight `io.Copy` on the stream side to return promptly. --- -**Test: "Connection Type selector accessibility snapshot"** - -Replace the old combobox aria snapshot: - -```typescript -test('Connection mode radio group accessibility snapshot', async ({ page }) => { - await page.goto('/hecate/remote-servers'); - await waitForLoadingComplete(page); - - await test.step('Open Add Server form', async () => { - await page.getByRole('button', { name: /add server/i }).first().click(); - await expect(page.getByRole('heading', { name: /add remote server/i })).toBeVisible({ timeout: 5000 }); - }); - - await test.step('Verify connection mode fieldset accessibility', async () => { - const fieldset = page.locator('fieldset').filter({ has: page.getByRole('radio', { name: /direct/i }) }); - await expect(fieldset).toMatchAriaSnapshot(` - - group "Connection mode": - - radio "Direct" [checked] - - radio "Agent" - - radio "Provider" - `); - }); -}); -``` - -> **Note on aria snapshot text**: Radio labels include the description suffix appended by the `` next to each label (e.g. "— Connect via IP or hostname"). Run the test once with `--update-snapshots` to capture the exact text if descriptions appear in the snapshot. - -#### A.5 Tests That Are Unaffected +## 5. Error Handling and Edge Cases -The following tests do **not** touch `#connection-type` or `selectOption` and remain correct as-is: - -- All "Connection Column - Direct Server" tests -- All "Connection Column - Orthrus/Tunnel Server" tests (use `ORTHRUS_SERVER` fixture with `connection_type: 'orthrus'`, not the form) -- All "TunnelLogViewer" tests -- All "Page Accessibility" tests +| Scenario | Where Detected | Response | +|----------|---------------|----------| +| Agent not connected | `GetProxyAddr` returns `("", false)` | `ListContainers` → 502 Bad Gateway | +| Orthrus subsystem unavailable | `h.orthrusResolver == nil` | `ListContainers` → 503 Service Unavailable | +| `OrthrusAgentUUID` is nil/empty on the server record | `server.OrthrusAgentUUID == nil` check | `ListContainers` → 400 Bad Request | +| Port allocation failure on connect | `net.Listen` returns error | `HandleWebSocket` logs warning; session registered with `proxyPort == 0`; Docker returns 502 | +| Agent disconnects mid-request | `io.Copy` returns error | `proxyConn` exits; Docker client gets connection reset; `ListContainers` returns error → 503 | +| yamux stream open failure | `session.Open()` returns error | `proxyConn` returns; TCP conn closed; Docker client retries or fails | +| Concurrent `Close()` calls | `s.mu.Lock()` in `Close()`, `listener = nil` after close | Idempotent; second call is a no-op for the listener | +| `StartDockerProxy` called a second time | `s.listener != nil` guard under mutex | Returns error; caller logs and skips; no new listener | --- -### B. New Unit Test File: `OrthrusAgentManager` - -**File to create**: `frontend/src/components/hecate/__tests__/OrthrusAgentManager.test.tsx` - -#### B.1 Component Overview (`OrthrusAgentManager.tsx`) - -- Renders a `` with columns: **Name** (inline-editable), **UUID**, **Status**, **Provider**, **Last Seen**, **Actions** -- The **Provider** cell (`AgentRow`): - - If `agent.hecate_tunnel_uuid` is truthy: shows `agent.resolved_address ?? agent.device_id ?? '—'` in `font-mono text-content-primary` - - Otherwise: shows `t('hecate.agentManager.noProviderAssigned')` = `"No provider assigned"` in italic muted text -- The **Actions** cell contains two buttons per row: - - `Link2` icon button — `aria-label = t('hecate.agentManager.assignProvider', { name })` = `"Assign provider to {name}"`; calls `onAssignProvider(agent)` → sets `assignProviderAgent` state → mounts `` - - `Trash2` icon button — `aria-label = t('hecate.agentManager.deleteLabel', { name })` = `"Delete agent {name}"`; calls `handleDeleteRequest(uuid, name)` → sets `confirmDelete` state → mounts a confirm `` -- When `agents.length === 0`: renders empty-state paragraph with `t('hecate.agentManager.noAgents')` = `"No agents registered yet."` -- Inline name editing: click name text → Input appears → Enter/tick commits; Escape/X cancels - -#### B.2 Mock Setup Pattern - -Follow the pattern from `RemoteServerForm.test.tsx`: - -```typescript -import { QueryClient, QueryClientProvider } from '@tanstack/react-query' -import { render, screen, fireEvent, waitFor } from '@testing-library/react' -import { MemoryRouter } from 'react-router-dom' -import { describe, it, expect, vi, beforeEach } from 'vitest' - -import { OrthrusAgentManager } from '../OrthrusAgentManager' - -const mockDelete = vi.fn() -const mockRename = vi.fn() - -vi.mock('../../../hooks/useOrthrus', () => ({ - useDeleteAgent: () => ({ mutate: mockDelete, isPending: false }), - useRenameAgent: () => ({ mutate: mockRename, isPending: false }), -})) - -vi.mock('../AgentProviderAssignDialog', () => ({ - AgentProviderAssignDialog: ({ open, onClose, agent }: { - open: boolean; onClose: () => void; agent: { name: string } - }) => - open ? ( -
- -
- ) : null, -})) - -vi.mock('react-i18next', () => ({ - useTranslation: () => ({ - t: (key: string, opts?: Record) => - opts?.name ? `${key}:${opts.name}` : key, - }), -})) - -function renderManager(agents: Parameters[0]['agents']) { - const qc = new QueryClient({ defaultOptions: { queries: { retry: false } } }) - return render( - - - - - - ) -} -``` - -#### B.3 Test Cases - -**Fixture data**: - -```typescript -const agentWithProvider = { - uuid: 'agent-1', - name: 'Prod Agent', - status: 'online' as const, - capabilities: '["proxy"]', - hecate_tunnel_uuid: 'ts-uuid', - resolved_address: '100.72.3.4', - device_id: 'ts-device-1', - created_at: '2025-01-01T00:00:00Z', - updated_at: '2025-01-01T00:00:00Z', -} - -const agentWithoutProvider = { - uuid: 'agent-2', - name: 'Dev Agent', - status: 'offline' as const, - capabilities: '[]', - hecate_tunnel_uuid: undefined, - resolved_address: undefined, - device_id: undefined, - created_at: '2025-01-01T00:00:00Z', - updated_at: '2025-01-01T00:00:00Z', -} -``` +## 6. Files Changed -| # | Test description | Arrange | Act | Assert | -|---|---|---|---|---| -| 1 | Renders table with all column headers | `renderManager([agentWithProvider])` | — | `screen.getByRole('columnheader', { name: /hecate.agentManager.colName/i })` and same for UUID, Status, Provider, Last Seen | -| 2 | Shows `resolved_address` in Provider cell when agent has one | `renderManager([agentWithProvider])` | — | `screen.getByText('100.72.3.4')` is in the document | -| 3 | Shows fallback `device_id` when no `resolved_address` | agent with `hecate_tunnel_uuid` set but `resolved_address: undefined`, `device_id: 'abc'` | — | `screen.getByText('abc')` | -| 4 | Shows `—` when tunnel assigned but neither address nor device_id | agent with only `hecate_tunnel_uuid` set | — | `screen.getByText('—')` | -| 5 | Shows "No provider assigned" italic text when no tunnel | `renderManager([agentWithoutProvider])` | — | `screen.getByText('hecate.agentManager.noProviderAssigned')` | -| 6 | Clicking Link2 button opens AgentProviderAssignDialog | `renderManager([agentWithProvider])` | `fireEvent.click(screen.getByRole('button', { name: /hecate.agentManager.assignProvider:Prod Agent/i }))` | `screen.getByTestId('assign-dialog')` is in the document | -| 7 | Closing dialog via callback clears assignment state | same | click button then click CloseAssign | `screen.queryByTestId('assign-dialog')` is null | -| 8 | Clicking delete button opens confirm dialog | `renderManager([agentWithProvider])` | `fireEvent.click(screen.getByRole('button', { name: /hecate.agentManager.deleteLabel:Prod Agent/i }))` | `screen.getByRole('dialog')` is visible; contains agent name | -| 9 | Confirming delete calls `deleteAgent` mutation | same | open confirm dialog → click confirm button | `mockDelete` called with `'agent-1'` | -| 10 | Inline rename: clicking name opens input | `renderManager([agentWithProvider])` | `fireEvent.click(screen.getByRole('button', { name: /hecate.agentManager.editNameLabel:Prod Agent/i }))` | `screen.getByRole('textbox', { name: /hecate.agentManager.renameInputLabel:Prod Agent/i })` is visible | -| 11 | Inline rename: pressing Enter calls rename mutation | same → click name button → change input | `fireEvent.keyDown(input, { key: 'Enter' })` | `mockRename` called with `{ uuid: 'agent-1', name: 'New Name' }` | -| 12 | Inline rename: pressing Escape cancels without calling mutation | same → click name button | `fireEvent.keyDown(input, { key: 'Escape' })` | `mockRename` not called; input no longer in DOM | -| 13 | Empty state renders when no agents passed | `renderManager([])` | — | `screen.getByText('hecate.agentManager.noAgents')` is visible; `screen.queryByRole('table')` is null | +| File | Change Type | Summary | +|------|-------------|---------| +| `backend/internal/orthrus/session.go` | Modify | Add `streamTypeDocker` constant, `listener net.Listener` field, `StartDockerProxy()`, `runProxyListener()`, `proxyConn()`, update `Close()` | +| `backend/internal/orthrus/server.go` | Modify | Call `session.StartDockerProxy()` in `HandleWebSocket`; call `sess.Close()` in `watchHeartbeat` cleanup | +| `backend/internal/api/handlers/docker_handler.go` | Modify | Add `orthrusProxyResolver` interface, `orthrusResolver` field, `SetOrthrusResolver()`, update `ListContainers` switch on `ConnectionType` | +| `backend/internal/api/routes/routes.go` | Modify | Hoist `var orthrusServer` declaration; call `dockerHandler.SetOrthrusResolver(orthrusServer)` | +| `backend/internal/orthrus/session_test.go` | Modify | Add proxy lifecycle tests | +| `backend/internal/orthrus/server_test.go` | Modify | Add test for proxy start on session connect | +| `backend/internal/api/handlers/docker_handler_test.go` | Modify | Add orthrus resolver tests | +| `backend/internal/orthrus/proxy_integration_test.go` | Create | Integration test stub (`//go:build integration`) | --- -### C. Missing `AgentProviderAssignDialog` Test Cases - -**File**: `frontend/src/components/hecate/__tests__/AgentProviderAssignDialog.test.tsx` - -Append the following cases to the existing `describe('AgentProviderAssignDialog', ...)` block. The existing mock setup (`mockPatch`, `mockTunnels`, `vi.mock` calls) remains unchanged. +## 7. Implementation Plan -#### C.1 Cancel Button +### Phase 1 — Playwright Tests (UI/UX Specification) -```typescript -it('clicking Cancel closes dialog without calling patchAgent', () => { - const onClose = vi.fn() - render( - - ) +Write Playwright tests that define expected behavior before implementation. These will fail until the backend is wired up. - fireEvent.click(screen.getByRole('button', { name: /common.cancel/i })) +**File**: `tests/docker-orthrus-proxy.spec.ts` - expect(onClose).toHaveBeenCalledOnce() - expect(mockPatch).not.toHaveBeenCalled() -}) -``` - -#### C.2 Remove Provider Button (after D1 is implemented) - -This test is forward-declared and will be enabled once the Remove Provider button exists in `AgentProviderAssignDialog`: - -```typescript -it('clicking Remove Provider calls patchAgent with null fields', async () => { - const onClose = vi.fn() - const agentWithProvider = { - ...baseAgent, - hecate_tunnel_uuid: 'cf-uuid', - device_id: 'dev-1', - resolved_address: 'app.example.com', - } - - render( - - ) - - fireEvent.click( - screen.getByRole('button', { name: /hecate.agentManager.removeProviderAssignment/i }) - ) - - expect(mockPatch).toHaveBeenCalledWith( - { - uuid: 'agent-1', - req: { - hecate_tunnel_uuid: null, - device_id: null, - resolved_address: null, - }, - }, - expect.objectContaining({ onSuccess: expect.any(Function) }), - ) -}) -``` - -#### C.3 Pre-populated Values - -```typescript -it('opens with tunnel pre-selected when agent already has hecate_tunnel_uuid', () => { - const agentWithProvider = { - ...baseAgent, - hecate_tunnel_uuid: 'cf-uuid', - device_id: undefined, - resolved_address: 'app.example.com', - } - - render( - undefined} /> - ) - - const combobox = screen.getByRole('combobox') - expect(combobox).toHaveValue('cf-uuid') -}) - -it('opens with resolved address pre-filled when agent has resolved_address', async () => { - const agentWithProvider = { - ...baseAgent, - hecate_tunnel_uuid: 'cf-uuid', - resolved_address: 'app.example.com', - } - - render( - undefined} /> - ) - - fireEvent.change(screen.getByRole('combobox'), { target: { value: 'cf-uuid' } }) - - const hostnameInput = await screen.findByRole('textbox', { name: /cloudflareTunnelHostname/i }) - expect(hostnameInput).toHaveValue('app.example.com') -}) -``` - -#### C.4 Save Disabled State - -```typescript -it('Save button is disabled when no tunnel is selected', () => { - render( - undefined} /> - ) - - const saveButton = screen.getByRole('button', { name: /saveProviderAssignment/i }) - expect(saveButton).toBeDisabled() -}) +Tests: +1. **Agent offline** — with a remote server that has `connection_type: "orthrus"` and an agent that is offline, the Docker containers panel shows a "Orthrus agent is not currently connected" error state. +2. **No `server_id`** — local Docker containers still load normally (no regression). -it('Save button is enabled after a tunnel is selected', async () => { - render( - undefined} /> - ) +These tests validate the UI error handling introduced by the new 502/503 responses. - fireEvent.change(screen.getByRole('combobox'), { target: { value: 'cf-uuid' } }) +### Phase 2 — Backend: Session Proxy Listener - const saveButton = screen.getByRole('button', { name: /saveProviderAssignment/i }) - await waitFor(() => expect(saveButton).not.toBeDisabled()) -}) -``` - -#### C.5 Tailscale Device Picker Flow - -```typescript -it('shows TailscaleDevicePicker when Tailscale tunnel selected and Select device clicked', async () => { - render( - undefined} /> - ) - - fireEvent.change(screen.getByRole('combobox'), { target: { value: 'ts-uuid' } }) +**Files**: `session.go`, `server.go` - await waitFor(() => { - expect( - screen.getByRole('button', { name: /hecate.form.mode.selectDevice/i }) - ).toBeInTheDocument() - }) +- [ ] Add `streamTypeDocker = byte(0x01)` constant to `session.go` +- [ ] Add `listener net.Listener` field to `AgentSession` +- [ ] Implement `StartDockerProxy() error` +- [ ] Implement `runProxyListener(ln net.Listener)` +- [ ] Implement `proxyConn(conn net.Conn)` +- [ ] Update `Close()` to close `s.listener` (under mutex, set to nil) +- [ ] In `HandleWebSocket`: call `session.StartDockerProxy()`, log warning on failure (non-fatal) +- [ ] In `watchHeartbeat`: call `sess.Close()` before `markOffline` when session is dead +- [ ] Unit tests: + - `TestAgentSession_StartDockerProxy_SetsProxyAddr` — start proxy, assert `GetProxyAddr()` non-empty + - `TestAgentSession_StartDockerProxy_AcceptsConnection` — dial the proxy addr, verify type byte written to yamux stream + - `TestAgentSession_Close_StopsProxyListener` — start proxy, close session, verify listener no longer accepts + - `TestAgentSession_StartDockerProxy_CalledTwice` — call twice; second call returns error containing "already started"; `GetProxyAddr()` returns same address as first call; no additional listener port allocated + - `TestOrthrusServer_HandleWebSocket_StartsProxy` — full WebSocket handshake, assert session has non-empty proxy addr - fireEvent.click(screen.getByRole('button', { name: /hecate.form.mode.selectDevice/i })) +**Validation gate**: `go test -race ./backend/internal/orthrus/...` passes. - // TailscaleDevicePicker renders (mocked via useQuery returning []) - // Verify the picker container is present; actual content depends on TailscaleDevicePicker internals - // In CI, useQuery is mocked to return [] so the picker renders empty list - await waitFor(() => { - expect(screen.getByText(/hecate.tailscale.noDevices/i)).toBeInTheDocument() - }) -}) -``` +### Phase 3 — Backend: DockerHandler Integration -> **Note**: `useQuery` in the existing mock setup is `vi.fn().mockReturnValue({ data: [] })`. For Tailscale, `listTailscaleDevices` is called with `enabled: pickerOpen && provider === 'tailscale'`. After clicking "Select device", `pickerOpen` becomes `true`, triggering the query. The mock returns `[]`, so `TailscaleDevicePicker` receives an empty `devices` array and renders `t('hecate.tailscale.noDevices')`. - -#### C.6 ZeroTier Member Picker Flow - -```typescript -it('shows ZeroTierMemberPicker when ZeroTier tunnel selected and Select device clicked', async () => { - const mockTunnelsWithZT = [ - ...mockTunnels, - { uuid: 'zt-uuid', name: 'ZT Tunnel', provider: 'zerotier' }, - ] - // Re-render with extended mocks that include a ZeroTier tunnel - // (requires a local override of the useHecate mock for this test) - - render( - undefined} /> - ) - - // The ZeroTier tunnel must be in the mockTunnels list — add 'zt-uuid' to the - // top-level mockTunnels fixture or use a scoped override - fireEvent.change(screen.getByRole('combobox'), { target: { value: 'zt-uuid' } }) - - await waitFor(() => { - expect( - screen.getByRole('button', { name: /hecate.form.mode.selectMember/i }) - ).toBeInTheDocument() - }) - - fireEvent.click(screen.getByRole('button', { name: /hecate.form.mode.selectMember/i })) - - // ZeroTierMemberPicker should appear; verify its open state via a heading or landmark - // Exact aria depends on ZeroTierMemberPicker internals — at minimum dialog should be visible - await waitFor(() => { - expect(screen.getAllByRole('dialog')).toHaveLength(2) // outer AgentProviderAssignDialog + picker dialog - }) -}) -``` +**Files**: `docker_handler.go`, `routes.go`, `docker_handler_test.go` -> **Implementation note on ZT mock**: Add `{ uuid: 'zt-uuid', name: 'ZT Tunnel', provider: 'zerotier' }` to the top-level `mockTunnels` array so it's available across tests, or use `vi.mocked(useHecate).mockReturnValueOnce(...)` for this specific test. +- [ ] Add `orthrusProxyResolver` interface to `docker_handler.go` +- [ ] Add `orthrusResolver orthrusProxyResolver` field to `DockerHandler` +- [ ] Implement `SetOrthrusResolver(r orthrusProxyResolver)` +- [ ] Update `ListContainers`: replace single `fmt.Sprintf` with `switch server.ConnectionType` +- [ ] In `routes.go`: hoist `var orthrusServer` declaration; call `dockerHandler.SetOrthrusResolver(orthrusServer)` +- [ ] Unit tests: + - `TestDockerHandler_ListContainers_OrthrusAgentConnected` — resolver returns `("127.0.0.1:54321", true)`; verify `dockerSvc.host == "tcp://127.0.0.1:54321"` + - `TestDockerHandler_ListContainers_OrthrusAgentOffline` — resolver returns `("", false)`; verify 502 + - `TestDockerHandler_ListContainers_OrthrusSubsystemUnavailable` — `orthrusResolver == nil`; verify 503 + - `TestDockerHandler_ListContainers_OrthrusMissingAgentUUID` — server has `OrthrusAgentUUID == nil`; verify 400 + - `TestDockerHandler_SetOrthrusResolver_Nil` — explicit nil does not panic on request ---- +**Validation gate**: `go test -race ./backend/internal/api/...` passes; no regression in existing Docker handler tests. -### D. UX Enhancements +### Phase 4 — Integration Test Stub ---- +**File**: `backend/internal/orthrus/proxy_integration_test.go` -#### D1. "Remove Provider" Button in `AgentProviderAssignDialog` - -**File**: `frontend/src/components/hecate/AgentProviderAssignDialog.tsx` - -**Trigger**: shown only when `agent.hecate_tunnel_uuid` is non-null (agent already has a provider assigned). - -**What it does**: calls `patch` with all provider fields as `null`, then calls `onClose` on success. +```go +//go:build integration -**Where in the layout**: Add as the leftmost button inside ``, before Cancel. Use destructive styling to signal irreversibility. +package orthrus_test -**Implementation spec**: +import ( + "testing" +) -```tsx -// Add handler inside AgentProviderAssignDialog: -const handleRemove = () => { - patch( - { - uuid: agent.uuid, - req: { - hecate_tunnel_uuid: null, - device_id: null, - resolved_address: null, - }, - }, - { onSuccess: onClose }, - ) +// TestDockerProxyIntegration_FullTunnel exercises the complete path: +// TCP connection → local proxy listener → yamux stream → agent → Docker socket +// Requires a running Orthrus agent with Docker socket accessible. +func TestDockerProxyIntegration_FullTunnel(t *testing.T) { + t.Skip("requires running Orthrus agent with /var/run/docker.sock") } - -// In , before the Cancel button: -{agent.hecate_tunnel_uuid && ( - -)} ``` -**i18n key to add** in `frontend/src/locales/en/translation.json` under `hecate.agentManager`: +**Validation gate**: `go test -tags integration ./backend/internal/orthrus/...` — skips cleanly. -```json -"removeProviderAssignment": "Remove Provider" -``` +### Phase 5 — Documentation -**Accessibility**: -- `type="button"` — prevents accidental form submission -- `disabled={isPending}` — prevents double-fire while mutation is in flight -- `aria-label` matches the visible label (satisfies WCAG 2.5.3 Label in Name) -- Destructive border/text color ensures 3:1 contrast ratio against the surface background +- [ ] Update `ARCHITECTURE.md` component table: add row for "Orthrus Docker Proxy Listener" +- [ ] Confirm no OpenAPI spec changes needed (existing `GET /docker/containers` endpoint; response schema unchanged) --- -#### D2. Show Resolved Host Preview Before Save (`RemoteServerForm`) - -**File**: `frontend/src/components/RemoteServerForm.tsx` - -**Problem**: After the user selects a tunnel + device in Provider mode, `formData.resolved_address` is populated via `onDeviceSelect` callback (from `ProviderDevicePicker`), but nothing is displayed to the user before they click Create/Save. +## 8. Acceptance Criteria + +| # | Criterion | Verification | +|---|-----------|-------------| +| AC-1 | `AgentSession.GetProxyAddr()` returns a non-empty `127.0.0.1:PORT` address after `StartDockerProxy()` succeeds | Unit test `TestAgentSession_StartDockerProxy_SetsProxyAddr` | +| AC-2 | A TCP connection to the proxy address results in `0x01` being written to the agent's yamux stream | Unit test `TestAgentSession_StartDockerProxy_AcceptsConnection` | +| AC-3 | Closing an `AgentSession` causes `ln.Accept()` to return an error and `runProxyListener` to exit | Unit test `TestAgentSession_Close_StopsProxyListener` | +| AC-4 | `HandleWebSocket` registers a session with a non-zero `proxyPort` | Unit test `TestOrthrusServer_HandleWebSocket_StartsProxy` | +| AC-5 | `GET /docker/containers?server_id=` with a connected agent passes `tcp://127.0.0.1:PORT` to the Docker client | Unit test `TestDockerHandler_ListContainers_OrthrusAgentConnected` | +| AC-6 | Same request with a disconnected agent returns HTTP 502 with `"Orthrus agent is not currently connected"` | Unit test `TestDockerHandler_ListContainers_OrthrusAgentOffline` | +| AC-7 | Same request when Orthrus subsystem is unavailable returns HTTP 503 | Unit test `TestDockerHandler_ListContainers_OrthrusSubsystemUnavailable` | +| AC-8 | `RemoteServer` with `connection_type == "orthrus"` and nil `OrthrusAgentUUID` returns HTTP 400 | Unit test `TestDockerHandler_ListContainers_OrthrusMissingAgentUUID` | +| AC-9 | No regression in existing Docker handler tests for direct connection type | `go test ./backend/internal/api/handlers/...` | +| AC-10 | No regression in existing Orthrus session/server unit tests | `go test ./backend/internal/orthrus/...` | +| AC-11 | `go test -race ./backend/...` passes with no race conditions | CI | +| AC-12 | A second call to `StartDockerProxy()` on an already-started session returns a non-nil error containing `"already started"`. `GetProxyAddr()` still returns the address from the first successful call. No additional listener port is allocated. | `TestAgentSession_StartDockerProxy_CalledTwice` | -**Where to render**: Below the `` block, inside the same `
` wrapping the `