Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,21 @@ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

### Added

- **Per-route scope enforcement on the management HTTP port.**
`WithMgmtControlAuth` is a new option that wraps the cluster-
mutating control endpoints (`POST /evict`, `POST /clear`,
`POST /trigger-expiration`) in a stricter auth gate than the
observability surface. The hypercache-server binary now wires
read-or-better on `/stats`/`/config`/`/cluster/*`/`/dist/*` and
admin-only on the control routes (see `cmd/hypercache-server/
main.go`). `/health` is intentionally NOT auth-wrapped — k8s
liveness probes don't carry credentials, and a probe failure
cascades into a pod-restart loop. Also new: `httpauth.Policy.Verify`,
the "block-with-error" sibling of `Middleware()` that adapters
(like `WithMgmtAuth`/`WithMgmtControlAuth`) use when they own
their own next-handler dispatch. Existing `Middleware()` is now
thin sugar over `Verify() + c.Next()` so the auth logic lives
in exactly one place.
- **`GET /v1/me` — resolved caller identity.** New scope-protected
(`read`) route that reads the resolved `httpauth.Identity` from
`c.Locals(httpauth.IdentityKey)` and returns
Expand Down
26 changes: 25 additions & 1 deletion cmd/hypercache-server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -266,9 +266,33 @@ func buildHyperCache(ctx context.Context, cfg envConfig, logger *slog.Logger) (*
)
}

// Phase C2: light up scope enforcement on the management port.
// /health stays public (k8s liveness probes carry no creds).
// Read-or-better is required for the observability surface
// (/stats, /config, /dist/*, /cluster/*); admin scope is
// required for the cluster-mutating control routes (/evict,
// /clear, /trigger-expiration). Closes a long-standing gap
// where the mgmt port was fully unauthenticated server-side
// while the monitor's proxy carried the only check.
//
// Closure captures cfg.AuthPolicy by value — Policy is value-
// semantic and safe for concurrent use after construction;
// see pkg/httpauth/policy.go.
policy := cfg.AuthPolicy
mgmtReadAuth := func(fiberCtx fiber.Ctx) error {
return policy.Verify(fiberCtx, httpauth.ScopeRead)
}
mgmtAdminAuth := func(fiberCtx fiber.Ctx) error {
return policy.Verify(fiberCtx, httpauth.ScopeAdmin)
}

hcCfg.HyperCacheOptions = append(
hcCfg.HyperCacheOptions,
hypercache.WithManagementHTTP[backend.DistMemory](cfg.MgmtAddr),
hypercache.WithManagementHTTP[backend.DistMemory](
cfg.MgmtAddr,
hypercache.WithMgmtAuth(mgmtReadAuth),
hypercache.WithMgmtControlAuth(mgmtAdminAuth),
),
)

hc, err := hypercache.New(ctx, hypercache.GetDefaultManager(), hcCfg)
Expand Down
1 change: 1 addition & 0 deletions cspell.config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ words:
- distroless
- EDITMSG
- elif
- Equalf
- errcheck
- errp
- ewrap
Expand Down
78 changes: 65 additions & 13 deletions management_http.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,22 @@ type ManagementHTTPOption func(*ManagementHTTPServer)

// ManagementHTTPServer holds Fiber app and settings.
type ManagementHTTPServer struct {
addr string
app *fiber.App
readTimeout time.Duration
writeTimeout time.Duration
idleTimeout time.Duration
bodyLimit int
concurrency int
authFunc func(fiber.Ctx) error
addr string
app *fiber.App
readTimeout time.Duration
writeTimeout time.Duration
idleTimeout time.Duration
bodyLimit int
concurrency int
authFunc func(fiber.Ctx) error
// controlAuthFunc is an optional stricter auth gate applied
// only to the cluster-mutating control endpoints (/evict,
// /clear, /trigger-expiration). When set, it runs INSTEAD OF
// authFunc on those routes — typically configured to require
// admin scope while authFunc requires read. When nil, the
// control routes fall back to authFunc, preserving the
// pre-Phase-C2 single-gate behavior.
controlAuthFunc func(fiber.Ctx) error
ln net.Listener
started bool
listenerDeadline time.Duration
Expand All @@ -49,11 +57,28 @@ type ManagementHTTPServer struct {
serveErr atomic.Pointer[error]
}

// WithMgmtAuth sets an auth function (return error to block).
// WithMgmtAuth sets an auth function applied to every authenticated
// route on the management port (return error to block). /health is
// exempt — k8s liveness probes do not carry credentials.
//
// Pair with WithMgmtControlAuth for finer scope on the cluster-
// mutating endpoints (/evict, /clear, /trigger-expiration); without
// it, those routes fall back to this same gate.
func WithMgmtAuth(fn func(fiber.Ctx) error) ManagementHTTPOption {
return func(s *ManagementHTTPServer) { s.authFunc = fn }
}

// WithMgmtControlAuth sets a stricter auth function applied only to
// the cluster-mutating control endpoints — /evict, /clear,
// /trigger-expiration. Use this with httpauth.Policy.Verify(c,
// httpauth.ScopeAdmin) so a token granted only read or write
// scope cannot trigger destructive operations through the mgmt
// port. When nil, control routes inherit authFunc's gate (the
// pre-Phase-C2 single-gate behavior).
func WithMgmtControlAuth(fn func(fiber.Ctx) error) ManagementHTTPOption {
return func(s *ManagementHTTPServer) { s.controlAuthFunc = fn }
}

// WithMgmtReadTimeout sets read timeout.
func WithMgmtReadTimeout(d time.Duration) ManagementHTTPOption {
return func(s *ManagementHTTPServer) { s.readTimeout = d }
Expand Down Expand Up @@ -254,20 +279,42 @@ func (s *ManagementHTTPServer) Shutdown(ctx context.Context) error {
// mountRoutes.
func (s *ManagementHTTPServer) mountRoutes(hc managementCache) { // split into helpers to satisfy funlen
useAuth := s.wrapAuth
useControlAuth := s.wrapControlAuth
s.registerBasic(useAuth, hc)
s.registerDistributed(useAuth, hc)
s.registerCluster(useAuth, hc)
s.registerControl(useAuth, hc)
s.registerControl(useControlAuth, hc)
}

// wrapAuth returns an auth-wrapped handler if authFunc provided.
func (s *ManagementHTTPServer) wrapAuth(handler fiber.Handler) fiber.Handler {
if s.authFunc == nil {
return wrapWithGate(s.authFunc, handler)
}

// wrapControlAuth returns a handler wrapped with the stricter
// control-route auth when controlAuthFunc is set, otherwise it
// falls back to wrapAuth. This preserves the pre-Phase-C2
// single-gate behavior for operators who haven't opted into
// admin-scope enforcement on the mgmt port.
func (s *ManagementHTTPServer) wrapControlAuth(handler fiber.Handler) fiber.Handler {
if s.controlAuthFunc != nil {
return wrapWithGate(s.controlAuthFunc, handler)
}

return s.wrapAuth(handler)
}

// wrapWithGate applies an auth-gate function before invoking the
// underlying handler. Nil gate is a passthrough — same shape as
// before WithMgmtAuth was wired, used by deployments that haven't
// configured any auth on the mgmt port.
func wrapWithGate(gate func(fiber.Ctx) error, handler fiber.Handler) fiber.Handler {
if gate == nil {
return handler
}

return func(fiberCtx fiber.Ctx) error {
authErr := s.authFunc(fiberCtx)
authErr := gate(fiberCtx)
if authErr != nil {
return authErr
}
Expand All @@ -277,7 +324,12 @@ func (s *ManagementHTTPServer) wrapAuth(handler fiber.Handler) fiber.Handler {
}

func (s *ManagementHTTPServer) registerBasic(useAuth func(fiber.Handler) fiber.Handler, hc managementCache) {
s.app.Get("/health", useAuth(func(fiberCtx fiber.Ctx) error { return fiberCtx.SendString("ok") }))
// /health is intentionally NOT wrapped in useAuth — k8s
// liveness/readiness probes do not carry credentials, and
// a probe failure cascades into a pod-restart loop. Mirrors
// the client-API binary's `/healthz` exemption (see
// cmd/hypercache-server/main.go:registerClientRoutes).
s.app.Get("/health", func(fiberCtx fiber.Ctx) error { return fiberCtx.SendString("ok") })
s.app.Get("/stats", useAuth(func(fiberCtx fiber.Ctx) error { return fiberCtx.JSON(hc.GetStats()) }))
s.app.Get("/config", useAuth(func(fiberCtx fiber.Ctx) error {
cfg := map[string]any{
Expand Down
45 changes: 37 additions & 8 deletions pkg/httpauth/policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,19 +210,48 @@ func (p Policy) Validate() error {
// that want any-authenticated-caller semantics.
func (p Policy) Middleware(required Scope) fiber.Handler {
return func(c fiber.Ctx) error {
identity, ok := p.resolve(c)
if !ok {
return c.SendStatus(fiber.StatusUnauthorized)
err := p.Verify(c, required)
if err != nil {
return err
}

if required != "" && !identity.HasScope(required) {
return c.SendStatus(fiber.StatusForbidden)
}
return c.Next()
}
}

c.Locals(IdentityKey, identity)
// Verify resolves credentials, asserts the required scope, and
// stores the resolved Identity in c.Locals(IdentityKey). Returns
// nil on success; on failure returns a *fiber.Error carrying
// status 401 (no credentials matched) or 403 (credentials matched
// but scope is missing). Fiber's default error handler emits the
// canonical text body for the status code.
//
// Use Verify when integrating with code that owns its own next-
// handler dispatch — e.g. ManagementHTTPServer.WithMgmtAuth and
// WithMgmtControlAuth, which short-circuit on a non-nil return
// from the gate function and never call the wrapped handler.
// Middleware() is thin sugar over Verify() + Next() so the auth
// logic lives in exactly one place.
//
// CRITICAL: do NOT switch to `c.SendStatus(...)` here. SendStatus
// returns nil on success, which would silently fall through to
// the wrapped handler in wrapWithGate-style adapters and the
// downstream handler would write its own success status over the
// 401 body. Returning a *fiber.Error keeps both Middleware and
// the gate adapters fail-closed.
func (p Policy) Verify(c fiber.Ctx, required Scope) error {
identity, ok := p.resolve(c)
if !ok {
return fiber.NewError(fiber.StatusUnauthorized)
}

return c.Next()
if required != "" && !identity.HasScope(required) {
return fiber.NewError(fiber.StatusForbidden)
}

c.Locals(IdentityKey, identity)

return nil
}

// resolve walks the credential resolution chain in priority order:
Expand Down
111 changes: 111 additions & 0 deletions pkg/httpauth/policy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -366,3 +366,114 @@ func TestPolicy_HasScope(t *testing.T) {
t.Errorf("Write should match Write")
}
}

// TestPolicy_Verify covers the public Verify() entry point — the
// "block-with-error" sibling of Middleware that adapters
// (ManagementHTTPServer.WithMgmtControlAuth, etc.) use when they
// own their own next-handler dispatch. Same semantics as
// Middleware: 401 on missing/invalid creds, 403 on wrong scope,
// nil + Identity in Locals on success. The shared resolve() means
// any future divergence between Middleware and Verify would be a
// security bug; this test pins parity.
func TestPolicy_Verify(t *testing.T) {
t.Parallel()

p := Policy{
Tokens: []TokenIdentity{
{ID: "ro", Token: "ro-token", Scopes: []Scope{ScopeRead}},
{ID: "admin", Token: "admin-token", Scopes: []Scope{ScopeRead, ScopeWrite, ScopeAdmin}},
},
}

cases := []struct {
name string
header string
scope Scope
want int
}{
{"no creds → 401", "", ScopeRead, http.StatusUnauthorized},
{"bad bearer → 401", "Bearer wrong", ScopeRead, http.StatusUnauthorized},
{"read scope on read route → 200", "Bearer ro-token", ScopeRead, http.StatusOK},
{"read scope on admin route → 403", "Bearer ro-token", ScopeAdmin, http.StatusForbidden},
{"admin scope on admin route → 200", "Bearer admin-token", ScopeAdmin, http.StatusOK},
{"empty scope (any-authenticated) → 200 with creds", "Bearer ro-token", "", http.StatusOK},
{"empty scope still 401 without creds", "", "", http.StatusUnauthorized},
}

for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()

app := fiber.New()
// Mount Verify in the wrapAuth-style adapter shape:
// auth happens, then handler runs only on nil error.
// This is exactly how ManagementHTTPServer wires it.
scope := tc.scope
app.Get("/protected", func(c fiber.Ctx) error {
err := p.Verify(c, scope)
if err != nil {
return err
}

return c.SendString("ok")
})

got := doStatus(t, app, tc.header)
if got != tc.want {
t.Fatalf("status: got %d, want %d", got, tc.want)
}
})
}
}

// TestPolicy_Verify_StoresIdentityInLocals pins the side-effect
// contract: a successful Verify populates IdentityKey before
// returning. Adapters that read c.Locals(IdentityKey) — e.g. any
// future audit-attribution handler on the mgmt port — depend on
// it. Without this assertion a future refactor could regress
// Verify into "scope-check only" silently.
func TestPolicy_Verify_StoresIdentityInLocals(t *testing.T) {
t.Parallel()

p := Policy{
Tokens: []TokenIdentity{
{ID: "audit-target", Token: "tok", Scopes: []Scope{ScopeRead}},
},
}

app := fiber.New()
app.Get("/who", func(c fiber.Ctx) error {
err := p.Verify(c, ScopeRead)
if err != nil {
return err
}

v := c.Locals(IdentityKey)

id, ok := v.(Identity)
if !ok {
return c.Status(http.StatusInternalServerError).SendString("no identity")
}

return c.SendString(id.ID)
})

req := httptest.NewRequestWithContext(t.Context(), http.MethodGet, "/who", strings.NewReader(""))
req.Header.Set("Authorization", "Bearer tok")

resp, err := app.Test(req)
if err != nil {
t.Fatalf("app.Test: %v", err)
}

defer func() { _ = resp.Body.Close() }()

body := make([]byte, 64)

n, _ := resp.Body.Read(body)

got := string(body[:n])
if got != "audit-target" {
t.Fatalf("locals identity ID = %q, want %q", got, "audit-target")
}
}
Loading
Loading