From 2b36bde831a7c6bccf9b988cb1edee394412022d Mon Sep 17 00:00:00 2001 From: "F." Date: Mon, 4 May 2026 08:52:37 +0200 Subject: [PATCH 1/3] feat(dist): add configurable HTTP transport limits and response body caps Introduce DistHTTPLimits to bound inbound request bodies (server-side) and response bodies (client-side) on the dist HTTP transport, preventing unbounded memory consumption from oversized or malicious payloads. - Add DistHTTPLimits struct with tunable BodyLimit, ResponseLimit, timeouts, and concurrency; zero fields inherit package defaults - Add WithDistHTTPLimits option for DistMemory and WithMgmt{IdleTimeout, BodyLimit,Concurrency} options for ManagementHTTPServer - Fix ManagementHTTPServer fiber app construction order so user-supplied options are applied before the app is built - Cap client-side response bodies via http.MaxBytesReader; replace streaming JSON decoders with read-then-unmarshal to surface MaxBytesError instead of masking it as unexpected EOF - Extract drainBody helper to deduplicate drain+close across all transport methods - Add tests: server rejects oversized body (413), client rejects oversized response (MaxBytesError), default limits sanity check - Fix TestDistMemoryHeartbeatLiveness race by polling NodesRemoved metric after membership transition settles --- management_http.go | 62 +++++- pkg/backend/dist_http_server.go | 92 ++++++++- pkg/backend/dist_http_transport.go | 179 ++++++++---------- pkg/backend/dist_memory.go | 28 ++- pkg/backend/dist_memory_test_helpers.go | 7 +- tests/dist_http_limits_test.go | 176 +++++++++++++++++ tests/hypercache_distmemory_heartbeat_test.go | 26 +++ 7 files changed, 461 insertions(+), 109 deletions(-) create mode 100644 tests/dist_http_limits_test.go diff --git a/management_http.go b/management_http.go index 1a427ff..c4b2a87 100644 --- a/management_http.go +++ b/management_http.go @@ -22,6 +22,9 @@ type ManagementHTTPServer struct { app *fiber.App readTimeout time.Duration writeTimeout time.Duration + idleTimeout time.Duration + bodyLimit int + concurrency int authFunc func(fiber.Ctx) error ln net.Listener started bool @@ -43,30 +46,77 @@ func WithMgmtWriteTimeout(d time.Duration) ManagementHTTPOption { return func(s *ManagementHTTPServer) { s.writeTimeout = d } } +// WithMgmtIdleTimeout sets the keep-alive idle timeout. Without this idle +// connections accumulate; fiber's default is unbounded. <=0 keeps the +// package default. +func WithMgmtIdleTimeout(d time.Duration) ManagementHTTPOption { + return func(s *ManagementHTTPServer) { + if d > 0 { + s.idleTimeout = d + } + } +} + +// WithMgmtBodyLimit caps inbound request body bytes. Defaults to fiber's +// 4 MiB. <=0 keeps the package default. +func WithMgmtBodyLimit(bytes int) ManagementHTTPOption { + return func(s *ManagementHTTPServer) { + if bytes > 0 { + s.bodyLimit = bytes + } + } +} + +// WithMgmtConcurrency caps simultaneous in-flight handlers. <=0 keeps the +// package default (256 KiB, matching fiber). +func WithMgmtConcurrency(n int) ManagementHTTPOption { + return func(s *ManagementHTTPServer) { + if n > 0 { + s.concurrency = n + } + } +} + const ( defaultReadTimeout = 5 * time.Second defaultWriteTimeout = 5 * time.Second defaultListenerDeadline = 2 * time.Second + // defaultMgmtIdleTimeout caps keep-alive idle connections. + defaultMgmtIdleTimeout = 60 * time.Second + // defaultMgmtBodyLimit matches fiber's own default but is stated + // explicitly so the value is visible in /config and tunable via + // WithMgmtBodyLimit. + defaultMgmtBodyLimit = 4 * 1024 * 1024 + // defaultMgmtConcurrency matches fiber's own default. + defaultMgmtConcurrency = 256 * 1024 ) // NewManagementHTTPServer builds an HTTP server holder (lazy start). func NewManagementHTTPServer(addr string, opts ...ManagementHTTPOption) *ManagementHTTPServer { - app := fiber.New(fiber.Config{ - ReadTimeout: defaultReadTimeout, - WriteTimeout: defaultWriteTimeout, - }) - srv := &ManagementHTTPServer{ addr: addr, - app: app, readTimeout: defaultReadTimeout, writeTimeout: defaultWriteTimeout, + idleTimeout: defaultMgmtIdleTimeout, + bodyLimit: defaultMgmtBodyLimit, + concurrency: defaultMgmtConcurrency, listenerDeadline: defaultListenerDeadline, } for _, opt := range opts { // apply options opt(srv) } + // Construct the fiber app *after* options apply so user-supplied + // timeouts/limits actually take effect (the previous order built the + // app with default config, then mutated unrelated struct fields). + srv.app = fiber.New(fiber.Config{ + ReadTimeout: srv.readTimeout, + WriteTimeout: srv.writeTimeout, + IdleTimeout: srv.idleTimeout, + BodyLimit: srv.bodyLimit, + Concurrency: srv.concurrency, + }) + return srv } diff --git a/pkg/backend/dist_http_server.go b/pkg/backend/dist_http_server.go index d0597fa..bb2ebe6 100644 --- a/pkg/backend/dist_http_server.go +++ b/pkg/backend/dist_http_server.go @@ -26,10 +26,98 @@ type distHTTPServer struct { const ( httpReadTimeout = 5 * time.Second httpWriteTimeout = 5 * time.Second + + // defaultDistHTTPBodyLimit caps inbound request bodies the dist HTTP + // server will accept. 16 MiB is generous for typical cache values + // while still rejecting absurd payloads. Tunable via + // WithDistHTTPLimits. + defaultDistHTTPBodyLimit = 16 * 1024 * 1024 + // defaultDistHTTPResponseLimit caps inbound response bodies the dist + // HTTP client will accept. Mirrors BodyLimit so a peer cannot OOM + // the requester via an oversized response. + defaultDistHTTPResponseLimit int64 = 16 * 1024 * 1024 + // defaultDistHTTPIdleTimeout is the keep-alive idle timeout. Without + // it idle connections accumulate; fiber's default is unbounded. + defaultDistHTTPIdleTimeout = 60 * time.Second + // defaultDistHTTPConcurrency caps simultaneous in-flight handlers. + // Matches fiber's own default but stated explicitly so it shows up in + // /config introspection. + defaultDistHTTPConcurrency = 256 * 1024 + // defaultDistHTTPClientTimeout is the per-request deadline for the + // dist HTTP client when the caller doesn't supply one. 5s aligns with + // server read/write timeouts; the previous 2s caused flakes under + // -race when the fiber listener was slow to accept the first request. + defaultDistHTTPClientTimeout = 5 * time.Second ) -func newDistHTTPServer(addr string) *distHTTPServer { - app := fiber.New(fiber.Config{ReadTimeout: httpReadTimeout, WriteTimeout: httpWriteTimeout}) +// DistHTTPLimits bundles the tunable HTTP-transport limits applied to both +// the dist HTTP server (inbound request bodies, timeouts, concurrency) and +// the auto-created dist HTTP client (outbound request timeout, inbound +// response size). Zero-valued fields fall back to the defaults below. +// +// Use [WithDistHTTPLimits] to override defaults; partial overrides keep +// the rest at their default values. +type DistHTTPLimits struct { + // BodyLimit caps inbound request body bytes (server-side). + BodyLimit int + // ResponseLimit caps inbound response body bytes (client-side). + ResponseLimit int64 + // ReadTimeout is the server read deadline. + ReadTimeout time.Duration + // WriteTimeout is the server write deadline. + WriteTimeout time.Duration + // IdleTimeout is the keep-alive idle timeout (server-side). + IdleTimeout time.Duration + // Concurrency is the maximum number of concurrent in-flight handlers. + Concurrency int + // ClientTimeout is the per-request deadline for the dist HTTP client. + ClientTimeout time.Duration +} + +// withDefaults fills any zero-valued field on l with the package default. +// Returned by value — callers should treat the result as immutable. +func (l DistHTTPLimits) withDefaults() DistHTTPLimits { + if l.BodyLimit <= 0 { + l.BodyLimit = defaultDistHTTPBodyLimit + } + + if l.ResponseLimit <= 0 { + l.ResponseLimit = defaultDistHTTPResponseLimit + } + + if l.ReadTimeout <= 0 { + l.ReadTimeout = httpReadTimeout + } + + if l.WriteTimeout <= 0 { + l.WriteTimeout = httpWriteTimeout + } + + if l.IdleTimeout <= 0 { + l.IdleTimeout = defaultDistHTTPIdleTimeout + } + + if l.Concurrency <= 0 { + l.Concurrency = defaultDistHTTPConcurrency + } + + if l.ClientTimeout <= 0 { + l.ClientTimeout = defaultDistHTTPClientTimeout + } + + return l +} + +func newDistHTTPServer(addr string, limits DistHTTPLimits) *distHTTPServer { + limits = limits.withDefaults() + + app := fiber.New(fiber.Config{ + ReadTimeout: limits.ReadTimeout, + WriteTimeout: limits.WriteTimeout, + IdleTimeout: limits.IdleTimeout, + BodyLimit: limits.BodyLimit, + Concurrency: limits.Concurrency, + }) return &distHTTPServer{app: app, addr: addr} } diff --git a/pkg/backend/dist_http_transport.go b/pkg/backend/dist_http_transport.go index c91ec12..f8b8992 100644 --- a/pkg/backend/dist_http_transport.go +++ b/pkg/backend/dist_http_transport.go @@ -21,18 +21,69 @@ import ( type DistHTTPTransport struct { client *http.Client baseURLFn func(nodeID string) (string, bool) + // respBodyLimit caps response bodies so a malicious or compromised + // peer cannot OOM the requester via a giant response. <=0 disables. + respBodyLimit int64 } const statusThreshold = 300 // NewDistHTTPTransport constructs a DistHTTPTransport with the given timeout and -// nodeID->baseURL resolver. Timeout <=0 defaults to 2s. +// nodeID->baseURL resolver. Timeout <=0 defaults to defaultDistHTTPClientTimeout. +// Response bodies are bounded by defaultDistHTTPResponseLimit; use +// NewDistHTTPTransportWithLimits to override. func NewDistHTTPTransport(timeout time.Duration, resolver func(string) (string, bool)) *DistHTTPTransport { if timeout <= 0 { - timeout = 2 * time.Second + timeout = defaultDistHTTPClientTimeout } - return &DistHTTPTransport{client: &http.Client{Timeout: timeout}, baseURLFn: resolver} + return &DistHTTPTransport{ + client: &http.Client{Timeout: timeout}, + baseURLFn: resolver, + respBodyLimit: defaultDistHTTPResponseLimit, + } +} + +// NewDistHTTPTransportWithLimits is the explicit-limits variant. Use it when +// the caller needs to raise/lower the response-body cap or align the client +// timeout with custom DistHTTPLimits applied to the server. +func NewDistHTTPTransportWithLimits(limits DistHTTPLimits, resolver func(string) (string, bool)) *DistHTTPTransport { + limits = limits.withDefaults() + + return &DistHTTPTransport{ + client: &http.Client{Timeout: limits.ClientTimeout}, + baseURLFn: resolver, + respBodyLimit: limits.ResponseLimit, + } +} + +// drainBody consumes any remaining bytes (so the connection can be reused +// from the keep-alive pool) and closes the body. Centralizes the drain+close +// pattern so each call site stays a one-line defer. Errors are intentionally +// dropped — the connection is being recycled either way. +func drainBody(body io.ReadCloser) { + _, _ = io.Copy(io.Discard, body) + _ = body.Close() +} + +// readAndUnmarshal reads the entire bounded body into memory, then +// json.Unmarshals it into dst. We pre-read so that an *http.MaxBytesError +// from the limited body surfaces cleanly to the caller — the goccy/go-json +// streaming decoder swallows it as io.EOF / "unexpected end of JSON input", +// which would mask the real "peer sent too much" failure mode behind a +// generic parse error. Read cost is bounded by respBodyLimit. +func readAndUnmarshal(body io.Reader, dst any) error { + buf, err := io.ReadAll(body) + if err != nil { + return ewrap.Wrap(err, "read response body") + } + + err = json.Unmarshal(buf, dst) + if err != nil { + return ewrap.Wrap(err, "unmarshal response body") + } + + return nil } const ( @@ -69,31 +120,20 @@ func (t *DistHTTPTransport) ForwardSet(ctx context.Context, nodeID string, item return err } - defer func() { - _, copyErr := io.Copy(io.Discard, resp.Body) - if copyErr != nil { - // Best-effort drain to keep connections reusable. - _ = copyErr - } - - closeErr := resp.Body.Close() - if closeErr != nil { - // Best-effort close on deferred cleanup. - _ = closeErr - } - }() + body := t.limitedBody(resp) + defer drainBody(body) if resp.StatusCode == http.StatusNotFound { return sentinel.ErrBackendNotFound } if resp.StatusCode >= statusThreshold { - body, rerr := io.ReadAll(resp.Body) + errBody, rerr := io.ReadAll(body) if rerr != nil { return ewrap.Wrap(rerr, "read error body") } - return ewrap.Newf("forward set status %d body %s", resp.StatusCode, string(body)) + return ewrap.Newf("forward set status %d body %s", resp.StatusCode, string(errBody)) } return nil @@ -114,19 +154,8 @@ func (t *DistHTTPTransport) ForwardGet(ctx context.Context, nodeID, key string) return nil, false, err } - defer func() { - _, copyErr := io.Copy(io.Discard, resp.Body) - if copyErr != nil { - // Best-effort drain to keep connections reusable. - _ = copyErr - } - - closeErr := resp.Body.Close() - if closeErr != nil { - // Best-effort close on deferred cleanup. - _ = closeErr - } - }() + body := t.limitedBody(resp) + defer drainBody(body) if resp.StatusCode == http.StatusNotFound { return nil, false, sentinel.ErrBackendNotFound @@ -136,7 +165,7 @@ func (t *DistHTTPTransport) ForwardGet(ctx context.Context, nodeID, key string) return nil, false, ewrap.Newf("forward get status %d", resp.StatusCode) } - item, found, derr := decodeGetBody(resp.Body) + item, found, derr := decodeGetBody(body) if derr != nil { return nil, false, derr } @@ -151,11 +180,9 @@ func (t *DistHTTPTransport) ForwardGet(ctx context.Context, nodeID, key string) func decodeGetBody(r io.Reader) (*cache.Item, bool, error) { var raw map[string]json.RawMessage - dec := json.NewDecoder(r) - - err := dec.Decode(&raw) + err := readAndUnmarshal(r, &raw) if err != nil { - return nil, false, ewrap.Wrap(err, "decode body") + return nil, false, err } var found bool @@ -214,19 +241,7 @@ func (t *DistHTTPTransport) ForwardRemove(ctx context.Context, nodeID, key strin return err } - defer func() { - _, copyErr := io.Copy(io.Discard, resp.Body) - if copyErr != nil { - // Best-effort drain to keep connections reusable. - _ = copyErr - } - - closeErr := resp.Body.Close() - if closeErr != nil { - // Best-effort close on deferred cleanup. - _ = closeErr - } - }() + defer drainBody(t.limitedBody(resp)) if resp.StatusCode == http.StatusNotFound { return sentinel.ErrBackendNotFound @@ -251,19 +266,7 @@ func (t *DistHTTPTransport) Health(ctx context.Context, nodeID string) error { return err } - defer func() { - _, copyErr := io.Copy(io.Discard, resp.Body) - if copyErr != nil { - // Best-effort drain to keep connections reusable. - _ = copyErr - } - - closeErr := resp.Body.Close() - if closeErr != nil { - // Best-effort close on deferred cleanup. - _ = closeErr - } - }() + defer drainBody(t.limitedBody(resp)) if resp.StatusCode == http.StatusNotFound { return sentinel.ErrBackendNotFound @@ -292,19 +295,8 @@ func (t *DistHTTPTransport) FetchMerkle(ctx context.Context, nodeID string) (*Me return nil, err } - defer func() { - _, copyErr := io.Copy(io.Discard, resp.Body) - if copyErr != nil { - // Best-effort drain to keep connections reusable. - _ = copyErr - } - - closeErr := resp.Body.Close() - if closeErr != nil { - // Best-effort close on deferred cleanup. - _ = closeErr - } - }() + respBody := t.limitedBody(resp) + defer drainBody(respBody) if resp.StatusCode == http.StatusNotFound { return nil, sentinel.ErrBackendNotFound @@ -320,11 +312,9 @@ func (t *DistHTTPTransport) FetchMerkle(ctx context.Context, nodeID string) (*Me ChunkSize int `json:"chunk_size"` } - dec := json.NewDecoder(resp.Body) - - err = dec.Decode(&body) + err = readAndUnmarshal(respBody, &body) if err != nil { - return nil, ewrap.Wrap(err, "decode merkle") + return nil, err } return &MerkleTree{Root: body.Root, LeafHashes: body.LeafHashes, ChunkSize: body.ChunkSize}, nil @@ -342,19 +332,8 @@ func (t *DistHTTPTransport) ListKeys(ctx context.Context, nodeID string) ([]stri return nil, err } - defer func() { - _, copyErr := io.Copy(io.Discard, resp.Body) - if copyErr != nil { - // Best-effort drain to keep connections reusable. - _ = copyErr - } - - closeErr := resp.Body.Close() - if closeErr != nil { - // Best-effort close on deferred cleanup. - _ = closeErr - } - }() + respBody := t.limitedBody(resp) + defer drainBody(respBody) if resp.StatusCode >= statusThreshold { return nil, ewrap.Newf("list keys status %d", resp.StatusCode) @@ -364,16 +343,24 @@ func (t *DistHTTPTransport) ListKeys(ctx context.Context, nodeID string) ([]stri Keys []string `json:"keys"` } - dec := json.NewDecoder(resp.Body) - - err = dec.Decode(&body) + err = readAndUnmarshal(respBody, &body) if err != nil { - return nil, ewrap.Wrap(err, "decode keys") + return nil, err } return body.Keys, nil } +// limitedBody wraps resp.Body so reads beyond respBodyLimit return +// *http.MaxBytesError. Returns the original body when the limit is <=0. +func (t *DistHTTPTransport) limitedBody(resp *http.Response) io.ReadCloser { + if t.respBodyLimit <= 0 { + return resp.Body + } + + return http.MaxBytesReader(nil, resp.Body, t.respBodyLimit) +} + func (t *DistHTTPTransport) resolveBaseURL(nodeID string) (*url.URL, error) { if t == nil || t.baseURLFn == nil { return nil, errNoTransport diff --git a/pkg/backend/dist_memory.go b/pkg/backend/dist_memory.go index 1211102..618be5b 100644 --- a/pkg/backend/dist_memory.go +++ b/pkg/backend/dist_memory.go @@ -128,6 +128,12 @@ type DistMemory struct { rebalanceStopCh chan struct{} lastRebalanceVersion atomic.Uint64 + // httpLimits caps inbound request bodies (server) and inbound response + // bodies (auto-created client) plus tunes timeouts and concurrency. + // Zero-valued fields fall back to defaultDistHTTP* in + // dist_http_server.go via DistHTTPLimits.withDefaults(). + httpLimits DistHTTPLimits + // replica-only diff scan limits replicaDiffMaxPerTick int // 0 = unlimited @@ -566,6 +572,19 @@ func WithDistSeeds(addresses []string) DistMemoryOption { return func(dm *DistMemory) { dm.seeds = cp } } +// WithDistHTTPLimits configures the HTTP transport limits for the dist +// HTTP server (inbound request bodies, timeouts, concurrency) and the +// auto-created HTTP client (response body cap, request timeout). Partial +// overrides are honored: zero-valued fields inherit the package defaults +// from DistHTTPLimits.withDefaults. +// +// This option only affects the *internal* HTTP server/client created by +// tryStartHTTP — explicitly-supplied transports via WithDistTransport are +// the caller's responsibility to bound. +func WithDistHTTPLimits(limits DistHTTPLimits) DistMemoryOption { + return func(dm *DistMemory) { dm.httpLimits = limits } +} + // NewDistMemory creates a new DistMemory backend. func NewDistMemory(ctx context.Context, opts ...DistMemoryOption) (IBackend[DistMemory], error) { dm := &DistMemory{ @@ -1973,7 +1992,12 @@ func (dm *DistMemory) tryStartHTTP(ctx context.Context) { return } - server := newDistHTTPServer(dm.nodeAddr) + // Resolve once so server and auto-created client share the same + // timeouts / body caps — a request that the server would reject as + // too large is also one the client should not attempt to send. + limits := dm.httpLimits.withDefaults() + + server := newDistHTTPServer(dm.nodeAddr, limits) err := server.start(ctx, dm) if err != nil { // best-effort @@ -1998,7 +2022,7 @@ func (dm *DistMemory) tryStartHTTP(ctx context.Context) { return "", false } - dm.storeTransport(NewDistHTTPTransport(2*time.Second, resolver)) + dm.storeTransport(NewDistHTTPTransportWithLimits(limits, resolver)) } // startHeartbeatIfEnabled launches heartbeat loop if configured. diff --git a/pkg/backend/dist_memory_test_helpers.go b/pkg/backend/dist_memory_test_helpers.go index bd434d8..c563c1f 100644 --- a/pkg/backend/dist_memory_test_helpers.go +++ b/pkg/backend/dist_memory_test_helpers.go @@ -4,7 +4,6 @@ package backend import ( "context" - "time" ) // DisableHTTPForTest stops the internal HTTP server and clears transport (testing helper). @@ -27,7 +26,9 @@ func (dm *DistMemory) EnableHTTPForTest(ctx context.Context) { return } - server := newDistHTTPServer(dm.nodeAddr) + limits := dm.httpLimits.withDefaults() + + server := newDistHTTPServer(dm.nodeAddr, limits) err := server.start(ctx, dm) if err != nil { @@ -52,7 +53,7 @@ func (dm *DistMemory) EnableHTTPForTest(ctx context.Context) { return "", false } - dm.storeTransport(NewDistHTTPTransport(2*time.Second, resolver)) + dm.storeTransport(NewDistHTTPTransportWithLimits(limits, resolver)) } // HintedQueueSize returns number of queued hints for a node (testing helper). diff --git a/tests/dist_http_limits_test.go b/tests/dist_http_limits_test.go new file mode 100644 index 0000000..3391051 --- /dev/null +++ b/tests/dist_http_limits_test.go @@ -0,0 +1,176 @@ +package tests + +import ( + "context" + "errors" + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" + + "github.com/hyp3rd/hypercache/pkg/backend" + cache "github.com/hyp3rd/hypercache/pkg/cache/v2" +) + +// tinyBodyLimit is small enough that any reasonable cache value will +// exceed it — used to trigger fiber's 413 / http.MaxBytesError responses +// in the limit-enforcement tests below. +const tinyBodyLimit = 1024 + +// TestDistHTTPServer_RejectsOversizedBody verifies the dist HTTP server +// rejects request bodies larger than the configured BodyLimit with HTTP +// 413, instead of buffering an unbounded payload into memory. +func TestDistHTTPServer_RejectsOversizedBody(t *testing.T) { + t.Parallel() + + ctx := context.Background() + addr := AllocatePort(t) + + bi, err := backend.NewDistMemory(ctx, + backend.WithDistNode("oversized-server", addr), + backend.WithDistReplication(1), + backend.WithDistHTTPLimits(backend.DistHTTPLimits{BodyLimit: tinyBodyLimit}), + ) + if err != nil { + t.Fatalf("new dist memory: %v", err) + } + + dm, ok := bi.(*backend.DistMemory) + if !ok { + t.Fatalf("expected *backend.DistMemory, got %T", bi) + } + + StopOnCleanup(t, dm) + + if !waitForHealth(ctx, "http://"+dm.LocalNodeAddr(), 5*time.Second) { + t.Fatal("dist HTTP server never came up") + } + + // Build a request body that comfortably exceeds the limit. The literal + // JSON envelope adds overhead too, so the value alone is several times + // the cap — fiber should reject it before reading the whole stream. + oversizedValue := strings.Repeat("x", 4*tinyBodyLimit) + body := `{"key":"k","value":"` + oversizedValue + `","expiration":0,"version":1,"origin":"t"}` + + req, err := http.NewRequestWithContext(ctx, http.MethodPost, "http://"+dm.LocalNodeAddr()+"/internal/set", strings.NewReader(body)) + if err != nil { + t.Fatalf("build request: %v", err) + } + + req.Header.Set("Content-Type", "application/json") + + client := &http.Client{Timeout: 5 * time.Second} + + resp, err := client.Do(req) + if err != nil { + t.Fatalf("do request: %v", err) + } + + defer func() { _ = resp.Body.Close() }() + + if resp.StatusCode != http.StatusRequestEntityTooLarge { + t.Fatalf("expected 413 Payload Too Large for oversized body, got %d", resp.StatusCode) + } +} + +// TestDistHTTPClient_RejectsOversizedResponse verifies the dist HTTP +// transport bounds inbound response bodies — a malicious or compromised +// peer cannot OOM the requester via a giant payload. +func TestDistHTTPClient_RejectsOversizedResponse(t *testing.T) { + t.Parallel() + + // Stub server that always returns a body well past the client cap, + // regardless of requested path. + const responseSize = 8 * tinyBodyLimit + + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + + // Write a JSON-shaped response so the decoder doesn't bail on + // syntax before hitting the byte cap. + _, _ = w.Write([]byte(`{"keys":["` + strings.Repeat("x", responseSize) + `"]}`)) + })) + t.Cleanup(srv.Close) + + transport := backend.NewDistHTTPTransportWithLimits( + backend.DistHTTPLimits{ResponseLimit: tinyBodyLimit}, + func(_ string) (string, bool) { return srv.URL, true }, + ) + + _, err := transport.ListKeys(context.Background(), "ignored") + if err == nil { + t.Fatalf("expected ListKeys to fail when response exceeds limit, got nil") + } + + // http.MaxBytesReader returns *http.MaxBytesError; the transport + // wraps the decode error so we just check the error chain. + var maxBytesErr *http.MaxBytesError + + if !errors.As(err, &maxBytesErr) { + t.Fatalf("expected http.MaxBytesError in error chain, got: %v", err) + } +} + +// TestDistHTTPLimits_DefaultsApply verifies that an option with zero +// fields still produces sane defaults — guards against the partial-override +// contract regressing. +func TestDistHTTPLimits_DefaultsApply(t *testing.T) { + t.Parallel() + + // Sanity check: end-to-end Set/Get works with no explicit limit option. + ctx := context.Background() + addr := AllocatePort(t) + + bi, err := backend.NewDistMemory(ctx, + backend.WithDistNode("default-limits", addr), + backend.WithDistReplication(1), + ) + if err != nil { + t.Fatalf("new dist memory: %v", err) + } + + dm, ok := bi.(*backend.DistMemory) + if !ok { + t.Fatalf("expected *backend.DistMemory, got %T", bi) + } + + StopOnCleanup(t, dm) + + if !waitForHealth(ctx, "http://"+dm.LocalNodeAddr(), 5*time.Second) { + t.Fatal("dist HTTP server never came up") + } + + err = dm.Set(ctx, &cache.Item{Key: "k", Value: "v", Version: 1, Origin: "t", LastUpdated: time.Now()}) + if err != nil { + t.Fatalf("default-limits Set should succeed: %v", err) + } +} + +// waitForHealth polls a node's /health endpoint until it returns 200 or +// the deadline elapses. Mirrors the merkle-test waitForMerkleEndpoint +// helper but is local to limits tests so they don't pull in unrelated +// dependencies. +func waitForHealth(ctx context.Context, baseURL string, timeout time.Duration) bool { + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, baseURL+"/health", nil) + if err != nil { + return false + } + + resp, err := http.DefaultClient.Do(req) + if err == nil { + _ = resp.Body.Close() + + if resp.StatusCode == http.StatusOK { + return true + } + } + + time.Sleep(50 * time.Millisecond) + } + + return false +} diff --git a/tests/hypercache_distmemory_heartbeat_test.go b/tests/hypercache_distmemory_heartbeat_test.go index 110cf7a..b35d566 100644 --- a/tests/hypercache_distmemory_heartbeat_test.go +++ b/tests/hypercache_distmemory_heartbeat_test.go @@ -74,6 +74,25 @@ func waitForNodeRemoval(membership *cluster.Membership, target cluster.NodeID, t return result } +// waitForNodesRemovedMetric polls until b's NodesRemoved metric is non-zero +// or timeout elapses. The heartbeat goroutine increments this metric in +// evaluateLiveness *after* membership.Remove has succeeded — there is a +// tiny preemption window (worse under -race) where membership shows the +// node gone but the atomic.Add has not yet executed. Tests that observe +// the membership change first must wait for the metric to settle. +func waitForNodesRemovedMetric(b *backend.DistMemory, timeout time.Duration) bool { + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + if b.Metrics().NodesRemoved > 0 { + return true + } + + time.Sleep(10 * time.Millisecond) + } + + return false +} + // assertHeartbeatLiveness verifies the post-condition of a heartbeat removal // scenario: removed node is gone from membership, alive node is still // present and alive, and the heartbeat metrics reflect the activity. @@ -162,5 +181,12 @@ func TestDistMemoryHeartbeatLiveness(t *testing.T) { //nolint:paralleltest // mu t.Fatalf("node2 not removed within 2*deadAfter") } + // Bridge the preemption window between membership.Remove() returning + // and the heartbeat goroutine executing nodesRemoved.Add(1). Without + // this poll the metric assertion below races and reports 0. + if !waitForNodesRemovedMetric(b1, 500*time.Millisecond) { + t.Fatalf("nodes-removed metric never reached >0 after membership transition") + } + assertHeartbeatLiveness(t, membership, target, b3.LocalNodeID(), b1.Metrics()) } From 501a0da6839cf57d4bf5d6aaf7fd6bb3458c1081 Mon Sep 17 00:00:00 2001 From: "F." Date: Mon, 4 May 2026 09:24:55 +0200 Subject: [PATCH 2/3] refactor(http): replace goroutine/select shutdown pattern with ShutdownWithContext Eliminate the goroutine-leak risk in both ManagementHTTPServer and distHTTPServer where the old `go func() { ch <- s.app.Shutdown() }()` pattern returned to the caller via the ctx-done branch but left the inner goroutine running until fiber drained organically. - Replace the manual goroutine+select in Shutdown/stop with fiber's ShutdownWithContext, which closes listeners, waits for in-flight requests, and force-closes on ctx deadline. - Capture the server-lifecycle context in ManagementHTTPServer.ctx and distHTTPServer.ctx at Start/start time so handlers (TriggerEviction, Clear, applySet, applyRemove) use a stable context rather than the pooled, short-lived fiber.Ctx that races with happy-eyeballs dial goroutines spawned by http.Client.Do. - Refactor duplicate route registrations into shared handler methods (handleSet, handleGet, handleRemove) mounted on both legacy (/internal/cache/*) and canonical (/internal/*) paths. - Remove now-unused sentinel.ErrMgmtHTTPShutdownTimeout. - Fix bench target to use ./... instead of . to pick up all sub-packages. - Add TestDistHTTPServer_StopRespectsCanceledContext to guard against regression: verifies Stop returns sub-second with an already-canceled context. --- Makefile | 2 +- cspell.config.yaml | 3 +- internal/sentinel/sentinel.go | 3 - management_http.go | 42 ++++--- pkg/backend/dist_http_server.go | 212 +++++++++++++------------------- tests/dist_http_limits_test.go | 48 ++++++++ 6 files changed, 162 insertions(+), 148 deletions(-) diff --git a/Makefile b/Makefile index ab363d2..022ad3b 100644 --- a/Makefile +++ b/Makefile @@ -33,7 +33,7 @@ ci: lint typecheck test-race sec build # bench runs the benchmark tests in the benchmark subpackage of the tests package. bench: - cd tests/benchmark && go test -bench=. -benchmem -benchtime=4s . -timeout 30m + cd tests/benchmark && go test -bench=. -benchmem -benchtime=4s ./... -timeout 30m # bench-baseline captures the current benchmark output to bench-baseline.txt for benchstat comparison. bench-baseline: diff --git a/cspell.config.yaml b/cspell.config.yaml index a044110..a84701f 100644 --- a/cspell.config.yaml +++ b/cspell.config.yaml @@ -52,6 +52,7 @@ words: - cmap - Cmder - codacy + - containedctx - cpuprofile - cyclop - daixiang @@ -86,7 +87,6 @@ words: - goccy - gochecknoglobals - goconst - - histogramcollector - gofiber - GOFILES - gofumpt @@ -98,6 +98,7 @@ words: - gosec - GOTOOLCHAIN - govulncheck + - histogramcollector - HMAC - honnef - hreq diff --git a/internal/sentinel/sentinel.go b/internal/sentinel/sentinel.go index e958e55..685d8f8 100644 --- a/internal/sentinel/sentinel.go +++ b/internal/sentinel/sentinel.go @@ -69,9 +69,6 @@ var ( // ErrTimeoutOrCanceled is returned when a timeout or cancellation occurs. ErrTimeoutOrCanceled = ewrap.New("the operation timed out or was canceled") - // ErrMgmtHTTPShutdownTimeout is returned when the management HTTP server fails to shutdown before context deadline. - ErrMgmtHTTPShutdownTimeout = ewrap.New("management http shutdown timeout") - // ErrNotOwner is returned when a distributed backend instance is not the ring owner for a key. ErrNotOwner = ewrap.New("not ring owner") diff --git a/management_http.go b/management_http.go index c4b2a87..a5de0ae 100644 --- a/management_http.go +++ b/management_http.go @@ -9,7 +9,6 @@ import ( "github.com/hyp3rd/ewrap" "github.com/hyp3rd/hypercache/internal/constants" - "github.com/hyp3rd/hypercache/internal/sentinel" "github.com/hyp3rd/hypercache/pkg/stats" ) @@ -29,6 +28,14 @@ type ManagementHTTPServer struct { ln net.Listener started bool listenerDeadline time.Duration + // ctx is the server-lifecycle context captured at Start. Handlers + // pass it to backend operations (Clear in particular) so cancellation + // propagates from HyperCache.Stop. We do NOT use the per-request + // fiber.Ctx for this: fiber.Ctx is pooled and reset after the handler + // returns, racing with happy-eyeballs goroutines spawned by + // net.(*Dialer).DialContext when DistMemory's transport fan-out goes + // through http.Client.Do. + ctx context.Context //nolint:containedctx // captured server lifecycle, not request scope } // WithMgmtAuth sets an auth function (return error to block). @@ -161,7 +168,8 @@ func (s *ManagementHTTPServer) Start(ctx context.Context, hc managementCache) er return nil } - s.mountRoutes(ctx, hc) + s.ctx = ctx + s.mountRoutes(hc) lc := net.ListenConfig{} @@ -201,27 +209,20 @@ func (s *ManagementHTTPServer) Shutdown(ctx context.Context) error { return nil } - ch := make(chan error, 1) - - go func() { - ch <- s.app.Shutdown() - }() - - select { - case <-ctx.Done(): - return sentinel.ErrMgmtHTTPShutdownTimeout - case err := <-ch: - return err - } + // ShutdownWithContext closes listeners gracefully, waits for in-flight + // requests, and force-closes once ctx's deadline elapses. Replaces + // the previous go-routine + select pattern that leaked the shutdown + // goroutine when our ctx fired first. + return s.app.ShutdownWithContext(ctx) } // mountRoutes. -func (s *ManagementHTTPServer) mountRoutes(ctx context.Context, hc managementCache) { // split into helpers to satisfy funlen +func (s *ManagementHTTPServer) mountRoutes(hc managementCache) { // split into helpers to satisfy funlen useAuth := s.wrapAuth s.registerBasic(useAuth, hc) s.registerDistributed(useAuth, hc) s.registerCluster(useAuth, hc) - s.registerControl(ctx, useAuth, hc) + s.registerControl(useAuth, hc) } // wrapAuth returns an auth-wrapped handler if authFunc provided. @@ -322,12 +323,15 @@ func (s *ManagementHTTPServer) registerCluster(useAuth func(fiber.Handler) fiber } func (s *ManagementHTTPServer) registerControl( - ctx context.Context, useAuth func(fiber.Handler) fiber.Handler, hc managementCache, ) { + // Handlers use s.ctx (server-lifecycle) for backend ops. Per-request + // fiber.Ctx would race when Clear's transport fan-out spawns + // happy-eyeballs dial goroutines that outlive the handler — see the + // comment on ManagementHTTPServer.ctx for the trace. s.app.Post("/evict", useAuth(func(fiberCtx fiber.Ctx) error { - hc.TriggerEviction(ctx) + hc.TriggerEviction(s.ctx) return fiberCtx.SendStatus(fiber.StatusAccepted) })) @@ -337,7 +341,7 @@ func (s *ManagementHTTPServer) registerControl( return fiberCtx.SendStatus(fiber.StatusAccepted) })) s.app.Post("/clear", useAuth(func(fiberCtx fiber.Ctx) error { - clearErr := hc.Clear(ctx) + clearErr := hc.Clear(s.ctx) if clearErr != nil { return clearErr } diff --git a/pkg/backend/dist_http_server.go b/pkg/backend/dist_http_server.go index bb2ebe6..108183e 100644 --- a/pkg/backend/dist_http_server.go +++ b/pkg/backend/dist_http_server.go @@ -18,6 +18,16 @@ type distHTTPServer struct { app *fiber.App ln net.Listener addr string + // ctx is the server-lifecycle context captured at start. Handlers + // pass it to backend operations so cancellation propagates when the + // caller cancels the constructor ctx (e.g. via DistMemory Stop). + // + // We do NOT use the per-request fiber.Ctx for this: fiber.Ctx is + // pooled and reset after the handler returns, which races with + // happy-eyeballs goroutines spawned by net.(*Dialer).DialContext + // when applySet's replica fan-out goes through http.Client.Do. + // See the race trace captured during phase-5b investigation. + ctx context.Context //nolint:containedctx // captured server lifecycle, not request scope } // minimal request/response types reused by transport @@ -123,148 +133,106 @@ func newDistHTTPServer(addr string, limits DistHTTPLimits) *distHTTPServer { } func (s *distHTTPServer) start(ctx context.Context, dm *DistMemory) error { - s.registerSet(ctx, dm) - s.registerGet(ctx, dm) - s.registerRemove(ctx, dm) + s.ctx = ctx + s.registerSet(dm) + s.registerGet(dm) + s.registerRemove(dm) s.registerHealth() - s.registerMerkle(ctx, dm) + s.registerMerkle(dm) return s.listen(ctx) } -func (s *distHTTPServer) registerSet(ctx context.Context, dm *DistMemory) { - // legacy path - s.app.Post("/internal/cache/set", func(fctx fiber.Ctx) error { // small handler - var req httpSetRequest +// handleSet decodes a httpSetRequest and applies it locally + optionally +// fan-outs to replicas. Uses s.ctx (server-lifecycle) as the backend +// operation context — see the comment on distHTTPServer.ctx for why we +// can't use the per-request fiber.Ctx here. +func (s *distHTTPServer) handleSet(fctx fiber.Ctx, dm *DistMemory) error { + var req httpSetRequest - body := fctx.Body() - - unmarshalErr := json.Unmarshal(body, &req) - if unmarshalErr != nil { // separated to satisfy noinlineerr - return fctx.Status(fiber.StatusBadRequest).JSON(fiber.Map{constants.ErrorLabel: unmarshalErr.Error()}) - } - - it := &cache.Item{ // LastUpdated set to now for replicated writes - Key: req.Key, - Value: req.Value, - Expiration: time.Duration(req.Expiration) * time.Millisecond, - Version: req.Version, - Origin: req.Origin, - LastUpdated: time.Now(), - } - - dm.applySet(ctx, it, req.Replicate) - - return fctx.JSON(httpSetResponse{}) - }) - - // canonical path per roadmap - s.app.Post("/internal/set", func(fctx fiber.Ctx) error { // small handler - var req httpSetRequest - - body := fctx.Body() - - unmarshalErr := json.Unmarshal(body, &req) - if unmarshalErr != nil { // separated to satisfy noinlineerr - return fctx.Status(fiber.StatusBadRequest).JSON(fiber.Map{constants.ErrorLabel: unmarshalErr.Error()}) - } + unmarshalErr := json.Unmarshal(fctx.Body(), &req) + if unmarshalErr != nil { // separated to satisfy noinlineerr + return fctx.Status(fiber.StatusBadRequest).JSON(fiber.Map{constants.ErrorLabel: unmarshalErr.Error()}) + } - it := &cache.Item{ // LastUpdated set to now for replicated writes - Key: req.Key, - Value: req.Value, - Expiration: time.Duration(req.Expiration) * time.Millisecond, - Version: req.Version, - Origin: req.Origin, - LastUpdated: time.Now(), - } + it := &cache.Item{ // LastUpdated set to now for replicated writes + Key: req.Key, + Value: req.Value, + Expiration: time.Duration(req.Expiration) * time.Millisecond, + Version: req.Version, + Origin: req.Origin, + LastUpdated: time.Now(), + } - dm.applySet(ctx, it, req.Replicate) + dm.applySet(s.ctx, it, req.Replicate) - return fctx.JSON(httpSetResponse{}) - }) + return fctx.JSON(httpSetResponse{}) } -func (s *distHTTPServer) registerGet(_ context.Context, dm *DistMemory) { - // legacy path - s.app.Get("/internal/cache/get", func(fctx fiber.Ctx) error { - key := fctx.Query("key") - if key == "" { - return fctx.Status(fiber.StatusBadRequest).JSON(fiber.Map{constants.ErrorLabel: constants.ErrMsgMissingCacheKey}) - } - - owners := dm.lookupOwners(key) - if len(owners) == 0 { - return fctx.Status(fiber.StatusNotFound).JSON(fiber.Map{constants.ErrorLabel: "not owner"}) - } - - if it, ok := dm.shardFor(key).items.Get(key); ok { - return fctx.JSON(httpGetResponse{Found: true, Item: it}) - } - - return fctx.JSON(httpGetResponse{Found: false}) - }) +func (s *distHTTPServer) registerSet(dm *DistMemory) { + handler := func(fctx fiber.Ctx) error { return s.handleSet(fctx, dm) } + // legacy + canonical paths share the same handler. + s.app.Post("/internal/cache/set", handler) + s.app.Post("/internal/set", handler) +} - // canonical path per roadmap - s.app.Get("/internal/get", func(fctx fiber.Ctx) error { - key := fctx.Query("key") - if key == "" { - return fctx.Status(fiber.StatusBadRequest).JSON(fiber.Map{constants.ErrorLabel: constants.ErrMsgMissingCacheKey}) - } +// handleGet looks up a key locally for a remote owner that ring-routed +// to this node. Get itself is synchronous and doesn't take a ctx, so this +// handler doesn't need one. +func (*distHTTPServer) handleGet(fctx fiber.Ctx, dm *DistMemory) error { + key := fctx.Query("key") + if key == "" { + return fctx.Status(fiber.StatusBadRequest).JSON(fiber.Map{constants.ErrorLabel: constants.ErrMsgMissingCacheKey}) + } - owners := dm.lookupOwners(key) - if len(owners) == 0 { - return fctx.Status(fiber.StatusNotFound).JSON(fiber.Map{constants.ErrorLabel: "not owner"}) - } + owners := dm.lookupOwners(key) + if len(owners) == 0 { + return fctx.Status(fiber.StatusNotFound).JSON(fiber.Map{constants.ErrorLabel: "not owner"}) + } - if it, ok := dm.shardFor(key).items.Get(key); ok { - return fctx.JSON(httpGetResponse{Found: true, Item: it}) - } + if it, ok := dm.shardFor(key).items.Get(key); ok { + return fctx.JSON(httpGetResponse{Found: true, Item: it}) + } - return fctx.JSON(httpGetResponse{Found: false}) - }) + return fctx.JSON(httpGetResponse{Found: false}) } -func (s *distHTTPServer) registerRemove(ctx context.Context, dm *DistMemory) { - // legacy path - s.app.Delete("/internal/cache/remove", func(fctx fiber.Ctx) error { - key := fctx.Query("key") - if key == "" { - return fctx.Status(fiber.StatusBadRequest).JSON(fiber.Map{constants.ErrorLabel: constants.ErrMsgMissingCacheKey}) - } - - replicate, parseErr := strconv.ParseBool(fctx.Query("replicate", "false")) - if parseErr != nil { - return fctx.Status(fiber.StatusBadRequest).JSON(fiber.Map{constants.ErrorLabel: "invalid replicate"}) - } - - dm.applyRemove(ctx, key, replicate) +func (s *distHTTPServer) registerGet(dm *DistMemory) { + handler := func(fctx fiber.Ctx) error { return s.handleGet(fctx, dm) } + s.app.Get("/internal/cache/get", handler) + s.app.Get("/internal/get", handler) +} - return fctx.SendStatus(fiber.StatusOK) - }) +// handleRemove deletes a key locally and optionally fan-outs the delete +// to replicas. Uses s.ctx for backend operations — see the comment on +// distHTTPServer.ctx for why per-request fiber.Ctx is unsafe here. +func (s *distHTTPServer) handleRemove(fctx fiber.Ctx, dm *DistMemory) error { + key := fctx.Query("key") + if key == "" { + return fctx.Status(fiber.StatusBadRequest).JSON(fiber.Map{constants.ErrorLabel: constants.ErrMsgMissingCacheKey}) + } - // canonical path per roadmap - s.app.Delete("/internal/del", func(fctx fiber.Ctx) error { - key := fctx.Query("key") - if key == "" { - return fctx.Status(fiber.StatusBadRequest).JSON(fiber.Map{constants.ErrorLabel: constants.ErrMsgMissingCacheKey}) - } + replicate, parseErr := strconv.ParseBool(fctx.Query("replicate", "false")) + if parseErr != nil { + return fctx.Status(fiber.StatusBadRequest).JSON(fiber.Map{constants.ErrorLabel: "invalid replicate"}) + } - replicate, parseErr := strconv.ParseBool(fctx.Query("replicate", "false")) - if parseErr != nil { - return fctx.Status(fiber.StatusBadRequest).JSON(fiber.Map{constants.ErrorLabel: "invalid replicate"}) - } + dm.applyRemove(s.ctx, key, replicate) - dm.applyRemove(ctx, key, replicate) + return fctx.SendStatus(fiber.StatusOK) +} - return fctx.SendStatus(fiber.StatusOK) - }) +func (s *distHTTPServer) registerRemove(dm *DistMemory) { + handler := func(fctx fiber.Ctx) error { return s.handleRemove(fctx, dm) } + s.app.Delete("/internal/cache/remove", handler) + s.app.Delete("/internal/del", handler) } func (s *distHTTPServer) registerHealth() { s.app.Get("/health", func(fctx fiber.Ctx) error { return fctx.SendString("ok") }) } -func (s *distHTTPServer) registerMerkle(_ context.Context, dm *DistMemory) { +func (s *distHTTPServer) registerMerkle(dm *DistMemory) { s.app.Get("/internal/merkle", func(fctx fiber.Ctx) error { tree := dm.BuildMerkleTree() @@ -321,14 +289,10 @@ func (s *distHTTPServer) stop(ctx context.Context) error { return nil } - ch := make(chan error, 1) - - go func() { ch <- s.app.Shutdown() }() - - select { - case <-ctx.Done(): - return ewrap.Newf("http server shutdown timeout") - case err := <-ch: - return err - } + // ShutdownWithContext closes listeners gracefully, waits for in-flight + // requests, and force-closes once ctx's deadline elapses — replacing + // the previous `go func() { ch <- s.app.Shutdown() }()` pattern which + // leaked the shutdown goroutine when our ctx fired before fiber + // finished draining. + return s.app.ShutdownWithContext(ctx) } diff --git a/tests/dist_http_limits_test.go b/tests/dist_http_limits_test.go index 3391051..99c5e86 100644 --- a/tests/dist_http_limits_test.go +++ b/tests/dist_http_limits_test.go @@ -148,6 +148,54 @@ func TestDistHTTPLimits_DefaultsApply(t *testing.T) { } } +// TestDistHTTPServer_StopRespectsCanceledContext verifies the dist HTTP +// server's Stop() returns promptly when the supplied context is already +// canceled, without leaking the shutdown goroutine. Replaces the old +// `go func() { ch <- s.app.Shutdown() }()` wrapper that returned to the +// caller via the ctx-done branch but left the inner goroutine running +// until fiber's Shutdown finished organically. +func TestDistHTTPServer_StopRespectsCanceledContext(t *testing.T) { + t.Parallel() + + ctx := context.Background() + addr := AllocatePort(t) + + bi, err := backend.NewDistMemory(ctx, + backend.WithDistNode("stop-test", addr), + backend.WithDistReplication(1), + ) + if err != nil { + t.Fatalf("new dist memory: %v", err) + } + + dm, ok := bi.(*backend.DistMemory) + if !ok { + t.Fatalf("expected *backend.DistMemory, got %T", bi) + } + + if !waitForHealth(ctx, "http://"+dm.LocalNodeAddr(), 5*time.Second) { + t.Fatal("dist HTTP server never came up") + } + + // Already-canceled ctx — ShutdownWithContext should return the ctx + // error immediately without blocking on a graceful drain. + canceledCtx, cancel := context.WithCancel(context.Background()) + cancel() + + start := time.Now() + + _ = dm.Stop(canceledCtx) + + elapsed := time.Since(start) + + // Allow generous slack for race-detector overhead. The bug case was + // "Stop blocks waiting for fiber's organic shutdown" — anything under + // a second indicates ShutdownWithContext honored the ctx. + if elapsed > time.Second { + t.Errorf("Stop took %v with already-canceled ctx; expected sub-second exit", elapsed) + } +} + // waitForHealth polls a node's /health endpoint until it returns 200 or // the deadline elapses. Mirrors the merkle-test waitForMerkleEndpoint // helper but is local to limits tests so they don't pull in unrelated From be4cbf01773339952f002cbe441b418a577db990 Mon Sep 17 00:00:00 2001 From: "F." Date: Mon, 4 May 2026 10:21:06 +0200 Subject: [PATCH 3/3] feat(backend/dist): add bearer-token auth, TLS, and lifecycle context cancellation Introduces three security and reliability improvements to the distributed HTTP transport layer: 1. Bearer-token authentication (DistHTTPAuth): constant-time token validation on the server, automatic request signing on the auto-created HTTP client. ServerVerify and ClientSign escape hatches support JWT, HMAC, and mTLS-derived identity. Applied to all dist endpoints including /health. 2. TLS support via DistHTTPLimits.TLSConfig: wraps TCP listeners with tls.NewListener and configures the auto-created client with a matching *tls.Config. Forces HTTP/1.1 via ALPN to avoid h2/fasthttp mismatch; resolver advertises https:// when TLS is configured. 3. Deterministic lifecycle context cancellation: DistMemory and ManagementHTTPServer derive a lifeCtx/lifeCancel pair from the constructor context. Stop() cancels lifeCtx before channel tear-down, so in-flight handlers and background goroutines observe Done() independently of the (usually non-canceling) constructor context. Additional changes: - LastServeError() surfaces background serve-goroutine failures instead of silently swallowing them - LifecycleContext() accessor and WithDistHTTPAuth() option added to DistMemory; ErrUnauthorized sentinel added - makePeerURLResolver extracted and shared between tryStartHTTP and EnableHTTPForTest - Integration tests: dist_http_auth_test.go, dist_http_tls_test.go, dist_http_lifecycle_test.go --- bench-baseline-v2.txt | 51 - bench-baseline.txt | 51 - bench-phase1.txt | 51 - bench-step1-unit.txt | 26 - bench-step1.txt | 51 - bench-step2.txt | 51 - bench-step3.txt | 26 - cspell.config.yaml | 7 + internal/sentinel/sentinel.go | 3 + lint-baseline-v2.txt | 4138 ----------------------- management_http.go | 65 +- pkg/backend/dist_http_server.go | 201 +- pkg/backend/dist_http_transport.go | 62 +- pkg/backend/dist_memory.go | 98 +- pkg/backend/dist_memory_test_helpers.go | 22 +- pkg/stats/histogramcollector_test.go | 4 +- race-baseline-v2.log | 21 - race-step2.log | 21 - tests/dist_http_auth_test.go | 327 ++ tests/dist_http_lifecycle_test.go | 71 + tests/dist_http_tls_test.go | 234 ++ 21 files changed, 1031 insertions(+), 4550 deletions(-) delete mode 100644 bench-baseline-v2.txt delete mode 100644 bench-baseline.txt delete mode 100644 bench-phase1.txt delete mode 100644 bench-step1-unit.txt delete mode 100644 bench-step1.txt delete mode 100644 bench-step2.txt delete mode 100644 bench-step3.txt delete mode 100644 lint-baseline-v2.txt delete mode 100644 race-baseline-v2.log delete mode 100644 race-step2.log create mode 100644 tests/dist_http_auth_test.go create mode 100644 tests/dist_http_lifecycle_test.go create mode 100644 tests/dist_http_tls_test.go diff --git a/bench-baseline-v2.txt b/bench-baseline-v2.txt deleted file mode 100644 index b3f5ef9..0000000 --- a/bench-baseline-v2.txt +++ /dev/null @@ -1,51 +0,0 @@ -goos: darwin -goarch: arm64 -pkg: github.com/hyp3rd/hypercache/tests/benchmark -cpu: Apple M4 Pro -BenchmarkHyperCache_SetParallel-14 6676305 711.9 ns/op 218 B/op 3 allocs/op -BenchmarkHyperCache_SetParallel-14 6438765 729.0 ns/op 220 B/op 3 allocs/op -BenchmarkHyperCache_SetParallel-14 6961741 807.6 ns/op 218 B/op 3 allocs/op -BenchmarkHyperCache_SetParallel-14 6262878 710.7 ns/op 223 B/op 3 allocs/op -BenchmarkHyperCache_SetParallel-14 6871442 789.8 ns/op 217 B/op 3 allocs/op -BenchmarkHyperCache_GetParallel-14 54231619 88.86 ns/op 135 B/op 2 allocs/op -BenchmarkHyperCache_GetParallel-14 53125920 89.61 ns/op 135 B/op 2 allocs/op -BenchmarkHyperCache_GetParallel-14 53083129 89.39 ns/op 135 B/op 2 allocs/op -BenchmarkHyperCache_GetParallel-14 54015073 88.35 ns/op 135 B/op 2 allocs/op -BenchmarkHyperCache_GetParallel-14 54414236 91.13 ns/op 135 B/op 2 allocs/op -BenchmarkHyperCache_GetOrSetParallel-14 6655920 748.5 ns/op 218 B/op 3 allocs/op -BenchmarkHyperCache_GetOrSetParallel-14 6446132 738.8 ns/op 220 B/op 3 allocs/op -BenchmarkHyperCache_GetOrSetParallel-14 6796017 777.4 ns/op 217 B/op 3 allocs/op -BenchmarkHyperCache_GetOrSetParallel-14 6508280 712.7 ns/op 220 B/op 3 allocs/op -BenchmarkHyperCache_GetOrSetParallel-14 6980356 783.1 ns/op 219 B/op 3 allocs/op -BenchmarkHyperCache_MixedParallel-14 33034370 167.5 ns/op 157 B/op 3 allocs/op -BenchmarkHyperCache_MixedParallel-14 29370020 165.6 ns/op 159 B/op 3 allocs/op -BenchmarkHyperCache_MixedParallel-14 32026545 180.1 ns/op 158 B/op 3 allocs/op -BenchmarkHyperCache_MixedParallel-14 28606334 168.0 ns/op 160 B/op 3 allocs/op -BenchmarkHyperCache_MixedParallel-14 31824206 178.5 ns/op 158 B/op 3 allocs/op -BenchmarkHyperCache_Get-14 40344086 110.3 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get-14 43160210 113.0 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get-14 43770510 112.1 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get-14 43183962 114.3 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get-14 43036879 111.4 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get_ProactiveEviction-14 42927090 112.1 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get_ProactiveEviction-14 43338003 114.9 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get_ProactiveEviction-14 40403430 113.5 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get_ProactiveEviction-14 42223924 116.6 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get_ProactiveEviction-14 41546704 117.6 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_List-14 46045465 105.6 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_List-14 45772118 101.4 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_List-14 47736949 101.0 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_List-14 46948050 99.45 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_List-14 46891046 99.97 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Set-14 12900670 469.6 ns/op 222 B/op 3 allocs/op -BenchmarkHyperCache_Set-14 8785929 479.5 ns/op 255 B/op 3 allocs/op -BenchmarkHyperCache_Set-14 12034856 488.7 ns/op 227 B/op 3 allocs/op -BenchmarkHyperCache_Set-14 10504384 520.1 ns/op 238 B/op 3 allocs/op -BenchmarkHyperCache_Set-14 11103819 486.2 ns/op 233 B/op 3 allocs/op -BenchmarkHyperCache_Set_Proactive_Eviction-14 4011253 1260 ns/op 454 B/op 6 allocs/op -BenchmarkHyperCache_Set_Proactive_Eviction-14 3742882 1231 ns/op 454 B/op 6 allocs/op -BenchmarkHyperCache_Set_Proactive_Eviction-14 4316617 1150 ns/op 454 B/op 6 allocs/op -BenchmarkHyperCache_Set_Proactive_Eviction-14 4342858 1120 ns/op 454 B/op 6 allocs/op -BenchmarkHyperCache_Set_Proactive_Eviction-14 3557096 1353 ns/op 454 B/op 6 allocs/op -PASS -ok github.com/hyp3rd/hypercache/tests/benchmark 259.760s diff --git a/bench-baseline.txt b/bench-baseline.txt deleted file mode 100644 index b3f5ef9..0000000 --- a/bench-baseline.txt +++ /dev/null @@ -1,51 +0,0 @@ -goos: darwin -goarch: arm64 -pkg: github.com/hyp3rd/hypercache/tests/benchmark -cpu: Apple M4 Pro -BenchmarkHyperCache_SetParallel-14 6676305 711.9 ns/op 218 B/op 3 allocs/op -BenchmarkHyperCache_SetParallel-14 6438765 729.0 ns/op 220 B/op 3 allocs/op -BenchmarkHyperCache_SetParallel-14 6961741 807.6 ns/op 218 B/op 3 allocs/op -BenchmarkHyperCache_SetParallel-14 6262878 710.7 ns/op 223 B/op 3 allocs/op -BenchmarkHyperCache_SetParallel-14 6871442 789.8 ns/op 217 B/op 3 allocs/op -BenchmarkHyperCache_GetParallel-14 54231619 88.86 ns/op 135 B/op 2 allocs/op -BenchmarkHyperCache_GetParallel-14 53125920 89.61 ns/op 135 B/op 2 allocs/op -BenchmarkHyperCache_GetParallel-14 53083129 89.39 ns/op 135 B/op 2 allocs/op -BenchmarkHyperCache_GetParallel-14 54015073 88.35 ns/op 135 B/op 2 allocs/op -BenchmarkHyperCache_GetParallel-14 54414236 91.13 ns/op 135 B/op 2 allocs/op -BenchmarkHyperCache_GetOrSetParallel-14 6655920 748.5 ns/op 218 B/op 3 allocs/op -BenchmarkHyperCache_GetOrSetParallel-14 6446132 738.8 ns/op 220 B/op 3 allocs/op -BenchmarkHyperCache_GetOrSetParallel-14 6796017 777.4 ns/op 217 B/op 3 allocs/op -BenchmarkHyperCache_GetOrSetParallel-14 6508280 712.7 ns/op 220 B/op 3 allocs/op -BenchmarkHyperCache_GetOrSetParallel-14 6980356 783.1 ns/op 219 B/op 3 allocs/op -BenchmarkHyperCache_MixedParallel-14 33034370 167.5 ns/op 157 B/op 3 allocs/op -BenchmarkHyperCache_MixedParallel-14 29370020 165.6 ns/op 159 B/op 3 allocs/op -BenchmarkHyperCache_MixedParallel-14 32026545 180.1 ns/op 158 B/op 3 allocs/op -BenchmarkHyperCache_MixedParallel-14 28606334 168.0 ns/op 160 B/op 3 allocs/op -BenchmarkHyperCache_MixedParallel-14 31824206 178.5 ns/op 158 B/op 3 allocs/op -BenchmarkHyperCache_Get-14 40344086 110.3 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get-14 43160210 113.0 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get-14 43770510 112.1 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get-14 43183962 114.3 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get-14 43036879 111.4 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get_ProactiveEviction-14 42927090 112.1 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get_ProactiveEviction-14 43338003 114.9 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get_ProactiveEviction-14 40403430 113.5 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get_ProactiveEviction-14 42223924 116.6 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get_ProactiveEviction-14 41546704 117.6 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_List-14 46045465 105.6 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_List-14 45772118 101.4 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_List-14 47736949 101.0 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_List-14 46948050 99.45 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_List-14 46891046 99.97 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Set-14 12900670 469.6 ns/op 222 B/op 3 allocs/op -BenchmarkHyperCache_Set-14 8785929 479.5 ns/op 255 B/op 3 allocs/op -BenchmarkHyperCache_Set-14 12034856 488.7 ns/op 227 B/op 3 allocs/op -BenchmarkHyperCache_Set-14 10504384 520.1 ns/op 238 B/op 3 allocs/op -BenchmarkHyperCache_Set-14 11103819 486.2 ns/op 233 B/op 3 allocs/op -BenchmarkHyperCache_Set_Proactive_Eviction-14 4011253 1260 ns/op 454 B/op 6 allocs/op -BenchmarkHyperCache_Set_Proactive_Eviction-14 3742882 1231 ns/op 454 B/op 6 allocs/op -BenchmarkHyperCache_Set_Proactive_Eviction-14 4316617 1150 ns/op 454 B/op 6 allocs/op -BenchmarkHyperCache_Set_Proactive_Eviction-14 4342858 1120 ns/op 454 B/op 6 allocs/op -BenchmarkHyperCache_Set_Proactive_Eviction-14 3557096 1353 ns/op 454 B/op 6 allocs/op -PASS -ok github.com/hyp3rd/hypercache/tests/benchmark 259.760s diff --git a/bench-phase1.txt b/bench-phase1.txt deleted file mode 100644 index 08c8cb7..0000000 --- a/bench-phase1.txt +++ /dev/null @@ -1,51 +0,0 @@ -goos: darwin -goarch: arm64 -pkg: github.com/hyp3rd/hypercache/tests/benchmark -cpu: Apple M4 Pro -BenchmarkHyperCache_SetParallel-14 6209270 716.0 ns/op 223 B/op 3 allocs/op -BenchmarkHyperCache_SetParallel-14 6455631 783.7 ns/op 220 B/op 3 allocs/op -BenchmarkHyperCache_SetParallel-14 6529954 781.8 ns/op 219 B/op 3 allocs/op -BenchmarkHyperCache_SetParallel-14 6415386 787.3 ns/op 221 B/op 3 allocs/op -BenchmarkHyperCache_SetParallel-14 6554228 787.0 ns/op 219 B/op 3 allocs/op -BenchmarkHyperCache_GetParallel-14 53781536 89.38 ns/op 135 B/op 2 allocs/op -BenchmarkHyperCache_GetParallel-14 53632083 89.92 ns/op 135 B/op 2 allocs/op -BenchmarkHyperCache_GetParallel-14 52835577 87.68 ns/op 135 B/op 2 allocs/op -BenchmarkHyperCache_GetParallel-14 53531523 87.95 ns/op 135 B/op 2 allocs/op -BenchmarkHyperCache_GetParallel-14 53525479 91.84 ns/op 135 B/op 2 allocs/op -BenchmarkHyperCache_GetOrSetParallel-14 6200666 752.3 ns/op 223 B/op 3 allocs/op -BenchmarkHyperCache_GetOrSetParallel-14 6393361 735.8 ns/op 221 B/op 3 allocs/op -BenchmarkHyperCache_GetOrSetParallel-14 6340680 786.5 ns/op 222 B/op 3 allocs/op -BenchmarkHyperCache_GetOrSetParallel-14 6393583 784.1 ns/op 221 B/op 3 allocs/op -BenchmarkHyperCache_GetOrSetParallel-14 6464352 804.3 ns/op 220 B/op 3 allocs/op -BenchmarkHyperCache_MixedParallel-14 30741903 172.8 ns/op 158 B/op 3 allocs/op -BenchmarkHyperCache_MixedParallel-14 28517773 175.8 ns/op 160 B/op 3 allocs/op -BenchmarkHyperCache_MixedParallel-14 29075665 170.9 ns/op 159 B/op 3 allocs/op -BenchmarkHyperCache_MixedParallel-14 29588047 175.9 ns/op 159 B/op 3 allocs/op -BenchmarkHyperCache_MixedParallel-14 30058425 162.6 ns/op 159 B/op 3 allocs/op -BenchmarkHyperCache_Get-14 41194540 108.9 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get-14 44170627 109.3 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get-14 44089144 109.7 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get-14 43422474 109.7 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get-14 44358532 108.8 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get_ProactiveEviction-14 43800232 109.6 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get_ProactiveEviction-14 43563344 111.2 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get_ProactiveEviction-14 42146994 111.4 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get_ProactiveEviction-14 40192420 113.1 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get_ProactiveEviction-14 42018354 113.4 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_List-14 45661159 103.6 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_List-14 47006362 101.6 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_List-14 46921814 101.1 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_List-14 47563152 100.2 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_List-14 46937605 101.1 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Set-14 12518250 450.2 ns/op 224 B/op 3 allocs/op -BenchmarkHyperCache_Set-14 11584551 464.7 ns/op 230 B/op 3 allocs/op -BenchmarkHyperCache_Set-14 12283092 447.8 ns/op 225 B/op 3 allocs/op -BenchmarkHyperCache_Set-14 11601224 507.2 ns/op 230 B/op 3 allocs/op -BenchmarkHyperCache_Set-14 8849468 539.7 ns/op 254 B/op 3 allocs/op -BenchmarkHyperCache_Set_Proactive_Eviction-14 3087795 1385 ns/op 454 B/op 6 allocs/op -BenchmarkHyperCache_Set_Proactive_Eviction-14 3858516 1181 ns/op 454 B/op 6 allocs/op -BenchmarkHyperCache_Set_Proactive_Eviction-14 3961390 1164 ns/op 454 B/op 6 allocs/op -BenchmarkHyperCache_Set_Proactive_Eviction-14 3892960 1152 ns/op 454 B/op 6 allocs/op -BenchmarkHyperCache_Set_Proactive_Eviction-14 3880062 1172 ns/op 454 B/op 6 allocs/op -PASS -ok github.com/hyp3rd/hypercache/tests/benchmark 260.917s diff --git a/bench-step1-unit.txt b/bench-step1-unit.txt deleted file mode 100644 index 1133e22..0000000 --- a/bench-step1-unit.txt +++ /dev/null @@ -1,26 +0,0 @@ -goos: darwin -goarch: arm64 -pkg: github.com/hyp3rd/hypercache/pkg/cache/v2 -cpu: Apple M4 Pro -BenchmarkConcurrentMap_Count-14 243351127 9.783 ns/op 0 B/op 0 allocs/op -BenchmarkConcurrentMap_Count-14 248693774 9.691 ns/op 0 B/op 0 allocs/op -BenchmarkConcurrentMap_Count-14 245917975 9.753 ns/op 0 B/op 0 allocs/op -BenchmarkConcurrentMap_Count-14 246347533 9.857 ns/op 0 B/op 0 allocs/op -BenchmarkConcurrentMap_Count-14 242966653 9.768 ns/op 0 B/op 0 allocs/op -BenchmarkConcurrentMap_CountParallel-14 187792453 13.33 ns/op 8 B/op 0 allocs/op -BenchmarkConcurrentMap_CountParallel-14 175128324 13.76 ns/op 8 B/op 0 allocs/op -BenchmarkConcurrentMap_CountParallel-14 173046586 14.09 ns/op 8 B/op 0 allocs/op -BenchmarkConcurrentMap_CountParallel-14 174620641 13.80 ns/op 8 B/op 0 allocs/op -BenchmarkConcurrentMap_CountParallel-14 174253932 14.02 ns/op 8 B/op 0 allocs/op -BenchmarkConcurrentMap_GetShard-14 232021467 10.15 ns/op 0 B/op 0 allocs/op -BenchmarkConcurrentMap_GetShard-14 238652191 10.10 ns/op 0 B/op 0 allocs/op -BenchmarkConcurrentMap_GetShard-14 236221518 10.16 ns/op 0 B/op 0 allocs/op -BenchmarkConcurrentMap_GetShard-14 237259972 10.12 ns/op 0 B/op 0 allocs/op -BenchmarkConcurrentMap_GetShard-14 238689426 10.03 ns/op 0 B/op 0 allocs/op -BenchmarkConcurrentMap_IterBuffered-14 2704 778859 ns/op 1811549 B/op 230 allocs/op -BenchmarkConcurrentMap_IterBuffered-14 3111 771239 ns/op 1811412 B/op 230 allocs/op -BenchmarkConcurrentMap_IterBuffered-14 3247 756859 ns/op 1811416 B/op 230 allocs/op -BenchmarkConcurrentMap_IterBuffered-14 3224 757272 ns/op 1811418 B/op 230 allocs/op -BenchmarkConcurrentMap_IterBuffered-14 3092 756387 ns/op 1811415 B/op 230 allocs/op -PASS -ok github.com/hyp3rd/hypercache/pkg/cache/v2 48.545s diff --git a/bench-step1.txt b/bench-step1.txt deleted file mode 100644 index 8718918..0000000 --- a/bench-step1.txt +++ /dev/null @@ -1,51 +0,0 @@ -goos: darwin -goarch: arm64 -pkg: github.com/hyp3rd/hypercache/tests/benchmark -cpu: Apple M4 Pro -BenchmarkHyperCache_SetParallel-14 6260472 774.9 ns/op 223 B/op 3 allocs/op -BenchmarkHyperCache_SetParallel-14 6864753 767.0 ns/op 217 B/op 3 allocs/op -BenchmarkHyperCache_SetParallel-14 6478573 765.3 ns/op 220 B/op 3 allocs/op -BenchmarkHyperCache_SetParallel-14 6912163 758.1 ns/op 217 B/op 3 allocs/op -BenchmarkHyperCache_SetParallel-14 6514670 714.9 ns/op 220 B/op 3 allocs/op -BenchmarkHyperCache_GetParallel-14 56692104 85.48 ns/op 135 B/op 2 allocs/op -BenchmarkHyperCache_GetParallel-14 56765685 84.98 ns/op 135 B/op 2 allocs/op -BenchmarkHyperCache_GetParallel-14 55099080 84.60 ns/op 135 B/op 2 allocs/op -BenchmarkHyperCache_GetParallel-14 57246303 84.74 ns/op 135 B/op 2 allocs/op -BenchmarkHyperCache_GetParallel-14 55435438 80.97 ns/op 135 B/op 2 allocs/op -BenchmarkHyperCache_GetOrSetParallel-14 6500679 761.3 ns/op 220 B/op 3 allocs/op -BenchmarkHyperCache_GetOrSetParallel-14 6420355 761.2 ns/op 221 B/op 3 allocs/op -BenchmarkHyperCache_GetOrSetParallel-14 6384942 758.3 ns/op 221 B/op 3 allocs/op -BenchmarkHyperCache_GetOrSetParallel-14 6635151 768.5 ns/op 218 B/op 3 allocs/op -BenchmarkHyperCache_GetOrSetParallel-14 6613527 714.6 ns/op 218 B/op 3 allocs/op -BenchmarkHyperCache_MixedParallel-14 30420403 170.4 ns/op 159 B/op 3 allocs/op -BenchmarkHyperCache_MixedParallel-14 29663520 176.7 ns/op 159 B/op 3 allocs/op -BenchmarkHyperCache_MixedParallel-14 29980153 175.3 ns/op 159 B/op 3 allocs/op -BenchmarkHyperCache_MixedParallel-14 30013122 162.4 ns/op 159 B/op 3 allocs/op -BenchmarkHyperCache_MixedParallel-14 32641153 168.0 ns/op 157 B/op 3 allocs/op -BenchmarkHyperCache_Get-14 42228490 108.1 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get-14 44702448 107.7 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get-14 44540242 108.1 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get-14 44453173 107.7 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get-14 44364632 108.5 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get_ProactiveEviction-14 44611564 108.4 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get_ProactiveEviction-14 43978932 109.5 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get_ProactiveEviction-14 43477210 110.8 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get_ProactiveEviction-14 43584688 111.4 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get_ProactiveEviction-14 42684261 112.2 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_List-14 44195536 109.3 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_List-14 45295766 104.8 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_List-14 45874153 104.8 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_List-14 45919684 103.9 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_List-14 45967378 104.4 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Set-14 12399925 449.5 ns/op 225 B/op 3 allocs/op -BenchmarkHyperCache_Set-14 11446411 478.4 ns/op 231 B/op 3 allocs/op -BenchmarkHyperCache_Set-14 12122114 446.2 ns/op 226 B/op 3 allocs/op -BenchmarkHyperCache_Set-14 11246661 472.0 ns/op 232 B/op 3 allocs/op -BenchmarkHyperCache_Set-14 11021144 478.0 ns/op 234 B/op 3 allocs/op -BenchmarkHyperCache_Set_Proactive_Eviction-14 4333801 1182 ns/op 454 B/op 6 allocs/op -BenchmarkHyperCache_Set_Proactive_Eviction-14 4385100 1121 ns/op 454 B/op 6 allocs/op -BenchmarkHyperCache_Set_Proactive_Eviction-14 4257256 1134 ns/op 454 B/op 6 allocs/op -BenchmarkHyperCache_Set_Proactive_Eviction-14 4205035 1203 ns/op 454 B/op 6 allocs/op -BenchmarkHyperCache_Set_Proactive_Eviction-14 4128036 1115 ns/op 454 B/op 6 allocs/op -PASS -ok github.com/hyp3rd/hypercache/tests/benchmark 258.571s diff --git a/bench-step2.txt b/bench-step2.txt deleted file mode 100644 index 7346c9a..0000000 --- a/bench-step2.txt +++ /dev/null @@ -1,51 +0,0 @@ -goos: darwin -goarch: arm64 -pkg: github.com/hyp3rd/hypercache/tests/benchmark -cpu: Apple M4 Pro -BenchmarkHyperCache_SetParallel-14 28227842 188.5 ns/op 221 B/op 3 allocs/op -BenchmarkHyperCache_SetParallel-14 32025574 229.3 ns/op 262 B/op 3 allocs/op -BenchmarkHyperCache_SetParallel-14 26087546 195.9 ns/op 219 B/op 3 allocs/op -BenchmarkHyperCache_SetParallel-14 31543452 206.6 ns/op 263 B/op 3 allocs/op -BenchmarkHyperCache_SetParallel-14 34179614 204.5 ns/op 255 B/op 3 allocs/op -BenchmarkHyperCache_GetParallel-14 56597265 82.91 ns/op 135 B/op 2 allocs/op -BenchmarkHyperCache_GetParallel-14 57092503 87.24 ns/op 135 B/op 2 allocs/op -BenchmarkHyperCache_GetParallel-14 55513336 82.22 ns/op 135 B/op 2 allocs/op -BenchmarkHyperCache_GetParallel-14 55296619 83.76 ns/op 135 B/op 2 allocs/op -BenchmarkHyperCache_GetParallel-14 56040259 82.45 ns/op 135 B/op 2 allocs/op -BenchmarkHyperCache_GetOrSetParallel-14 25826862 208.2 ns/op 220 B/op 3 allocs/op -BenchmarkHyperCache_GetOrSetParallel-14 28858351 214.8 ns/op 231 B/op 3 allocs/op -BenchmarkHyperCache_GetOrSetParallel-14 28640280 210.6 ns/op 227 B/op 3 allocs/op -BenchmarkHyperCache_GetOrSetParallel-14 30360506 222.7 ns/op 259 B/op 3 allocs/op -BenchmarkHyperCache_GetOrSetParallel-14 27698293 204.7 ns/op 217 B/op 3 allocs/op -BenchmarkHyperCache_MixedParallel-14 47300134 121.5 ns/op 162 B/op 3 allocs/op -BenchmarkHyperCache_MixedParallel-14 48800263 137.4 ns/op 162 B/op 3 allocs/op -BenchmarkHyperCache_MixedParallel-14 47583622 122.8 ns/op 162 B/op 3 allocs/op -BenchmarkHyperCache_MixedParallel-14 45183699 125.9 ns/op 163 B/op 3 allocs/op -BenchmarkHyperCache_MixedParallel-14 48629344 122.2 ns/op 162 B/op 3 allocs/op -BenchmarkHyperCache_Get-14 41079711 117.7 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get-14 40609336 122.5 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get-14 37839892 124.7 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get-14 39746574 124.6 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get-14 36861888 124.0 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get_ProactiveEviction-14 39026796 123.4 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get_ProactiveEviction-14 38521831 124.2 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get_ProactiveEviction-14 38527873 126.3 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get_ProactiveEviction-14 38009490 126.0 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Get_ProactiveEviction-14 38018910 127.5 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_List-14 40412685 119.0 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_List-14 41405366 113.7 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_List-14 42876250 112.3 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_List-14 43379095 111.0 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_List-14 41403536 112.5 ns/op 128 B/op 1 allocs/op -BenchmarkHyperCache_Set-14 10691716 534.1 ns/op 236 B/op 3 allocs/op -BenchmarkHyperCache_Set-14 9867717 561.2 ns/op 243 B/op 3 allocs/op -BenchmarkHyperCache_Set-14 9506500 542.2 ns/op 247 B/op 3 allocs/op -BenchmarkHyperCache_Set-14 8988578 550.0 ns/op 252 B/op 3 allocs/op -BenchmarkHyperCache_Set-14 8975970 558.5 ns/op 252 B/op 3 allocs/op -BenchmarkHyperCache_Set_Proactive_Eviction-14 3497715 1480 ns/op 454 B/op 6 allocs/op -BenchmarkHyperCache_Set_Proactive_Eviction-14 3486507 1433 ns/op 454 B/op 6 allocs/op -BenchmarkHyperCache_Set_Proactive_Eviction-14 3464900 1428 ns/op 454 B/op 6 allocs/op -BenchmarkHyperCache_Set_Proactive_Eviction-14 3521899 1415 ns/op 454 B/op 6 allocs/op -BenchmarkHyperCache_Set_Proactive_Eviction-14 3444156 1450 ns/op 454 B/op 6 allocs/op -PASS -ok github.com/hyp3rd/hypercache/tests/benchmark 287.373s diff --git a/bench-step3.txt b/bench-step3.txt deleted file mode 100644 index 449cc1d..0000000 --- a/bench-step3.txt +++ /dev/null @@ -1,26 +0,0 @@ -goos: darwin -goarch: arm64 -pkg: github.com/hyp3rd/hypercache/pkg/cache/v2 -cpu: Apple M4 Pro -BenchmarkConcurrentMap_Count-14 122877582 9.726 ns/op 0 B/op 0 allocs/op -BenchmarkConcurrentMap_Count-14 122084272 9.828 ns/op 0 B/op 0 allocs/op -BenchmarkConcurrentMap_Count-14 123835690 9.683 ns/op 0 B/op 0 allocs/op -BenchmarkConcurrentMap_Count-14 122782857 9.763 ns/op 0 B/op 0 allocs/op -BenchmarkConcurrentMap_Count-14 124295282 9.649 ns/op 0 B/op 0 allocs/op -BenchmarkConcurrentMap_CountParallel-14 79226004 13.44 ns/op 9 B/op 0 allocs/op -BenchmarkConcurrentMap_CountParallel-14 82377501 13.32 ns/op 9 B/op 0 allocs/op -BenchmarkConcurrentMap_CountParallel-14 93374600 12.83 ns/op 9 B/op 0 allocs/op -BenchmarkConcurrentMap_CountParallel-14 74157042 14.11 ns/op 9 B/op 0 allocs/op -BenchmarkConcurrentMap_CountParallel-14 89445556 13.49 ns/op 8 B/op 0 allocs/op -BenchmarkConcurrentMap_GetShard-14 100000000 10.08 ns/op 0 B/op 0 allocs/op -BenchmarkConcurrentMap_GetShard-14 120268629 10.01 ns/op 0 B/op 0 allocs/op -BenchmarkConcurrentMap_GetShard-14 100000000 10.22 ns/op 0 B/op 0 allocs/op -BenchmarkConcurrentMap_GetShard-14 100000000 10.03 ns/op 0 B/op 0 allocs/op -BenchmarkConcurrentMap_GetShard-14 100000000 10.07 ns/op 0 B/op 0 allocs/op -BenchmarkConcurrentMap_All-14 46454 26087 ns/op 0 B/op 0 allocs/op -BenchmarkConcurrentMap_All-14 44976 26633 ns/op 0 B/op 0 allocs/op -BenchmarkConcurrentMap_All-14 45357 26457 ns/op 0 B/op 0 allocs/op -BenchmarkConcurrentMap_All-14 41589 27827 ns/op 0 B/op 0 allocs/op -BenchmarkConcurrentMap_All-14 45512 26454 ns/op 0 B/op 0 allocs/op -PASS -ok github.com/hyp3rd/hypercache/pkg/cache/v2 23.302s diff --git a/cspell.config.yaml b/cspell.config.yaml index a84701f..5520855 100644 --- a/cspell.config.yaml +++ b/cspell.config.yaml @@ -34,12 +34,14 @@ dictionaryDefinitions: [] dictionaries: [] words: - acks + - ALPN - autosync - backpressure - benchmarkdist - benchmem - benchtime - bitnami + - bodyclose - bufbuild - cacheerrors - cachev @@ -53,7 +55,9 @@ words: - Cmder - codacy - containedctx + - contextcheck - cpuprofile + - cret - cyclop - daixiang - Decr @@ -64,11 +68,13 @@ words: - EDITMSG - elif - errcheck + - errp - ewrap - excalidraw - excludeonly - exhaustruct - Fanout + - fasthttp - fatals - fctx - ferr @@ -102,6 +108,7 @@ words: - HMAC - honnef - hreq + - HTTPTLS - hypercache - idxs - Iface diff --git a/internal/sentinel/sentinel.go b/internal/sentinel/sentinel.go index 685d8f8..2855360 100644 --- a/internal/sentinel/sentinel.go +++ b/internal/sentinel/sentinel.go @@ -69,6 +69,9 @@ var ( // ErrTimeoutOrCanceled is returned when a timeout or cancellation occurs. ErrTimeoutOrCanceled = ewrap.New("the operation timed out or was canceled") + // ErrUnauthorized is returned when an HTTP request to the dist transport is missing or carries an invalid auth token. + ErrUnauthorized = ewrap.New("unauthorized") + // ErrNotOwner is returned when a distributed backend instance is not the ring owner for a key. ErrNotOwner = ewrap.New("not ring owner") diff --git a/lint-baseline-v2.txt b/lint-baseline-v2.txt deleted file mode 100644 index da4c1b3..0000000 --- a/lint-baseline-v2.txt +++ /dev/null @@ -1,4138 +0,0 @@ -tests/hypercache_distmemory_heartbeat_test.go:14:1: calculated cyclomatic complexity for function TestDistMemoryHeartbeatLiveness is 27, max is 15 (cyclop) -func TestDistMemoryHeartbeatLiveness(t *testing.T) { //nolint:paralleltest -^ -tests/hypercache_distmemory_integration_test.go:15:1: calculated cyclomatic complexity for function TestDistMemoryForwardingReplication is 16, max is 15 (cyclop) -func TestDistMemoryForwardingReplication(t *testing.T) { -^ -tests/hypercache_distmemory_remove_readrepair_test.go:113:1: calculated cyclomatic complexity for function TestDistMemoryReadRepair is 27, max is 15 (cyclop) -func TestDistMemoryReadRepair(t *testing.T) { -^ -tests/hypercache_distmemory_stale_quorum_test.go:14:1: calculated cyclomatic complexity for function TestDistMemoryStaleQuorum is 16, max is 15 (cyclop) -func TestDistMemoryStaleQuorum(t *testing.T) { -^ -tests/hypercache_distmemory_versioning_test.go:17:1: calculated cyclomatic complexity for function TestDistMemoryVersioningQuorum is 16, max is 15 (cyclop) -func TestDistMemoryVersioningQuorum(t *testing.T) { //nolint:paralleltest -^ -tests/hypercache_http_merkle_test.go:15:1: calculated cyclomatic complexity for function TestHTTPFetchMerkle is 17, max is 15 (cyclop) -func TestHTTPFetchMerkle(t *testing.T) { -^ -tests/integration/dist_phase1_test.go:115:1: calculated cyclomatic complexity for function valueOK is 25, max is 15 (cyclop) -func valueOK(v any) bool { //nolint:ireturn -^ -pkg/eviction/cawolfu_test.go:1: 1-91 lines are duplicate of `pkg/eviction/lru_test.go:1-92` (dupl) -package eviction - -import "testing" - -func TestCAWOLFU_EvictsLeastFrequentTail(t *testing.T) { - c, err := NewCAWOLFU(2) - if err != nil { - t.Fatalf("NewCAWOLFU error: %v", err) - } - - c.Set("a", 1) - c.Set("b", 2) - - // bump 'a' so 'b' is less frequent - if _, ok := c.Get("a"); !ok { - t.Fatalf("expected to get 'a'") - } - - // Insert 'c' -> evict tail ('b') - c.Set("c", 3) - - if _, ok := c.Get("b"); ok { - t.Fatalf("expected 'b' to be evicted") - } - - if _, ok := c.Get("a"); !ok { - t.Fatalf("expected 'a' to remain in cache") - } - - if v, ok := c.Get("c"); !ok || v.(int) != 3 { - t.Fatalf("expected 'c'=3 in cache, got %v, ok=%v", v, ok) - } -} - -func TestCAWOLFU_EvictMethodOrder(t *testing.T) { - c, err := NewCAWOLFU(2) - if err != nil { - t.Fatalf("NewCAWOLFU error: %v", err) - } - - c.Set("a", 1) - c.Set("b", 2) - - // Without additional access, tail is 'a' (inserted first with same count) - key, ok := c.Evict() - if !ok || key != "a" { - t.Fatalf("expected to evict 'a' first, got %q ok=%v", key, ok) - } - - key, ok = c.Evict() - if !ok || key != "b" { - t.Fatalf("expected to evict 'b' second, got %q ok=%v", key, ok) - } -} - -func TestCAWOLFU_ZeroCapacity_NoOp(t *testing.T) { - c, err := NewCAWOLFU(0) - if err != nil { - t.Fatalf("NewCAWOLFU error: %v", err) - } - - c.Set("a", 1) - - if _, ok := c.Get("a"); ok { - t.Fatalf("expected Get to miss on zero-capacity cache") - } - - if key, ok := c.Evict(); ok || key != "" { - t.Fatalf("expected no eviction on zero-capacity, got %q ok=%v", key, ok) - } -} - -func TestCAWOLFU_Delete_RemovesItem(t *testing.T) { - c, err := NewCAWOLFU(2) - if err != nil { - t.Fatalf("NewCAWOLFU error: %v", err) - } - - c.Set("a", 1) - c.Set("b", 2) - c.Delete("a") - - if _, ok := c.Get("a"); ok { - t.Fatalf("expected 'a' to be deleted") - } - - key, ok := c.Evict() - if !ok || key != "b" { - t.Fatalf("expected to evict 'b' as remaining item, got %q ok=%v", key, ok) - } -} -pkg/eviction/lru_test.go:1: 1-92 lines are duplicate of `pkg/eviction/cawolfu_test.go:1-91` (dupl) -package eviction - -import "testing" - -func TestLRU_EvictsLeastRecentlyUsedOnSet(t *testing.T) { - lru, err := NewLRUAlgorithm(2) - if err != nil { - t.Fatalf("NewLRUAlgorithm error: %v", err) - } - - lru.Set("a", 1) - lru.Set("b", 2) - - // Access "a" so that "b" becomes the least recently used - if _, ok := lru.Get("a"); !ok { - t.Fatalf("expected to get 'a'") - } - - // Insert "c"; should evict "b" - lru.Set("c", 3) - - if _, ok := lru.Get("b"); ok { - t.Fatalf("expected 'b' to be evicted") - } - - if _, ok := lru.Get("a"); !ok { - t.Fatalf("expected 'a' to remain in cache") - } - - if v, ok := lru.Get("c"); !ok || v.(int) != 3 { - t.Fatalf("expected 'c'=3 in cache, got %v, ok=%v", v, ok) - } -} - -func TestLRU_EvictMethodOrder(t *testing.T) { - lru, err := NewLRUAlgorithm(2) - if err != nil { - t.Fatalf("NewLRUAlgorithm error: %v", err) - } - - lru.Set("a", 1) - lru.Set("b", 2) - - // After two inserts, tail should be "a" - key, ok := lru.Evict() - if !ok || key != "a" { - t.Fatalf("expected to evict 'a' first, got %q ok=%v", key, ok) - } - - key, ok = lru.Evict() - if !ok || key != "b" { - t.Fatalf("expected to evict 'b' second, got %q ok=%v", key, ok) - } -} - -func TestLRU_ZeroCapacity_NoOp(t *testing.T) { - lru, err := NewLRUAlgorithm(0) - if err != nil { - t.Fatalf("NewLRUAlgorithm error: %v", err) - } - - lru.Set("a", 1) - - if _, ok := lru.Get("a"); ok { - t.Fatalf("expected Get to miss on zero-capacity cache") - } - - if key, ok := lru.Evict(); ok || key != "" { - t.Fatalf("expected no eviction on zero-capacity, got %q ok=%v", key, ok) - } -} - -func TestLRU_Delete_RemovesItem(t *testing.T) { - lru, err := NewLRUAlgorithm(2) - if err != nil { - t.Fatalf("NewLRUAlgorithm error: %v", err) - } - - lru.Set("a", 1) - lru.Set("b", 2) - lru.Delete("a") - - if _, ok := lru.Get("a"); ok { - t.Fatalf("expected 'a' to be deleted") - } - - // Evict should not return deleted key - key, ok := lru.Evict() - if !ok || key != "b" { - t.Fatalf("expected to evict 'b' as remaining item, got %q ok=%v", key, ok) - } -} -tests/integration/dist_rebalance_test.go:256:14: do not define dynamic errors, use wrapped static errors instead: "fmt.Errorf(\"unexpected status %d\", resp.StatusCode)" (err113) - lastErr = fmt.Errorf("unexpected status %d", resp.StatusCode) - ^ -pkg/eviction/arc_test.go:14:35: Error return value is not checked (errcheck) - if v, ok := arc.Get("a"); !ok || v.(int) != 1 { - ^ -pkg/eviction/arc_test.go:78:35: Error return value is not checked (errcheck) - if v, ok := arc.Get("a"); !ok || v.(int) != 10 { - ^ -pkg/eviction/cawolfu_test.go:30:33: Error return value is not checked (errcheck) - if v, ok := c.Get("c"); !ok || v.(int) != 3 { - ^ -tests/hypercache_get_multiple_test.go:30:14: Error return value of `cache.Set` is not checked (errcheck) - cache.Set(context.TODO(), "key1", 1, 0) - ^ -tests/hypercache_get_multiple_test.go:31:14: Error return value of `cache.Set` is not checked (errcheck) - cache.Set(context.TODO(), "key2", 2, 0) - ^ -tests/hypercache_get_multiple_test.go:32:14: Error return value of `cache.Set` is not checked (errcheck) - cache.Set(context.TODO(), "key3", 3, 0) - ^ -tests/hypercache_mgmt_dist_test.go:142:23: Error return value of `resp.Body.Close` is not checked (errcheck) - defer resp.Body.Close() - ^ -tests/hypercache_set_test.go:75:18: Error return value of `cache.Stop` is not checked (errcheck) - defer cache.Stop(context.TODO()) - ^ -tests/hypercache_trigger_eviction_test.go:19:15: Error return value of `hc.Stop` is not checked (errcheck) - defer hc.Stop(context.TODO()) - ^ -tests/management_http_test.go:34:15: Error return value of `hc.Stop` is not checked (errcheck) - defer hc.Stop(ctx) - ^ -pkg/eviction/arc_test.go:14:35: type assertion must be checked (forcetypeassert) - if v, ok := arc.Get("a"); !ok || v.(int) != 1 { - ^ -pkg/eviction/arc_test.go:78:35: type assertion must be checked (forcetypeassert) - if v, ok := arc.Get("a"); !ok || v.(int) != 10 { - ^ -pkg/eviction/cawolfu_test.go:30:33: type assertion must be checked (forcetypeassert) - if v, ok := c.Get("c"); !ok || v.(int) != 3 { - ^ -tests/hypercache_distmemory_heartbeat_sampling_test.go:13:6: Function 'TestHeartbeatSamplingAndTransitions' has too many statements (44 > 40) (funlen) -func TestHeartbeatSamplingAndTransitions(t *testing.T) { //nolint:paralleltest - ^ -tests/hypercache_distmemory_heartbeat_test.go:14:6: Function 'TestDistMemoryHeartbeatLiveness' has too many statements (75 > 40) (funlen) -func TestDistMemoryHeartbeatLiveness(t *testing.T) { //nolint:paralleltest - ^ -tests/hypercache_distmemory_integration_test.go:15:6: Function 'TestDistMemoryForwardingReplication' has too many statements (49 > 40) (funlen) -func TestDistMemoryForwardingReplication(t *testing.T) { - ^ -tests/hypercache_distmemory_stale_quorum_test.go:14:6: Function 'TestDistMemoryStaleQuorum' has too many statements (57 > 40) (funlen) -func TestDistMemoryStaleQuorum(t *testing.T) { - ^ -tests/hypercache_distmemory_tiebreak_test.go:15:6: Function 'TestDistMemoryVersionTieBreak' has too many statements (44 > 40) (funlen) -func TestDistMemoryVersionTieBreak(t *testing.T) { //nolint:paralleltest - ^ -tests/hypercache_distmemory_versioning_test.go:17:6: Function 'TestDistMemoryVersioningQuorum' has too many statements (51 > 40) (funlen) -func TestDistMemoryVersioningQuorum(t *testing.T) { //nolint:paralleltest - ^ -tests/hypercache_distmemory_write_quorum_test.go:76:6: Function 'TestWriteQuorumFailure' has too many statements (47 > 40) (funlen) -func TestWriteQuorumFailure(t *testing.T) { - ^ -tests/hypercache_http_merkle_test.go:15:6: Function 'TestHTTPFetchMerkle' has too many statements (56 > 40) (funlen) -func TestHTTPFetchMerkle(t *testing.T) { - ^ -tests/management_http_test.go:19:6: Function 'TestManagementHTTP_BasicEndpoints' has too many statements (42 > 40) (funlen) -func TestManagementHTTP_BasicEndpoints(t *testing.T) { - ^ -tests/hypercache_distmemory_heartbeat_test.go:14:1: cognitive complexity 41 of func `TestDistMemoryHeartbeatLiveness` is high (> 30) (gocognit) -func TestDistMemoryHeartbeatLiveness(t *testing.T) { //nolint:paralleltest -^ -tests/integration/dist_phase1_test.go:115:1: cognitive complexity 42 of func `valueOK` is high (> 30) (gocognit) -func valueOK(v any) bool { //nolint:ireturn -^ -pkg/cache/cmap_test.go:50:9: string `test` has 15 occurrences, make it a constant (goconst) - key := "test" - ^ -pkg/cache/cmap_test.go:68:3: string `key1` has 7 occurrences, make it a constant (goconst) - "key1": 1, - ^ -pkg/cache/cmap_test.go:69:3: string `key2` has 7 occurrences, make it a constant (goconst) - "key2": 2, - ^ -pkg/cache/cmap_test.go:70:3: string `key3` has 5 occurrences, make it a constant (goconst) - "key3": 3, - ^ -pkg/cache/item_test.go:34:21: string `test` has 15 occurrences, make it a constant (goconst) - item := &Item{Key: "test", Value: "value"} - ^ -pkg/cache/v2/cmap_test.go:75:15: string `test_value` has 5 occurrences, make it a constant (goconst) - Value: "test_value", - ^ -tests/hypercache_get_multiple_test.go:26:25: string `key1` has 12 occurrences, make it a constant (goconst) - keys: []string{"key1", "key2", "key3"}, - ^ -tests/hypercache_get_multiple_test.go:26:33: string `key2` has 9 occurrences, make it a constant (goconst) - keys: []string{"key1", "key2", "key3"}, - ^ -tests/hypercache_get_multiple_test.go:26:41: string `key3` has 6 occurrences, make it a constant (goconst) - keys: []string{"key1", "key2", "key3"}, - ^ -tests/hypercache_get_or_set_test.go:27:19: string `value1` has 7 occurrences, make it a constant (goconst) - value: "value1", - ^ -tests/hypercache_get_or_set_test.go:35:19: string `value2` has 6 occurrences, make it a constant (goconst) - value: "value2", - ^ -tests/hypercache_get_test.go:27:19: string `key1` has 12 occurrences, make it a constant (goconst) - key: "key1", - ^ -tests/hypercache_get_test.go:28:19: string `value1` has 7 occurrences, make it a constant (goconst) - value: "value1", - ^ -tests/hypercache_get_test.go:35:19: string `key2` has 9 occurrences, make it a constant (goconst) - key: "key2", - ^ -tests/hypercache_get_test.go:36:19: string `value2` has 6 occurrences, make it a constant (goconst) - value: "value2", - ^ -tests/hypercache_set_test.go:25:19: string `key1` has 12 occurrences, make it a constant (goconst) - key: "key1", - ^ -tests/hypercache_set_test.go:26:19: string `value1` has 7 occurrences, make it a constant (goconst) - value: "value1", - ^ -tests/hypercache_set_test.go:33:19: string `key2` has 9 occurrences, make it a constant (goconst) - key: "key2", - ^ -tests/hypercache_set_test.go:34:19: string `value2` has 6 occurrences, make it a constant (goconst) - value: "value2", - ^ -tests/hypercache_distmemory_stale_quorum_test.go:77:2: ifElseChain: rewrite if-else to switch statement (gocritic) - if primary == b1.LocalNodeID() { - ^ -tests/integration/dist_phase1_test.go:105:9: elseif: can replace 'else {if cond {}}' with 'else if cond {}' (gocritic) - } else { - ^ -tests/hypercache_get_multiple_test.go:30:5: G104: Errors unhandled (gosec) - cache.Set(context.TODO(), "key1", 1, 0) - ^ -tests/hypercache_get_multiple_test.go:31:5: G104: Errors unhandled (gosec) - cache.Set(context.TODO(), "key2", 2, 0) - ^ -tests/hypercache_get_multiple_test.go:32:5: G104: Errors unhandled (gosec) - cache.Set(context.TODO(), "key3", 3, 0) - ^ -pkg/cache/v2/cmap_test.go:266:22: G115: integer overflow conversion int -> rune (gosec) - cm.Set(string(rune(i)), item) - ^ -pkg/cache/v2/cmap_test.go:277:22: G115: integer overflow conversion int -> rune (gosec) - cm.Get(string(rune(i))) - ^ -pkg/stats/histogramcollector_test.go:291:17: G115: integer overflow conversion uint64 -> int64 (gosec) - growth := int64(after.HeapAlloc) - int64(before.HeapAlloc) - ^ -pkg/stats/histogramcollector_test.go:291:42: G115: integer overflow conversion uint64 -> int64 (gosec) - growth := int64(after.HeapAlloc) - int64(before.HeapAlloc) - ^ -tests/hypercache_http_merkle_test.go:134:58: G115: integer overflow conversion int -> rune (gosec) -func httpKey(i int) string { return "hkey:" + string(rune('a'+i)) } - ^ -tests/hypercache_http_merkle_test.go:92:24: net/http.Get must not be called. use net/http.NewRequestWithContext and (*net/http.Client).Do(*http.Request) (noctx) - resp, err := http.Get("http://" + b1.LocalNodeAddr() + "/internal/merkle") - ^ -tests/integration/dist_phase1_test.go:20:22: net.Listen must not be called. use (*net.ListenConfig).Listen (noctx) - l, err := net.Listen("tcp", "127.0.0.1:0") - ^ -tests/management_http_test.go:58:25: (*net/http.Client).Get must not be called. use (*net/http.Client).Do(*http.Request) (noctx) - resp, err := client.Get("http://" + addr + "/health") - ^ -tests/management_http_test.go:68:24: (*net/http.Client).Get must not be called. use (*net/http.Client).Do(*http.Request) (noctx) - resp, err = client.Get("http://" + addr + "/stats") - ^ -tests/management_http_test.go:85:24: (*net/http.Client).Get must not be called. use (*net/http.Client).Do(*http.Request) (noctx) - resp, err = client.Get("http://" + addr + "/config") - ^ -tests/hypercache_http_merkle_test.go:111:5: avoid inline error handling using `if err := ...; err != nil`; use plain assignment `err := ...` (noinlineerr) - if err := b2.SyncWith(ctx, "n1"); err != nil { - ^ -tests/integration/dist_phase1_test.go:123:10: avoid inline error handling using `if err := ...; err != nil`; use plain assignment `err := ...` (noinlineerr) - if b, err := base64.StdEncoding.DecodeString(s); err == nil && string(b) == "v1" { - ^ -tests/merkle_sync_test.go:69:5: avoid inline error handling using `if err := ...; err != nil`; use plain assignment `err := ...` (noinlineerr) - if err := dmB.SyncWith(ctx, string(dmA.LocalNodeID())); err != nil { - ^ -pkg/cache/cmap_test.go:19:1: Function TestNew missing the call to method parallel (paralleltest) -func TestNew(t *testing.T) { -^ -pkg/cache/cmap_test.go:30:1: Function TestNewStringer missing the call to method parallel (paralleltest) -func TestNewStringer(t *testing.T) { -^ -pkg/cache/cmap_test.go:37:1: Function TestNewWithCustomShardingFunction missing the call to method parallel (paralleltest) -func TestNewWithCustomShardingFunction(t *testing.T) { -^ -pkg/cache/cmap_test.go:48:1: Function TestSetAndGet missing the call to method parallel (paralleltest) -func TestSetAndGet(t *testing.T) { -^ -pkg/cache/cmap_test.go:65:1: Function TestMSet missing the call to method parallel (paralleltest) -func TestMSet(t *testing.T) { -^ -pkg/cache/cmap_test.go:82:1: Function TestUpsert missing the call to method parallel (paralleltest) -func TestUpsert(t *testing.T) { -^ -pkg/cache/cmap_test.go:113:1: Function TestSetIfAbsent missing the call to method parallel (paralleltest) -func TestSetIfAbsent(t *testing.T) { -^ -pkg/cache/cmap_test.go:133:1: Function TestHas missing the call to method parallel (paralleltest) -func TestHas(t *testing.T) { -^ -pkg/cache/v2/cmap_test.go:14:1: Function TestNew missing the call to method parallel (paralleltest) -func TestNew(t *testing.T) { -^ -pkg/cache/v2/cmap_test.go:37:1: Function TestGetShardIndex missing the call to method parallel (paralleltest) -func TestGetShardIndex(t *testing.T) { -^ -pkg/cache/v2/cmap_test.go:47:2: Range statement for test TestGetShardIndex missing the call to method parallel in test Run (paralleltest) - for _, tt := range tests { - ^ -pkg/cache/v2/cmap_test.go:57:1: Function TestGetShard missing the call to method parallel (paralleltest) -func TestGetShard(t *testing.T) { -^ -pkg/cache/v2/cmap_test.go:72:1: Function TestSetAndGet missing the call to method parallel (paralleltest) -func TestSetAndGet(t *testing.T) { -^ -pkg/cache/v2/cmap_test.go:99:1: Function TestHas missing the call to method parallel (paralleltest) -func TestHas(t *testing.T) { -^ -pkg/cache/v2/cmap_test.go:119:1: Function TestPop missing the call to method parallel (paralleltest) -func TestPop(t *testing.T) { -^ -pkg/cache/v2/cmap_test.go:146:1: Function TestRemove missing the call to method parallel (paralleltest) -func TestRemove(t *testing.T) { -^ -pkg/cache/v2/cmap_test.go:164:1: Function TestCount missing the call to method parallel (paralleltest) -func TestCount(t *testing.T) { -^ -pkg/cache/v2/cmap_test.go:195:1: Function TestIterBuffered missing the call to method parallel (paralleltest) -func TestIterBuffered(t *testing.T) { -^ -pkg/cache/v2/cmap_test.go:227:1: Function TestClear missing the call to method parallel (paralleltest) -func TestClear(t *testing.T) { -^ -pkg/cache/v2/cmap_test.go:251:1: Function TestConcurrentAccess missing the call to method parallel (paralleltest) -func TestConcurrentAccess(t *testing.T) { -^ -pkg/cache/v2/cmap_test.go:284:1: Function TestSnapshotPanic missing the call to method parallel (paralleltest) -func TestSnapshotPanic(t *testing.T) { -^ -pkg/eviction/arc_test.go:5:1: Function TestARC_BasicSetGetAndEvict missing the call to method parallel (paralleltest) -func TestARC_BasicSetGetAndEvict(t *testing.T) { -^ -pkg/eviction/arc_test.go:29:1: Function TestARC_ZeroCapacity_NoOp missing the call to method parallel (paralleltest) -func TestARC_ZeroCapacity_NoOp(t *testing.T) { -^ -pkg/eviction/arc_test.go:46:1: Function TestARC_Delete_RemovesResidentAndGhost missing the call to method parallel (paralleltest) -func TestARC_Delete_RemovesResidentAndGhost(t *testing.T) { -^ -pkg/eviction/arc_test.go:65:1: Function TestARC_B1GhostHitPromotesToT2 missing the call to method parallel (paralleltest) -func TestARC_B1GhostHitPromotesToT2(t *testing.T) { -^ -pkg/eviction/cawolfu_test.go:5:1: Function TestCAWOLFU_EvictsLeastFrequentTail missing the call to method parallel (paralleltest) -func TestCAWOLFU_EvictsLeastFrequentTail(t *testing.T) { -^ -pkg/eviction/cawolfu_test.go:35:1: Function TestCAWOLFU_EvictMethodOrder missing the call to method parallel (paralleltest) -func TestCAWOLFU_EvictMethodOrder(t *testing.T) { -^ -pkg/eviction/cawolfu_test.go:56:1: Function TestCAWOLFU_ZeroCapacity_NoOp missing the call to method parallel (paralleltest) -func TestCAWOLFU_ZeroCapacity_NoOp(t *testing.T) { -^ -pkg/eviction/cawolfu_test.go:73:1: Function TestCAWOLFU_Delete_RemovesItem missing the call to method parallel (paralleltest) -func TestCAWOLFU_Delete_RemovesItem(t *testing.T) { -^ -pkg/eviction/clock_test.go:5:1: Function TestClock_EvictsWhenHandFindsColdPage missing the call to method parallel (paralleltest) -func TestClock_EvictsWhenHandFindsColdPage(t *testing.T) { -^ -pkg/eviction/clock_test.go:48:1: Function TestClock_ZeroCapacity_NoOp missing the call to method parallel (paralleltest) -func TestClock_ZeroCapacity_NoOp(t *testing.T) { -^ -pkg/eviction/clock_test.go:65:1: Function TestClock_Delete_RemovesItem missing the call to method parallel (paralleltest) -func TestClock_Delete_RemovesItem(t *testing.T) { -^ -pkg/eviction/lfu_test.go:6:1: Function TestLFU_EvictsOldestOnTie_InsertOrder missing the call to method parallel (paralleltest) -func TestLFU_EvictsOldestOnTie_InsertOrder(t *testing.T) { -^ -pkg/eviction/lfu_test.go:37:1: Function TestLFU_EvictsOldestOnTie_AccessOrder missing the call to method parallel (paralleltest) -func TestLFU_EvictsOldestOnTie_AccessOrder(t *testing.T) { -^ -pkg/eviction/lfu_test.go:67:1: Function TestLFU_ZeroCapacity_NoOp missing the call to method parallel (paralleltest) -func TestLFU_ZeroCapacity_NoOp(t *testing.T) { -^ -pkg/eviction/lfu_test.go:84:1: Function TestLFU_Delete_RemovesItem missing the call to method parallel (paralleltest) -func TestLFU_Delete_RemovesItem(t *testing.T) { -^ -pkg/eviction/lru_test.go:5:1: Function TestLRU_EvictsLeastRecentlyUsedOnSet missing the call to method parallel (paralleltest) -func TestLRU_EvictsLeastRecentlyUsedOnSet(t *testing.T) { -^ -pkg/eviction/lru_test.go:35:1: Function TestLRU_EvictMethodOrder missing the call to method parallel (paralleltest) -func TestLRU_EvictMethodOrder(t *testing.T) { -^ -pkg/eviction/lru_test.go:56:1: Function TestLRU_ZeroCapacity_NoOp missing the call to method parallel (paralleltest) -func TestLRU_ZeroCapacity_NoOp(t *testing.T) { -^ -pkg/eviction/lru_test.go:73:1: Function TestLRU_Delete_RemovesItem missing the call to method parallel (paralleltest) -func TestLRU_Delete_RemovesItem(t *testing.T) { -^ -pkg/stats/histogramcollector_test.go:12:1: Function TestHistogramStatsCollector_BasicAggregates missing the call to method parallel (paralleltest) -func TestHistogramStatsCollector_BasicAggregates(t *testing.T) { -^ -pkg/stats/histogramcollector_test.go:51:1: Function TestHistogramStatsCollector_DecrStoresNegative missing the call to method parallel (paralleltest) -func TestHistogramStatsCollector_DecrStoresNegative(t *testing.T) { -^ -pkg/stats/histogramcollector_test.go:71:1: Function TestHistogramStatsCollector_Median missing the call to method parallel (paralleltest) -func TestHistogramStatsCollector_Median(t *testing.T) { -^ -pkg/stats/histogramcollector_test.go:91:1: Function TestHistogramStatsCollector_Percentile missing the call to method parallel (paralleltest) -func TestHistogramStatsCollector_Percentile(t *testing.T) { -^ -pkg/stats/histogramcollector_test.go:115:1: Function TestHistogramStatsCollector_EmptyStat missing the call to method parallel (paralleltest) -func TestHistogramStatsCollector_EmptyStat(t *testing.T) { -^ -pkg/stats/histogramcollector_test.go:136:1: Function TestHistogramStatsCollector_BoundedSamples missing the call to method parallel (paralleltest) -func TestHistogramStatsCollector_BoundedSamples(t *testing.T) { -^ -pkg/stats/histogramcollector_test.go:173:1: Function TestHistogramStatsCollector_ConcurrentRecord missing the call to method parallel (paralleltest) -func TestHistogramStatsCollector_ConcurrentRecord(t *testing.T) { -^ -pkg/stats/histogramcollector_test.go:226:1: Function TestHistogramStatsCollector_GetStatsSnapshotIsolated missing the call to method parallel (paralleltest) -func TestHistogramStatsCollector_GetStatsSnapshotIsolated(t *testing.T) { -^ -pkg/stats/histogramcollector_test.go:254:1: Function TestHistogramStatsCollector_NoMemoryLeak missing the call to method parallel (paralleltest) -func TestHistogramStatsCollector_NoMemoryLeak(t *testing.T) { -^ -pkg/stats/histogramcollector_test.go:307:1: Function TestHistogramStatsCollector_AtomicMinMaxRace missing the call to method parallel (paralleltest) -func TestHistogramStatsCollector_AtomicMinMaxRace(t *testing.T) { -^ -tests/integration/dist_rebalance_replica_diff_test.go:21:2: Consider preallocating baseOpts with capacity 4 (prealloc) - baseOpts := []backend.DistMemoryOption{ - ^ -tests/integration/dist_rebalance_replica_diff_throttle_test.go:20:2: Consider preallocating base with capacity 5 (prealloc) - base := []backend.DistMemoryOption{ - ^ -tests/integration/dist_rebalance_test.go:177:2: Consider preallocating opts with capacity 6 + len(extra) (prealloc) - opts := []backend.DistMemoryOption{ - ^ -pkg/stats/histogramcollector_test.go:137:8: const cap has same name as predeclared identifier (predeclared) - const cap = 8 - ^ -pkg/stats/histogramcollector_test.go:259:8: const cap has same name as predeclared identifier (predeclared) - const cap = 1024 - ^ -pkg/cache/cmap_test.go:38:25: unused-parameter: parameter 'key' seems to be unused, consider removing or renaming it as _ (revive) - customSharding := func(key string) uint32 { - ^ -pkg/cache/cmap_test.go:169:37: unused-parameter: parameter 'k' seems to be unused, consider removing or renaming it as _ (revive) - removed := cmap.RemoveCb(key, func(k string, v int, exists bool) bool { - ^ -pkg/cache/cmap_test.go:423:22: unused-parameter: parameter 't' seems to be unused, consider removing or renaming it as _ (revive) -func TestConcurrency(t *testing.T) { - ^ -pkg/cache/v2/cmap_test.go:251:27: unused-parameter: parameter 't' seems to be unused, consider removing or renaming it as _ (revive) -func TestConcurrentAccess(t *testing.T) { - ^ -pkg/cache/v2/cmap_test.go:257:3: use-waitgroup-go: replace wg.Add()...go {...wg.Done()...} with wg.Go(...) (revive) - wg.Add(1) - ^ -pkg/cache/v2/cmap_test.go:272:3: use-waitgroup-go: replace wg.Add()...go {...wg.Done()...} with wg.Go(...) (revive) - wg.Add(1) - ^ -pkg/eviction/cawolfu.go:14: line-length-limit: line is 151 characters, out of limit 150 (revive) - list *CAWOLFULinkedList // linked list to store the items in the cache, with the most frequently used items at the front -pkg/stats/histogramcollector_test.go:137:2: redefines-builtin-id: redefinition of the built-in function cap (revive) - const cap = 8 - ^ -pkg/stats/histogramcollector_test.go:259:2: redefines-builtin-id: redefinition of the built-in function cap (revive) - const cap = 1024 - ^ -pkg/stats/histogramcollector_test.go:268:2: call-to-gc: explicit call to the garbage collector (revive) - runtime.GC() - ^ -pkg/stats/histogramcollector_test.go:281:2: call-to-gc: explicit call to the garbage collector (revive) - runtime.GC() - ^ -tests/benchmark/hypercache_concurrency_benchmark_test.go:1:9: package-directory-mismatch: package name "tests" does not match directory name "benchmark" (revive) -package tests - ^ -tests/benchmark/hypercache_list_benchmark_test.go:1:9: package-directory-mismatch: package name "tests" does not match directory name "benchmark" (revive) -package tests - ^ -tests/benchmark/hypercache_set_benchmark_test.go:1:9: package-directory-mismatch: package name "tests" does not match directory name "benchmark" (revive) -package tests - ^ -tests/hypercache_distmemory_heartbeat_sampling_test.go:13:1: function-length: maximum number of lines per function exceeded; max 75 but got 79 (revive) -func TestHeartbeatSamplingAndTransitions(t *testing.T) { //nolint:paralleltest - ctx := context.Background() - ring := cluster.NewRing(cluster.WithReplication(1)) - membership := cluster.NewMembership(ring) - transport := backend.NewInProcessTransport() - - // three peers plus local - n1 := cluster.NewNode("", "n1") - n2 := cluster.NewNode("", "n2") - n3 := cluster.NewNode("", "n3") - - b1i, _ := backend.NewDistMemory( - ctx, - backend.WithDistMembership(membership, n1), - backend.WithDistTransport(transport), - backend.WithDistHeartbeat(15*time.Millisecond, 40*time.Millisecond, 90*time.Millisecond), - backend.WithDistHeartbeatSample(0), // probe all peers per tick for deterministic transition - ) - - _ = b1i // for clarity - - b2i, _ := backend.NewDistMemory(ctx, backend.WithDistMembership(membership, n2), backend.WithDistTransport(transport)) - b3i, _ := backend.NewDistMemory(ctx, backend.WithDistMembership(membership, n3), backend.WithDistTransport(transport)) - - b1, ok := b1i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b1i to *backend.DistMemory") - } - - b2, ok := b2i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b2i to *backend.DistMemory") - } - - b3, ok := b3i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b3i to *backend.DistMemory") - } - - StopOnCleanup(t, b1) - StopOnCleanup(t, b2) - StopOnCleanup(t, b3) - - transport.Register(b1) - transport.Register(b2) - transport.Register(b3) - - // Unregister b2 to simulate failure so it becomes suspect then dead. - transport.Unregister(string(n2.ID)) - - // Wait long enough for dead transition. Because of sampling (k=1) we give generous time window. - deadline := time.Now().Add(3 * time.Second) - for time.Now().Before(deadline) { - m := b1.Metrics() - if m.NodesDead > 0 { // transition observed - break - } - - time.Sleep(10 * time.Millisecond) - } - - mfinal := b1.Metrics() - if mfinal.NodesSuspect == 0 { - t.Fatalf("expected at least one suspect transition, got 0") - } - - if mfinal.NodesDead == 0 { - t.Fatalf("expected at least one dead transition, got 0") - } - - // ensure membership version advanced beyond initial additions (>= number of transitions + initial upserts) - snap := b1.DistMembershipSnapshot() - verAny := snap["version"] - - ver, _ := verAny.(uint64) - if ver < 3 { // initial upserts already increment version; tolerate timing variance - t.Fatalf("expected membership version >=4, got %v", verAny) - } - - _ = b3 // silence linter for now (future: more assertions) -} -tests/hypercache_distmemory_heartbeat_sampling_test.go:87:12: unchecked-type-assertion: type cast result is unchecked in verAny.(uint64) - type assertion result ignored (revive) - ver, _ := verAny.(uint64) - ^ -tests/hypercache_distmemory_heartbeat_test.go:14:1: cognitive-complexity: function TestDistMemoryHeartbeatLiveness has cognitive complexity 41 (> max enabled 15) (revive) -func TestDistMemoryHeartbeatLiveness(t *testing.T) { //nolint:paralleltest - // Intervals chosen so the test tolerates the 3-5x slowdown imposed by - // the race detector. Previous values (interval=30ms, dead=120ms) were - // tight enough that a delayed heartbeat tick could push *alive* nodes - // past deadAfter under -race, removing them from membership. - interval := 80 * time.Millisecond - suspectAfter := 4 * interval // 320ms - deadAfter := 8 * interval // 640ms - - ring := cluster.NewRing(cluster.WithReplication(1)) - membership := cluster.NewMembership(ring) - transport := backend.NewInProcessTransport() - - // nodes - n1 := cluster.NewNode("", "n1:0") - n2 := cluster.NewNode("", "n2:0") - n3 := cluster.NewNode("", "n3:0") - - // backend for node1 with heartbeat enabled - b1i, err := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n1), - backend.WithDistTransport(transport), - backend.WithDistHeartbeat(interval, suspectAfter, deadAfter), - ) - if err != nil { - t.Fatalf("b1: %v", err) - } - - b1, ok := b1i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b1i to *backend.DistMemory") - } - - StopOnCleanup(t, b1) - - // add peers (without heartbeat loops themselves) - b2i, err := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n2), - backend.WithDistTransport(transport), - ) - if err != nil { - t.Fatalf("b2: %v", err) - } - - b2, ok := b2i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b2i to *backend.DistMemory") - } - - StopOnCleanup(t, b2) - - b3i, err := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n3), - backend.WithDistTransport(transport), - ) - if err != nil { - t.Fatalf("b3: %v", err) - } - - b3, ok := b3i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b3i to *backend.DistMemory") - } - - StopOnCleanup(t, b3) - - transport.Register(b1) - transport.Register(b2) - transport.Register(b3) - - // Wait until heartbeat marks peers alive (initial success probes) - deadline := time.Now().Add(2 * time.Second) - for time.Now().Before(deadline) { - aliveCount := 0 - for _, n := range membership.List() { - if n.State == cluster.NodeAlive { - aliveCount++ - } - } - - if aliveCount == 3 { - break - } - - time.Sleep(20 * time.Millisecond) - } - - // Simulate node2 becoming unresponsive by removing it from transport registry. - // (Simplest way: do not respond to health; drop entry.) - transport.Unregister(string(n2.ID)) - - // Wait until node2 transitions to suspect then removed. - var sawSuspect bool - - deadline = time.Now().Add(2 * deadAfter) - for time.Now().Before(deadline) { - foundN2 := false - for _, n := range membership.List() { - if n.ID == n2.ID { - foundN2 = true - - if n.State == cluster.NodeSuspect { - sawSuspect = true - } - } - } - - if !foundN2 && sawSuspect { - break - } // removed after suspicion observed - - time.Sleep(20 * time.Millisecond) - } - - if !sawSuspect { - t.Fatalf("node2 never became suspect") - } - - // ensure removed - for _, n := range membership.List() { - if n.ID == n2.ID { - t.Fatalf("node2 still present, state=%s", n.State) - } - } - - // Node3 should remain alive; ensure not removed - n3Present := false - for _, n := range membership.List() { - if n.ID == n3.ID { - n3Present = true - - if n.State != cluster.NodeAlive { - t.Fatalf("node3 not alive: %s", n.State) - } - } - } - - if !n3Present { - t.Fatalf("node3 missing") - } - - // Metrics sanity: at least one heartbeat failure and success recorded. - m := b1.Metrics() - if m.HeartbeatFailure == 0 { - t.Errorf("expected heartbeat failures > 0") - } - - if m.HeartbeatSuccess == 0 { - t.Errorf("expected heartbeat successes > 0") - } - - if m.NodesRemoved == 0 { - t.Errorf("expected nodes removed metric > 0") - } -} -tests/hypercache_distmemory_heartbeat_test.go:14:1: cyclomatic: function TestDistMemoryHeartbeatLiveness has cyclomatic complexity 27 (> max enabled 15) (revive) -func TestDistMemoryHeartbeatLiveness(t *testing.T) { //nolint:paralleltest - // Intervals chosen so the test tolerates the 3-5x slowdown imposed by - // the race detector. Previous values (interval=30ms, dead=120ms) were - // tight enough that a delayed heartbeat tick could push *alive* nodes - // past deadAfter under -race, removing them from membership. - interval := 80 * time.Millisecond - suspectAfter := 4 * interval // 320ms - deadAfter := 8 * interval // 640ms - - ring := cluster.NewRing(cluster.WithReplication(1)) - membership := cluster.NewMembership(ring) - transport := backend.NewInProcessTransport() - - // nodes - n1 := cluster.NewNode("", "n1:0") - n2 := cluster.NewNode("", "n2:0") - n3 := cluster.NewNode("", "n3:0") - - // backend for node1 with heartbeat enabled - b1i, err := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n1), - backend.WithDistTransport(transport), - backend.WithDistHeartbeat(interval, suspectAfter, deadAfter), - ) - if err != nil { - t.Fatalf("b1: %v", err) - } - - b1, ok := b1i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b1i to *backend.DistMemory") - } - - StopOnCleanup(t, b1) - - // add peers (without heartbeat loops themselves) - b2i, err := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n2), - backend.WithDistTransport(transport), - ) - if err != nil { - t.Fatalf("b2: %v", err) - } - - b2, ok := b2i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b2i to *backend.DistMemory") - } - - StopOnCleanup(t, b2) - - b3i, err := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n3), - backend.WithDistTransport(transport), - ) - if err != nil { - t.Fatalf("b3: %v", err) - } - - b3, ok := b3i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b3i to *backend.DistMemory") - } - - StopOnCleanup(t, b3) - - transport.Register(b1) - transport.Register(b2) - transport.Register(b3) - - // Wait until heartbeat marks peers alive (initial success probes) - deadline := time.Now().Add(2 * time.Second) - for time.Now().Before(deadline) { - aliveCount := 0 - for _, n := range membership.List() { - if n.State == cluster.NodeAlive { - aliveCount++ - } - } - - if aliveCount == 3 { - break - } - - time.Sleep(20 * time.Millisecond) - } - - // Simulate node2 becoming unresponsive by removing it from transport registry. - // (Simplest way: do not respond to health; drop entry.) - transport.Unregister(string(n2.ID)) - - // Wait until node2 transitions to suspect then removed. - var sawSuspect bool - - deadline = time.Now().Add(2 * deadAfter) - for time.Now().Before(deadline) { - foundN2 := false - for _, n := range membership.List() { - if n.ID == n2.ID { - foundN2 = true - - if n.State == cluster.NodeSuspect { - sawSuspect = true - } - } - } - - if !foundN2 && sawSuspect { - break - } // removed after suspicion observed - - time.Sleep(20 * time.Millisecond) - } - - if !sawSuspect { - t.Fatalf("node2 never became suspect") - } - - // ensure removed - for _, n := range membership.List() { - if n.ID == n2.ID { - t.Fatalf("node2 still present, state=%s", n.State) - } - } - - // Node3 should remain alive; ensure not removed - n3Present := false - for _, n := range membership.List() { - if n.ID == n3.ID { - n3Present = true - - if n.State != cluster.NodeAlive { - t.Fatalf("node3 not alive: %s", n.State) - } - } - } - - if !n3Present { - t.Fatalf("node3 missing") - } - - // Metrics sanity: at least one heartbeat failure and success recorded. - m := b1.Metrics() - if m.HeartbeatFailure == 0 { - t.Errorf("expected heartbeat failures > 0") - } - - if m.HeartbeatSuccess == 0 { - t.Errorf("expected heartbeat successes > 0") - } - - if m.NodesRemoved == 0 { - t.Errorf("expected nodes removed metric > 0") - } -} -tests/hypercache_distmemory_heartbeat_test.go:14:1: function-length: maximum number of statements per function exceeded; max 50 but got 75 (revive) -func TestDistMemoryHeartbeatLiveness(t *testing.T) { //nolint:paralleltest - // Intervals chosen so the test tolerates the 3-5x slowdown imposed by - // the race detector. Previous values (interval=30ms, dead=120ms) were - // tight enough that a delayed heartbeat tick could push *alive* nodes - // past deadAfter under -race, removing them from membership. - interval := 80 * time.Millisecond - suspectAfter := 4 * interval // 320ms - deadAfter := 8 * interval // 640ms - - ring := cluster.NewRing(cluster.WithReplication(1)) - membership := cluster.NewMembership(ring) - transport := backend.NewInProcessTransport() - - // nodes - n1 := cluster.NewNode("", "n1:0") - n2 := cluster.NewNode("", "n2:0") - n3 := cluster.NewNode("", "n3:0") - - // backend for node1 with heartbeat enabled - b1i, err := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n1), - backend.WithDistTransport(transport), - backend.WithDistHeartbeat(interval, suspectAfter, deadAfter), - ) - if err != nil { - t.Fatalf("b1: %v", err) - } - - b1, ok := b1i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b1i to *backend.DistMemory") - } - - StopOnCleanup(t, b1) - - // add peers (without heartbeat loops themselves) - b2i, err := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n2), - backend.WithDistTransport(transport), - ) - if err != nil { - t.Fatalf("b2: %v", err) - } - - b2, ok := b2i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b2i to *backend.DistMemory") - } - - StopOnCleanup(t, b2) - - b3i, err := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n3), - backend.WithDistTransport(transport), - ) - if err != nil { - t.Fatalf("b3: %v", err) - } - - b3, ok := b3i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b3i to *backend.DistMemory") - } - - StopOnCleanup(t, b3) - - transport.Register(b1) - transport.Register(b2) - transport.Register(b3) - - // Wait until heartbeat marks peers alive (initial success probes) - deadline := time.Now().Add(2 * time.Second) - for time.Now().Before(deadline) { - aliveCount := 0 - for _, n := range membership.List() { - if n.State == cluster.NodeAlive { - aliveCount++ - } - } - - if aliveCount == 3 { - break - } - - time.Sleep(20 * time.Millisecond) - } - - // Simulate node2 becoming unresponsive by removing it from transport registry. - // (Simplest way: do not respond to health; drop entry.) - transport.Unregister(string(n2.ID)) - - // Wait until node2 transitions to suspect then removed. - var sawSuspect bool - - deadline = time.Now().Add(2 * deadAfter) - for time.Now().Before(deadline) { - foundN2 := false - for _, n := range membership.List() { - if n.ID == n2.ID { - foundN2 = true - - if n.State == cluster.NodeSuspect { - sawSuspect = true - } - } - } - - if !foundN2 && sawSuspect { - break - } // removed after suspicion observed - - time.Sleep(20 * time.Millisecond) - } - - if !sawSuspect { - t.Fatalf("node2 never became suspect") - } - - // ensure removed - for _, n := range membership.List() { - if n.ID == n2.ID { - t.Fatalf("node2 still present, state=%s", n.State) - } - } - - // Node3 should remain alive; ensure not removed - n3Present := false - for _, n := range membership.List() { - if n.ID == n3.ID { - n3Present = true - - if n.State != cluster.NodeAlive { - t.Fatalf("node3 not alive: %s", n.State) - } - } - } - - if !n3Present { - t.Fatalf("node3 missing") - } - - // Metrics sanity: at least one heartbeat failure and success recorded. - m := b1.Metrics() - if m.HeartbeatFailure == 0 { - t.Errorf("expected heartbeat failures > 0") - } - - if m.HeartbeatSuccess == 0 { - t.Errorf("expected heartbeat successes > 0") - } - - if m.NodesRemoved == 0 { - t.Errorf("expected nodes removed metric > 0") - } -} -tests/hypercache_distmemory_heartbeat_test.go:14:1: function-length: maximum number of lines per function exceeded; max 75 but got 156 (revive) -func TestDistMemoryHeartbeatLiveness(t *testing.T) { //nolint:paralleltest - // Intervals chosen so the test tolerates the 3-5x slowdown imposed by - // the race detector. Previous values (interval=30ms, dead=120ms) were - // tight enough that a delayed heartbeat tick could push *alive* nodes - // past deadAfter under -race, removing them from membership. - interval := 80 * time.Millisecond - suspectAfter := 4 * interval // 320ms - deadAfter := 8 * interval // 640ms - - ring := cluster.NewRing(cluster.WithReplication(1)) - membership := cluster.NewMembership(ring) - transport := backend.NewInProcessTransport() - - // nodes - n1 := cluster.NewNode("", "n1:0") - n2 := cluster.NewNode("", "n2:0") - n3 := cluster.NewNode("", "n3:0") - - // backend for node1 with heartbeat enabled - b1i, err := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n1), - backend.WithDistTransport(transport), - backend.WithDistHeartbeat(interval, suspectAfter, deadAfter), - ) - if err != nil { - t.Fatalf("b1: %v", err) - } - - b1, ok := b1i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b1i to *backend.DistMemory") - } - - StopOnCleanup(t, b1) - - // add peers (without heartbeat loops themselves) - b2i, err := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n2), - backend.WithDistTransport(transport), - ) - if err != nil { - t.Fatalf("b2: %v", err) - } - - b2, ok := b2i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b2i to *backend.DistMemory") - } - - StopOnCleanup(t, b2) - - b3i, err := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n3), - backend.WithDistTransport(transport), - ) - if err != nil { - t.Fatalf("b3: %v", err) - } - - b3, ok := b3i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b3i to *backend.DistMemory") - } - - StopOnCleanup(t, b3) - - transport.Register(b1) - transport.Register(b2) - transport.Register(b3) - - // Wait until heartbeat marks peers alive (initial success probes) - deadline := time.Now().Add(2 * time.Second) - for time.Now().Before(deadline) { - aliveCount := 0 - for _, n := range membership.List() { - if n.State == cluster.NodeAlive { - aliveCount++ - } - } - - if aliveCount == 3 { - break - } - - time.Sleep(20 * time.Millisecond) - } - - // Simulate node2 becoming unresponsive by removing it from transport registry. - // (Simplest way: do not respond to health; drop entry.) - transport.Unregister(string(n2.ID)) - - // Wait until node2 transitions to suspect then removed. - var sawSuspect bool - - deadline = time.Now().Add(2 * deadAfter) - for time.Now().Before(deadline) { - foundN2 := false - for _, n := range membership.List() { - if n.ID == n2.ID { - foundN2 = true - - if n.State == cluster.NodeSuspect { - sawSuspect = true - } - } - } - - if !foundN2 && sawSuspect { - break - } // removed after suspicion observed - - time.Sleep(20 * time.Millisecond) - } - - if !sawSuspect { - t.Fatalf("node2 never became suspect") - } - - // ensure removed - for _, n := range membership.List() { - if n.ID == n2.ID { - t.Fatalf("node2 still present, state=%s", n.State) - } - } - - // Node3 should remain alive; ensure not removed - n3Present := false - for _, n := range membership.List() { - if n.ID == n3.ID { - n3Present = true - - if n.State != cluster.NodeAlive { - t.Fatalf("node3 not alive: %s", n.State) - } - } - } - - if !n3Present { - t.Fatalf("node3 missing") - } - - // Metrics sanity: at least one heartbeat failure and success recorded. - m := b1.Metrics() - if m.HeartbeatFailure == 0 { - t.Errorf("expected heartbeat failures > 0") - } - - if m.HeartbeatSuccess == 0 { - t.Errorf("expected heartbeat successes > 0") - } - - if m.NodesRemoved == 0 { - t.Errorf("expected nodes removed metric > 0") - } -} -tests/hypercache_distmemory_integration_test.go:15:1: cognitive-complexity: function TestDistMemoryForwardingReplication has cognitive complexity 19 (> max enabled 15) (revive) -func TestDistMemoryForwardingReplication(t *testing.T) { - ring := cluster.NewRing(cluster.WithReplication(2)) - membership := cluster.NewMembership(ring) - transport := backend.NewInProcessTransport() - - // create two nodes/backends - n1 := cluster.NewNode("", "node1:0") - n2 := cluster.NewNode("", "node2:0") - - b1i, err := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n1), - backend.WithDistTransport(transport), - ) - if err != nil { - t.Fatalf("backend1: %v", err) - } - - b2i, err := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n2), - backend.WithDistTransport(transport), - ) - if err != nil { - t.Fatalf("backend2: %v", err) - } - - b1, ok := b1i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b1i to *backend.DistMemory") - } - - b2, ok := b2i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b2i to *backend.DistMemory") - } - - StopOnCleanup(t, b1) - StopOnCleanup(t, b2) - - transport.Register(b1) - transport.Register(b2) - - // pick keys to exercise distribution (simple deterministic list) - keys := []string{"alpha", "bravo", "charlie", "delta", "echo"} - // write via the node that is primary owner to guarantee placement + replication - for _, k := range keys { - owners := ring.Lookup(k) - if len(owners) == 0 { - t.Fatalf("no owners for key %s", k) - } - - item := &cache.Item{Key: k, Value: k} - - err := item.Valid() - if err != nil { - t.Fatalf("item valid %s: %v", k, err) - } - - target := owners[0] - - var err2 error - - switch target { - case n1.ID: - err2 = b1.Set(context.Background(), item) - case n2.ID: - err2 = b2.Set(context.Background(), item) - default: - t.Fatalf("unexpected owner id %s", target) - } - - if err2 != nil { - t.Fatalf("set %s via %s: %v", k, target, err2) - } - } - - // Each key should be readable via either owner (b1 primary forward) or local if replica - for _, k := range keys { - if _, ok := b1.Get(context.Background(), k); !ok { - t.Fatalf("b1 cannot get key %s", k) - } - - if _, ok := b2.Get(context.Background(), k); !ok { // should forward or local hit - t.Fatalf("b2 cannot get key %s", k) - } - } - - // Check replication: at least one key should physically exist on b2 after writes from b1 when b2 is replica - foundReplica := slices.ContainsFunc(keys, b2.LocalContains) - if !foundReplica { - t.Fatalf("expected at least one replicated key on b2") - } -} -tests/hypercache_distmemory_integration_test.go:15:1: function-length: maximum number of lines per function exceeded; max 75 but got 92 (revive) -func TestDistMemoryForwardingReplication(t *testing.T) { - ring := cluster.NewRing(cluster.WithReplication(2)) - membership := cluster.NewMembership(ring) - transport := backend.NewInProcessTransport() - - // create two nodes/backends - n1 := cluster.NewNode("", "node1:0") - n2 := cluster.NewNode("", "node2:0") - - b1i, err := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n1), - backend.WithDistTransport(transport), - ) - if err != nil { - t.Fatalf("backend1: %v", err) - } - - b2i, err := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n2), - backend.WithDistTransport(transport), - ) - if err != nil { - t.Fatalf("backend2: %v", err) - } - - b1, ok := b1i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b1i to *backend.DistMemory") - } - - b2, ok := b2i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b2i to *backend.DistMemory") - } - - StopOnCleanup(t, b1) - StopOnCleanup(t, b2) - - transport.Register(b1) - transport.Register(b2) - - // pick keys to exercise distribution (simple deterministic list) - keys := []string{"alpha", "bravo", "charlie", "delta", "echo"} - // write via the node that is primary owner to guarantee placement + replication - for _, k := range keys { - owners := ring.Lookup(k) - if len(owners) == 0 { - t.Fatalf("no owners for key %s", k) - } - - item := &cache.Item{Key: k, Value: k} - - err := item.Valid() - if err != nil { - t.Fatalf("item valid %s: %v", k, err) - } - - target := owners[0] - - var err2 error - - switch target { - case n1.ID: - err2 = b1.Set(context.Background(), item) - case n2.ID: - err2 = b2.Set(context.Background(), item) - default: - t.Fatalf("unexpected owner id %s", target) - } - - if err2 != nil { - t.Fatalf("set %s via %s: %v", k, target, err2) - } - } - - // Each key should be readable via either owner (b1 primary forward) or local if replica - for _, k := range keys { - if _, ok := b1.Get(context.Background(), k); !ok { - t.Fatalf("b1 cannot get key %s", k) - } - - if _, ok := b2.Get(context.Background(), k); !ok { // should forward or local hit - t.Fatalf("b2 cannot get key %s", k) - } - } - - // Check replication: at least one key should physically exist on b2 after writes from b1 when b2 is replica - foundReplica := slices.ContainsFunc(keys, b2.LocalContains) - if !foundReplica { - t.Fatalf("expected at least one replicated key on b2") - } -} -tests/hypercache_distmemory_integration_test.go:15:1: cyclomatic: function TestDistMemoryForwardingReplication has cyclomatic complexity 16 (> max enabled 15) (revive) -func TestDistMemoryForwardingReplication(t *testing.T) { - ring := cluster.NewRing(cluster.WithReplication(2)) - membership := cluster.NewMembership(ring) - transport := backend.NewInProcessTransport() - - // create two nodes/backends - n1 := cluster.NewNode("", "node1:0") - n2 := cluster.NewNode("", "node2:0") - - b1i, err := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n1), - backend.WithDistTransport(transport), - ) - if err != nil { - t.Fatalf("backend1: %v", err) - } - - b2i, err := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n2), - backend.WithDistTransport(transport), - ) - if err != nil { - t.Fatalf("backend2: %v", err) - } - - b1, ok := b1i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b1i to *backend.DistMemory") - } - - b2, ok := b2i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b2i to *backend.DistMemory") - } - - StopOnCleanup(t, b1) - StopOnCleanup(t, b2) - - transport.Register(b1) - transport.Register(b2) - - // pick keys to exercise distribution (simple deterministic list) - keys := []string{"alpha", "bravo", "charlie", "delta", "echo"} - // write via the node that is primary owner to guarantee placement + replication - for _, k := range keys { - owners := ring.Lookup(k) - if len(owners) == 0 { - t.Fatalf("no owners for key %s", k) - } - - item := &cache.Item{Key: k, Value: k} - - err := item.Valid() - if err != nil { - t.Fatalf("item valid %s: %v", k, err) - } - - target := owners[0] - - var err2 error - - switch target { - case n1.ID: - err2 = b1.Set(context.Background(), item) - case n2.ID: - err2 = b2.Set(context.Background(), item) - default: - t.Fatalf("unexpected owner id %s", target) - } - - if err2 != nil { - t.Fatalf("set %s via %s: %v", k, target, err2) - } - } - - // Each key should be readable via either owner (b1 primary forward) or local if replica - for _, k := range keys { - if _, ok := b1.Get(context.Background(), k); !ok { - t.Fatalf("b1 cannot get key %s", k) - } - - if _, ok := b2.Get(context.Background(), k); !ok { // should forward or local hit - t.Fatalf("b2 cannot get key %s", k) - } - } - - // Check replication: at least one key should physically exist on b2 after writes from b1 when b2 is replica - foundReplica := slices.ContainsFunc(keys, b2.LocalContains) - if !foundReplica { - t.Fatalf("expected at least one replicated key on b2") - } -} -tests/hypercache_distmemory_remove_readrepair_test.go:13:60: confusing-results: unnamed results of the same type may be confusing, consider using named results (revive) -func newTwoNodeCluster(t *testing.T) (*backend.DistMemory, *backend.DistMemory, *cluster.Ring) { - ^ -tests/hypercache_distmemory_remove_readrepair_test.go:113:1: cyclomatic: function TestDistMemoryReadRepair has cyclomatic complexity 27 (> max enabled 15) (revive) -func TestDistMemoryReadRepair(t *testing.T) { - b1, b2, ring := newTwoNodeCluster(t) - key := "rr-key" - - owners := ring.Lookup(key) - if len(owners) == 0 { - t.Fatalf("no owners") - } - - item := &cache.Item{Key: key, Value: "val"} - - err := item.Valid() - if err != nil { - t.Fatalf("valid: %v", err) - } - - // write via primary - if owners[0] == b1.LocalNodeID() { - err := b1.Set(context.Background(), item) - if err != nil { - t.Fatalf("set: %v", err) - } - } else { - err := b2.Set(context.Background(), item) - if err != nil { - t.Fatalf("set: %v", err) - } - } - - // determine replica node (owners[1]) and drop local copy there manually - if len(owners) < 2 { - t.Skip("replication factor <2") - } - - replica := owners[1] - // optional: t.Logf("owners: %v primary=%s replica=%s", owners, owners[0], replica) - if replica == b1.LocalNodeID() { - b1.DebugDropLocal(key) - } else { - b2.DebugDropLocal(key) - } - - // ensure dropped locally - if replica == b1.LocalNodeID() && b1.LocalContains(key) { - t.Fatalf("replica still has key after drop") - } - - if replica == b2.LocalNodeID() && b2.LocalContains(key) { - t.Fatalf("replica still has key after drop") - } - - // issue Get from a non-owner node to trigger forwarding, then verify owners repaired. - // choose a requester: use node that is neither primary nor replica if possible; with 2 nodes this means primary forwards to replica or - // vice versa. - requester := b1 - if owners[0] == b1.LocalNodeID() && replica == b2.LocalNodeID() { - requester = b2 // request from replica to forward to primary - } else if owners[0] == b2.LocalNodeID() && replica == b1.LocalNodeID() { - requester = b1 - } - - if _, ok := requester.Get(context.Background(), key); !ok { - t.Fatalf("get for read-repair failed") - } - - // after forwarding, both owners should have key locally again - if owners[0] == b1.LocalNodeID() && !b1.LocalContains(key) { - t.Fatalf("primary missing after read repair") - } - - if owners[0] == b2.LocalNodeID() && !b2.LocalContains(key) { - t.Fatalf("primary missing after read repair") - } - - if replica == b1.LocalNodeID() && !b1.LocalContains(key) { - t.Fatalf("replica missing after read repair") - } - - if replica == b2.LocalNodeID() && !b2.LocalContains(key) { - t.Fatalf("replica missing after read repair") - } - - // metrics should show at least one read repair - var repaired bool - - if replica == b1.LocalNodeID() { - repaired = b1.Metrics().ReadRepair > 0 - } else { - repaired = b2.Metrics().ReadRepair > 0 - } - - if !repaired { - t.Fatalf("expected read-repair metric increment") - } -} -tests/hypercache_distmemory_remove_readrepair_test.go:113:1: function-length: maximum number of lines per function exceeded; max 75 but got 93 (revive) -func TestDistMemoryReadRepair(t *testing.T) { - b1, b2, ring := newTwoNodeCluster(t) - key := "rr-key" - - owners := ring.Lookup(key) - if len(owners) == 0 { - t.Fatalf("no owners") - } - - item := &cache.Item{Key: key, Value: "val"} - - err := item.Valid() - if err != nil { - t.Fatalf("valid: %v", err) - } - - // write via primary - if owners[0] == b1.LocalNodeID() { - err := b1.Set(context.Background(), item) - if err != nil { - t.Fatalf("set: %v", err) - } - } else { - err := b2.Set(context.Background(), item) - if err != nil { - t.Fatalf("set: %v", err) - } - } - - // determine replica node (owners[1]) and drop local copy there manually - if len(owners) < 2 { - t.Skip("replication factor <2") - } - - replica := owners[1] - // optional: t.Logf("owners: %v primary=%s replica=%s", owners, owners[0], replica) - if replica == b1.LocalNodeID() { - b1.DebugDropLocal(key) - } else { - b2.DebugDropLocal(key) - } - - // ensure dropped locally - if replica == b1.LocalNodeID() && b1.LocalContains(key) { - t.Fatalf("replica still has key after drop") - } - - if replica == b2.LocalNodeID() && b2.LocalContains(key) { - t.Fatalf("replica still has key after drop") - } - - // issue Get from a non-owner node to trigger forwarding, then verify owners repaired. - // choose a requester: use node that is neither primary nor replica if possible; with 2 nodes this means primary forwards to replica or - // vice versa. - requester := b1 - if owners[0] == b1.LocalNodeID() && replica == b2.LocalNodeID() { - requester = b2 // request from replica to forward to primary - } else if owners[0] == b2.LocalNodeID() && replica == b1.LocalNodeID() { - requester = b1 - } - - if _, ok := requester.Get(context.Background(), key); !ok { - t.Fatalf("get for read-repair failed") - } - - // after forwarding, both owners should have key locally again - if owners[0] == b1.LocalNodeID() && !b1.LocalContains(key) { - t.Fatalf("primary missing after read repair") - } - - if owners[0] == b2.LocalNodeID() && !b2.LocalContains(key) { - t.Fatalf("primary missing after read repair") - } - - if replica == b1.LocalNodeID() && !b1.LocalContains(key) { - t.Fatalf("replica missing after read repair") - } - - if replica == b2.LocalNodeID() && !b2.LocalContains(key) { - t.Fatalf("replica missing after read repair") - } - - // metrics should show at least one read repair - var repaired bool - - if replica == b1.LocalNodeID() { - repaired = b1.Metrics().ReadRepair > 0 - } else { - repaired = b2.Metrics().ReadRepair > 0 - } - - if !repaired { - t.Fatalf("expected read-repair metric increment") - } -} -tests/hypercache_distmemory_stale_quorum_test.go:14:1: function-length: maximum number of lines per function exceeded; max 75 but got 118 (revive) -func TestDistMemoryStaleQuorum(t *testing.T) { - ring := cluster.NewRing(cluster.WithReplication(3)) - membership := cluster.NewMembership(ring) - transport := backend.NewInProcessTransport() - - n1 := cluster.NewNode("", "n1:0") - n2 := cluster.NewNode("", "n2:0") - n3 := cluster.NewNode("", "n3:0") - - b1i, _ := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n1), - backend.WithDistTransport(transport), - backend.WithDistReadConsistency(backend.ConsistencyQuorum), - ) - b2i, _ := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n2), - backend.WithDistTransport(transport), - backend.WithDistReadConsistency(backend.ConsistencyQuorum), - ) - b3i, _ := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n3), - backend.WithDistTransport(transport), - backend.WithDistReadConsistency(backend.ConsistencyQuorum), - ) - - b1, ok := b1i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b1i to *backend.DistMemory") - } - - b2, ok := b2i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b2i to *backend.DistMemory") - } - - b3, ok := b3i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b3i to *backend.DistMemory") - } - - StopOnCleanup(t, b1) - StopOnCleanup(t, b2) - StopOnCleanup(t, b3) - - transport.Register(b1) - transport.Register(b2) - transport.Register(b3) - - key := "sq-key" - - owners := ring.Lookup(key) - if len(owners) != 3 { - t.Skip("replication factor !=3") - } - - // Write initial version via primary - primary := owners[0] - item := &cache.Item{Key: key, Value: "v1"} - - _ = item.Valid() - if primary == b1.LocalNodeID() { - _ = b1.Set(context.Background(), item) - } else if primary == b2.LocalNodeID() { - _ = b2.Set(context.Background(), item) - } else { - _ = b3.Set(context.Background(), item) - } - - // Manually bump version on one replica to simulate a newer write that others missed - // Pick owners[1] as ahead replica - aheadID := owners[1] - ahead := map[cluster.NodeID]*backend.DistMemory{b1.LocalNodeID(): b1, b2.LocalNodeID(): b2, b3.LocalNodeID(): b3}[aheadID] - ahead.DebugInject(&cache.Item{Key: key, Value: "v2", Version: 5, Origin: string(ahead.LocalNodeID()), LastUpdated: time.Now()}) - - // Drop local copy on owners[2] to simulate stale/missing - lagID := owners[2] - lag := map[cluster.NodeID]*backend.DistMemory{b1.LocalNodeID(): b1, b2.LocalNodeID(): b2, b3.LocalNodeID(): b3}[lagID] - lag.DebugDropLocal(key) - - // Issue quorum read from a non-ahead node (choose primary if not ahead, else third) - requester := b1 - if requester.LocalNodeID() == aheadID { - requester = b2 - } - - if requester.LocalNodeID() == aheadID { - requester = b3 - } - - got, ok := requester.Get(context.Background(), key) - if !ok { - t.Fatalf("quorum get failed") - } - - // Value stored as interface{} may be string (not []byte) in this test - if sval, okCast := got.Value.(string); !okCast || sval != "v2" { - t.Fatalf("expected quorum to return newer version v2, got=%v (type %T)", got.Value, got.Value) - } - - // Allow brief repair propagation - time.Sleep(50 * time.Millisecond) - - // All owners should now have v2 (version 5) - for _, oid := range owners { - inst := map[cluster.NodeID]*backend.DistMemory{b1.LocalNodeID(): b1, b2.LocalNodeID(): b2, b3.LocalNodeID(): b3}[oid] - - it, ok2 := inst.Get(context.Background(), key) - if !ok2 || it.Version != 5 { - t.Fatalf("owner %s not repaired to v2 (v5) -> (%v,%v)", oid, ok2, it) - } - } - - // ReadRepair metric should have incremented somewhere - if b1.Metrics().ReadRepair+b2.Metrics().ReadRepair+b3.Metrics().ReadRepair == 0 { - t.Fatalf("expected read repair metric >0") - } -} -tests/hypercache_distmemory_stale_quorum_test.go:14:1: function-length: maximum number of statements per function exceeded; max 50 but got 57 (revive) -func TestDistMemoryStaleQuorum(t *testing.T) { - ring := cluster.NewRing(cluster.WithReplication(3)) - membership := cluster.NewMembership(ring) - transport := backend.NewInProcessTransport() - - n1 := cluster.NewNode("", "n1:0") - n2 := cluster.NewNode("", "n2:0") - n3 := cluster.NewNode("", "n3:0") - - b1i, _ := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n1), - backend.WithDistTransport(transport), - backend.WithDistReadConsistency(backend.ConsistencyQuorum), - ) - b2i, _ := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n2), - backend.WithDistTransport(transport), - backend.WithDistReadConsistency(backend.ConsistencyQuorum), - ) - b3i, _ := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n3), - backend.WithDistTransport(transport), - backend.WithDistReadConsistency(backend.ConsistencyQuorum), - ) - - b1, ok := b1i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b1i to *backend.DistMemory") - } - - b2, ok := b2i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b2i to *backend.DistMemory") - } - - b3, ok := b3i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b3i to *backend.DistMemory") - } - - StopOnCleanup(t, b1) - StopOnCleanup(t, b2) - StopOnCleanup(t, b3) - - transport.Register(b1) - transport.Register(b2) - transport.Register(b3) - - key := "sq-key" - - owners := ring.Lookup(key) - if len(owners) != 3 { - t.Skip("replication factor !=3") - } - - // Write initial version via primary - primary := owners[0] - item := &cache.Item{Key: key, Value: "v1"} - - _ = item.Valid() - if primary == b1.LocalNodeID() { - _ = b1.Set(context.Background(), item) - } else if primary == b2.LocalNodeID() { - _ = b2.Set(context.Background(), item) - } else { - _ = b3.Set(context.Background(), item) - } - - // Manually bump version on one replica to simulate a newer write that others missed - // Pick owners[1] as ahead replica - aheadID := owners[1] - ahead := map[cluster.NodeID]*backend.DistMemory{b1.LocalNodeID(): b1, b2.LocalNodeID(): b2, b3.LocalNodeID(): b3}[aheadID] - ahead.DebugInject(&cache.Item{Key: key, Value: "v2", Version: 5, Origin: string(ahead.LocalNodeID()), LastUpdated: time.Now()}) - - // Drop local copy on owners[2] to simulate stale/missing - lagID := owners[2] - lag := map[cluster.NodeID]*backend.DistMemory{b1.LocalNodeID(): b1, b2.LocalNodeID(): b2, b3.LocalNodeID(): b3}[lagID] - lag.DebugDropLocal(key) - - // Issue quorum read from a non-ahead node (choose primary if not ahead, else third) - requester := b1 - if requester.LocalNodeID() == aheadID { - requester = b2 - } - - if requester.LocalNodeID() == aheadID { - requester = b3 - } - - got, ok := requester.Get(context.Background(), key) - if !ok { - t.Fatalf("quorum get failed") - } - - // Value stored as interface{} may be string (not []byte) in this test - if sval, okCast := got.Value.(string); !okCast || sval != "v2" { - t.Fatalf("expected quorum to return newer version v2, got=%v (type %T)", got.Value, got.Value) - } - - // Allow brief repair propagation - time.Sleep(50 * time.Millisecond) - - // All owners should now have v2 (version 5) - for _, oid := range owners { - inst := map[cluster.NodeID]*backend.DistMemory{b1.LocalNodeID(): b1, b2.LocalNodeID(): b2, b3.LocalNodeID(): b3}[oid] - - it, ok2 := inst.Get(context.Background(), key) - if !ok2 || it.Version != 5 { - t.Fatalf("owner %s not repaired to v2 (v5) -> (%v,%v)", oid, ok2, it) - } - } - - // ReadRepair metric should have incremented somewhere - if b1.Metrics().ReadRepair+b2.Metrics().ReadRepair+b3.Metrics().ReadRepair == 0 { - t.Fatalf("expected read repair metric >0") - } -} -tests/hypercache_distmemory_stale_quorum_test.go:14:1: cyclomatic: function TestDistMemoryStaleQuorum has cyclomatic complexity 16 (> max enabled 15) (revive) -func TestDistMemoryStaleQuorum(t *testing.T) { - ring := cluster.NewRing(cluster.WithReplication(3)) - membership := cluster.NewMembership(ring) - transport := backend.NewInProcessTransport() - - n1 := cluster.NewNode("", "n1:0") - n2 := cluster.NewNode("", "n2:0") - n3 := cluster.NewNode("", "n3:0") - - b1i, _ := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n1), - backend.WithDistTransport(transport), - backend.WithDistReadConsistency(backend.ConsistencyQuorum), - ) - b2i, _ := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n2), - backend.WithDistTransport(transport), - backend.WithDistReadConsistency(backend.ConsistencyQuorum), - ) - b3i, _ := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n3), - backend.WithDistTransport(transport), - backend.WithDistReadConsistency(backend.ConsistencyQuorum), - ) - - b1, ok := b1i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b1i to *backend.DistMemory") - } - - b2, ok := b2i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b2i to *backend.DistMemory") - } - - b3, ok := b3i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b3i to *backend.DistMemory") - } - - StopOnCleanup(t, b1) - StopOnCleanup(t, b2) - StopOnCleanup(t, b3) - - transport.Register(b1) - transport.Register(b2) - transport.Register(b3) - - key := "sq-key" - - owners := ring.Lookup(key) - if len(owners) != 3 { - t.Skip("replication factor !=3") - } - - // Write initial version via primary - primary := owners[0] - item := &cache.Item{Key: key, Value: "v1"} - - _ = item.Valid() - if primary == b1.LocalNodeID() { - _ = b1.Set(context.Background(), item) - } else if primary == b2.LocalNodeID() { - _ = b2.Set(context.Background(), item) - } else { - _ = b3.Set(context.Background(), item) - } - - // Manually bump version on one replica to simulate a newer write that others missed - // Pick owners[1] as ahead replica - aheadID := owners[1] - ahead := map[cluster.NodeID]*backend.DistMemory{b1.LocalNodeID(): b1, b2.LocalNodeID(): b2, b3.LocalNodeID(): b3}[aheadID] - ahead.DebugInject(&cache.Item{Key: key, Value: "v2", Version: 5, Origin: string(ahead.LocalNodeID()), LastUpdated: time.Now()}) - - // Drop local copy on owners[2] to simulate stale/missing - lagID := owners[2] - lag := map[cluster.NodeID]*backend.DistMemory{b1.LocalNodeID(): b1, b2.LocalNodeID(): b2, b3.LocalNodeID(): b3}[lagID] - lag.DebugDropLocal(key) - - // Issue quorum read from a non-ahead node (choose primary if not ahead, else third) - requester := b1 - if requester.LocalNodeID() == aheadID { - requester = b2 - } - - if requester.LocalNodeID() == aheadID { - requester = b3 - } - - got, ok := requester.Get(context.Background(), key) - if !ok { - t.Fatalf("quorum get failed") - } - - // Value stored as interface{} may be string (not []byte) in this test - if sval, okCast := got.Value.(string); !okCast || sval != "v2" { - t.Fatalf("expected quorum to return newer version v2, got=%v (type %T)", got.Value, got.Value) - } - - // Allow brief repair propagation - time.Sleep(50 * time.Millisecond) - - // All owners should now have v2 (version 5) - for _, oid := range owners { - inst := map[cluster.NodeID]*backend.DistMemory{b1.LocalNodeID(): b1, b2.LocalNodeID(): b2, b3.LocalNodeID(): b3}[oid] - - it, ok2 := inst.Get(context.Background(), key) - if !ok2 || it.Version != 5 { - t.Fatalf("owner %s not repaired to v2 (v5) -> (%v,%v)", oid, ok2, it) - } - } - - // ReadRepair metric should have incremented somewhere - if b1.Metrics().ReadRepair+b2.Metrics().ReadRepair+b3.Metrics().ReadRepair == 0 { - t.Fatalf("expected read repair metric >0") - } -} -tests/hypercache_distmemory_tiebreak_test.go:15:1: function-length: maximum number of lines per function exceeded; max 75 but got 86 (revive) -func TestDistMemoryVersionTieBreak(t *testing.T) { //nolint:paralleltest - interval := 5 * time.Millisecond - ring := cluster.NewRing(cluster.WithReplication(3)) - membership := cluster.NewMembership(ring) - transport := backend.NewInProcessTransport() - - n1 := cluster.NewNode("", "n1:0") - n2 := cluster.NewNode("", "n2:0") - n3 := cluster.NewNode("", "n3:0") - - b1i, _ := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n1), - backend.WithDistTransport(transport), - backend.WithDistReplication(3), - backend.WithDistHeartbeat(interval, 0, 0), - backend.WithDistReadConsistency(backend.ConsistencyQuorum), - backend.WithDistWriteConsistency(backend.ConsistencyQuorum), - ) - b2i, _ := backend.NewDistMemory(context.TODO(), backend.WithDistMembership(membership, n2), backend.WithDistTransport(transport)) - b3i, _ := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n3), - backend.WithDistTransport(transport), - backend.WithDistReadConsistency(backend.ConsistencyQuorum), - ) - - b1, ok := b1i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b1i to *backend.DistMemory") - } - - b2, ok := b2i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b2i to *backend.DistMemory") - } - - b3, ok := b3i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b3i to *backend.DistMemory") - } - - StopOnCleanup(t, b1) - StopOnCleanup(t, b2) - StopOnCleanup(t, b3) - - transport.Register(b1) - transport.Register(b2) - transport.Register(b3) - - // choose key where b1,b2,b3 ordering fixed - key := "tie" - for i := range 3000 { - cand := fmt.Sprintf("tie%d", i) - - owners := b1.DebugOwners(cand) - if len(owners) == 3 && owners[0] == b1.LocalNodeID() && owners[1] == b2.LocalNodeID() && owners[2] == b3.LocalNodeID() { - key = cand - - break - } - } - - // primary write to establish version=1 origin=b1 - err := b1.Set(context.Background(), &cache.Item{Key: key, Value: "v1"}) - if err != nil { - t.Fatalf("initial set: %v", err) - } - - // Inject a fake item on b2 with SAME version but lexicographically larger origin so it should lose. - b2.DebugDropLocal(key) - b2.DebugInject(&cache.Item{Key: key, Value: "alt", Version: 1, Origin: "zzzz"}) - - // Quorum read through b3 triggers selection + repair. - it, ok := b3.Get(context.Background(), key) - if !ok { - t.Fatalf("expected quorum read ok") - } - - if it.Value != "v1" { - t.Fatalf("expected b1 value win, got %v", it.Value) - } - - // Ensure b2 repaired to winning value. - if it2, ok2 := b2.Get(context.Background(), key); !ok2 || it2.Value != "v1" { - t.Fatalf("expected repaired tie-break value on b2") - } -} -tests/hypercache_distmemory_versioning_test.go:17:1: cyclomatic: function TestDistMemoryVersioningQuorum has cyclomatic complexity 16 (> max enabled 15) (revive) -func TestDistMemoryVersioningQuorum(t *testing.T) { //nolint:paralleltest - interval := 10 * time.Millisecond - ring := cluster.NewRing(cluster.WithReplication(3)) - membership := cluster.NewMembership(ring) - transport := backend.NewInProcessTransport() - - // three nodes - n1 := cluster.NewNode("", "n1:0") - n2 := cluster.NewNode("", "n2:0") - n3 := cluster.NewNode("", "n3:0") - - // enable quorum read + write consistency on b1 - b1i, _ := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n1), - backend.WithDistTransport(transport), - backend.WithDistReplication(3), - backend.WithDistHeartbeat(interval, 0, 0), - backend.WithDistReadConsistency(backend.ConsistencyQuorum), - backend.WithDistWriteConsistency(backend.ConsistencyQuorum), - ) - b2i, _ := backend.NewDistMemory(context.TODO(), backend.WithDistMembership(membership, n2), backend.WithDistTransport(transport)) - b3i, _ := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n3), - backend.WithDistTransport(transport), - backend.WithDistReadConsistency(backend.ConsistencyQuorum), - ) - b1, ok := b1i.(*backend.DistMemory) - - if !ok { - t.Fatalf("failed to cast b1i to *backend.DistMemory") - } - - b2, ok := b2i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b2i to *backend.DistMemory") - } - - b3, ok := b3i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b3i to *backend.DistMemory") - } - - StopOnCleanup(t, b1) - StopOnCleanup(t, b2) - StopOnCleanup(t, b3) - - transport.Register(b1) - transport.Register(b2) - transport.Register(b3) - - // Find a deterministic key where ownership ordering is b1,b2,b3 to avoid forwarding complexities. - key := "k" - for i := range 2000 { // brute force - cand := fmt.Sprintf("k%d", i) - - owners := b1.DebugOwners(cand) - if len(owners) == 3 && owners[0] == b1.LocalNodeID() && owners[1] == b2.LocalNodeID() && owners[2] == b3.LocalNodeID() { - key = cand - - break - } - } - - // Write key via primary. - item1 := &cache.Item{Key: key, Value: "v1"} - - err := b1.Set(context.Background(), item1) - if err != nil { - t.Fatalf("initial set: %v", err) - } - - // Simulate a concurrent stale write from another node with lower version (manual injection on b2). - itemStale := &cache.Item{Key: key, Value: "v0", Version: 0, Origin: "zzz"} - b2.DebugDropLocal(key) - b2.DebugInject(itemStale) - - // Read quorum from node3: should observe latest (v1) and repair b2. - it, ok := b3.Get(context.Background(), key) - if !ok { - t.Fatalf("expected read ok") - } - - if it.Value != "v1" { - t.Fatalf("expected value v1, got %v", it.Value) - } - - // Ensure b2 repaired. - if it2, ok2 := b2.Get(context.Background(), key); !ok2 || it2.Value != "v1" { - t.Fatalf("expected repaired value on b2") - } - - // Simulate reduced acks: unregister one replica and perform write requiring quorum (2 of 3). - transport.Unregister(string(n3.ID)) - - item2 := &cache.Item{Key: key, Value: "v2"} - - err = b1.Set(context.Background(), item2) - if err != nil && !errors.Is(err, sentinel.ErrQuorumFailed) { - t.Fatalf("unexpected error after replica loss: %v", err) - } -} -tests/hypercache_distmemory_versioning_test.go:17:1: function-length: maximum number of lines per function exceeded; max 75 but got 101 (revive) -func TestDistMemoryVersioningQuorum(t *testing.T) { //nolint:paralleltest - interval := 10 * time.Millisecond - ring := cluster.NewRing(cluster.WithReplication(3)) - membership := cluster.NewMembership(ring) - transport := backend.NewInProcessTransport() - - // three nodes - n1 := cluster.NewNode("", "n1:0") - n2 := cluster.NewNode("", "n2:0") - n3 := cluster.NewNode("", "n3:0") - - // enable quorum read + write consistency on b1 - b1i, _ := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n1), - backend.WithDistTransport(transport), - backend.WithDistReplication(3), - backend.WithDistHeartbeat(interval, 0, 0), - backend.WithDistReadConsistency(backend.ConsistencyQuorum), - backend.WithDistWriteConsistency(backend.ConsistencyQuorum), - ) - b2i, _ := backend.NewDistMemory(context.TODO(), backend.WithDistMembership(membership, n2), backend.WithDistTransport(transport)) - b3i, _ := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n3), - backend.WithDistTransport(transport), - backend.WithDistReadConsistency(backend.ConsistencyQuorum), - ) - b1, ok := b1i.(*backend.DistMemory) - - if !ok { - t.Fatalf("failed to cast b1i to *backend.DistMemory") - } - - b2, ok := b2i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b2i to *backend.DistMemory") - } - - b3, ok := b3i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b3i to *backend.DistMemory") - } - - StopOnCleanup(t, b1) - StopOnCleanup(t, b2) - StopOnCleanup(t, b3) - - transport.Register(b1) - transport.Register(b2) - transport.Register(b3) - - // Find a deterministic key where ownership ordering is b1,b2,b3 to avoid forwarding complexities. - key := "k" - for i := range 2000 { // brute force - cand := fmt.Sprintf("k%d", i) - - owners := b1.DebugOwners(cand) - if len(owners) == 3 && owners[0] == b1.LocalNodeID() && owners[1] == b2.LocalNodeID() && owners[2] == b3.LocalNodeID() { - key = cand - - break - } - } - - // Write key via primary. - item1 := &cache.Item{Key: key, Value: "v1"} - - err := b1.Set(context.Background(), item1) - if err != nil { - t.Fatalf("initial set: %v", err) - } - - // Simulate a concurrent stale write from another node with lower version (manual injection on b2). - itemStale := &cache.Item{Key: key, Value: "v0", Version: 0, Origin: "zzz"} - b2.DebugDropLocal(key) - b2.DebugInject(itemStale) - - // Read quorum from node3: should observe latest (v1) and repair b2. - it, ok := b3.Get(context.Background(), key) - if !ok { - t.Fatalf("expected read ok") - } - - if it.Value != "v1" { - t.Fatalf("expected value v1, got %v", it.Value) - } - - // Ensure b2 repaired. - if it2, ok2 := b2.Get(context.Background(), key); !ok2 || it2.Value != "v1" { - t.Fatalf("expected repaired value on b2") - } - - // Simulate reduced acks: unregister one replica and perform write requiring quorum (2 of 3). - transport.Unregister(string(n3.ID)) - - item2 := &cache.Item{Key: key, Value: "v2"} - - err = b1.Set(context.Background(), item2) - if err != nil && !errors.Is(err, sentinel.ErrQuorumFailed) { - t.Fatalf("unexpected error after replica loss: %v", err) - } -} -tests/hypercache_distmemory_versioning_test.go:17:1: function-length: maximum number of statements per function exceeded; max 50 but got 51 (revive) -func TestDistMemoryVersioningQuorum(t *testing.T) { //nolint:paralleltest - interval := 10 * time.Millisecond - ring := cluster.NewRing(cluster.WithReplication(3)) - membership := cluster.NewMembership(ring) - transport := backend.NewInProcessTransport() - - // three nodes - n1 := cluster.NewNode("", "n1:0") - n2 := cluster.NewNode("", "n2:0") - n3 := cluster.NewNode("", "n3:0") - - // enable quorum read + write consistency on b1 - b1i, _ := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n1), - backend.WithDistTransport(transport), - backend.WithDistReplication(3), - backend.WithDistHeartbeat(interval, 0, 0), - backend.WithDistReadConsistency(backend.ConsistencyQuorum), - backend.WithDistWriteConsistency(backend.ConsistencyQuorum), - ) - b2i, _ := backend.NewDistMemory(context.TODO(), backend.WithDistMembership(membership, n2), backend.WithDistTransport(transport)) - b3i, _ := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n3), - backend.WithDistTransport(transport), - backend.WithDistReadConsistency(backend.ConsistencyQuorum), - ) - b1, ok := b1i.(*backend.DistMemory) - - if !ok { - t.Fatalf("failed to cast b1i to *backend.DistMemory") - } - - b2, ok := b2i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b2i to *backend.DistMemory") - } - - b3, ok := b3i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b3i to *backend.DistMemory") - } - - StopOnCleanup(t, b1) - StopOnCleanup(t, b2) - StopOnCleanup(t, b3) - - transport.Register(b1) - transport.Register(b2) - transport.Register(b3) - - // Find a deterministic key where ownership ordering is b1,b2,b3 to avoid forwarding complexities. - key := "k" - for i := range 2000 { // brute force - cand := fmt.Sprintf("k%d", i) - - owners := b1.DebugOwners(cand) - if len(owners) == 3 && owners[0] == b1.LocalNodeID() && owners[1] == b2.LocalNodeID() && owners[2] == b3.LocalNodeID() { - key = cand - - break - } - } - - // Write key via primary. - item1 := &cache.Item{Key: key, Value: "v1"} - - err := b1.Set(context.Background(), item1) - if err != nil { - t.Fatalf("initial set: %v", err) - } - - // Simulate a concurrent stale write from another node with lower version (manual injection on b2). - itemStale := &cache.Item{Key: key, Value: "v0", Version: 0, Origin: "zzz"} - b2.DebugDropLocal(key) - b2.DebugInject(itemStale) - - // Read quorum from node3: should observe latest (v1) and repair b2. - it, ok := b3.Get(context.Background(), key) - if !ok { - t.Fatalf("expected read ok") - } - - if it.Value != "v1" { - t.Fatalf("expected value v1, got %v", it.Value) - } - - // Ensure b2 repaired. - if it2, ok2 := b2.Get(context.Background(), key); !ok2 || it2.Value != "v1" { - t.Fatalf("expected repaired value on b2") - } - - // Simulate reduced acks: unregister one replica and perform write requiring quorum (2 of 3). - transport.Unregister(string(n3.ID)) - - item2 := &cache.Item{Key: key, Value: "v2"} - - err = b1.Set(context.Background(), item2) - if err != nil && !errors.Is(err, sentinel.ErrQuorumFailed) { - t.Fatalf("unexpected error after replica loss: %v", err) - } -} -tests/hypercache_distmemory_write_quorum_test.go:76:1: function-length: maximum number of lines per function exceeded; max 75 but got 89 (revive) -func TestWriteQuorumFailure(t *testing.T) { - ctx := context.Background() - transport := backend.NewInProcessTransport() - - // Shared ring/membership so ownership is identical across nodes. - ring := cluster.NewRing(cluster.WithReplication(3)) - m := cluster.NewMembership(ring) - m.Upsert(cluster.NewNode("A", "A")) - m.Upsert(cluster.NewNode("B", "B")) - m.Upsert(cluster.NewNode("C", "C")) - - opts := []backend.DistMemoryOption{ - backend.WithDistReplication(3), - backend.WithDistWriteConsistency(backend.ConsistencyAll), - backend.WithDistHintTTL(time.Minute), - backend.WithDistHintReplayInterval(50 * time.Millisecond), - } - - // Create three nodes but only register two with transport to force ALL failure. - na, _ := backend.NewDistMemory( - ctx, - append(opts, backend.WithDistNode("A", "A"), backend.WithDistMembership(m, cluster.NewNode("A", "A")))...) - nb, _ := backend.NewDistMemory( - ctx, - append(opts, backend.WithDistNode("B", "B"), backend.WithDistMembership(m, cluster.NewNode("B", "B")))...) - - nc, _ := backend.NewDistMemory( - ctx, - append(opts, backend.WithDistNode("C", "C"), backend.WithDistMembership(m, cluster.NewNode("C", "C")))...) - - da, ok := any(na).(*backend.DistMemory) - if !ok { - t.Fatalf("expected *backend.DistMemory, got %T", na) - } - - db, ok := any(nb).(*backend.DistMemory) - if !ok { - t.Fatalf("expected *backend.DistMemory, got %T", nb) - } - - dc, ok := any(nc).(*backend.DistMemory) - if !ok { - t.Fatalf("expected *backend.DistMemory, got %T", nc) - } - - StopOnCleanup(t, da) - StopOnCleanup(t, db) - StopOnCleanup(t, dc) - - da.SetTransport(transport) - db.SetTransport(transport) - transport.Register(da) - transport.Register(db) // C intentionally not registered (unreachable) - - // Find a key whose owners include all three nodes (replication=3 ensures this) – just brute force until order stable. - key := "quorum-all-fail" - for i := range 50 { // try some keys to ensure A is primary sometimes; not strictly required - candidate := fmt.Sprintf("quorum-all-fail-%d", i) - - owners := da.Ring().Lookup(candidate) - if len(owners) == 3 && string(owners[0]) == "A" { // prefer A primary for clarity - key = candidate - - break - } - } - - item := &cache.Item{Key: key, Value: "v-fail"} - - err := na.Set(ctx, item) - if !errors.Is(err, sentinel.ErrQuorumFailed) { - // Provide ring owners for debugging. - owners := da.Ring().Lookup(key) - - ids := make([]string, 0, len(owners)) - for _, o := range owners { - ids = append(ids, string(o)) - } - - t.Fatalf("expected ErrQuorumFailed, got %v (owners=%v)", err, ids) - } - - metrics := da.Metrics() - if metrics.WriteQuorumFailures < 1 { - t.Fatalf("expected WriteQuorumFailures >=1, got %d", metrics.WriteQuorumFailures) - } - - if metrics.WriteAttempts < 1 { // should have attempted at least once - t.Fatalf("expected WriteAttempts >=1, got %d", metrics.WriteAttempts) - } -} -tests/hypercache_get_or_set_test.go:15:1: function-length: maximum number of lines per function exceeded; max 75 but got 98 (revive) -func TestHyperCache_GetOrSet(t *testing.T) { - tests := []struct { - name string - key string - value any - expiry time.Duration - expectedValue any - expectedErr error - }{ - { - name: "get or set with valid key and value", - key: "key1", - value: "value1", - expiry: 0, - expectedValue: "value1", - expectedErr: nil, - }, - { - name: "get or set with valid key and value with expiry", - key: "key2", - value: "value2", - expiry: time.Second, - expectedValue: "value2", - expectedErr: nil, - }, - // { - // name: "get or set with empty key", - // key: "", - // value: "value3", - // expiry: 0, - // expectedValue: nil, - // expectedErr: hypercache.ErrInvalidKey, - // }, - { - name: "get or set with nil value", - key: "key4", - value: nil, - expiry: 0, - expectedValue: nil, - expectedErr: sentinel.ErrNilValue, - }, - { - name: "get or set with key that has expired", - key: "key5", - value: "value5", - expiry: time.Millisecond, - expectedValue: nil, - expectedErr: sentinel.ErrKeyExpired, - }, - { - name: "get or set with key that already exists", - key: "key1", - value: "value6", - expiry: 0, - expectedValue: "value1", - expectedErr: nil, - }, - } - cache, err := hypercache.NewInMemoryWithDefaults(context.TODO(), 10) - assert.Nil(t, err) - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - var ( - val any - err error - ) - - shouldExpire := errors.Is(test.expectedErr, sentinel.ErrKeyExpired) - - val, err = cache.GetOrSet(context.TODO(), test.key, test.value, test.expiry) - if !shouldExpire { - assert.Equal(t, test.expectedErr, err) - } - - if err == nil && !shouldExpire { - assert.Equal(t, test.expectedValue, val) - } - - if shouldExpire { - t.Log("sleeping for 2 Millisecond to allow the key to expire") - time.Sleep(2 * time.Millisecond) - - _, err = cache.GetOrSet(context.TODO(), test.key, test.value, test.expiry) - assert.Equal(t, test.expectedErr, err) - } - - // Check if the value has been set in the cache - if err == nil { - val, ok := cache.Get(context.TODO(), test.key) - assert.True(t, ok) - assert.Equal(t, test.expectedValue, val) - } else { - val, ok := cache.Get(context.TODO(), test.key) - assert.False(t, ok) - assert.Nil(t, val) - } - }) - } -} -tests/hypercache_get_test.go:14:1: cognitive-complexity: function TestHyperCache_Get has cognitive complexity 17 (> max enabled 15) (revive) -func TestHyperCache_Get(t *testing.T) { - tests := []struct { - name string - key string - value any - expiry time.Duration - expectedValue any - expectedErr error - sleep time.Duration - shouldSet bool - }{ - { - name: "get with valid key", - key: "key1", - value: "value1", - expiry: 0, - expectedValue: "value1", - expectedErr: nil, - }, - { - name: "get with valid key and value with expiry", - key: "key2", - value: "value2", - expiry: 5 * time.Second, - expectedValue: "value2", - expectedErr: nil, - }, - // { - // name: "get with empty key", - // key: "", - // value: "value3", - // expiry: 0, - // expectedValue: "", - // expectedErr: hypercache.ErrInvalidKey, - // }, - { - name: "get with expired key", - key: "key4", - value: "value4", - expiry: 1 * time.Second, - expectedValue: nil, - expectedErr: nil, - sleep: 2 * time.Second, - }, - { - name: "get with non-existent key", - key: "key5", - value: "value5", - expiry: 0, - expectedValue: nil, - expectedErr: sentinel.ErrKeyNotFound, - shouldSet: false, - }, - } - cache, err := hypercache.NewInMemoryWithDefaults(context.TODO(), 10) - assert.Nil(t, err) - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - if test.shouldSet { - err = cache.Set(context.TODO(), test.key, test.value, test.expiry) - if err != nil { - assert.Equal(t, test.expectedErr, err) - } - - if test.sleep > 0 { - time.Sleep(test.sleep) - } - } - - val, ok := cache.Get(context.TODO(), test.key) - if test.expectedErr != nil || !ok { - assert.False(t, ok) - } else { - assert.True(t, ok) - assert.Equal(t, test.expectedValue, val) - } - }) - } -} -tests/hypercache_get_test.go:14:1: function-length: maximum number of lines per function exceeded; max 75 but got 78 (revive) -func TestHyperCache_Get(t *testing.T) { - tests := []struct { - name string - key string - value any - expiry time.Duration - expectedValue any - expectedErr error - sleep time.Duration - shouldSet bool - }{ - { - name: "get with valid key", - key: "key1", - value: "value1", - expiry: 0, - expectedValue: "value1", - expectedErr: nil, - }, - { - name: "get with valid key and value with expiry", - key: "key2", - value: "value2", - expiry: 5 * time.Second, - expectedValue: "value2", - expectedErr: nil, - }, - // { - // name: "get with empty key", - // key: "", - // value: "value3", - // expiry: 0, - // expectedValue: "", - // expectedErr: hypercache.ErrInvalidKey, - // }, - { - name: "get with expired key", - key: "key4", - value: "value4", - expiry: 1 * time.Second, - expectedValue: nil, - expectedErr: nil, - sleep: 2 * time.Second, - }, - { - name: "get with non-existent key", - key: "key5", - value: "value5", - expiry: 0, - expectedValue: nil, - expectedErr: sentinel.ErrKeyNotFound, - shouldSet: false, - }, - } - cache, err := hypercache.NewInMemoryWithDefaults(context.TODO(), 10) - assert.Nil(t, err) - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - if test.shouldSet { - err = cache.Set(context.TODO(), test.key, test.value, test.expiry) - if err != nil { - assert.Equal(t, test.expectedErr, err) - } - - if test.sleep > 0 { - time.Sleep(test.sleep) - } - } - - val, ok := cache.Get(context.TODO(), test.key) - if test.expectedErr != nil || !ok { - assert.False(t, ok) - } else { - assert.True(t, ok) - assert.Equal(t, test.expectedValue, val) - } - }) - } -} -tests/hypercache_http_merkle_test.go:15:1: cognitive-complexity: function TestHTTPFetchMerkle has cognitive complexity 22 (> max enabled 15) (revive) -func TestHTTPFetchMerkle(t *testing.T) { - ctx := context.Background() - - // shared ring/membership - ring := cluster.NewRing(cluster.WithReplication(1)) - membership := cluster.NewMembership(ring) - - // create two nodes with HTTP server enabled (dynamically allocated addresses) - addr1 := AllocatePort(t) - addr2 := AllocatePort(t) - - n1 := cluster.NewNode("", addr1) - - b1i, err := backend.NewDistMemory(ctx, - backend.WithDistMembership(membership, n1), - backend.WithDistNode("n1", addr1), - backend.WithDistMerkleChunkSize(2), - ) - if err != nil { - t.Fatalf("b1: %v", err) - } - - n2 := cluster.NewNode("", addr2) - - b2i, err := backend.NewDistMemory(ctx, - backend.WithDistMembership(membership, n2), - backend.WithDistNode("n2", addr2), - backend.WithDistMerkleChunkSize(2), - ) - if err != nil { - t.Fatalf("b2: %v", err) - } - - b1, ok := b1i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b1i to *backend.DistMemory") - } - - b2, ok := b2i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b2i to *backend.DistMemory") - } - - StopOnCleanup(t, b1) - StopOnCleanup(t, b2) - - // HTTP transport resolver maps node IDs to http base URLs. - resolver := func(id string) (string, bool) { - switch id { // node IDs same as provided - case "n1": - return "http://" + b1.LocalNodeAddr(), true - case "n2": - return "http://" + b2.LocalNodeAddr(), true - } - - return "", false - } - // 5s transport timeout (was 2s) — under -race the fiber listener can take - // >2s to accept its first request, which made SyncWith time out spuriously. - transport := backend.NewDistHTTPTransport(5*time.Second, resolver) - b1.SetTransport(transport) - b2.SetTransport(transport) - - // ensure membership has both before writes (already upserted in constructors) - // write some keys to b1 only - for i := range 5 { // direct inject to sidestep replication/forwarding complexity - item := &cache.Item{Key: httpKey(i), Value: []byte("v"), Version: uint64(i + 1), Origin: "n1", LastUpdated: time.Now()} - b1.DebugInject(item) - } - - // Poll the HTTP merkle endpoint until it actually responds 200. Under - // -race the fiber listener can take seconds to start accepting requests - // even after Listen() returns; a single-shot Get is racy. - merkleReady := false - - deadline := time.Now().Add(10 * time.Second) - for time.Now().Before(deadline) { - resp, err := http.Get("http://" + b1.LocalNodeAddr() + "/internal/merkle") - if err == nil { - _ = resp.Body.Close() - - if resp.StatusCode == http.StatusOK { - merkleReady = true - - break - } - } - - time.Sleep(50 * time.Millisecond) - } - - if !merkleReady { - t.Fatal("merkle endpoint did not become ready within deadline") - } - - // b2 sync from b1 via HTTP transport - if err := b2.SyncWith(ctx, "n1"); err != nil { - t.Fatalf("sync: %v", err) - } - - // Validate keys present on b2. Allow brief retry to absorb any async tail - // in sync's apply path (each missing key is retried once). - for i := range 5 { - if _, ok := b2.Get(ctx, httpKey(i)); ok { - continue - } - - // One retry: re-sync and check again. - err := b2.SyncWith(ctx, "n1") - if err != nil { - t.Fatalf("re-sync: %v", err) - } - - if _, ok := b2.Get(ctx, httpKey(i)); !ok { - t.Fatalf("missing key %d post-sync", i) - } - } -} -tests/hypercache_http_merkle_test.go:15:1: cyclomatic: function TestHTTPFetchMerkle has cyclomatic complexity 17 (> max enabled 15) (revive) -func TestHTTPFetchMerkle(t *testing.T) { - ctx := context.Background() - - // shared ring/membership - ring := cluster.NewRing(cluster.WithReplication(1)) - membership := cluster.NewMembership(ring) - - // create two nodes with HTTP server enabled (dynamically allocated addresses) - addr1 := AllocatePort(t) - addr2 := AllocatePort(t) - - n1 := cluster.NewNode("", addr1) - - b1i, err := backend.NewDistMemory(ctx, - backend.WithDistMembership(membership, n1), - backend.WithDistNode("n1", addr1), - backend.WithDistMerkleChunkSize(2), - ) - if err != nil { - t.Fatalf("b1: %v", err) - } - - n2 := cluster.NewNode("", addr2) - - b2i, err := backend.NewDistMemory(ctx, - backend.WithDistMembership(membership, n2), - backend.WithDistNode("n2", addr2), - backend.WithDistMerkleChunkSize(2), - ) - if err != nil { - t.Fatalf("b2: %v", err) - } - - b1, ok := b1i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b1i to *backend.DistMemory") - } - - b2, ok := b2i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b2i to *backend.DistMemory") - } - - StopOnCleanup(t, b1) - StopOnCleanup(t, b2) - - // HTTP transport resolver maps node IDs to http base URLs. - resolver := func(id string) (string, bool) { - switch id { // node IDs same as provided - case "n1": - return "http://" + b1.LocalNodeAddr(), true - case "n2": - return "http://" + b2.LocalNodeAddr(), true - } - - return "", false - } - // 5s transport timeout (was 2s) — under -race the fiber listener can take - // >2s to accept its first request, which made SyncWith time out spuriously. - transport := backend.NewDistHTTPTransport(5*time.Second, resolver) - b1.SetTransport(transport) - b2.SetTransport(transport) - - // ensure membership has both before writes (already upserted in constructors) - // write some keys to b1 only - for i := range 5 { // direct inject to sidestep replication/forwarding complexity - item := &cache.Item{Key: httpKey(i), Value: []byte("v"), Version: uint64(i + 1), Origin: "n1", LastUpdated: time.Now()} - b1.DebugInject(item) - } - - // Poll the HTTP merkle endpoint until it actually responds 200. Under - // -race the fiber listener can take seconds to start accepting requests - // even after Listen() returns; a single-shot Get is racy. - merkleReady := false - - deadline := time.Now().Add(10 * time.Second) - for time.Now().Before(deadline) { - resp, err := http.Get("http://" + b1.LocalNodeAddr() + "/internal/merkle") - if err == nil { - _ = resp.Body.Close() - - if resp.StatusCode == http.StatusOK { - merkleReady = true - - break - } - } - - time.Sleep(50 * time.Millisecond) - } - - if !merkleReady { - t.Fatal("merkle endpoint did not become ready within deadline") - } - - // b2 sync from b1 via HTTP transport - if err := b2.SyncWith(ctx, "n1"); err != nil { - t.Fatalf("sync: %v", err) - } - - // Validate keys present on b2. Allow brief retry to absorb any async tail - // in sync's apply path (each missing key is retried once). - for i := range 5 { - if _, ok := b2.Get(ctx, httpKey(i)); ok { - continue - } - - // One retry: re-sync and check again. - err := b2.SyncWith(ctx, "n1") - if err != nil { - t.Fatalf("re-sync: %v", err) - } - - if _, ok := b2.Get(ctx, httpKey(i)); !ok { - t.Fatalf("missing key %d post-sync", i) - } - } -} -tests/hypercache_http_merkle_test.go:15:1: function-length: maximum number of lines per function exceeded; max 75 but got 116 (revive) -func TestHTTPFetchMerkle(t *testing.T) { - ctx := context.Background() - - // shared ring/membership - ring := cluster.NewRing(cluster.WithReplication(1)) - membership := cluster.NewMembership(ring) - - // create two nodes with HTTP server enabled (dynamically allocated addresses) - addr1 := AllocatePort(t) - addr2 := AllocatePort(t) - - n1 := cluster.NewNode("", addr1) - - b1i, err := backend.NewDistMemory(ctx, - backend.WithDistMembership(membership, n1), - backend.WithDistNode("n1", addr1), - backend.WithDistMerkleChunkSize(2), - ) - if err != nil { - t.Fatalf("b1: %v", err) - } - - n2 := cluster.NewNode("", addr2) - - b2i, err := backend.NewDistMemory(ctx, - backend.WithDistMembership(membership, n2), - backend.WithDistNode("n2", addr2), - backend.WithDistMerkleChunkSize(2), - ) - if err != nil { - t.Fatalf("b2: %v", err) - } - - b1, ok := b1i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b1i to *backend.DistMemory") - } - - b2, ok := b2i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b2i to *backend.DistMemory") - } - - StopOnCleanup(t, b1) - StopOnCleanup(t, b2) - - // HTTP transport resolver maps node IDs to http base URLs. - resolver := func(id string) (string, bool) { - switch id { // node IDs same as provided - case "n1": - return "http://" + b1.LocalNodeAddr(), true - case "n2": - return "http://" + b2.LocalNodeAddr(), true - } - - return "", false - } - // 5s transport timeout (was 2s) — under -race the fiber listener can take - // >2s to accept its first request, which made SyncWith time out spuriously. - transport := backend.NewDistHTTPTransport(5*time.Second, resolver) - b1.SetTransport(transport) - b2.SetTransport(transport) - - // ensure membership has both before writes (already upserted in constructors) - // write some keys to b1 only - for i := range 5 { // direct inject to sidestep replication/forwarding complexity - item := &cache.Item{Key: httpKey(i), Value: []byte("v"), Version: uint64(i + 1), Origin: "n1", LastUpdated: time.Now()} - b1.DebugInject(item) - } - - // Poll the HTTP merkle endpoint until it actually responds 200. Under - // -race the fiber listener can take seconds to start accepting requests - // even after Listen() returns; a single-shot Get is racy. - merkleReady := false - - deadline := time.Now().Add(10 * time.Second) - for time.Now().Before(deadline) { - resp, err := http.Get("http://" + b1.LocalNodeAddr() + "/internal/merkle") - if err == nil { - _ = resp.Body.Close() - - if resp.StatusCode == http.StatusOK { - merkleReady = true - - break - } - } - - time.Sleep(50 * time.Millisecond) - } - - if !merkleReady { - t.Fatal("merkle endpoint did not become ready within deadline") - } - - // b2 sync from b1 via HTTP transport - if err := b2.SyncWith(ctx, "n1"); err != nil { - t.Fatalf("sync: %v", err) - } - - // Validate keys present on b2. Allow brief retry to absorb any async tail - // in sync's apply path (each missing key is retried once). - for i := range 5 { - if _, ok := b2.Get(ctx, httpKey(i)); ok { - continue - } - - // One retry: re-sync and check again. - err := b2.SyncWith(ctx, "n1") - if err != nil { - t.Fatalf("re-sync: %v", err) - } - - if _, ok := b2.Get(ctx, httpKey(i)); !ok { - t.Fatalf("missing key %d post-sync", i) - } - } -} -tests/hypercache_http_merkle_test.go:15:1: function-length: maximum number of statements per function exceeded; max 50 but got 54 (revive) -func TestHTTPFetchMerkle(t *testing.T) { - ctx := context.Background() - - // shared ring/membership - ring := cluster.NewRing(cluster.WithReplication(1)) - membership := cluster.NewMembership(ring) - - // create two nodes with HTTP server enabled (dynamically allocated addresses) - addr1 := AllocatePort(t) - addr2 := AllocatePort(t) - - n1 := cluster.NewNode("", addr1) - - b1i, err := backend.NewDistMemory(ctx, - backend.WithDistMembership(membership, n1), - backend.WithDistNode("n1", addr1), - backend.WithDistMerkleChunkSize(2), - ) - if err != nil { - t.Fatalf("b1: %v", err) - } - - n2 := cluster.NewNode("", addr2) - - b2i, err := backend.NewDistMemory(ctx, - backend.WithDistMembership(membership, n2), - backend.WithDistNode("n2", addr2), - backend.WithDistMerkleChunkSize(2), - ) - if err != nil { - t.Fatalf("b2: %v", err) - } - - b1, ok := b1i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b1i to *backend.DistMemory") - } - - b2, ok := b2i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b2i to *backend.DistMemory") - } - - StopOnCleanup(t, b1) - StopOnCleanup(t, b2) - - // HTTP transport resolver maps node IDs to http base URLs. - resolver := func(id string) (string, bool) { - switch id { // node IDs same as provided - case "n1": - return "http://" + b1.LocalNodeAddr(), true - case "n2": - return "http://" + b2.LocalNodeAddr(), true - } - - return "", false - } - // 5s transport timeout (was 2s) — under -race the fiber listener can take - // >2s to accept its first request, which made SyncWith time out spuriously. - transport := backend.NewDistHTTPTransport(5*time.Second, resolver) - b1.SetTransport(transport) - b2.SetTransport(transport) - - // ensure membership has both before writes (already upserted in constructors) - // write some keys to b1 only - for i := range 5 { // direct inject to sidestep replication/forwarding complexity - item := &cache.Item{Key: httpKey(i), Value: []byte("v"), Version: uint64(i + 1), Origin: "n1", LastUpdated: time.Now()} - b1.DebugInject(item) - } - - // Poll the HTTP merkle endpoint until it actually responds 200. Under - // -race the fiber listener can take seconds to start accepting requests - // even after Listen() returns; a single-shot Get is racy. - merkleReady := false - - deadline := time.Now().Add(10 * time.Second) - for time.Now().Before(deadline) { - resp, err := http.Get("http://" + b1.LocalNodeAddr() + "/internal/merkle") - if err == nil { - _ = resp.Body.Close() - - if resp.StatusCode == http.StatusOK { - merkleReady = true - - break - } - } - - time.Sleep(50 * time.Millisecond) - } - - if !merkleReady { - t.Fatal("merkle endpoint did not become ready within deadline") - } - - // b2 sync from b1 via HTTP transport - if err := b2.SyncWith(ctx, "n1"); err != nil { - t.Fatalf("sync: %v", err) - } - - // Validate keys present on b2. Allow brief retry to absorb any async tail - // in sync's apply path (each missing key is retried once). - for i := range 5 { - if _, ok := b2.Get(ctx, httpKey(i)); ok { - continue - } - - // One retry: re-sync and check again. - err := b2.SyncWith(ctx, "n1") - if err != nil { - t.Fatalf("re-sync: %v", err) - } - - if _, ok := b2.Get(ctx, httpKey(i)); !ok { - t.Fatalf("missing key %d post-sync", i) - } - } -} -tests/hypercache_mgmt_dist_test.go:18:1: cognitive-complexity: function TestManagementHTTPDistMemory has cognitive complexity 18 (> max enabled 15) (revive) -func TestManagementHTTPDistMemory(t *testing.T) { //nolint:paralleltest - cfg, err := hypercache.NewConfig[backend.DistMemory](constants.DistMemoryBackend) - if err != nil { - t.Fatalf("NewConfig: %v", err) - } - - cfg.HyperCacheOptions = append(cfg.HyperCacheOptions, - hypercache.WithManagementHTTP[backend.DistMemory]("127.0.0.1:0"), // ephemeral port - ) - cfg.DistMemoryOptions = []backend.DistMemoryOption{ - backend.WithDistReplication(1), - backend.WithDistVirtualNodes(32), - backend.WithDistNode("test-node", "local"), - } - - hc, err := hypercache.New(context.Background(), hypercache.GetDefaultManager(), cfg) - if err != nil { - t.Fatalf("new dist hypercache: %v", err) - } - - defer func() { _ = hc.Stop(context.Background()) }() - - baseURL := waitForMgmt(t, hc) - - // Insert a key to exercise owners endpoint. - err = hc.Set(context.Background(), "alpha", "value", 0) - if err != nil { - // not fatal for owners shape but should succeed given replication=1 - t.Fatalf("set alpha: %v", err) - } - - // /config should include replication + virtualNodesPerNode - configBody := getJSON(t, baseURL+"/config") - if _, ok := configBody["replication"]; !ok { - t.Errorf("/config missing replication") - } - - if vnp, ok := configBody["virtualNodesPerNode"]; !ok || vnp == nil { - t.Errorf("/config missing virtualNodesPerNode") - } - - // /dist/metrics basic shape - metricsBody := getJSON(t, baseURL+"/dist/metrics") - if _, ok := metricsBody["ForwardGet"]; !ok { // one exported field - // could be 404 if backend unsupported (should not be here) - if e, hasErr := metricsBody[constants.ErrorLabel]; hasErr { - t.Fatalf("/dist/metrics returned error: %v", e) - } - - // else fail - t.Errorf("/dist/metrics missing ForwardGet field") - } - - // /dist/owners - ownersBody := getJSON(t, baseURL+"/dist/owners?key=alpha") - if _, ok := ownersBody["owners"]; !ok { - if e, hasErr := ownersBody[constants.ErrorLabel]; hasErr { - t.Fatalf("/dist/owners returned error: %v", e) - } - - t.Errorf("/dist/owners missing owners field") - } - - // /cluster/members - membersBody := getJSON(t, baseURL+"/cluster/members") - if _, ok := membersBody["members"]; !ok { - if e, hasErr := membersBody[constants.ErrorLabel]; hasErr { - t.Fatalf("/cluster/members returned error: %v", e) - } - - t.Errorf("/cluster/members missing members field") - } - - // /cluster/ring - ringBody := getJSON(t, baseURL+"/cluster/ring") - if _, ok := ringBody["vnodes"]; !ok { - if e, hasErr := ringBody[constants.ErrorLabel]; hasErr { - t.Fatalf("/cluster/ring returned error: %v", e) - } - - t.Errorf("/cluster/ring missing vnodes field") - } -} -tests/hypercache_mgmt_dist_test.go:18:1: function-length: maximum number of lines per function exceeded; max 75 but got 81 (revive) -func TestManagementHTTPDistMemory(t *testing.T) { //nolint:paralleltest - cfg, err := hypercache.NewConfig[backend.DistMemory](constants.DistMemoryBackend) - if err != nil { - t.Fatalf("NewConfig: %v", err) - } - - cfg.HyperCacheOptions = append(cfg.HyperCacheOptions, - hypercache.WithManagementHTTP[backend.DistMemory]("127.0.0.1:0"), // ephemeral port - ) - cfg.DistMemoryOptions = []backend.DistMemoryOption{ - backend.WithDistReplication(1), - backend.WithDistVirtualNodes(32), - backend.WithDistNode("test-node", "local"), - } - - hc, err := hypercache.New(context.Background(), hypercache.GetDefaultManager(), cfg) - if err != nil { - t.Fatalf("new dist hypercache: %v", err) - } - - defer func() { _ = hc.Stop(context.Background()) }() - - baseURL := waitForMgmt(t, hc) - - // Insert a key to exercise owners endpoint. - err = hc.Set(context.Background(), "alpha", "value", 0) - if err != nil { - // not fatal for owners shape but should succeed given replication=1 - t.Fatalf("set alpha: %v", err) - } - - // /config should include replication + virtualNodesPerNode - configBody := getJSON(t, baseURL+"/config") - if _, ok := configBody["replication"]; !ok { - t.Errorf("/config missing replication") - } - - if vnp, ok := configBody["virtualNodesPerNode"]; !ok || vnp == nil { - t.Errorf("/config missing virtualNodesPerNode") - } - - // /dist/metrics basic shape - metricsBody := getJSON(t, baseURL+"/dist/metrics") - if _, ok := metricsBody["ForwardGet"]; !ok { // one exported field - // could be 404 if backend unsupported (should not be here) - if e, hasErr := metricsBody[constants.ErrorLabel]; hasErr { - t.Fatalf("/dist/metrics returned error: %v", e) - } - - // else fail - t.Errorf("/dist/metrics missing ForwardGet field") - } - - // /dist/owners - ownersBody := getJSON(t, baseURL+"/dist/owners?key=alpha") - if _, ok := ownersBody["owners"]; !ok { - if e, hasErr := ownersBody[constants.ErrorLabel]; hasErr { - t.Fatalf("/dist/owners returned error: %v", e) - } - - t.Errorf("/dist/owners missing owners field") - } - - // /cluster/members - membersBody := getJSON(t, baseURL+"/cluster/members") - if _, ok := membersBody["members"]; !ok { - if e, hasErr := membersBody[constants.ErrorLabel]; hasErr { - t.Fatalf("/cluster/members returned error: %v", e) - } - - t.Errorf("/cluster/members missing members field") - } - - // /cluster/ring - ringBody := getJSON(t, baseURL+"/cluster/ring") - if _, ok := ringBody["vnodes"]; !ok { - if e, hasErr := ringBody[constants.ErrorLabel]; hasErr { - t.Fatalf("/cluster/ring returned error: %v", e) - } - - t.Errorf("/cluster/ring missing vnodes field") - } -} -tests/integration/dist_phase1_test.go:33:1: cognitive-complexity: function TestDistPhase1BasicQuorum has cognitive complexity 16 (> max enabled 15) (revive) -func TestDistPhase1BasicQuorum(t *testing.T) { - ctx := context.Background() - - addrA := allocatePort(t) - addrB := allocatePort(t) - addrC := allocatePort(t) - - // create three nodes; we'll stop C's HTTP after start to simulate outage then restart - makeNode := func(id, addr string, seeds []string) *backend.DistMemory { - bm, err := backend.NewDistMemory(ctx, - backend.WithDistNode(id, addr), - backend.WithDistSeeds(seeds), - backend.WithDistReplication(3), - backend.WithDistVirtualNodes(32), - backend.WithDistHintReplayInterval(200*time.Millisecond), - backend.WithDistHintTTL(5*time.Second), - backend.WithDistReadConsistency(backend.ConsistencyQuorum), - backend.WithDistWriteConsistency(backend.ConsistencyQuorum), - ) - if err != nil { - t.Fatalf("new dist memory: %v", err) - } - - bk, ok := bm.(*backend.DistMemory) - if !ok { - t.Fatalf("expected *backend.DistMemory, got %T", bm) - } - - return bk - } - - nodeA := makeNode("A", addrA, []string{addrB, addrC}) - nodeB := makeNode("B", addrB, []string{addrA, addrC}) - nodeC := makeNode("C", addrC, []string{addrA, addrB}) - // defer cleanup of A and B - defer func() { _ = nodeA.Stop(ctx); _ = nodeB.Stop(ctx) }() - - // allow some time for ring initialization - time.Sleep(200 * time.Millisecond) - - // Perform a write expecting replication across all three nodes - item := &cache.Item{Key: "k1", Value: []byte("v1"), Expiration: 0, Version: 1, Origin: "A", LastUpdated: time.Now()} - - err := nodeA.Set(ctx, item) - if err != nil { - t.Fatalf("set: %v", err) - } - - // Quorum read from B should succeed (value may be []byte, string, or json.RawMessage) - if got, ok := nodeB.Get(ctx, "k1"); !ok { - t.Fatalf("expected quorum read via B: not found") - } else { - assertValue(t, got.Value) - } - - // Basic propagation check loop (give replication a moment) - defer func() { _ = nodeC.Stop(ctx) }() - - deadline := time.Now().Add(3 * time.Second) - for time.Now().Before(deadline) { - if it, ok := nodeC.Get(ctx, "k1"); ok { - if valueOK(it.Value) { - goto Done - } - } - - time.Sleep(100 * time.Millisecond) - } - - if it, ok := nodeC.Get(ctx, "k1"); !ok { - // Not fatal yet; we only created scaffolding – mark skip for now. - t.Skipf("hint replay not yet observable; will be validated after full wiring (missing item)") - } else { - if !valueOK(it.Value) { - t.Skipf("value mismatch after wait") - } - } - -Done: -} -tests/integration/dist_phase1_test.go:33:1: function-length: maximum number of lines per function exceeded; max 75 but got 78 (revive) -func TestDistPhase1BasicQuorum(t *testing.T) { - ctx := context.Background() - - addrA := allocatePort(t) - addrB := allocatePort(t) - addrC := allocatePort(t) - - // create three nodes; we'll stop C's HTTP after start to simulate outage then restart - makeNode := func(id, addr string, seeds []string) *backend.DistMemory { - bm, err := backend.NewDistMemory(ctx, - backend.WithDistNode(id, addr), - backend.WithDistSeeds(seeds), - backend.WithDistReplication(3), - backend.WithDistVirtualNodes(32), - backend.WithDistHintReplayInterval(200*time.Millisecond), - backend.WithDistHintTTL(5*time.Second), - backend.WithDistReadConsistency(backend.ConsistencyQuorum), - backend.WithDistWriteConsistency(backend.ConsistencyQuorum), - ) - if err != nil { - t.Fatalf("new dist memory: %v", err) - } - - bk, ok := bm.(*backend.DistMemory) - if !ok { - t.Fatalf("expected *backend.DistMemory, got %T", bm) - } - - return bk - } - - nodeA := makeNode("A", addrA, []string{addrB, addrC}) - nodeB := makeNode("B", addrB, []string{addrA, addrC}) - nodeC := makeNode("C", addrC, []string{addrA, addrB}) - // defer cleanup of A and B - defer func() { _ = nodeA.Stop(ctx); _ = nodeB.Stop(ctx) }() - - // allow some time for ring initialization - time.Sleep(200 * time.Millisecond) - - // Perform a write expecting replication across all three nodes - item := &cache.Item{Key: "k1", Value: []byte("v1"), Expiration: 0, Version: 1, Origin: "A", LastUpdated: time.Now()} - - err := nodeA.Set(ctx, item) - if err != nil { - t.Fatalf("set: %v", err) - } - - // Quorum read from B should succeed (value may be []byte, string, or json.RawMessage) - if got, ok := nodeB.Get(ctx, "k1"); !ok { - t.Fatalf("expected quorum read via B: not found") - } else { - assertValue(t, got.Value) - } - - // Basic propagation check loop (give replication a moment) - defer func() { _ = nodeC.Stop(ctx) }() - - deadline := time.Now().Add(3 * time.Second) - for time.Now().Before(deadline) { - if it, ok := nodeC.Get(ctx, "k1"); ok { - if valueOK(it.Value) { - goto Done - } - } - - time.Sleep(100 * time.Millisecond) - } - - if it, ok := nodeC.Get(ctx, "k1"); !ok { - // Not fatal yet; we only created scaffolding – mark skip for now. - t.Skipf("hint replay not yet observable; will be validated after full wiring (missing item)") - } else { - if !valueOK(it.Value) { - t.Skipf("value mismatch after wait") - } - } - -Done: -} -tests/integration/dist_phase1_test.go:115:1: cognitive-complexity: function valueOK has cognitive complexity 42 (> max enabled 15) (revive) -func valueOK(v any) bool { //nolint:ireturn - switch x := v.(type) { - case []byte: - if string(x) == "v1" { - return true - } - - if s := string(x); s == "djE=" { // base64 of v1 - if b, err := base64.StdEncoding.DecodeString(s); err == nil && string(b) == "v1" { - return true - } - } - - return false - - case string: - if x == "v1" { - return true - } - - if x == "djE=" { // base64 form - if b, err := base64.StdEncoding.DecodeString(x); err == nil && string(b) == "v1" { - return true - } - } - - return false - - case json.RawMessage: - // could be "v1" or base64 inside quotes - if len(x) == 0 { - return false - } - - // try as string literal - var s string - - err := json.Unmarshal(x, &s) - if err == nil { - if s == "v1" { - return true - } - - if s == "djE=" { - if b, err2 := base64.StdEncoding.DecodeString(s); err2 == nil && string(b) == "v1" { - return true - } - } - } - - // fall back to raw compare - return string(x) == "v1" || string(x) == "\"v1\"" - - default: - s := fmt.Sprintf("%v", x) - if s == "v1" || s == "\"v1\"" { - return true - } - - if s == "djE=" { - if b, err := base64.StdEncoding.DecodeString(s); err == nil && string(b) == "v1" { - return true - } - } - - return false - } -} -tests/integration/dist_phase1_test.go:115:1: cyclomatic: function valueOK has cyclomatic complexity 25 (> max enabled 15) (revive) -func valueOK(v any) bool { //nolint:ireturn - switch x := v.(type) { - case []byte: - if string(x) == "v1" { - return true - } - - if s := string(x); s == "djE=" { // base64 of v1 - if b, err := base64.StdEncoding.DecodeString(s); err == nil && string(b) == "v1" { - return true - } - } - - return false - - case string: - if x == "v1" { - return true - } - - if x == "djE=" { // base64 form - if b, err := base64.StdEncoding.DecodeString(x); err == nil && string(b) == "v1" { - return true - } - } - - return false - - case json.RawMessage: - // could be "v1" or base64 inside quotes - if len(x) == 0 { - return false - } - - // try as string literal - var s string - - err := json.Unmarshal(x, &s) - if err == nil { - if s == "v1" { - return true - } - - if s == "djE=" { - if b, err2 := base64.StdEncoding.DecodeString(s); err2 == nil && string(b) == "v1" { - return true - } - } - } - - // fall back to raw compare - return string(x) == "v1" || string(x) == "\"v1\"" - - default: - s := fmt.Sprintf("%v", x) - if s == "v1" || s == "\"v1\"" { - return true - } - - if s == "djE=" { - if b, err := base64.StdEncoding.DecodeString(s); err == nil && string(b) == "v1" { - return true - } - } - - return false - } -} -tests/integration/dist_rebalance_test.go:17:1: function-length: maximum number of lines per function exceeded; max 75 but got 93 (revive) -func TestDistRebalanceJoin(t *testing.T) { - ctx := context.Background() - - // Initial cluster: 2 nodes. - addrA := allocatePort(t) - addrB := allocatePort(t) - - nodeA := mustDistNode( - ctx, - t, - "A", - addrA, - []string{addrB}, - backend.WithDistReplication(2), - backend.WithDistVirtualNodes(32), - backend.WithDistRebalanceInterval(100*time.Millisecond), - ) - - nodeB := mustDistNode( - ctx, - t, - "B", - addrB, - []string{addrA}, - backend.WithDistReplication(2), - backend.WithDistVirtualNodes(32), - backend.WithDistRebalanceInterval(100*time.Millisecond), - ) - defer func() { _ = nodeA.Stop(ctx); _ = nodeB.Stop(ctx) }() - - // Write a spread of keys via A. - totalKeys := 300 - for i := range totalKeys { - k := cacheKey(i) - - it := &cache.Item{Key: k, Value: []byte("v"), Version: 1, Origin: "A", LastUpdated: time.Now()} - - err := nodeA.Set(ctx, it) - if err != nil { - t.Fatalf("set %s: %v", k, err) - } - } - - time.Sleep(200 * time.Millisecond) // allow initial replication - - // Capture ownership counts before join. - skeys := sampleKeys(totalKeys) - - _ = ownedPrimaryCount(nodeA, skeys) // baseline (unused currently) - _ = ownedPrimaryCount(nodeB, skeys) - - // Add third node C. - addrC := allocatePort(t) - - nodeC := mustDistNode( - ctx, - t, - "C", - addrC, - []string{addrA, addrB}, - backend.WithDistReplication(2), - backend.WithDistVirtualNodes(32), - backend.WithDistRebalanceInterval(100*time.Millisecond), - ) - defer func() { _ = nodeC.Stop(ctx) }() - - // Manually inject C into A and B membership (simulating gossip propagation delay that doesn't exist yet). - nodeA.AddPeer(addrC) - nodeB.AddPeer(addrC) - - // Allow membership to propagate + several rebalance ticks. - time.Sleep(1200 * time.Millisecond) - - // Post-join ownership counts (sampled locally using isOwner logic via Get + Metrics ring lookup indirectly). - postOwnedA := ownedPrimaryCount(nodeA, skeys) - postOwnedB := ownedPrimaryCount(nodeB, skeys) - postOwnedC := ownedPrimaryCount(nodeC, skeys) - - // Basic sanity: new node should now own > 0 keys. - if postOwnedC == 0 { - t.Fatalf("expected node C to own some keys after rebalancing") - } - - // Distribution variance check: ensure no node has > 80% of sample (initial naive rebalance heuristic). - maxAllowed := int(float64(totalKeys) * 0.80) - if postOwnedA > maxAllowed || postOwnedB > maxAllowed || postOwnedC > maxAllowed { - t.Fatalf("ownership still highly skewed: A=%d B=%d C=%d", postOwnedA, postOwnedB, postOwnedC) - } - - // Rebalance metrics should show migrations (keys forwarded off old primaries) across cluster. - migrated := nodeA.Metrics().RebalancedKeys + nodeB.Metrics().RebalancedKeys + nodeC.Metrics().RebalancedKeys - if migrated == 0 { - t.Fatalf("expected some rebalanced keys (total migrated=0)") - } -} -tests/management_http_test.go:19:1: function-length: maximum number of lines per function exceeded; max 75 but got 81 (revive) -func TestManagementHTTP_BasicEndpoints(t *testing.T) { - cfg, err := hypercache.NewConfig[backend.InMemory](constants.InMemoryBackend) - if err != nil { - t.Fatalf("NewConfig: %v", err) - } - - cfg.HyperCacheOptions = append(cfg.HyperCacheOptions, - hypercache.WithEvictionInterval[backend.InMemory](0), - hypercache.WithManagementHTTP[backend.InMemory]("127.0.0.1:0"), - ) - - ctx := context.Background() - hc, err := hypercache.New(ctx, hypercache.GetDefaultManager(), cfg) - assert.Nil(t, err) - - defer hc.Stop(ctx) - - // Wait for the management HTTP listener to come up. The race detector - // can push listener startup well past the original 30 ms; poll with a - // generous deadline instead. - var addr string - - deadline := time.Now().Add(5 * time.Second) - for time.Now().Before(deadline) { - addr = hc.ManagementHTTPAddress() - if addr != "" { - break - } - - time.Sleep(10 * time.Millisecond) - } - - if addr == "" { - t.Fatal("management HTTP listener did not bind within deadline") - } - - client := &http.Client{Timeout: 5 * time.Second} - - // /health - resp, err := client.Get("http://" + addr + "/health") - if err != nil { - t.Fatalf("GET /health: %v", err) - } - - assert.Equal(t, http.StatusOK, resp.StatusCode) - - _ = resp.Body.Close() - - // /stats - resp, err = client.Get("http://" + addr + "/stats") - if err != nil { - t.Fatalf("GET /stats: %v", err) - } - - assert.Equal(t, http.StatusOK, resp.StatusCode) - - var statsBody map[string]any - - dec := json.NewDecoder(resp.Body) - - err = dec.Decode(&statsBody) - assert.NoError(t, err) - - _ = resp.Body.Close() - - // /config - resp, err = client.Get("http://" + addr + "/config") - if err != nil { - t.Fatalf("GET /config: %v", err) - } - - assert.Equal(t, http.StatusOK, resp.StatusCode) - - var cfgBody map[string]any - - dec = json.NewDecoder(resp.Body) - _ = dec.Decode(&cfgBody) - _ = resp.Body.Close() - - assert.True(t, len(cfgBody) > 0) - - assert.True(t, cfgBody["evictionAlgorithm"] != nil) -} -tests/merkle_sync_test.go:13:1: function-length: maximum number of lines per function exceeded; max 75 but got 76 (revive) -func TestMerkleSyncConvergence(t *testing.T) { - ctx := context.Background() - transport := backend.NewInProcessTransport() - - bA, err := backend.NewDistMemory(ctx, - backend.WithDistNode("A", AllocatePort(t)), - backend.WithDistReplication(1), - backend.WithDistMerkleChunkSize(2), - ) - if err != nil { - t.Fatalf("new dist memory A: %v", err) - } - - dmA, ok := any(bA).(*backend.DistMemory) - if !ok { - t.Fatalf("expected *backend.DistMemory, got %T", bA) - } - - StopOnCleanup(t, dmA) - - bB, err := backend.NewDistMemory(ctx, - backend.WithDistNode("B", AllocatePort(t)), - backend.WithDistReplication(1), - backend.WithDistMerkleChunkSize(2), - ) - if err != nil { - t.Fatalf("new dist memory B: %v", err) - } - - dmB, ok := any(bB).(*backend.DistMemory) - if !ok { - t.Fatalf("expected *backend.DistMemory, got %T", bB) - } - - StopOnCleanup(t, dmB) - - dmA.SetTransport(transport) - dmB.SetTransport(transport) - - // register for in-process lookups - transport.Register(dmA) - transport.Register(dmB) - - // inject divergent data (A has extra/newer) - for i := range 5 { - it := &cache.Item{Key: keyf("k", i), Value: []byte("vA"), Version: uint64(i + 1), Origin: "A", LastUpdated: time.Now()} - dmA.DebugInject(it) - } - - // B shares only first 2 keys older versions - for i := range 2 { - it := &cache.Item{Key: keyf("k", i), Value: []byte("old"), Version: uint64(i), Origin: "B", LastUpdated: time.Now()} - dmB.DebugInject(it) - } - - // Run sync B->A to pull newer - if err := dmB.SyncWith(ctx, string(dmA.LocalNodeID())); err != nil { - // HTTP transport fetch merkle unsupported; we rely on in-process - if testing.Verbose() { - t.Logf("sync error: %v", err) - } - } - - // Validate B now has all 5 keys with correct versions (>= A's) - for i := range 5 { - k := keyf("k", i) - itA, _ := dmA.Get(ctx, k) - - itB, _ := dmB.Get(ctx, k) - if itA == nil || itB == nil { - t.Fatalf("missing key %s after sync", k) - } - - if itB.Version < itA.Version { - t.Fatalf("expected B version >= A version for %s", k) - } - } -} -176 issues: -* cyclop: 7 -* dupl: 2 -* err113: 1 -* errcheck: 10 -* forcetypeassert: 3 -* funlen: 9 -* gocognit: 2 -* goconst: 19 -* gocritic: 2 -* gosec: 8 -* noctx: 5 -* noinlineerr: 3 -* paralleltest: 50 -* prealloc: 3 -* predeclared: 2 -* revive: 50 diff --git a/management_http.go b/management_http.go index a5de0ae..761e55f 100644 --- a/management_http.go +++ b/management_http.go @@ -3,6 +3,7 @@ package hypercache import ( "context" "net" + "sync/atomic" "time" fiber "github.com/gofiber/fiber/v3" @@ -28,14 +29,24 @@ type ManagementHTTPServer struct { ln net.Listener started bool listenerDeadline time.Duration - // ctx is the server-lifecycle context captured at Start. Handlers + // ctx is the server-lifecycle context derived from the ctx supplied + // to Start, with its own cancel func wired into Shutdown. Handlers // pass it to backend operations (Clear in particular) so cancellation - // propagates from HyperCache.Stop. We do NOT use the per-request - // fiber.Ctx for this: fiber.Ctx is pooled and reset after the handler - // returns, racing with happy-eyeballs goroutines spawned by - // net.(*Dialer).DialContext when DistMemory's transport fan-out goes - // through http.Client.Do. + // propagates when the operator calls hyperCache.Stop. + // + // We do NOT use the per-request fiber.Ctx for this: fiber.Ctx is + // pooled and reset after the handler returns, racing with + // happy-eyeballs goroutines spawned by net.(*Dialer).DialContext + // when DistMemory's transport fan-out goes through http.Client.Do. ctx context.Context //nolint:containedctx // captured server lifecycle, not request scope + // lifeCancel cancels s.ctx; called from Shutdown so in-flight + // handlers see Done() before fiber drains the listeners. + lifeCancel context.CancelFunc + // serveErr captures the last error returned by app.Listener when the + // background serve goroutine exits. Operators can read it via + // LastServeError() to surface listener failures (e.g. port already + // bound) instead of having them silently swallowed. + serveErr atomic.Pointer[error] } // WithMgmtAuth sets an auth function (return error to block). @@ -127,6 +138,20 @@ func NewManagementHTTPServer(addr string, opts ...ManagementHTTPOption) *Managem return srv } +// LastServeError returns the last error captured from the background +// serve goroutine. Returns nil when the server shut down cleanly. +func (s *ManagementHTTPServer) LastServeError() error { + if s == nil { + return nil + } + + if errp := s.serveErr.Load(); errp != nil { + return *errp + } + + return nil +} + // mountRoutes registers endpoints onto the Fiber app. type managementCache interface { GetStats() stats.Stats @@ -168,7 +193,10 @@ func (s *ManagementHTTPServer) Start(ctx context.Context, hc managementCache) er return nil } - s.ctx = ctx + // Derive a lifecycle ctx so Shutdown can cancel in-flight handlers + // independently of the caller's ctx (which usually never cancels — + // production code passes context.Background()). + s.ctx, s.lifeCancel = context.WithCancel(ctx) s.mountRoutes(hc) lc := net.ListenConfig{} @@ -180,12 +208,15 @@ func (s *ManagementHTTPServer) Start(ctx context.Context, hc managementCache) er s.ln = ln - go func() { // serve in background (optional server errors are ignored intentionally) + go func() { // Suppress fiber's startup banner so tests at -count=N do not drown // real failures under hundreds of "INFO Server started on..." lines. - err = s.app.Listener(ln, fiber.ListenConfig{DisableStartupMessage: true}) - if err != nil { // optional server; log hook could be added in future - _ = err + serveErr := s.app.Listener(ln, fiber.ListenConfig{DisableStartupMessage: true}) + if serveErr != nil { + // Stash so operators can read it via LastServeError(); a + // listener that crashed silently is the worst kind of + // production bug. + s.serveErr.Store(&serveErr) } }() @@ -209,10 +240,14 @@ func (s *ManagementHTTPServer) Shutdown(ctx context.Context) error { return nil } - // ShutdownWithContext closes listeners gracefully, waits for in-flight - // requests, and force-closes once ctx's deadline elapses. Replaces - // the previous go-routine + select pattern that leaked the shutdown - // goroutine when our ctx fired first. + // Cancel s.ctx first so in-flight handlers see Done() before fiber + // starts draining listeners. ShutdownWithContext then closes + // listeners gracefully, waits for in-flight requests, and + // force-closes once ctx's deadline elapses. + if s.lifeCancel != nil { + s.lifeCancel() + } + return s.app.ShutdownWithContext(ctx) } diff --git a/pkg/backend/dist_http_server.go b/pkg/backend/dist_http_server.go index 108183e..df73ca6 100644 --- a/pkg/backend/dist_http_server.go +++ b/pkg/backend/dist_http_server.go @@ -2,8 +2,12 @@ package backend import ( "context" + "crypto/subtle" + "crypto/tls" "net" + "net/http" "strconv" + "sync/atomic" "time" "github.com/goccy/go-json" @@ -11,6 +15,7 @@ import ( "github.com/hyp3rd/ewrap" "github.com/hyp3rd/hypercache/internal/constants" + "github.com/hyp3rd/hypercache/internal/sentinel" cache "github.com/hyp3rd/hypercache/pkg/cache/v2" ) @@ -28,6 +33,94 @@ type distHTTPServer struct { // when applySet's replica fan-out goes through http.Client.Do. // See the race trace captured during phase-5b investigation. ctx context.Context //nolint:containedctx // captured server lifecycle, not request scope + // auth is the configured authentication policy. Zero-valued means no + // auth (current default behavior). + auth DistHTTPAuth + // tlsConfig (when non-nil) wraps the listener with tls.NewListener. + // Resolver advertises https:// in that case; clients with the same + // TLSConfig handshake successfully, plaintext peers are rejected at + // the TCP level by Go's TLS server. + tlsConfig *tls.Config + // serveErr captures the last error returned by app.Listener when the + // background serve goroutine exits. Operators can read it via + // LastServeError() to surface listener failures (e.g. port already in + // use, TLS handshake failure on accept) instead of having them + // silently swallowed. + serveErr atomic.Pointer[error] +} + +// DistHTTPAuth configures bearer-token authentication for the dist HTTP +// server (inbound) and the auto-created HTTP client (outbound). Zero-value +// disables auth — current behavior. When configured, *all* dist endpoints +// (including /health) require a valid token; operators who want a public +// health endpoint can supply a custom ServerVerify that exempts that path. +// +// Most clusters need only Token: every node sets the same string, the +// server validates incoming Authorization: Bearer headers via +// constant-time compare, and the client sends the same header on every +// outgoing request. +// +// ServerVerify and ClientSign are escape hatches for JWT, mTLS-derived +// identity, HMAC signing, etc. When set they fully replace the default +// token check / header injection. +type DistHTTPAuth struct { + // Token is the shared bearer string. When set (and ServerVerify is + // nil), the server requires `Authorization: Bearer ` on every + // request. The auto-created client sends the same header. + Token string + // ServerVerify (optional) inspects each incoming request and returns + // non-nil to reject with HTTP 401. Use for JWT, OAuth introspection, + // path-based exemptions, etc. When set it replaces the Token check. + ServerVerify func(fiber.Ctx) error + // ClientSign (optional) decorates each outgoing request before send. + // Use for HMAC signing, mTLS-derived headers, etc. When set it + // replaces the default `Authorization: Bearer ` header. + ClientSign func(*http.Request) error +} + +// configured reports whether the auth policy is active. +func (a DistHTTPAuth) configured() bool { + return a.Token != "" || a.ServerVerify != nil || a.ClientSign != nil +} + +// verify validates the incoming request against the configured policy. +// Returns nil when the request is authorized, non-nil otherwise. The +// default (Token-only) check uses constant-time compare to defeat timing +// side-channels. +func (a DistHTTPAuth) verify(fctx fiber.Ctx) error { + if a.ServerVerify != nil { + return a.ServerVerify(fctx) + } + + if a.Token == "" { + return nil + } + + got := fctx.Get("Authorization") + want := "Bearer " + a.Token + + if subtle.ConstantTimeCompare([]byte(got), []byte(want)) != 1 { + return sentinel.ErrUnauthorized + } + + return nil +} + +// sign decorates an outgoing request with the configured auth header. +// Default (Token-only) sets `Authorization: Bearer `. ClientSign +// fully overrides when set. +func (a DistHTTPAuth) sign(req *http.Request) error { + if a.ClientSign != nil { + return a.ClientSign(req) + } + + if a.Token == "" { + return nil + } + + req.Header.Set("Authorization", "Bearer "+a.Token) + + return nil } // minimal request/response types reused by transport @@ -82,6 +175,18 @@ type DistHTTPLimits struct { Concurrency int // ClientTimeout is the per-request deadline for the dist HTTP client. ClientTimeout time.Duration + // TLSConfig (when non-nil) enables TLS for both the dist HTTP server + // (wraps the TCP listener with tls.NewListener) and the auto-created + // HTTP client (sets Transport.TLSClientConfig). Operators must apply + // the same config to every node; mismatched roots/certs cause peer + // handshakes to fail. The same struct is shared by server and client + // because in this codebase a node is both — but tests / advanced + // callers can fork the value and assign different ones if needed. + // + // For mTLS, set both Certificates (server cert) and ClientCAs + + // ClientAuth=tls.RequireAndVerifyClientCert. The auto-client uses + // the same cert as its client cert via Certificates[0]. + TLSConfig *tls.Config } // withDefaults fills any zero-valued field on l with the package default. @@ -118,7 +223,7 @@ func (l DistHTTPLimits) withDefaults() DistHTTPLimits { return l } -func newDistHTTPServer(addr string, limits DistHTTPLimits) *distHTTPServer { +func newDistHTTPServer(addr string, limits DistHTTPLimits, auth DistHTTPAuth) *distHTTPServer { limits = limits.withDefaults() app := fiber.New(fiber.Config{ @@ -129,18 +234,64 @@ func newDistHTTPServer(addr string, limits DistHTTPLimits) *distHTTPServer { Concurrency: limits.Concurrency, }) - return &distHTTPServer{app: app, addr: addr} + return &distHTTPServer{app: app, addr: addr, auth: auth, tlsConfig: limits.TLSConfig} +} + +// LastServeError returns the last error captured from the background +// serve goroutine (typically Listener accept-loop failure or TLS-level +// rejection). Returns nil when the server shut down cleanly. Replaces +// the pre-5e silent-swallow pattern where serveErr was assigned to _ and +// dropped, leaving operators with no signal that the listener died. +func (s *distHTTPServer) LastServeError() error { + if s == nil { + return nil + } + + if errp := s.serveErr.Load(); errp != nil { + return *errp + } + + return nil +} + +// wrapAuth returns an auth-checking wrapper around the supplied handler +// when the server's auth policy is configured; otherwise returns the +// handler untouched (zero overhead for unauthenticated deployments). +func (s *distHTTPServer) wrapAuth(handler fiber.Handler) fiber.Handler { + if !s.auth.configured() { + return handler + } + + return func(fctx fiber.Ctx) error { + err := s.auth.verify(fctx) + if err != nil { + return fctx.Status(fiber.StatusUnauthorized).JSON(fiber.Map{constants.ErrorLabel: err.Error()}) + } + + return handler(fctx) + } } -func (s *distHTTPServer) start(ctx context.Context, dm *DistMemory) error { - s.ctx = ctx +// start registers handlers and binds the listener. The caller MUST set +// s.ctx to the desired handler-side lifecycle context before calling +// start — that ctx is captured into handler closures and used as the +// operation ctx for backend ops (applySet, applyRemove). The bindCtx +// argument controls only the listener.Listen call. +func (s *distHTTPServer) start(bindCtx context.Context, dm *DistMemory) error { + if s.ctx == nil { + // Defensive default: fall back to the bind ctx so the server is + // usable even if the caller forgot to set a lifecycle ctx. This + // matches the pre-5d behavior where start captured its own ctx. + s.ctx = bindCtx + } + s.registerSet(dm) s.registerGet(dm) s.registerRemove(dm) s.registerHealth() s.registerMerkle(dm) - return s.listen(ctx) + return s.listen(bindCtx) } // handleSet decodes a httpSetRequest and applies it locally + optionally @@ -170,7 +321,7 @@ func (s *distHTTPServer) handleSet(fctx fiber.Ctx, dm *DistMemory) error { } func (s *distHTTPServer) registerSet(dm *DistMemory) { - handler := func(fctx fiber.Ctx) error { return s.handleSet(fctx, dm) } + handler := s.wrapAuth(func(fctx fiber.Ctx) error { return s.handleSet(fctx, dm) }) // legacy + canonical paths share the same handler. s.app.Post("/internal/cache/set", handler) s.app.Post("/internal/set", handler) @@ -198,7 +349,7 @@ func (*distHTTPServer) handleGet(fctx fiber.Ctx, dm *DistMemory) error { } func (s *distHTTPServer) registerGet(dm *DistMemory) { - handler := func(fctx fiber.Ctx) error { return s.handleGet(fctx, dm) } + handler := s.wrapAuth(func(fctx fiber.Ctx) error { return s.handleGet(fctx, dm) }) s.app.Get("/internal/cache/get", handler) s.app.Get("/internal/get", handler) } @@ -223,17 +374,20 @@ func (s *distHTTPServer) handleRemove(fctx fiber.Ctx, dm *DistMemory) error { } func (s *distHTTPServer) registerRemove(dm *DistMemory) { - handler := func(fctx fiber.Ctx) error { return s.handleRemove(fctx, dm) } + handler := s.wrapAuth(func(fctx fiber.Ctx) error { return s.handleRemove(fctx, dm) }) s.app.Delete("/internal/cache/remove", handler) s.app.Delete("/internal/del", handler) } func (s *distHTTPServer) registerHealth() { - s.app.Get("/health", func(fctx fiber.Ctx) error { return fctx.SendString("ok") }) + // Auth-wrapped: when a token is configured, /health requires it too. + // Operators who want a public health probe should supply a custom + // ServerVerify that exempts the /health path. + s.app.Get("/health", s.wrapAuth(func(fctx fiber.Ctx) error { return fctx.SendString("ok") })) } func (s *distHTTPServer) registerMerkle(dm *DistMemory) { - s.app.Get("/internal/merkle", func(fctx fiber.Ctx) error { + s.app.Get("/internal/merkle", s.wrapAuth(func(fctx fiber.Ctx) error { tree := dm.BuildMerkleTree() return fctx.JSON(fiber.Map{ @@ -241,10 +395,10 @@ func (s *distHTTPServer) registerMerkle(dm *DistMemory) { "leaf_hashes": tree.LeafHashes, "chunk_size": tree.ChunkSize, }) - }) + })) // naive keys listing for anti-entropy (testing only). Not efficient for large datasets. - s.app.Get("/internal/keys", func(fctx fiber.Ctx) error { + s.app.Get("/internal/keys", s.wrapAuth(func(fctx fiber.Ctx) error { var keys []string for _, shard := range dm.shards { @@ -258,7 +412,7 @@ func (s *distHTTPServer) registerMerkle(dm *DistMemory) { } return fctx.JSON(fiber.Map{"keys": keys}) - }) + })) } func (s *distHTTPServer) listen(ctx context.Context) error { @@ -269,15 +423,26 @@ func (s *distHTTPServer) listen(ctx context.Context) error { return ewrap.Wrap(err, "dist http listen") } + // Wrap the TCP listener with TLS when configured. Plaintext peers + // connecting to a TLS-wrapped listener fail at the handshake — Go + // returns a tls.RecordHeaderError to the accept loop, which we + // capture in serveErr below. + if s.tlsConfig != nil { + ln = tls.NewListener(ln, s.tlsConfig) + } + s.ln = ln - go func() { // capture server errors (ignored intentionally for now) + go func() { // DisableStartupMessage avoids fiber's per-instance banner spam, - // which would otherwise flood test output at -count=N (see hundreds of - // "INFO Server started on..." lines drowning real failures). + // which would otherwise flood test output at -count=N (see hundreds + // of "INFO Server started on..." lines drowning real failures). serveErr := s.app.Listener(ln, fiber.ListenConfig{DisableStartupMessage: true}) - if serveErr != nil { // separated for noinlineerr linter - _ = serveErr + if serveErr != nil { + // Stash so operators can read it via LastServeError(); a + // listener that crashed silently is the worst kind of + // production bug. + s.serveErr.Store(&serveErr) } }() diff --git a/pkg/backend/dist_http_transport.go b/pkg/backend/dist_http_transport.go index f8b8992..0bb4682 100644 --- a/pkg/backend/dist_http_transport.go +++ b/pkg/backend/dist_http_transport.go @@ -3,6 +3,7 @@ package backend import ( "bytes" "context" + "crypto/tls" "io" "net/http" "net/url" @@ -24,6 +25,11 @@ type DistHTTPTransport struct { // respBodyLimit caps response bodies so a malicious or compromised // peer cannot OOM the requester via a giant response. <=0 disables. respBodyLimit int64 + // auth (zero-value = disabled) decorates outgoing requests with a + // bearer token or custom signing function. Server-side validation + // lives on distHTTPServer; the two share the same DistHTTPAuth + // struct when constructed via NewDistHTTPTransportWithAuth. + auth DistHTTPAuth } const statusThreshold = 300 @@ -48,12 +54,61 @@ func NewDistHTTPTransport(timeout time.Duration, resolver func(string) (string, // the caller needs to raise/lower the response-body cap or align the client // timeout with custom DistHTTPLimits applied to the server. func NewDistHTTPTransportWithLimits(limits DistHTTPLimits, resolver func(string) (string, bool)) *DistHTTPTransport { + return NewDistHTTPTransportWithAuth(limits, DistHTTPAuth{}, resolver) +} + +// NewDistHTTPTransportWithAuth combines explicit limits and auth policy in +// a single constructor. DistMemory uses this when WithDistHTTPAuth is set +// so the auto-created HTTP client signs requests with the same token the +// server validates against. +// +// If limits.TLSConfig is non-nil, the underlying http.Transport is +// configured with the same *tls.Config used by the server, so client +// connections to peer https:// endpoints handshake against the same +// roots and certificates. +func NewDistHTTPTransportWithAuth(limits DistHTTPLimits, auth DistHTTPAuth, resolver func(string) (string, bool)) *DistHTTPTransport { limits = limits.withDefaults() + client := &http.Client{Timeout: limits.ClientTimeout} + if limits.TLSConfig != nil { + // Clone http.DefaultTransport's settings (timeouts, idle pools) + // then attach the TLS config. Cloning vs constructing from + // scratch avoids reinventing default timeouts that future Go + // versions may tighten. + tr, ok := http.DefaultTransport.(*http.Transport) + if !ok { + // Defensive: stdlib has always returned *http.Transport + // here, but if a third party rewrote DefaultTransport at + // init we still want a usable client. + tr = &http.Transport{} + } else { + tr = tr.Clone() + } + + // Clone the TLS config and force HTTP/1.1 via ALPN. The dist + // HTTP server is fiber+fasthttp which speaks HTTP/1.1 only — + // without this constraint Go's stdlib transport advertises h2 + // via ALPN, succeeds the handshake, then immediately fails + // reading the response with "http2: frame too large, note that + // the frame header looked like an HTTP/1.1 header". The + // http/1.1 NextProto override is the canonical fix from + // net/http docs. + tlsConf := limits.TLSConfig.Clone() + if len(tlsConf.NextProtos) == 0 { + tlsConf.NextProtos = []string{"http/1.1"} + } + + tr.TLSClientConfig = tlsConf + tr.ForceAttemptHTTP2 = false + tr.TLSNextProto = map[string]func(string, *tls.Conn) http.RoundTripper{} + client.Transport = tr + } + return &DistHTTPTransport{ - client: &http.Client{Timeout: limits.ClientTimeout}, + client: client, baseURLFn: resolver, respBodyLimit: limits.ResponseLimit, + auth: auth, } } @@ -419,6 +474,11 @@ func (t *DistHTTPTransport) newNodeRequest( return nil, ewrap.Wrap(err, "create new request") } + err = t.auth.sign(req) + if err != nil { + return nil, ewrap.Wrap(err, "sign request") + } + return req, nil } diff --git a/pkg/backend/dist_memory.go b/pkg/backend/dist_memory.go index 618be5b..90f64e6 100644 --- a/pkg/backend/dist_memory.go +++ b/pkg/backend/dist_memory.go @@ -134,6 +134,21 @@ type DistMemory struct { // dist_http_server.go via DistHTTPLimits.withDefaults(). httpLimits DistHTTPLimits + // httpAuth configures bearer-token / signing-fn auth applied to both + // the dist HTTP server (inbound validation) and the auto-created + // HTTP client (outbound signing). Zero-value disables auth — same + // behavior as before WithDistHTTPAuth was added. + httpAuth DistHTTPAuth + + // lifeCtx is the server-lifetime context. Derived from the + // constructor ctx but with its own cancel func wired into Stop, so + // in-flight HTTP handlers and replica forwards observe Done() when + // the user calls Stop — not just when the constructor ctx happens + // to cancel (which it usually doesn't, since callers pass + // context.Background()). + lifeCtx context.Context //nolint:containedctx // server-lifecycle, not request-scope + lifeCancel context.CancelFunc + // replica-only diff scan limits replicaDiffMaxPerTick int // 0 = unlimited @@ -207,6 +222,13 @@ func (dm *DistMemory) Membership() *cluster.Membership { return dm.membership } // Ring returns the ring reference. func (dm *DistMemory) Ring() *cluster.Ring { return dm.ring } +// LifecycleContext returns the server-lifecycle context derived from the +// ctx supplied to NewDistMemory. Stop cancels this context, so callers +// (including HTTP handlers and background loops) can observe shutdown +// without polling the various stopCh channels. Read-only — modifying +// the returned ctx has no effect. +func (dm *DistMemory) LifecycleContext() context.Context { return dm.lifeCtx } + type distShard struct { items cache.ConcurrentMap tombs map[string]tombstone // per-key tombstones @@ -585,14 +607,37 @@ func WithDistHTTPLimits(limits DistHTTPLimits) DistMemoryOption { return func(dm *DistMemory) { dm.httpLimits = limits } } +// WithDistHTTPAuth configures bearer-token (or custom verify/sign) +// authentication for the dist HTTP server and auto-created HTTP client. +// See DistHTTPAuth for the policy struct shape and defaults. +// +// Operators must apply the same auth policy to every node in the +// cluster — peers with mismatched tokens will reject each other's +// requests with HTTP 401. Like WithDistHTTPLimits this only affects the +// internal transport; an externally-supplied DistTransport is the +// caller's responsibility to authenticate. +func WithDistHTTPAuth(auth DistHTTPAuth) DistMemoryOption { + return func(dm *DistMemory) { dm.httpAuth = auth } +} + // NewDistMemory creates a new DistMemory backend. func NewDistMemory(ctx context.Context, opts ...DistMemoryOption) (IBackend[DistMemory], error) { + // Derive a server-lifetime context from the caller's ctx so that: + // 1. If the caller cancels their ctx, our background work and HTTP + // handlers see it (chains via WithCancel parent). + // 2. Stop() can independently cancel without touching the caller's + // ctx — gives operators a deterministic shutdown signal even + // when they pass context.Background(). + lifeCtx, lifeCancel := context.WithCancel(ctx) + dm := &DistMemory{ shardCount: defaultDistShardCount, replication: 1, readConsistency: ConsistencyOne, writeConsistency: ConsistencyQuorum, latency: newDistLatencyCollector(), + lifeCtx: lifeCtx, + lifeCancel: lifeCancel, } for _, opt := range opts { opt(dm) @@ -600,13 +645,16 @@ func NewDistMemory(ctx context.Context, opts ...DistMemoryOption) (IBackend[Dist dm.ensureShardConfig() dm.initMembershipIfNeeded() + // Pass the lifecycle ctx to subsystems that capture it (HTTP handlers, + // background loops). The constructor ctx is used only for operations + // that must complete during NewDistMemory itself (e.g. listener bind). dm.tryStartHTTP(ctx) - dm.startHeartbeatIfEnabled(ctx) - dm.startHintReplayIfEnabled(ctx) + dm.startHeartbeatIfEnabled(lifeCtx) + dm.startHintReplayIfEnabled(lifeCtx) dm.startGossipIfEnabled() - dm.startAutoSyncIfEnabled(ctx) + dm.startAutoSyncIfEnabled(lifeCtx) dm.startTombstoneSweeper() - dm.startRebalancerIfEnabled(ctx) + dm.startRebalancerIfEnabled(lifeCtx) return dm, nil } @@ -1138,6 +1186,14 @@ func (dm *DistMemory) Stop(ctx context.Context) error { return nil } + // Cancel the lifecycle context first so in-flight HTTP handlers and + // background loops see Done() before we start tearing down channels. + // Background loops still listen on their stopCh below for backward + // compatibility; new code should prefer ctx.Done() over the channel. + if dm.lifeCancel != nil { + dm.lifeCancel() + } + if dm.stopCh != nil { close(dm.stopCh) @@ -1987,6 +2043,11 @@ func (dm *DistMemory) initMembershipIfNeeded() { } // tryStartHTTP starts internal HTTP transport if not provided. +// +// The bind ctx (parameter) controls the listener.Listen call only; the +// server stores dm.lifeCtx as its handler-side operation context, so +// in-flight backend work observes Stop's cancellation independent of +// the constructor ctx the caller supplied. func (dm *DistMemory) tryStartHTTP(ctx context.Context) { if dm.loadTransport() != nil || dm.nodeAddr == "" { return @@ -1997,7 +2058,9 @@ func (dm *DistMemory) tryStartHTTP(ctx context.Context) { // too large is also one the client should not attempt to send. limits := dm.httpLimits.withDefaults() - server := newDistHTTPServer(dm.nodeAddr, limits) + server := newDistHTTPServer(dm.nodeAddr, limits, dm.httpAuth) + + server.ctx = dm.lifeCtx // handler-side cancellation tied to Stop err := server.start(ctx, dm) if err != nil { // best-effort @@ -2006,23 +2069,38 @@ func (dm *DistMemory) tryStartHTTP(ctx context.Context) { dm.httpServer = server - resolver := func(nodeID string) (string, bool) { + resolver := dm.makePeerURLResolver(limits) + + dm.storeTransport(NewDistHTTPTransportWithAuth(limits, dm.httpAuth, resolver)) +} + +// makePeerURLResolver returns a resolver that maps node IDs to base URLs. +// The scheme follows whether TLS is configured: https:// when +// limits.TLSConfig is non-nil, http:// otherwise. Extracted from +// tryStartHTTP / EnableHTTPForTest so both share one source of truth for +// scheme selection — getting it wrong causes the client to dial plaintext +// against a TLS listener (or vice versa). +func (dm *DistMemory) makePeerURLResolver(limits DistHTTPLimits) func(string) (string, bool) { + scheme := "http://" + if limits.TLSConfig != nil { + scheme = "https://" + } + + return func(nodeID string) (string, bool) { if dm.membership != nil { for _, n := range dm.membership.List() { if string(n.ID) == nodeID { - return "http://" + n.Address, true + return scheme + n.Address, true } } } if dm.localNode != nil && string(dm.localNode.ID) == nodeID { - return "http://" + dm.localNode.Address, true + return scheme + dm.localNode.Address, true } return "", false } - - dm.storeTransport(NewDistHTTPTransportWithLimits(limits, resolver)) } // startHeartbeatIfEnabled launches heartbeat loop if configured. diff --git a/pkg/backend/dist_memory_test_helpers.go b/pkg/backend/dist_memory_test_helpers.go index c563c1f..d5b9b4c 100644 --- a/pkg/backend/dist_memory_test_helpers.go +++ b/pkg/backend/dist_memory_test_helpers.go @@ -28,7 +28,9 @@ func (dm *DistMemory) EnableHTTPForTest(ctx context.Context) { limits := dm.httpLimits.withDefaults() - server := newDistHTTPServer(dm.nodeAddr, limits) + server := newDistHTTPServer(dm.nodeAddr, limits, dm.httpAuth) + + server.ctx = dm.lifeCtx // handler-side cancellation tied to Stop err := server.start(ctx, dm) if err != nil { @@ -37,23 +39,9 @@ func (dm *DistMemory) EnableHTTPForTest(ctx context.Context) { dm.httpServer = server - resolver := func(nodeID string) (string, bool) { - if dm.membership != nil { - for _, n := range dm.membership.List() { - if string(n.ID) == nodeID { - return "http://" + n.Address, true - } - } - } - - if dm.localNode != nil && string(dm.localNode.ID) == nodeID { - return "http://" + dm.localNode.Address, true - } - - return "", false - } + resolver := dm.makePeerURLResolver(limits) - dm.storeTransport(NewDistHTTPTransportWithLimits(limits, resolver)) + dm.storeTransport(NewDistHTTPTransportWithAuth(limits, dm.httpAuth, resolver)) } // HintedQueueSize returns number of queued hints for a node (testing helper). diff --git a/pkg/stats/histogramcollector_test.go b/pkg/stats/histogramcollector_test.go index ee03385..479d6f1 100644 --- a/pkg/stats/histogramcollector_test.go +++ b/pkg/stats/histogramcollector_test.go @@ -283,7 +283,7 @@ func TestHistogramStatsCollector_NoMemoryLeak(t *testing.T) { c.Histogram(constants.StatHistogram, int64(i)) } - runtime.GC() //nolint:revive // intentional GC to take a clean heap reading for the leak assertion + runtime.GC() //nolint:revive var before runtime.MemStats @@ -296,7 +296,7 @@ func TestHistogramStatsCollector_NoMemoryLeak(t *testing.T) { c.Histogram(constants.StatHistogram, int64(i)) } - runtime.GC() //nolint:revive // intentional GC to take a clean heap reading for the leak assertion + runtime.GC() //nolint:revive var after runtime.MemStats diff --git a/race-baseline-v2.log b/race-baseline-v2.log deleted file mode 100644 index bb5690c..0000000 --- a/race-baseline-v2.log +++ /dev/null @@ -1,21 +0,0 @@ -ok github.com/hyp3rd/hypercache 4.425s -? github.com/hyp3rd/hypercache/internal/cluster [no test files] -? github.com/hyp3rd/hypercache/internal/constants [no test files] -? github.com/hyp3rd/hypercache/internal/dist [no test files] -? github.com/hyp3rd/hypercache/internal/introspect [no test files] -? github.com/hyp3rd/hypercache/internal/libs/serializer [no test files] -? github.com/hyp3rd/hypercache/internal/sentinel [no test files] -? github.com/hyp3rd/hypercache/internal/telemetry/attrs [no test files] -? github.com/hyp3rd/hypercache/internal/transport [no test files] -? github.com/hyp3rd/hypercache/pkg/backend [no test files] -? github.com/hyp3rd/hypercache/pkg/backend/redis [no test files] -? github.com/hyp3rd/hypercache/pkg/backend/rediscluster [no test files] -ok github.com/hyp3rd/hypercache/pkg/cache 2.012s -ok github.com/hyp3rd/hypercache/pkg/cache/v2 1.738s -ok github.com/hyp3rd/hypercache/pkg/eviction 2.575s -? github.com/hyp3rd/hypercache/pkg/middleware [no test files] -ok github.com/hyp3rd/hypercache/pkg/stats 6.521s -ok github.com/hyp3rd/hypercache/tests 156.721s -ok github.com/hyp3rd/hypercache/tests/benchmark 3.469s [no tests to run] -ok github.com/hyp3rd/hypercache/tests/benchmarkdist 2.787s [no tests to run] -ok github.com/hyp3rd/hypercache/tests/integration 100.665s diff --git a/race-step2.log b/race-step2.log deleted file mode 100644 index 67c39ce..0000000 --- a/race-step2.log +++ /dev/null @@ -1,21 +0,0 @@ -ok github.com/hyp3rd/hypercache 4.069s -? github.com/hyp3rd/hypercache/internal/cluster [no test files] -? github.com/hyp3rd/hypercache/internal/constants [no test files] -? github.com/hyp3rd/hypercache/internal/dist [no test files] -? github.com/hyp3rd/hypercache/internal/introspect [no test files] -? github.com/hyp3rd/hypercache/internal/libs/serializer [no test files] -? github.com/hyp3rd/hypercache/internal/sentinel [no test files] -? github.com/hyp3rd/hypercache/internal/telemetry/attrs [no test files] -? github.com/hyp3rd/hypercache/internal/transport [no test files] -? github.com/hyp3rd/hypercache/pkg/backend [no test files] -? github.com/hyp3rd/hypercache/pkg/backend/redis [no test files] -? github.com/hyp3rd/hypercache/pkg/backend/rediscluster [no test files] -ok github.com/hyp3rd/hypercache/pkg/cache 2.752s -ok github.com/hyp3rd/hypercache/pkg/cache/v2 1.418s -ok github.com/hyp3rd/hypercache/pkg/eviction 2.675s -? github.com/hyp3rd/hypercache/pkg/middleware [no test files] -ok github.com/hyp3rd/hypercache/pkg/stats 6.676s -ok github.com/hyp3rd/hypercache/tests 133.985s -ok github.com/hyp3rd/hypercache/tests/benchmark 3.283s [no tests to run] -ok github.com/hyp3rd/hypercache/tests/benchmarkdist 3.904s [no tests to run] -ok github.com/hyp3rd/hypercache/tests/integration 100.730s diff --git a/tests/dist_http_auth_test.go b/tests/dist_http_auth_test.go new file mode 100644 index 0000000..a7d8d9f --- /dev/null +++ b/tests/dist_http_auth_test.go @@ -0,0 +1,327 @@ +package tests + +import ( + "context" + "errors" + "net/http" + "sync/atomic" + "testing" + "time" + + fiber "github.com/gofiber/fiber/v3" + + "github.com/hyp3rd/hypercache/pkg/backend" + cache "github.com/hyp3rd/hypercache/pkg/cache/v2" +) + +const authTestToken = "s3cret-cluster-token" + +// errCustomVerifyDenied is returned by the custom verify hook in +// TestDistHTTPAuth_CustomVerify when the request lacks the expected token. +var errCustomVerifyDenied = errors.New("custom verify denied") + +// newAuthDistNode spins up a single DistMemory with HTTP auth enabled. +// The test waits for /health to respond before returning so subsequent +// auth assertions don't race against fiber's listener startup. +func newAuthDistNode(t *testing.T, auth backend.DistHTTPAuth) *backend.DistMemory { + t.Helper() + + ctx := context.Background() + addr := AllocatePort(t) + + bi, err := backend.NewDistMemory(ctx, + backend.WithDistNode("auth-test", addr), + backend.WithDistReplication(1), + backend.WithDistHTTPAuth(auth), + ) + if err != nil { + t.Fatalf("new dist memory: %v", err) + } + + dm, ok := bi.(*backend.DistMemory) + if !ok { + t.Fatalf("expected *backend.DistMemory, got %T", bi) + } + + StopOnCleanup(t, dm) + + // /health is auth-wrapped; poll it with the right token so we know + // the listener is up before the actual test asserts. + if !waitForHealthAuthed(ctx, "http://"+dm.LocalNodeAddr(), auth.Token, 5*time.Second) { + t.Fatal("dist HTTP server never came up") + } + + return dm +} + +// waitForHealthAuthed is a token-aware variant of waitForHealth. We need +// it because /health is now auth-wrapped when a token is configured. +func waitForHealthAuthed(ctx context.Context, baseURL, token string, timeout time.Duration) bool { + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, baseURL+"/health", nil) + if err != nil { + return false + } + + if token != "" { + req.Header.Set("Authorization", "Bearer "+token) + } + + resp, err := http.DefaultClient.Do(req) + if err == nil { + _ = resp.Body.Close() + + if resp.StatusCode == http.StatusOK { + return true + } + } + + time.Sleep(50 * time.Millisecond) + } + + return false +} + +// TestDistHTTPAuth_RejectsUnauthenticatedRequest verifies the server +// returns 401 for requests with no Authorization header when a token is +// configured. +func TestDistHTTPAuth_RejectsUnauthenticatedRequest(t *testing.T) { + t.Parallel() + + dm := newAuthDistNode(t, backend.DistHTTPAuth{Token: authTestToken}) + + // Bypass the dist transport (which would auto-sign) and send a raw + // request so we can prove the server enforces auth even when the + // caller doesn't cooperate. + req, err := http.NewRequestWithContext( + context.Background(), + http.MethodGet, + "http://"+dm.LocalNodeAddr()+"/internal/get?key=anything", + nil, + ) + if err != nil { + t.Fatalf("build request: %v", err) + } + + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatalf("do request: %v", err) + } + + defer func() { _ = resp.Body.Close() }() + + if resp.StatusCode != http.StatusUnauthorized { + t.Fatalf("expected 401 for missing token, got %d", resp.StatusCode) + } +} + +// TestDistHTTPAuth_RejectsWrongToken covers the case where the caller +// presents a token but it's the wrong one — must still 401, never leak +// any indication that the format was correct (constant-time compare +// guards against this side-channel). +func TestDistHTTPAuth_RejectsWrongToken(t *testing.T) { + t.Parallel() + + dm := newAuthDistNode(t, backend.DistHTTPAuth{Token: authTestToken}) + + req, err := http.NewRequestWithContext( + context.Background(), + http.MethodGet, + "http://"+dm.LocalNodeAddr()+"/internal/get?key=anything", + nil, + ) + if err != nil { + t.Fatalf("build request: %v", err) + } + + req.Header.Set("Authorization", "Bearer not-the-right-token") + + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatalf("do request: %v", err) + } + + defer func() { _ = resp.Body.Close() }() + + if resp.StatusCode != http.StatusUnauthorized { + t.Fatalf("expected 401 for wrong token, got %d", resp.StatusCode) + } +} + +// TestDistHTTPAuth_AcceptsValidToken sanity-checks that a request with +// the correct token is accepted. Without this companion test the +// 401-rejection tests above could pass even if the server rejected +// *every* request unconditionally. +func TestDistHTTPAuth_AcceptsValidToken(t *testing.T) { + t.Parallel() + + dm := newAuthDistNode(t, backend.DistHTTPAuth{Token: authTestToken}) + + req, err := http.NewRequestWithContext( + context.Background(), + http.MethodGet, + "http://"+dm.LocalNodeAddr()+"/health", + nil, + ) + if err != nil { + t.Fatalf("build request: %v", err) + } + + req.Header.Set("Authorization", "Bearer "+authTestToken) + + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatalf("do request: %v", err) + } + + defer func() { _ = resp.Body.Close() }() + + if resp.StatusCode != http.StatusOK { + t.Fatalf("expected 200 with valid token, got %d", resp.StatusCode) + } +} + +// newAuthReplicatedNode builds a 2-node-cluster member for the +// client-signing test. Extracted to a free function (rather than an +// in-test closure) so contextcheck doesn't follow the chain into +// StopOnCleanup's background-ctx cleanup. Construction uses +// context.Background() because Stop runs from t.Cleanup and a canceled +// outer ctx would leak the HTTP listener — same rationale as +// newHTTPMerkleNode in the merkle tests. +func newAuthReplicatedNode(t *testing.T, id, addr string, seeds []string, auth backend.DistHTTPAuth) *backend.DistMemory { + t.Helper() + + bi, err := backend.NewDistMemory(context.Background(), + backend.WithDistNode(id, addr), + backend.WithDistSeeds(seeds), + backend.WithDistReplication(2), + backend.WithDistVirtualNodes(32), + backend.WithDistHTTPAuth(auth), + ) + if err != nil { + t.Fatalf("new node %s: %v", id, err) + } + + dm, ok := bi.(*backend.DistMemory) + if !ok { + t.Fatalf("cast %s: %T", id, bi) + } + + StopOnCleanup(t, dm) + + return dm +} + +// TestDistHTTPAuth_ClientSignsRequests verifies the auto-created HTTP +// client signs outgoing requests with the configured token. Built as a +// 2-node cluster: node A's primary writes get replicated to node B over +// the dist transport — if the client failed to sign, B would 401 the +// replication and B's local copy would never appear. +func TestDistHTTPAuth_ClientSignsRequests(t *testing.T) { + t.Parallel() + + ctx := context.Background() + addrA := AllocatePort(t) + addrB := AllocatePort(t) + + auth := backend.DistHTTPAuth{Token: authTestToken} + + nodeA := newAuthReplicatedNode(t, "A", addrA, []string{addrB}, auth) + nodeB := newAuthReplicatedNode(t, "B", addrB, []string{addrA}, auth) + + // Wait for both listeners — auth-aware health probe. + for _, base := range []string{"http://" + nodeA.LocalNodeAddr(), "http://" + nodeB.LocalNodeAddr()} { + if !waitForHealthAuthed(ctx, base, authTestToken, 5*time.Second) { + t.Fatalf("node at %s never came up", base) + } + } + + // Allow ring/membership to settle. + time.Sleep(200 * time.Millisecond) + + item := &cache.Item{ + Key: "auth-prop-key", + Value: []byte("v1"), + Version: 1, + Origin: "A", + LastUpdated: time.Now(), + } + + err := nodeA.Set(ctx, item) + if err != nil { + t.Fatalf("Set on nodeA: %v", err) + } + + // The replicated value should appear on B within a few hundred ms; + // if the client failed to sign, B would 401 the replication and the + // poll would time out. + deadline := time.Now().Add(2 * time.Second) + for time.Now().Before(deadline) { + if it, ok := nodeB.Get(ctx, item.Key); ok && it != nil { + return + } + + time.Sleep(50 * time.Millisecond) + } + + t.Fatalf("replication did not propagate to nodeB — client likely failed to sign requests") +} + +// TestDistHTTPAuth_CustomVerify proves the ServerVerify escape hatch is +// invoked for every request and can deny on its own logic — used here +// to allow /health while requiring the bearer token elsewhere. +func TestDistHTTPAuth_CustomVerify(t *testing.T) { + t.Parallel() + + var calls atomic.Int64 + + // Verifier: /health is public; everything else needs the token. + verify := func(fctx fiber.Ctx) error { + calls.Add(1) + + if fctx.Path() == "/health" { + return nil + } + + got := fctx.Get("Authorization") + if got != "Bearer "+authTestToken { + return errCustomVerifyDenied + } + + return nil + } + + dm := newAuthDistNode(t, backend.DistHTTPAuth{ServerVerify: verify}) + + // /health works without any header (the custom verifier exempts it). + if !waitForHealthAuthed(context.Background(), "http://"+dm.LocalNodeAddr(), "", 2*time.Second) { + t.Fatal("public /health not reachable under custom verifier") + } + + // /internal/get without token must 401. + req, err := http.NewRequestWithContext( + context.Background(), + http.MethodGet, + "http://"+dm.LocalNodeAddr()+"/internal/get?key=x", + nil, + ) + if err != nil { + t.Fatalf("build request: %v", err) + } + + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatalf("do request: %v", err) + } + + _ = resp.Body.Close() + + if resp.StatusCode != http.StatusUnauthorized { + t.Fatalf("expected 401 from custom verifier without token, got %d", resp.StatusCode) + } + + if calls.Load() == 0 { + t.Fatal("custom verifier was never invoked") + } +} diff --git a/tests/dist_http_lifecycle_test.go b/tests/dist_http_lifecycle_test.go new file mode 100644 index 0000000..c0a8656 --- /dev/null +++ b/tests/dist_http_lifecycle_test.go @@ -0,0 +1,71 @@ +package tests + +import ( + "context" + "testing" + "time" + + "github.com/hyp3rd/hypercache/pkg/backend" +) + +// TestDistMemory_HandlerCtxCancelsOnStop is the assertion that Phase 5d +// promised: a handler's operation ctx (s.ctx on the dist HTTP server) +// must observe Done() the instant Stop is called, even when the +// constructor ctx never cancels. +// +// Pre-5d the server captured the constructor ctx (typically +// context.Background()) — its Done() channel never fired, so handlers +// could not be aborted on Stop. Post-5d DistMemory derives lifeCtx from +// the constructor ctx with its own cancel, and Stop calls that cancel. +func TestDistMemory_HandlerCtxCancelsOnStop(t *testing.T) { + t.Parallel() + + ctx := context.Background() + addr := AllocatePort(t) + + bi, err := backend.NewDistMemory(ctx, + backend.WithDistNode("life-test", addr), + backend.WithDistReplication(1), + ) + if err != nil { + t.Fatalf("new dist memory: %v", err) + } + + dm, ok := bi.(*backend.DistMemory) + if !ok { + t.Fatalf("expected *backend.DistMemory, got %T", bi) + } + + // Capture the server-lifecycle ctx via the dist memory's accessor. + lifeCtx := dm.LifecycleContext() + + // Sanity: lifecycle ctx exists and is not yet done. + if lifeCtx == nil { + t.Fatal("DistMemory lifecycle ctx is nil") + } + + select { + case <-lifeCtx.Done(): + t.Fatal("lifecycle ctx already canceled before Stop") + default: + } + + // Stop should cancel the lifecycle ctx promptly. Use a generous + // shutdown deadline so we're not racing against fiber's drain. + shutdownCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + + stopErr := dm.Stop(shutdownCtx) + if stopErr != nil { + t.Fatalf("Stop: %v", stopErr) + } + + // The lifecycle ctx must be done now — this is the property + // pre-5d code didn't have. + select { + case <-lifeCtx.Done(): + // expected + case <-time.After(time.Second): + t.Fatal("lifecycle ctx did not cancel within 1s of Stop returning") + } +} diff --git a/tests/dist_http_tls_test.go b/tests/dist_http_tls_test.go new file mode 100644 index 0000000..1d1337c --- /dev/null +++ b/tests/dist_http_tls_test.go @@ -0,0 +1,234 @@ +package tests + +import ( + "context" + "crypto/ecdsa" + "crypto/elliptic" + "crypto/rand" + "crypto/tls" + "crypto/x509" + "crypto/x509/pkix" + "errors" + "math/big" + "net" + "net/http" + "testing" + "time" + + "github.com/hyp3rd/hypercache/pkg/backend" + cache "github.com/hyp3rd/hypercache/pkg/cache/v2" +) + +// generateTLSConfig builds a self-signed *tls.Config suitable for both +// the dist HTTP server (Certificates) and the auto-created HTTP client +// (RootCAs). The cert is valid for 127.0.0.1 only — sufficient for +// in-process tests, never to be reused outside them. +func generateTLSConfig(t *testing.T) *tls.Config { + t.Helper() + + priv, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) + if err != nil { + t.Fatalf("generate key: %v", err) + } + + template := &x509.Certificate{ + SerialNumber: big.NewInt(1), + Subject: pkix.Name{CommonName: "hypercache-test"}, + NotBefore: time.Now().Add(-time.Hour), + NotAfter: time.Now().Add(time.Hour), + KeyUsage: x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment, + ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth, x509.ExtKeyUsageClientAuth}, + IPAddresses: []net.IP{net.ParseIP("127.0.0.1")}, + DNSNames: []string{"localhost"}, + } + + derBytes, err := x509.CreateCertificate(rand.Reader, template, template, &priv.PublicKey, priv) + if err != nil { + t.Fatalf("create cert: %v", err) + } + + cert, err := x509.ParseCertificate(derBytes) + if err != nil { + t.Fatalf("parse cert: %v", err) + } + + rootCAs := x509.NewCertPool() + rootCAs.AddCert(cert) + + return &tls.Config{ + Certificates: []tls.Certificate{{ + Certificate: [][]byte{derBytes}, + PrivateKey: priv, + Leaf: cert, + }}, + RootCAs: rootCAs, + MinVersion: tls.VersionTLS12, + ServerName: "127.0.0.1", + } +} + +// newTLSNode builds one DistMemory node configured with the supplied +// TLS config, replication=2, and seed list. Extracted to a free function +// so contextcheck doesn't follow the chain into StopOnCleanup's +// background-ctx cleanup. Construction uses context.Background() because +// Stop runs from t.Cleanup at end-of-test where the test ctx may already +// be canceled — same rationale as newAuthReplicatedNode. +func newTLSNode(t *testing.T, id, addr string, seeds []string, tlsConfig *tls.Config) *backend.DistMemory { + t.Helper() + + bi, err := backend.NewDistMemory(context.Background(), + backend.WithDistNode(id, addr), + backend.WithDistSeeds(seeds), + backend.WithDistReplication(2), + backend.WithDistVirtualNodes(32), + backend.WithDistHTTPLimits(backend.DistHTTPLimits{TLSConfig: tlsConfig}), + ) + if err != nil { + t.Fatalf("new node %s: %v", id, err) + } + + dm, ok := bi.(*backend.DistMemory) + if !ok { + t.Fatalf("cast %s: %T", id, bi) + } + + StopOnCleanup(t, dm) + + return dm +} + +// TestDistHTTPTLS_HandshakeAndReplication verifies a 2-node cluster with +// shared TLS config can replicate writes over https://. End-to-end: +// +// 1. node A's listener is wrapped with tls.NewListener +// 2. node A's auto-created HTTP client uses the same RootCAs +// 3. resolver advertises https:// (because TLSConfig is non-nil) +// 4. Set on A replicates to B via signed-and-encrypted requests +func TestDistHTTPTLS_HandshakeAndReplication(t *testing.T) { + t.Parallel() + + tlsConfig := generateTLSConfig(t) + + addrA := AllocatePort(t) + addrB := AllocatePort(t) + + nodeA := newTLSNode(t, "A", addrA, []string{addrB}, tlsConfig) + nodeB := newTLSNode(t, "B", addrB, []string{addrA}, tlsConfig) + + // Wait for both TLS listeners to accept handshakes. + for _, base := range []string{"https://" + nodeA.LocalNodeAddr(), "https://" + nodeB.LocalNodeAddr()} { + if !waitForHTTPSHealth(base, tlsConfig, 5*time.Second) { + t.Fatalf("TLS listener at %s never came up", base) + } + } + + // Allow ring to settle. + time.Sleep(200 * time.Millisecond) + + item := &cache.Item{ + Key: "tls-prop-key", + Value: []byte("encrypted-value"), + Version: 1, + Origin: "A", + LastUpdated: time.Now(), + } + + err := nodeA.Set(context.Background(), item) + if err != nil { + t.Fatalf("Set on nodeA: %v", err) + } + + // If TLS were broken (e.g. client didn't use the same root, or + // resolver advertised http:// against a TLS listener), replication + // would fail and the value would never appear on B. + deadline := time.Now().Add(2 * time.Second) + for time.Now().Before(deadline) { + if it, ok := nodeB.Get(context.Background(), item.Key); ok && it != nil { + return + } + + time.Sleep(50 * time.Millisecond) + } + + t.Fatalf("replication did not propagate to nodeB over TLS — handshake or scheme mismatch likely") +} + +// TestDistHTTPTLS_PlaintextPeerRejected verifies a client that does NOT +// trust the server's cert fails to handshake — no plaintext fallback. +func TestDistHTTPTLS_PlaintextPeerRejected(t *testing.T) { + t.Parallel() + + tlsConfig := generateTLSConfig(t) + addr := AllocatePort(t) + + bi, err := backend.NewDistMemory(context.Background(), + backend.WithDistNode("tls-only", addr), + backend.WithDistReplication(1), + backend.WithDistHTTPLimits(backend.DistHTTPLimits{TLSConfig: tlsConfig}), + ) + if err != nil { + t.Fatalf("new dist memory: %v", err) + } + + dm, ok := bi.(*backend.DistMemory) + if !ok { + t.Fatalf("cast: %T", bi) + } + + StopOnCleanup(t, dm) + + if !waitForHTTPSHealth("https://"+dm.LocalNodeAddr(), tlsConfig, 5*time.Second) { + t.Fatal("TLS listener never came up") + } + + // Plaintext client (no RootCAs configured): handshake should fail. + plaintextClient := &http.Client{Timeout: 2 * time.Second} + + _, err = plaintextClient.Get("https://" + dm.LocalNodeAddr() + "/health") //nolint:noctx,bodyclose // expect handshake failure before body + if err == nil { + t.Fatal("expected TLS handshake error from untrusted client, got nil") + } + + // Stdlib reports x509.UnknownAuthorityError or similar. Accept any + // error that mentions cert/x509/tls — the point is the connection + // did not succeed without trust. + var ( + x509Err *tls.CertificateVerificationError + unknownAuthErr x509.UnknownAuthorityError + ) + + if !errors.As(err, &x509Err) && !errors.As(err, &unknownAuthErr) { + // Fall back to substring check — Go versions wrap this error + // differently across releases. + t.Logf("note: error was %v (not CertificateVerificationError)", err) + } +} + +// waitForHTTPSHealth is the TLS-aware sibling of waitForHealth — uses +// the supplied tls.Config so the test client trusts the self-signed +// server cert. +func waitForHTTPSHealth(baseURL string, tlsConfig *tls.Config, timeout time.Duration) bool { + transport := &http.Transport{TLSClientConfig: tlsConfig} + client := &http.Client{Transport: transport, Timeout: time.Second} + + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, baseURL+"/health", nil) + if err != nil { + return false + } + + resp, err := client.Do(req) + if err == nil { + _ = resp.Body.Close() + + if resp.StatusCode == http.StatusOK { + return true + } + } + + time.Sleep(50 * time.Millisecond) + } + + return false +}