From ba3f5bf827a5e37acde61857eb62390b803e36a1 Mon Sep 17 00:00:00 2001 From: "Yoshiaki Ueda (bootjp)" Date: Wed, 22 Apr 2026 22:40:49 +0900 Subject: [PATCH 1/4] test(redis): wait for follower apply catch-up in MultiExec list tests TestRedis_MultiExec_DelThenRPushRecreatesList was intermittently failing under -race on CI (PR #577): after MULTI / DEL list / RPUSH list new1 new2 / EXEC, the final direct store.ScanAt(..., readTS) on nodes[1] returned the pre-EXEC ["old1","old2"] even though the immediately preceding LRANGE via the redis client correctly returned ["new1","new2"]. Why the race exists: - The go-redis client connects to nodes[1], which is not necessarily the raft leader. - LRANGE goes through the adapter's LeaseRead / LinearizableRead path, which blocks until the local apply watermark reaches a safe point, so it observes the EXEC commit even on a slightly-lagging follower. - readTS() (adapter/ts.go:snapshotTS) returns the local store's LastCommitTS() directly, with no wait-apply. On a follower (or a leader that has just responded to the client), LastCommitTS can momentarily trail the raft-committed EXEC because the test reads it on a different code path than the one that drove the client-visible wait. - The direct ScanAt(readTS) therefore sees the stale, pre-EXEC state. Fix: introduce waitForListState(t, node, key, expectedLen, expectedValues) that polls resolveListMeta AND the raw item scan at the node-local readTS via require.Eventually (250ms interval, 5s timeout) until both reflect the expected post-EXEC state. This is a real synchronization on the raft apply progress of the target node, not a timing hack: it succeeds on the first iteration when the node is already caught up and only retries while the apply is genuinely behind. Use it in place of the ad-hoc readTS/resolveListMeta/ScanAt block in TestRedis_MultiExec_DelThenRPushRecreatesList. --- adapter/redis_multi_test.go | 79 ++++++++++++++++++++++++++----------- 1 file changed, 55 insertions(+), 24 deletions(-) diff --git a/adapter/redis_multi_test.go b/adapter/redis_multi_test.go index 7130b30b1..9fa9a0a48 100644 --- a/adapter/redis_multi_test.go +++ b/adapter/redis_multi_test.go @@ -5,6 +5,7 @@ import ( "math" "strconv" "testing" + "time" "github.com/bootjp/elastickv/store" "github.com/redis/go-redis/v9" @@ -12,6 +13,55 @@ import ( "github.com/stretchr/testify/require" ) +// waitForListState polls until this node has applied raft commits such that +// the list stored under key resolves to expectedLen items whose values match +// expectedValues (when non-nil) via both resolveListMeta and a direct scan at +// the node-local readTS. +// +// Why this is necessary: the go-redis client may be connected to a follower +// (or a leader that momentarily lags in applying its own commit). The adapter's +// client-facing read path (LRANGE) uses LeaseRead/LinearizableRead so it +// blocks until the local apply catches up, which is why LRANGE observes the +// new state. However readTS() returns store.LastCommitTS() directly, and a +// direct ScanAt(readTS) bypasses that wait. When the client and the direct +// scan target the same node, there is still a window between the client +// receiving the EXEC reply (driven by the wait-apply inside read handlers) and +// the raft-applied commit updating LastCommitTS on this node's store — in +// particular when this node was not the proposer and applies strictly after +// the response was delivered via a different code path. Polling resolves the +// gap deterministically without a timing-based sleep. +func waitForListState(t *testing.T, n Node, key []byte, expectedLen int, expectedValues []string) { + t.Helper() + ctx := context.Background() + require.Eventually(t, func() bool { + readTS := n.redisServer.readTS() + meta, exists, err := n.redisServer.resolveListMeta(ctx, key, readTS) + if err != nil || !exists || meta.Len != int64(expectedLen) { + return false + } + kvs, err := n.redisServer.store.ScanAt( + ctx, + store.ListItemKey(key, math.MinInt64), + store.ListItemKey(key, math.MaxInt64), + expectedLen+1, + readTS, + ) + if err != nil || len(kvs) != expectedLen { + return false + } + if expectedValues == nil { + return true + } + for i, kvp := range kvs { + if string(kvp.Value) != expectedValues[i] { + return false + } + } + return true + }, 5*time.Second, 250*time.Millisecond, + "node did not catch up to expected list state for key %q (len=%d)", string(key), expectedLen) +} + func TestRedis_MultiExecAtomic(t *testing.T) { t.Parallel() nodes, _, _ := createNode(t, 3) @@ -194,31 +244,12 @@ func TestRedis_MultiExec_DelThenRPushRecreatesList(t *testing.T) { require.NoError(t, err) require.Equal(t, []any{"new1", "new2"}, rangeRes) - readTS := nodes[1].redisServer.readTS() - // With the Delta pattern, RPUSH inside a MULTI/EXEC emits a delta key - // rather than updating the base metadata key directly. Verify the - // effective metadata via resolveListMeta which aggregates deltas. - resolvedMeta, resolvedExists, err := nodes[1].redisServer.resolveListMeta(ctx, []byte("list-del-rpush"), readTS) - require.NoError(t, err) - require.True(t, resolvedExists) - require.Equal(t, int64(2), resolvedMeta.Len) - - kvs, err := nodes[1].redisServer.store.ScanAt( - ctx, - store.ListItemKey([]byte("list-del-rpush"), math.MinInt64), - store.ListItemKey([]byte("list-del-rpush"), math.MaxInt64), - 10, - readTS, - ) - require.NoError(t, err) - require.Len(t, kvs, 2) - - got := make([]string, 0, len(kvs)) - for _, kvp := range kvs { - got = append(got, string(kvp.Value)) - } - require.Equal(t, []string{"new1", "new2"}, got) + // rather than updating the base metadata key directly. Additionally, the + // node the client is connected to may apply the EXEC commit slightly + // after the client receives its response, so poll until this node's + // store has caught up before asserting on the raw scan. + waitForListState(t, nodes[1], []byte("list-del-rpush"), 2, []string{"new1", "new2"}) } func TestRedis_MultiExec_SetGetAfterDeleteReturnsNilOldValue(t *testing.T) { From d63f92bc39cb6751e74825b3c1e0359ee60210a5 Mon Sep 17 00:00:00 2001 From: "Yoshiaki Ueda (bootjp)" Date: Wed, 22 Apr 2026 23:00:54 +0900 Subject: [PATCH 2/4] fix(test): waitForListState accepts !exists when expectedLen==0 --- adapter/redis_multi_test.go | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/adapter/redis_multi_test.go b/adapter/redis_multi_test.go index 9fa9a0a48..ccf43402a 100644 --- a/adapter/redis_multi_test.go +++ b/adapter/redis_multi_test.go @@ -36,9 +36,24 @@ func waitForListState(t *testing.T, n Node, key []byte, expectedLen int, expecte require.Eventually(t, func() bool { readTS := n.redisServer.readTS() meta, exists, err := n.redisServer.resolveListMeta(ctx, key, readTS) - if err != nil || !exists || meta.Len != int64(expectedLen) { + if err != nil { return false } + // Redis represents an empty / deleted list as the absence of + // the meta key. When the caller is verifying deletion + // (expectedLen == 0), !exists is the success signal; + // requiring exists==true would make this helper unusable for + // post-DEL waits. For non-empty expectations a missing meta + // still means "not yet applied" and should keep polling. + if expectedLen == 0 { + if exists && meta.Len != 0 { + return false + } + } else { + if !exists || meta.Len != int64(expectedLen) { + return false + } + } kvs, err := n.redisServer.store.ScanAt( ctx, store.ListItemKey(key, math.MinInt64), From 403367eb414927f6be2fb6bb6fc44321482437b9 Mon Sep 17 00:00:00 2001 From: "Yoshiaki Ueda (bootjp)" Date: Wed, 22 Apr 2026 23:06:40 +0900 Subject: [PATCH 3/4] refactor(test): split waitForListState to satisfy cyclop --- adapter/redis_multi_test.go | 101 ++++++++++++++++++++++-------------- 1 file changed, 61 insertions(+), 40 deletions(-) diff --git a/adapter/redis_multi_test.go b/adapter/redis_multi_test.go index ccf43402a..5b69e3c30 100644 --- a/adapter/redis_multi_test.go +++ b/adapter/redis_multi_test.go @@ -32,51 +32,72 @@ import ( // gap deterministically without a timing-based sleep. func waitForListState(t *testing.T, n Node, key []byte, expectedLen int, expectedValues []string) { t.Helper() - ctx := context.Background() require.Eventually(t, func() bool { - readTS := n.redisServer.readTS() - meta, exists, err := n.redisServer.resolveListMeta(ctx, key, readTS) - if err != nil { - return false - } - // Redis represents an empty / deleted list as the absence of - // the meta key. When the caller is verifying deletion - // (expectedLen == 0), !exists is the success signal; - // requiring exists==true would make this helper unusable for - // post-DEL waits. For non-empty expectations a missing meta - // still means "not yet applied" and should keep polling. - if expectedLen == 0 { - if exists && meta.Len != 0 { - return false - } - } else { - if !exists || meta.Len != int64(expectedLen) { - return false - } - } - kvs, err := n.redisServer.store.ScanAt( - ctx, - store.ListItemKey(key, math.MinInt64), - store.ListItemKey(key, math.MaxInt64), - expectedLen+1, - readTS, - ) - if err != nil || len(kvs) != expectedLen { - return false - } - if expectedValues == nil { - return true - } - for i, kvp := range kvs { - if string(kvp.Value) != expectedValues[i] { - return false - } - } - return true + return listStateMatches(n, key, expectedLen, expectedValues) }, 5*time.Second, 250*time.Millisecond, "node did not catch up to expected list state for key %q (len=%d)", string(key), expectedLen) } +// listStateMatches reports whether this node's applied state for key resolves +// to expectedLen items whose values match expectedValues (when non-nil). It is +// the single-shot check driving waitForListState's poll loop. +func listStateMatches(n Node, key []byte, expectedLen int, expectedValues []string) bool { + ctx := context.Background() + readTS := n.redisServer.readTS() + if !listMetaMatches(ctx, n, key, expectedLen, readTS) { + return false + } + kvs, ok := scanListItems(ctx, n, key, expectedLen, readTS) + if !ok { + return false + } + return listValuesMatch(kvs, expectedValues) +} + +// listMetaMatches checks the list meta key. For expectedLen == 0 the Redis +// semantics of "empty list == absent meta" are honored; for non-empty +// expectations both existence and length must match. +func listMetaMatches(ctx context.Context, n Node, key []byte, expectedLen int, readTS uint64) bool { + meta, exists, err := n.redisServer.resolveListMeta(ctx, key, readTS) + if err != nil { + return false + } + if expectedLen == 0 { + return !exists || meta.Len == 0 + } + return exists && meta.Len == int64(expectedLen) +} + +// scanListItems reads the list items at readTS and returns them when the +// observed length matches expectedLen. +func scanListItems(ctx context.Context, n Node, key []byte, expectedLen int, readTS uint64) ([]*store.KVPair, bool) { + kvs, err := n.redisServer.store.ScanAt( + ctx, + store.ListItemKey(key, math.MinInt64), + store.ListItemKey(key, math.MaxInt64), + expectedLen+1, + readTS, + ) + if err != nil || len(kvs) != expectedLen { + return nil, false + } + return kvs, true +} + +// listValuesMatch returns true when expectedValues is nil (no value check +// requested) or every scanned value equals its expected counterpart. +func listValuesMatch(kvs []*store.KVPair, expectedValues []string) bool { + if expectedValues == nil { + return true + } + for i, kvp := range kvs { + if string(kvp.Value) != expectedValues[i] { + return false + } + } + return true +} + func TestRedis_MultiExecAtomic(t *testing.T) { t.Parallel() nodes, _, _ := createNode(t, 3) From 9e0258d707418983210516bca0fb993d5218db3c Mon Sep 17 00:00:00 2001 From: "Yoshiaki Ueda (bootjp)" Date: Wed, 22 Apr 2026 23:18:08 +0900 Subject: [PATCH 4/4] test(helper): guard waitForListState against mismatched expectedValues Fail fast in waitForListState when the caller supplies an expectedValues slice whose length does not match expectedLen. Without this check a misuse could cause an out-of-bounds panic inside listValuesMatch when scanListItems succeeds with expectedLen items. The guard centralizes the invariant at the single public entry point so listValuesMatch and listMetaMatches stay simple. --- adapter/redis_multi_test.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/adapter/redis_multi_test.go b/adapter/redis_multi_test.go index 5b69e3c30..932b028a5 100644 --- a/adapter/redis_multi_test.go +++ b/adapter/redis_multi_test.go @@ -32,6 +32,10 @@ import ( // gap deterministically without a timing-based sleep. func waitForListState(t *testing.T, n Node, key []byte, expectedLen int, expectedValues []string) { t.Helper() + if expectedValues != nil && len(expectedValues) != expectedLen { + t.Fatalf("waitForListState: expectedValues length %d does not match expectedLen %d", + len(expectedValues), expectedLen) + } require.Eventually(t, func() bool { return listStateMatches(n, key, expectedLen, expectedValues) }, 5*time.Second, 250*time.Millisecond,