From f831c931a5b00965f5c65ac78f68eb50de22d5fd Mon Sep 17 00:00:00 2001 From: "Yoshiaki Ueda (bootjp)" Date: Sun, 24 May 2026 06:03:50 +0900 Subject: [PATCH 1/2] test(redis): widen TTL window on ExpiredKey_BecomesInvisible to fix CI flake MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 200ms TTL races against the SET → GET round-trip latency on a 3-node Raft cluster under -race on CI runners. SET goes through Raft propose → quorum apply, which can take 100-250ms under contention. By the time the visibility-before-expiry GET fires, the wall-clock-based TTL has already burned and the GET returns redis.Nil — failing the require.NoError at line 193. The flake recurred consistently today across PRs that share no code with the Redis adapter: - #813 (admin Dynamo HTTP, internal/admin only) - #814 (admin S3 HTTP, internal/admin only) - #815 (admin SPA Dynamo, web/admin only) - #816 (admin SPA S3, web/admin only) That cross-PR pattern locates the failure squarely in the adapter test, not in the PR under test. Fix: PX 200 -> PX 2000. 2s is comfortably past the worst observed SET-ack latency (~250ms on CI) while keeping the test under a 7s wall-clock budget (2s TTL + 5s Eventually deadline). Also widens the Eventually deadline from 1s to 5s so the expiry-detection loop has headroom past the longer TTL plus any Raft-replicated DEL on expiry. The test's intent — verify that an expired key becomes invisible to subsequent GETs — is preserved exactly. Only the timing constants change; the assertions are unchanged. Tested locally: go test -race -count=3 -timeout=180s -run TestRedis_ExpiredKey_BecomesInvisible ./adapter/ ok github.com/bootjp/elastickv/adapter 10.792s --- adapter/redis_ttl_compat_test.go | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/adapter/redis_ttl_compat_test.go b/adapter/redis_ttl_compat_test.go index 9846be158..ddab53c5c 100644 --- a/adapter/redis_ttl_compat_test.go +++ b/adapter/redis_ttl_compat_test.go @@ -186,15 +186,33 @@ func TestRedis_ExpiredKey_BecomesInvisible(t *testing.T) { rdb := redis.NewClient(&redis.Options{Addr: nodes[0].redisAddress}) defer func() { _ = rdb.Close() }() - // Set a key with a very short TTL. - require.NoError(t, rdb.Do(ctx, "SET", "expiry:short", "v", "PX", "200").Err()) + // PX=2000ms (was 200ms). The previous 200ms TTL races against + // the SET → GET round-trip latency on a 3-node Raft cluster + // under -race on CI runners: SET proposes through Raft, waits + // for quorum apply, and then returns OK, which can take + // 100–250ms on a slow runner. By the time the "visible before + // expiry" GET fires, the wall-clock-based TTL has already + // fired and the GET returns redis.Nil — a recurring flake + // observed across PRs #813/#814/#815/#816 today, all of which + // share no code with the Redis adapter. + // + // 2s is comfortably past the worst observed SET-ack latency + // (~250ms on CI) while still letting the "must be gone after + // expiry" loop's 5s deadline (raised below) catch the expiry + // well before the test deadline. + const initialTTL = 2 * time.Second + require.NoError(t, rdb.Do(ctx, "SET", "expiry:short", "v", "PX", + initialTTL.Milliseconds()).Err()) got, err := rdb.Get(ctx, "expiry:short").Result() require.NoError(t, err) require.Equal(t, "v", got, "key must be visible before expiry") + // 5s deadline (was 1s) so a slow CI runner has headroom past + // the 2s TTL + the Eventually poll cadence (25ms tick) + + // any additional latency from Raft-replicated DEL on expiry. require.Eventually(t, func() bool { _, e := rdb.Get(ctx, "expiry:short").Result() return errors.Is(e, redis.Nil) - }, time.Second, 25*time.Millisecond, "key must be gone after expiry") + }, 5*time.Second, 25*time.Millisecond, "key must be gone after expiry") } From 4b4a650ac7496b62c79478335e6c64a6da1d0855 Mon Sep 17 00:00:00 2001 From: "Yoshiaki Ueda (bootjp)" Date: Sun, 24 May 2026 06:12:20 +0900 Subject: [PATCH 2/2] test(redis): trim PR refs from comments + derive deadline from TTL (PR #818 r1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two style notes from Gemini medium x2 / Claude bot on PR #818. 1) CLAUDE.md violation: the inline comment referenced PRs #813/#814/#815/#816 and 'today' — transient context that rots and doesn't belong in source comments. Per CLAUDE.md: 'Don't reference the current task, fix, or callers ... those belong in the PR description'. The commit message and PR description still carry the cross- PR diagnostic context for git archeology. 2) Eventually deadline now derives from initialTTL (initialTTL + 3*time.Second) rather than a hardcoded 5s. If a future TTL adjustment lands, the assertion window stays valid without a parallel update. Trimmed comment focuses on the technical justification: - WHY 2s TTL (SET-ack latency on 3-node Raft under -race) - WHY ttl+3s deadline (poll cadence + Raft-replicated DEL latency on expiry) No behavior change to the test. initialTTL=2s + deadline=5s (2+3) matches the previous 2s + 5s. Tested locally with -race -count=2 (7.4s wall) — passes both runs. go test -race -count=2 -timeout=120s ./adapter/... in scope: passes golangci-lint: clean --- adapter/redis_ttl_compat_test.go | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/adapter/redis_ttl_compat_test.go b/adapter/redis_ttl_compat_test.go index ddab53c5c..0b8c4fb6b 100644 --- a/adapter/redis_ttl_compat_test.go +++ b/adapter/redis_ttl_compat_test.go @@ -186,20 +186,13 @@ func TestRedis_ExpiredKey_BecomesInvisible(t *testing.T) { rdb := redis.NewClient(&redis.Options{Addr: nodes[0].redisAddress}) defer func() { _ = rdb.Close() }() - // PX=2000ms (was 200ms). The previous 200ms TTL races against - // the SET → GET round-trip latency on a 3-node Raft cluster - // under -race on CI runners: SET proposes through Raft, waits - // for quorum apply, and then returns OK, which can take - // 100–250ms on a slow runner. By the time the "visible before - // expiry" GET fires, the wall-clock-based TTL has already - // fired and the GET returns redis.Nil — a recurring flake - // observed across PRs #813/#814/#815/#816 today, all of which - // share no code with the Redis adapter. - // - // 2s is comfortably past the worst observed SET-ack latency - // (~250ms on CI) while still letting the "must be gone after - // expiry" loop's 5s deadline (raised below) catch the expiry - // well before the test deadline. + // initialTTL must comfortably outlast the SET → first-GET + // round-trip. SET on a 3-node Raft cluster must reach quorum + // before returning OK, which can take 100–250ms under -race on + // CI runners. The original 200ms TTL raced that window: by the + // time the "visible before expiry" GET fired, the wall-clock + // TTL had already burned and the GET returned redis.Nil. 2s + // is comfortably past the worst observed SET-ack latency. const initialTTL = 2 * time.Second require.NoError(t, rdb.Do(ctx, "SET", "expiry:short", "v", "PX", initialTTL.Milliseconds()).Err()) @@ -208,11 +201,12 @@ func TestRedis_ExpiredKey_BecomesInvisible(t *testing.T) { require.NoError(t, err) require.Equal(t, "v", got, "key must be visible before expiry") - // 5s deadline (was 1s) so a slow CI runner has headroom past - // the 2s TTL + the Eventually poll cadence (25ms tick) + - // any additional latency from Raft-replicated DEL on expiry. + // Deadline derives from initialTTL so a future TTL adjustment + // keeps the assertion window valid: ttl + 3s gives headroom + // for the 25ms poll cadence plus Raft-replicated DEL latency + // on expiry. require.Eventually(t, func() bool { _, e := rdb.Get(ctx, "expiry:short").Result() return errors.Is(e, redis.Nil) - }, 5*time.Second, 25*time.Millisecond, "key must be gone after expiry") + }, initialTTL+3*time.Second, 25*time.Millisecond, "key must be gone after expiry") }