Skip to content

Commit 38aba7f

Browse files
committed
db/state: clip merge windows that straddle existing files
When the natural merge start (endTxNum minus the largest power-of-two step span) falls strictly inside an existing visible file, bump it up to that file's endTxNum so the candidate window cannot straddle. Without this clip, non-power-of-2 step layouts — such as those produced by a step-size rebase that turns one step into a non-power-of-2 number of tx-nums — let the algorithm propose merge windows whose `from` lies inside a pre-existing file. Downstream staticFilesInRange would then silently drop that file and emit a merged segment whose name lies about its coverage, leaving an overlap with the surviving straddler. Unskip partial_overlap_must_not_be_selected (which now passes), and add step_rebase_swallow covering the headline case where a single-step file arriving at N=2_048_000 has natural span 16384 / natural start 2031616 that straddles [2016000, 2032000). After the clip the safe window {2032000, 2048000} cleanly absorbs 15 trailing files.
1 parent 3761dc3 commit 38aba7f

2 files changed

Lines changed: 87 additions & 14 deletions

File tree

db/state/merge.go

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,13 @@ func calculateMergeStartTxNum(endTxNum, stepSize, maxSpan uint64) uint64 {
136136
// Accounts/Storage/Commitment domain frontiers aligned. Files past it aren't yet
137137
// merge candidates — going further would let one entity drift ahead of the others.
138138
//
139+
// When the natural start (endTxNum minus the largest power-of-two step span)
140+
// falls strictly inside an existing visible file, the window is clipped so its
141+
// from aligns with that file's endTxNum boundary. Without this clip, on
142+
// non-power-of-2 step layouts (e.g. after a step-size rebase) the algorithm
143+
// would propose windows straddling an existing file, producing misnamed merged
144+
// files and/or overlapping coverage. See #20878.
145+
//
139146
// When superSetCheck is true (history & inverted index), a file matching the
140147
// candidate's from and reaching at least its to replaces the candidate's bounds
141148
// but resets needMerge to false, letting later smaller-but-still-mergeable files
@@ -146,7 +153,7 @@ func findMergeRangeInFiles(files visibleFiles, stepSize, maxEndTxNum, maxSpan ui
146153
if item.endTxNum > maxEndTxNum {
147154
continue
148155
}
149-
start := calculateMergeStartTxNum(item.endTxNum, stepSize, maxSpan)
156+
start := clipMergeStartToFileBoundary(files, calculateMergeStartTxNum(item.endTxNum, stepSize, maxSpan))
150157
if superSetCheck && r.from == item.startTxNum && item.endTxNum >= r.to {
151158
r.needMerge = false
152159
r.from = start
@@ -166,6 +173,24 @@ func findMergeRangeInFiles(files visibleFiles, stepSize, maxEndTxNum, maxSpan ui
166173
return r
167174
}
168175

176+
// clipMergeStartToFileBoundary bumps start up to the endTxNum of any visible
177+
// file that straddles it (startTxNum < start < endTxNum). visibleFiles are
178+
// non-overlapping, so at most one straddling file can exist; the loop returns
179+
// as soon as it is found. Files are sorted by endTxNum ascending, so the first
180+
// file with endTxNum > start is the only straddler candidate.
181+
func clipMergeStartToFileBoundary(files visibleFiles, start uint64) uint64 {
182+
for _, f := range files {
183+
if f.endTxNum <= start {
184+
continue
185+
}
186+
if f.startTxNum < start {
187+
return f.endTxNum
188+
}
189+
return start
190+
}
191+
return start
192+
}
193+
169194
// findMergeRange
170195
// make merge determenistic across nodes: even if Node has much small files - do earliest-first merges
171196
// As any other methods of DomainRoTx - it can't see any files overlaps or garbage

db/state/merge_test.go

Lines changed: 61 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -869,19 +869,11 @@ func TestFindMergeRangeInFiles(t *testing.T) {
869869
t.Run("partial_overlap_must_not_be_selected", func(t *testing.T) {
870870
// [0, 1024), [1024, 1536), [1536, 1600), [1600, 1618), [1618, 1620), [1620, 1621), [1621, 1622) ->
871871
// [0, 1024), [1024, 1536), [1536, 1600), [1600, 1618), [1618, 1620), [1620, 1622)
872-
// CORRECT BEHAVIOR: the {1616, 1620} window proposed by [1618, 1620) straddles the
873-
// pre-existing [1600, 1618) (its startTxNum 1600 < window's from 1616). Selecting that
874-
// window would either drop coverage of [1616, 1618) or create an overlap once
875-
// staticFilesInRange filters [1600, 1618) out. The algorithm should skip this unsafe
876-
// window and instead pick the trailing pair's safe window {1620, 1622}.
877-
//
878-
// TODO(#20878): findMergeRangeInFiles currently picks {1616, 1620} regardless.
879-
// The "Flexible merge ranges" proposal in https://github.com/erigontech/erigon/issues/20878
880-
// fixes this by clipping the candidate window's `from` to the smallest visible
881-
// startTxNum >= the natural from, so a window can never straddle an existing file.
882-
// Once that lands, unskip this test.
883-
t.Skip("known pathology: findMergeRangeInFiles selects a window straddling an existing file; see #20878")
884-
872+
// The {1616, 1620} window proposed by [1618, 1620) would straddle the pre-existing
873+
// [1600, 1618) (its startTxNum 1600 < window's from 1616). clipMergeStartToFileBoundary
874+
// bumps the start up to 1618; the resulting [1618, 1620) is then rejected as
875+
// already-aligned, and the next iteration picks the safe trailing pair {1620, 1622}.
876+
// See #20878.
885877
files := visibleFiles{
886878
f(0, 1024),
887879
f(1024, 1536),
@@ -898,6 +890,62 @@ func TestFindMergeRangeInFiles(t *testing.T) {
898890
assert.Equal(t, uint64(1622), mr.to)
899891
})
900892

893+
t.Run("step_rebase_swallow", func(t *testing.T) {
894+
// File boundaries here are not power-of-2 aligned: they're the original canonical
895+
// boundaries (1024, 1536, ..., 2046, 2047) multiplied by 1000, as would result from
896+
// a step-size rebase that changes one step into 1000 tx-nums. None of the resulting
897+
// endTxNums (1024000, 1536000, ..., 2047000) sit on a power-of-2 boundary, so the
898+
// merge algorithm's "largest aligned span ending at endTxNum" can fall strictly
899+
// inside an existing file.
900+
//
901+
// Past 2047000, single-step files [N-1, N) accumulate. They merge cleanly among
902+
// themselves whenever their endTxNum has a rightmost-set-bit span that fits inside
903+
// the post-2047000 region — that gives the binary tree of files below.
904+
//
905+
// The interesting moment is N=2048000. Its natural span is 16384 (2048000 = 125 *
906+
// 2^14, rightmost bit 2^14), giving natural start 2031616. That start lands inside
907+
// [2016000, 2032000): selecting {2031616, 2048000} as a merge window would either
908+
// drop coverage of [2016000, 2031616) or produce a file that overlaps [2016000,
909+
// 2032000). The clip bumps start up to 2032000 — the next safe boundary — producing
910+
// {2032000, 2048000}. That window cleanly absorbs the four trailing rebased files
911+
// at and above 2032000, the eight post-2047000 files, and the three latest
912+
// singletons (15 files into 1) without touching the still non-canonical
913+
// [2016000, 2032000) on its left.
914+
const maxSpan = uint64(1 << 20) // 1048576, large enough for the natural 16384 span
915+
files := visibleFiles{
916+
// Rebased canonical-step boundaries (×1000).
917+
f(0, 1024000),
918+
f(1024000, 1536000),
919+
f(1536000, 1792000),
920+
f(1792000, 1920000),
921+
f(1920000, 1984000),
922+
f(1984000, 2016000),
923+
f(2016000, 2032000),
924+
f(2032000, 2040000),
925+
f(2040000, 2044000),
926+
f(2044000, 2046000),
927+
f(2046000, 2047000),
928+
// Binary expansion of the post-2047000 region by N=2047996.
929+
f(2047000, 2047488),
930+
f(2047488, 2047744),
931+
f(2047744, 2047872),
932+
f(2047872, 2047936),
933+
f(2047936, 2047968),
934+
f(2047968, 2047984),
935+
f(2047984, 2047992),
936+
f(2047992, 2047996),
937+
// Latest singletons added at N=2047997, 2047998, 2047999, 2048000.
938+
f(2047996, 2047998),
939+
f(2047998, 2047999),
940+
f(2047999, 2048000),
941+
}
942+
mr := findMergeRangeInFiles(files, stepSize, 2048000, maxSpan, false)
943+
assert.True(t, mr.needMerge)
944+
assert.Equal(t, uint64(2032000), mr.from,
945+
"natural start 2031616 must be clipped up to 2032000 to avoid straddling [2016000, 2032000)")
946+
assert.Equal(t, uint64(2048000), mr.to)
947+
})
948+
901949
// --- superSetCheck (history / inverted-index branch) ---
902950

903951
t.Run("superSetCheck_resets_when_existing_file_covers_candidate", func(t *testing.T) {

0 commit comments

Comments
 (0)