Skip to content

Commit 1ba33f5

Browse files
committed
sstable: reimplement lazy load the index block in two level iterator
This commit reimplements lazy loading for two-level sstable iterators, fixing critical issues from the previous attempt (fe43e7b) which was reverted due to data corruption in stress tests. Benchmark tests are added to validate lazy loading behavior under various scenarios. Implements #3248
1 parent bbfc14b commit 1ba33f5

File tree

2 files changed

+449
-18
lines changed

2 files changed

+449
-18
lines changed

sstable/reader_iter_two_lvl.go

Lines changed: 83 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ type twoLevelIterator[I any, PI indexBlockIterator[I], D any, PD dataBlockIterat
3232
// false - any filtering happens at the top level.
3333
useFilterBlock bool
3434
lastBloomFilterMatched bool
35+
36+
// topLevelIndexLoaded is set to true if the top-level index block load
37+
// operation completed successfully.
38+
topLevelIndexLoaded bool
3539
}
3640

3741
var _ Iterator = (*twoLevelIteratorRowBlocks)(nil)
@@ -45,6 +49,7 @@ func (i *twoLevelIterator[I, PI, D, PD]) loadSecondLevelIndexBlock(dir int8) loa
4549
// the index fails.
4650
PD(&i.secondLevel.data).Invalidate()
4751
PI(&i.secondLevel.index).Invalidate()
52+
4853
if !PI(&i.topLevelIndex).Valid() {
4954
return loadBlockFailed
5055
}
@@ -87,6 +92,10 @@ func (i *twoLevelIterator[I, PI, D, PD]) loadSecondLevelIndexBlock(dir int8) loa
8792
// appropriate bound, depending on the iteration direction, and returns either
8893
// `blockIntersects` or `blockExcluded`.
8994
func (i *twoLevelIterator[I, PI, D, PD]) resolveMaybeExcluded(dir int8) intersectsResult {
95+
if invariants.Enabled && !i.topLevelIndexLoaded {
96+
panic("pebble: resolveMaybeExcluded called without loaded top-level index")
97+
}
98+
9099
// This iterator is configured with a bound-limited block property filter.
91100
// The bpf determined this entire index block could be excluded from
92101
// iteration based on the property encoded in the block handle. However, we
@@ -162,6 +171,7 @@ func newColumnBlockTwoLevelIterator(
162171
}
163172
i := twoLevelIterColumnBlockPool.Get().(*twoLevelIteratorColumnBlocks)
164173
i.secondLevel.init(ctx, r, opts)
174+
i.secondLevel.indexLoaded = true
165175
// Only check the bloom filter at the top level.
166176
i.useFilterBlock = i.secondLevel.useFilterBlock
167177
i.secondLevel.useFilterBlock = false
@@ -182,14 +192,7 @@ func newColumnBlockTwoLevelIterator(
182192
objstorage.NoReadBefore, &i.secondLevel.vbRHPrealloc)
183193
}
184194
i.secondLevel.data.InitOnce(r.keySchema, r.Comparer, &i.secondLevel.internalValueConstructor)
185-
topLevelIndexH, err := r.readTopLevelIndexBlock(ctx, i.secondLevel.readEnv.Block, i.secondLevel.indexFilterRH)
186-
if err == nil {
187-
err = i.topLevelIndex.InitHandle(r.Comparer, topLevelIndexH, opts.Transforms)
188-
}
189-
if err != nil {
190-
_ = i.Close()
191-
return nil, err
192-
}
195+
193196
return i, nil
194197
}
195198

@@ -211,6 +214,7 @@ func newRowBlockTwoLevelIterator(
211214
}
212215
i := twoLevelIterRowBlockPool.Get().(*twoLevelIteratorRowBlocks)
213216
i.secondLevel.init(ctx, r, opts)
217+
i.secondLevel.indexLoaded = true
214218
// Only check the bloom filter at the top level.
215219
i.useFilterBlock = i.secondLevel.useFilterBlock
216220
i.secondLevel.useFilterBlock = false
@@ -237,14 +241,6 @@ func newRowBlockTwoLevelIterator(
237241
i.secondLevel.data.SetHasValuePrefix(true)
238242
}
239243

240-
topLevelIndexH, err := r.readTopLevelIndexBlock(ctx, i.secondLevel.readEnv.Block, i.secondLevel.indexFilterRH)
241-
if err == nil {
242-
err = i.topLevelIndex.InitHandle(r.Comparer, topLevelIndexH, opts.Transforms)
243-
}
244-
if err != nil {
245-
_ = i.Close()
246-
return nil, err
247-
}
248244
return i, nil
249245
}
250246

@@ -277,6 +273,10 @@ func (i *twoLevelIterator[I, PI, D, PD]) SeekGE(
277273
err := i.secondLevel.err
278274
i.secondLevel.err = nil // clear cached iteration error
279275

276+
if !i.ensureTopLevelIndexLoaded() {
277+
return nil
278+
}
279+
280280
// The twoLevelIterator could be already exhausted. Utilize that when
281281
// trySeekUsingNext is true. See the comment about data-exhausted, PGDE, and
282282
// bounds-exhausted near the top of the file.
@@ -419,6 +419,10 @@ func (i *twoLevelIterator[I, PI, D, PD]) SeekPrefixGE(
419419
err := i.secondLevel.err
420420
i.secondLevel.err = nil // clear cached iteration error
421421

422+
if !i.ensureTopLevelIndexLoaded() {
423+
return nil
424+
}
425+
422426
// The twoLevelIterator could be already exhausted. Utilize that when
423427
// trySeekUsingNext is true. See the comment about data-exhausted, PGDE, and
424428
// bounds-exhausted near the top of the file.
@@ -586,6 +590,11 @@ func (i *twoLevelIterator[I, PI, D, PD]) virtualLastSeekLE() *base.InternalKV {
586590
panic("unexpected virtualLastSeekLE with exclusive upper bounds")
587591
}
588592
key := i.secondLevel.upper
593+
594+
if !i.ensureTopLevelIndexLoaded() {
595+
return nil
596+
}
597+
589598
// Need to position the topLevelIndex.
590599
//
591600
// The previous exhausted state of singleLevelIterator is no longer
@@ -643,6 +652,10 @@ func (i *twoLevelIterator[I, PI, D, PD]) SeekLT(
643652
// Seek optimization only applies until iterator is first positioned after SetBounds.
644653
i.secondLevel.boundsCmp = 0
645654

655+
if !i.ensureTopLevelIndexLoaded() {
656+
return nil
657+
}
658+
646659
var result loadBlockResult
647660
// NB: Unlike SeekGE, we don't have a fast-path here since we don't know
648661
// whether the topLevelIndex is positioned after the position that would
@@ -716,6 +729,10 @@ func (i *twoLevelIterator[I, PI, D, PD]) First() *base.InternalKV {
716729
// Seek optimization only applies until iterator is first positioned after SetBounds.
717730
i.secondLevel.boundsCmp = 0
718731

732+
if !i.ensureTopLevelIndexLoaded() {
733+
return nil
734+
}
735+
719736
if !PI(&i.topLevelIndex).First() {
720737
return nil
721738
}
@@ -765,6 +782,10 @@ func (i *twoLevelIterator[I, PI, D, PD]) Last() *base.InternalKV {
765782
// Seek optimization only applies until iterator is first positioned after SetBounds.
766783
i.secondLevel.boundsCmp = 0
767784

785+
if !i.ensureTopLevelIndexLoaded() {
786+
return nil
787+
}
788+
768789
if !PI(&i.topLevelIndex).Last() {
769790
return nil
770791
}
@@ -832,6 +853,11 @@ func (i *twoLevelIterator[I, PI, D, PD]) NextPrefix(succKey []byte) *base.Intern
832853

833854
// Did not find prefix in the existing second-level index block. This is the
834855
// slow-path where we seek the iterator.
856+
857+
if !i.ensureTopLevelIndexLoaded() {
858+
return nil
859+
}
860+
835861
if !PI(&i.topLevelIndex).SeekGE(succKey) {
836862
PD(&i.secondLevel.data).Invalidate()
837863
PI(&i.secondLevel.index).Invalidate()
@@ -879,6 +905,10 @@ func (i *twoLevelIterator[I, PI, D, PD]) skipForward() *base.InternalKV {
879905
return nil
880906
}
881907

908+
if !i.ensureTopLevelIndexLoaded() {
909+
return nil
910+
}
911+
882912
// It is possible that skipBackward went too far and the virtual table lower
883913
// bound is after the first key in the block we are about to load, in which
884914
// case we must use SeekGE below. The keys in the block we are about to load
@@ -956,6 +986,11 @@ func (i *twoLevelIterator[I, PI, D, PD]) skipBackward() *base.InternalKV {
956986
if i.secondLevel.err != nil || i.secondLevel.exhaustedBounds < 0 {
957987
return nil
958988
}
989+
990+
if !i.ensureTopLevelIndexLoaded() {
991+
return nil
992+
}
993+
959994
i.secondLevel.exhaustedBounds = 0
960995
if !PI(&i.topLevelIndex).Prev() {
961996
PD(&i.secondLevel.data).Invalidate()
@@ -1009,8 +1044,37 @@ func (i *twoLevelIterator[I, PI, D, PD]) SetupForCompaction() {
10091044
i.secondLevel.SetupForCompaction()
10101045
}
10111046

1012-
// Close implements internalIterator.Close, as documented in the pebble
1013-
// package.
1047+
func (i *twoLevelIterator[I, PI, D, PD]) ensureTopLevelIndexLoaded() bool {
1048+
if i.topLevelIndexLoaded {
1049+
return true
1050+
}
1051+
1052+
// Perform the deferred top-level index loading calls
1053+
topLevelIndexH, err := i.secondLevel.reader.readTopLevelIndexBlock(
1054+
i.secondLevel.ctx,
1055+
i.secondLevel.readEnv.Block,
1056+
i.secondLevel.indexFilterRH,
1057+
)
1058+
if err != nil {
1059+
i.secondLevel.err = err
1060+
return false
1061+
}
1062+
1063+
err = PI(&i.topLevelIndex).InitHandle(
1064+
i.secondLevel.reader.Comparer,
1065+
topLevelIndexH,
1066+
i.secondLevel.transforms,
1067+
)
1068+
if err != nil {
1069+
i.secondLevel.err = err
1070+
return false
1071+
}
1072+
1073+
i.topLevelIndexLoaded = true
1074+
return true
1075+
}
1076+
1077+
// Close implements internalIterator.Close, as documented in the pebble package.
10141078
func (i *twoLevelIterator[I, PI, D, PD]) Close() error {
10151079
if invariants.Enabled && i.secondLevel.pool != nil {
10161080
panic("twoLevelIterator's singleLevelIterator has its own non-nil pool")
@@ -1021,6 +1085,7 @@ func (i *twoLevelIterator[I, PI, D, PD]) Close() error {
10211085
err = firstError(err, PI(&i.topLevelIndex).Close())
10221086
i.useFilterBlock = false
10231087
i.lastBloomFilterMatched = false
1088+
i.topLevelIndexLoaded = false
10241089
if pool != nil {
10251090
pool.Put(i)
10261091
}

0 commit comments

Comments
 (0)