Skip to content

Commit c27a208

Browse files
committed
sstable: do not invalidate data block if bloomfilter returns false
Fixes #4788
1 parent d4618d5 commit c27a208

File tree

3 files changed

+486
-13
lines changed

3 files changed

+486
-13
lines changed

sstable/reader_iter_single_lvl.go

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,10 @@ type singleLevelIterator[I any, PI indexBlockIterator[I], D any, PD dataBlockIte
164164
// a match is high).
165165
useFilterBlock bool
166166
lastBloomFilterMatched bool
167+
// lastOpWasSeekPrefixGE tracks if the previous operation was SeekPrefixGE
168+
// that returned nil due to bloom filter miss. Used for invariant checking
169+
// in Next() to ensure the block is not invalidated when it doesn't have to be.
170+
lastOpWasSeekPrefixGE invariants.Value[bool]
167171

168172
// indexLoaded is set to true if the index block load operation completed
169173
// successfully.
@@ -646,6 +650,8 @@ func (i *singleLevelIterator[I, PI, D, PD]) trySeekLTUsingPrevWithinBlock(
646650
func (i *singleLevelIterator[I, PI, D, PD]) SeekGE(
647651
key []byte, flags base.SeekGEFlags,
648652
) *base.InternalKV {
653+
// Clear the tracking flag since this is a new absolute positioning operation
654+
i.lastOpWasSeekPrefixGE.Set(false)
649655
// The synthetic key is no longer relevant and must be cleared.
650656
i.synthetic.atSyntheticKey = false
651657

@@ -890,6 +896,8 @@ func (i *singleLevelIterator[I, PI, D, PD]) seekPrefixGE(
890896
// this method. Hence, we can use the existing iterator position if the last
891897
// SeekPrefixGE did not fail bloom filter matching.
892898

899+
// Clear the tracking flag initially; will be set later if bloom filter returns false
900+
i.lastOpWasSeekPrefixGE.Set(false)
893901
err := i.err
894902
i.err = nil // clear cached iteration error
895903
if i.useFilterBlock {
@@ -901,15 +909,17 @@ func (i *singleLevelIterator[I, PI, D, PD]) seekPrefixGE(
901909
// Check prefix bloom filter.
902910
var mayContain bool
903911
mayContain, i.err = i.bloomFilterMayContain(prefix)
904-
if i.err != nil || !mayContain {
905-
// In the i.err == nil case, this invalidation may not be necessary for
906-
// correctness, and may be a place to optimize later by reusing the
907-
// already loaded block. It was necessary in earlier versions of the code
908-
// since the caller was allowed to call Next when SeekPrefixGE returned
909-
// nil. This is no longer allowed.
912+
if i.err != nil {
910913
PD(&i.data).Invalidate()
911914
return nil
912915
}
916+
if !mayContain {
917+
// In the no-error bloom filter miss case, the key is definitely not in table.
918+
// We can avoid invalidating the already loaded block since the caller is
919+
// not allowed to call Next when SeekPrefixGE returns nil.
920+
i.lastOpWasSeekPrefixGE.Set(true)
921+
return nil
922+
}
913923
i.lastBloomFilterMatched = true
914924
}
915925
if flags.TrySeekUsingNext() {
@@ -1077,8 +1087,11 @@ func (i *singleLevelIterator[I, PI, D, PD]) virtualLastSeekLE() *base.InternalKV
10771087
func (i *singleLevelIterator[I, PI, D, PD]) SeekLT(
10781088
key []byte, flags base.SeekLTFlags,
10791089
) *base.InternalKV {
1090+
// Clear the tracking flag since this is a new absolute positioning operation
1091+
i.lastOpWasSeekPrefixGE.Set(false)
10801092
// The synthetic key is no longer relevant and must be cleared.
10811093
i.synthetic.atSyntheticKey = false
1094+
10821095
if i.readEnv.Virtual != nil {
10831096
// Might have to fix upper bound since virtual sstable bounds are not
10841097
// known to callers of SeekLT.
@@ -1189,8 +1202,11 @@ func (i *singleLevelIterator[I, PI, D, PD]) SeekLT(
11891202
// to ensure that key is greater than or equal to the lower bound (e.g. via a
11901203
// call to SeekGE(lower)).
11911204
func (i *singleLevelIterator[I, PI, D, PD]) First() *base.InternalKV {
1205+
// Clear the tracking flag since this is a new absolute positioning operation
1206+
i.lastOpWasSeekPrefixGE.Set(false)
11921207
// The synthetic key is no longer relevant and must be cleared.
11931208
i.synthetic.atSyntheticKey = false
1209+
11941210
// If we have a lower bound, use SeekGE. Note that in general this is not
11951211
// supported usage, except when the lower bound is there because the table is
11961212
// virtual.
@@ -1260,8 +1276,11 @@ func (i *singleLevelIterator[I, PI, D, PD]) firstInternal() *base.InternalKV {
12601276
// to ensure that key is less than the upper bound (e.g. via a call to
12611277
// SeekLT(upper))
12621278
func (i *singleLevelIterator[I, PI, D, PD]) Last() *base.InternalKV {
1279+
// Clear the tracking flag since this is a new absolute positioning operation
1280+
i.lastOpWasSeekPrefixGE.Set(false)
12631281
// The synthetic key is no longer relevant and must be cleared.
12641282
i.synthetic.atSyntheticKey = false
1283+
12651284
if i.readEnv.Virtual != nil {
12661285
return i.maybeVerifyKey(i.virtualLast())
12671286
}
@@ -1324,6 +1343,16 @@ func (i *singleLevelIterator[I, PI, D, PD]) lastInternal() *base.InternalKV {
13241343
// Note: compactionIterator.Next mirrors the implementation of Iterator.Next
13251344
// due to performance. Keep the two in sync.
13261345
func (i *singleLevelIterator[I, PI, D, PD]) Next() *base.InternalKV {
1346+
if invariants.Enabled && i.lastOpWasSeekPrefixGE.Get() {
1347+
// If the previous operation was SeekPrefixGE that returned nil due to bloom
1348+
// filter miss, the data block should not have been invalidated. This assertion
1349+
// ensures the optimization to preserve loaded blocks is working correctly.
1350+
if PD(&i.data).IsDataInvalidated() {
1351+
panic("pebble: data block was invalidated after SeekPrefixGE returned nil due to bloom filter miss")
1352+
}
1353+
}
1354+
// Clear the tracking flag since this is no longer the next operation after SeekPrefixGE
1355+
i.lastOpWasSeekPrefixGE.Set(false)
13271356

13281357
// The SeekPrefixGE might have returned a synthetic key with latest suffix
13291358
// contained in the sstable. If the caller is calling Next(), that means
@@ -1335,6 +1364,7 @@ func (i *singleLevelIterator[I, PI, D, PD]) Next() *base.InternalKV {
13351364
i.synthetic.atSyntheticKey = false
13361365
return i.seekPrefixGE(i.reader.Comparer.Split.Prefix(i.synthetic.seekKey), i.synthetic.seekKey, base.SeekGEFlagsNone)
13371366
}
1367+
13381368
if i.exhaustedBounds == +1 {
13391369
panic("Next called even though exhausted upper bound")
13401370
}
@@ -1362,6 +1392,8 @@ func (i *singleLevelIterator[I, PI, D, PD]) Next() *base.InternalKV {
13621392

13631393
// NextPrefix implements (base.InternalIterator).NextPrefix.
13641394
func (i *singleLevelIterator[I, PI, D, PD]) NextPrefix(succKey []byte) *base.InternalKV {
1395+
// Clear the tracking flag since this is a relative positioning operation
1396+
i.lastOpWasSeekPrefixGE.Set(false)
13651397
if i.exhaustedBounds == +1 {
13661398
panic("NextPrefix called even though exhausted upper bound")
13671399
}
@@ -1439,6 +1471,8 @@ func (i *singleLevelIterator[I, PI, D, PD]) NextPrefix(succKey []byte) *base.Int
14391471
// Prev implements internalIterator.Prev, as documented in the pebble
14401472
// package.
14411473
func (i *singleLevelIterator[I, PI, D, PD]) Prev() *base.InternalKV {
1474+
// Clear the tracking flag since this is a relative positioning operation
1475+
i.lastOpWasSeekPrefixGE.Set(false)
14421476
if i.exhaustedBounds == -1 {
14431477
panic("Prev called even though exhausted lower bound")
14441478
}

0 commit comments

Comments
 (0)