Skip to content

Commit e52aa89

Browse files
committed
arenaskl: use pointer comparison to avoid key comparison
Update the arenaskl.Iterator type to use a pointer comparison to avoid a key comparison when possible. This is the same optimization as an earlier commit, but this time applied to the read path. ``` goos: darwin goarch: arm64 pkg: github.com/cockroachdb/pebble/internal/arenaskl cpu: Apple M1 Pro │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ CockroachKeysSeekPrefixGE/skip=1/use-next=false-10 276.5n ± 1% 241.8n ± 2% -12.55% (p=0.000 n=10) CockroachKeysSeekPrefixGE/skip=1/use-next=true-10 35.05n ± 1% 34.20n ± 1% -2.40% (p=0.000 n=10) CockroachKeysSeekPrefixGE/skip=2/use-next=false-10 287.5n ± 0% 254.4n ± 2% -11.51% (p=0.000 n=10) CockroachKeysSeekPrefixGE/skip=2/use-next=true-10 53.64n ± 2% 52.73n ± 1% -1.70% (p=0.000 n=10) CockroachKeysSeekPrefixGE/skip=4/use-next=false-10 308.2n ± 1% 269.9n ± 1% -12.44% (p=0.000 n=10) CockroachKeysSeekPrefixGE/skip=4/use-next=true-10 76.76n ± 0% 74.53n ± 0% -2.90% (p=0.000 n=10) CockroachKeysSeekPrefixGE/skip=8/use-next=false-10 310.6n ± 0% 281.1n ± 2% -9.48% (p=0.000 n=10) CockroachKeysSeekPrefixGE/skip=8/use-next=true-10 368.3n ± 0% 336.0n ± 0% -8.77% (p=0.000 n=10) CockroachKeysSeekPrefixGE/skip=16/use-next=false-10 431.6n ± 1% 401.7n ± 0% -6.93% (p=0.000 n=10) CockroachKeysSeekPrefixGE/skip=16/use-next=true-10 486.2n ± 0% 465.6n ± 1% -4.26% (p=0.000 n=10) geomean 196.3n 181.8n -7.38% ```
1 parent 926e93b commit e52aa89

File tree

1 file changed

+50
-30
lines changed

1 file changed

+50
-30
lines changed

internal/arenaskl/iterator.go

Lines changed: 50 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ func (it *Iterator) SeekGE(key []byte, flags base.SeekGEFlags) *base.InternalKV
114114
return kv
115115
}
116116
}
117-
_, it.nd, _ = it.seekForBaseSplice(key)
117+
_, it.nd = it.seekForBaseSplice(key)
118118
if it.nd == it.list.tail || it.nd == it.upperNode {
119119
return nil
120120
}
@@ -142,7 +142,7 @@ func (it *Iterator) SeekPrefixGE(prefix, key []byte, flags base.SeekGEFlags) *ba
142142
func (it *Iterator) SeekLT(key []byte, flags base.SeekLTFlags) *base.InternalKV {
143143
// NB: the top-level Iterator has already adjusted key based on
144144
// the upper-bound.
145-
it.nd, _, _ = it.seekForBaseSplice(key)
145+
it.nd, _ = it.seekForBaseSplice(key)
146146
if it.nd == it.list.head || it.nd == it.lowerNode {
147147
return nil
148148
}
@@ -259,53 +259,73 @@ func (it *Iterator) decodeKey() {
259259
it.kv.K.Trailer = it.nd.keyTrailer
260260
}
261261

262-
func (it *Iterator) seekForBaseSplice(key []byte) (prev, next *node, found bool) {
263-
ikey := base.MakeSearchKey(key)
264-
262+
func (it *Iterator) seekForBaseSplice(key []byte) (prev, next *node) {
265263
prev = it.list.head
266264
for level := int(it.list.Height() - 1); level >= 0; level-- {
267265

268266
// Search this level for the key.
267+
prevLevelNext := next
269268
for {
270269
// Assume prev.key < key.
271270
next = it.list.getNext(prev, level)
271+
272+
// Before performing a key comparison, check if the next pointer
273+
// equals prevLevelNext. The pointer comparison is significantly
274+
// cheaper than a key comparison.
275+
//
276+
// It's not unlikely for consecutive levels to have the same next
277+
// pointer. We use [maxHeight]=20 levels, and with each higher
278+
// height the probability a node extends one more rung of the tower
279+
// is 1/e.
280+
//
281+
// The skiplist may contain nodes with keys between the (prev,next)
282+
// pair of nodes that make up the previous level's splice. Let's
283+
// divide these nodes into the L nodes with keys < key and the R
284+
// nodes with keys > key. Only a subset of these nodes may have
285+
// towers that reach [level].
286+
//
287+
// Of the nodes in R that reach [level], we only care about the one
288+
// with the smallest key. If there are no nodes in R that reach
289+
// [level], then this level's splice's next pointer will be the same
290+
// as the level above's splice's next pointer. We can perform a
291+
// cheap pointer comparison of [next] and [prevLevelNext] to
292+
// determine this.
293+
//
294+
// (Note that we must still skip over any of the nodes in L that are
295+
// high enough to reach [level], and each of these nodes will
296+
// require a key comparison.)
297+
//
298+
// (< key) (≥ key)
299+
// prev prevLevelNext
300+
// +---------+ +---------+
301+
// | | | |
302+
// | level+1 |------------------------> | |
303+
// | | | |
304+
// | | next | |
305+
// | | +--------+ | |
306+
// | level |--...--| |--...--> | |
307+
// | | | | | |
308+
// | | | | | |
309+
// +---------+ +--------+ +---------+
310+
if next == prevLevelNext {
311+
break
312+
}
272313
if next == it.list.tail {
273314
// Tail node, so done.
274315
break
275316
}
276317

277318
offset, size := next.keyOffset, next.keySize
278319
nextKey := it.list.arena.buf[offset : offset+size]
279-
cmp := it.list.cmp(ikey.UserKey, nextKey)
280-
if cmp < 0 {
281-
// We are done for this level, since prev.key < key < next.key.
320+
cmp := it.list.cmp(key, nextKey)
321+
if cmp <= 0 {
322+
// We are done for this level, since prev.key < key <= next.key.
282323
break
283324
}
284-
if cmp == 0 {
285-
// User-key equality.
286-
if ikey.Trailer == next.keyTrailer {
287-
// Internal key equality.
288-
found = true
289-
break
290-
}
291-
if ikey.Trailer > next.keyTrailer {
292-
// We are done for this level, since prev.key < key < next.key.
293-
break
294-
}
295-
}
296325
// Keep moving right on this level.
297326
prev = next
298327
}
299-
300-
if found {
301-
if level != 0 {
302-
// next is pointing at the target node, but we need to find previous on
303-
// the bottom level.
304-
prev = it.list.getPrev(next, 0)
305-
}
306-
break
307-
}
308328
}
309329

310-
return prev, next, found
330+
return prev, next
311331
}

0 commit comments

Comments
 (0)