Skip to content

Commit 1916ebf

Browse files
committed
db: zero out iterAlloc fields individually
When closing an iterator and returning the containing iterAlloc to the pool, zero each field of the iterAlloc individually (instead of wholesale with *alloc = iterAlloc{}). Additionally only zero out the iteratorBatchState if the iterator was constructed to iterate over an indexed batch. ``` goos: darwin goarch: arm64 pkg: github.com/cockroachdb/pebble cpu: Apple M1 Pro │ entire-iteralloc.txt │ piecemeal.txt │ │ sec/op │ sec/op vs base │ NewIterClose/1-10 1.213µ ± 1% 1.169µ ± 4% -3.67% (p=0.000 n=10) NewIterClose/10-10 6.385µ ± 0% 6.316µ ± 1% -1.08% (p=0.000 n=10) NewIterClose/100-10 92.41µ ± 1% 91.80µ ± 1% -0.66% (p=0.011 n=10) geomean 8.945µ 8.783µ -1.81% ```
1 parent d2d78bb commit 1916ebf

File tree

2 files changed

+43
-49
lines changed

2 files changed

+43
-49
lines changed

db.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -905,12 +905,12 @@ func (d *DB) commitWrite(b *Batch, syncWG *sync.WaitGroup, syncErr *error) (*mem
905905
}
906906

907907
type iterAlloc struct {
908-
dbi Iterator
909908
keyBuf []byte
910909
boundsBuf [2][]byte
911910
prefixOrFullSeekKey []byte
912-
merging mergingIter
913911
batchState iteratorBatchState
912+
dbi Iterator
913+
merging mergingIter
914914
mlevels [3 + numLevels]mergingIterLevel
915915
levels [3 + numLevels]levelIter
916916
levelsPositioned [3 + numLevels]bool

iterator.go

Lines changed: 41 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -2416,56 +2416,50 @@ func (i *Iterator) Close() error {
24162416
iterRangeKeyStateAllocPool.Put(i.rangeKey)
24172417
i.rangeKey = nil
24182418
}
2419-
if alloc := i.alloc; alloc != nil {
2420-
var (
2421-
keyBuf []byte
2422-
boundsBuf [2][]byte
2423-
prefixOrFullSeekKey []byte
2424-
mergingIterHeapItems []mergingIterHeapItem
2425-
)
24262419

2427-
// Avoid caching the key buf if it is overly large. The constant is fairly
2428-
// arbitrary.
2429-
if cap(i.keyBuf) < maxKeyBufCacheSize {
2430-
keyBuf = i.keyBuf
2431-
}
2432-
if cap(i.prefixOrFullSeekKey) < maxKeyBufCacheSize {
2433-
prefixOrFullSeekKey = i.prefixOrFullSeekKey
2434-
}
2435-
for j := range i.boundsBuf {
2436-
if cap(i.boundsBuf[j]) < maxKeyBufCacheSize {
2437-
boundsBuf[j] = i.boundsBuf[j]
2438-
}
2420+
alloc := i.alloc
2421+
if alloc == nil {
2422+
// NB: When the Iterator is used as a part of a Get(), Close() is called by
2423+
// getIterAlloc.Close which handles recycling the appropriate structure and
2424+
// fields.
2425+
return err
2426+
}
2427+
2428+
// Reset the alloc struct, retaining any buffers within their maximum
2429+
// bounds, and return the iterAlloc struct to the pool.
2430+
2431+
// Avoid caching the key buf if it is overly large. The constant is fairly
2432+
// arbitrary.
2433+
if cap(i.keyBuf) < maxKeyBufCacheSize {
2434+
alloc.keyBuf = i.keyBuf
2435+
}
2436+
if cap(i.prefixOrFullSeekKey) < maxKeyBufCacheSize {
2437+
alloc.prefixOrFullSeekKey = i.prefixOrFullSeekKey
2438+
}
2439+
for j := range i.boundsBuf {
2440+
if cap(i.boundsBuf[j]) < maxKeyBufCacheSize {
2441+
alloc.boundsBuf[j] = i.boundsBuf[j]
24392442
}
2440-
mergingIterHeapItems = alloc.merging.heap.items
2443+
}
2444+
mergingIterHeapItems := alloc.merging.heap.items
24412445

2442-
// Reset the alloc struct, re-assign the fields that are being recycled, and
2443-
// then return it to the pool. Splitting the first two steps performs better
2444-
// than doing them in a single step (e.g. *alloc = iterAlloc{...}) because
2445-
// the compiler can avoid the use of a stack allocated autotmp iterAlloc
2446-
// variable (~12KB, as of Dec 2024), which must first be zeroed out, then
2447-
// assigned into, then copied over into the heap-allocated alloc. Instead,
2448-
// the two-step process allows the compiler to quickly zero out the heap
2449-
// allocated object and then assign the few fields we want to preserve.
2450-
//
2451-
// TODO(nvanbenschoten): even with this optimization, zeroing out the alloc
2452-
// struct still shows up in profiles because it is such a large struct. Can
2453-
// we do something better here? We are hanging 22 separated iterators off of
2454-
// the alloc struct (or more, depending on how you count), many of which are
2455-
// only used in a few cases. Can those iterators be responsible for zeroing
2456-
// out their own memory on Close, allowing us to assume that most of the
2457-
// alloc struct is already zeroed out by this point?
2458-
*alloc = iterAlloc{}
2459-
alloc.keyBuf = keyBuf
2460-
alloc.boundsBuf = boundsBuf
2461-
alloc.prefixOrFullSeekKey = prefixOrFullSeekKey
2462-
alloc.merging.heap.items = mergingIterHeapItems
2463-
2464-
iterAllocPool.Put(alloc)
2465-
}
2466-
// NB: When the Iterator is used as a part of a Get(), Close() is called by
2467-
// getIterAlloc.Close which handles recycling the appropriate structure and
2468-
// fields.
2446+
// We zero each field piecemeal in part to avoid the use of a stack
2447+
// allocated autotmp iterAlloc variable, and in part to make it easier
2448+
// to make zeroing conditional on whether the field was actually used.
2449+
//
2450+
// TODO(jackson,nvanbenschoten): Iterator.Close continues to appear in
2451+
// profiles in part because of this zeroing.
2452+
if i.batch != nil {
2453+
alloc.batchState = iteratorBatchState{}
2454+
}
2455+
alloc.dbi = Iterator{}
2456+
alloc.merging = mergingIter{}
2457+
alloc.merging.heap.items = mergingIterHeapItems
2458+
clear(alloc.mlevels[:])
2459+
clear(alloc.levels[:])
2460+
clear(alloc.levelsPositioned[:])
2461+
2462+
iterAllocPool.Put(alloc)
24692463
return err
24702464
}
24712465

0 commit comments

Comments
 (0)