Skip to content

Commit 5d1bab4

Browse files
authored
Fix(builder): Too many small tables when compression is enabled (#1549)
This fixes the issue of too many sst files of very small sizes when compression is enabled. We now account for the actual sizes of blocks after compression and we assume that the table capacity is reached if the sum of actual sizes of block buffers is more than 90% of the table capacity.
1 parent 68fb85d commit 5d1bab4

File tree

3 files changed

+14
-8
lines changed

3 files changed

+14
-8
lines changed

levels.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -586,7 +586,7 @@ nextTable:
586586
}
587587

588588
if !y.SameKey(it.Key(), lastKey) {
589-
if builder.ReachedCapacity(s.kv.opt.MaxTableSize) {
589+
if builder.ReachedCapacity(uint64(float64(s.kv.opt.MaxTableSize) * 0.9)) {
590590
// Only break if we are on a different key, and have reached capacity. We want
591591
// to ensure that all versions of the key are stored in the same sstable, and
592592
// not divided across multiple tables at the same level.

stream_writer.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ func (w *sortedWriter) Add(key []byte, vs y.ValueStruct) error {
358358

359359
sameKey := y.SameKey(key, w.lastKey)
360360
// Same keys should go into the same SSTable.
361-
if !sameKey && w.builder.ReachedCapacity(w.db.opt.MaxTableSize) {
361+
if !sameKey && w.builder.ReachedCapacity(uint64(float64(w.db.opt.MaxTableSize)*0.9)) {
362362
if err := w.send(false); err != nil {
363363
return err
364364
}

table/builder.go

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"math"
2222
"runtime"
2323
"sync"
24+
"sync/atomic"
2425
"unsafe"
2526

2627
"github.com/dgryski/go-farm"
@@ -74,9 +75,10 @@ type bblock struct {
7475
// Builder is used in building a table.
7576
type Builder struct {
7677
// Typically tens or hundreds of meg. This is for one single file.
77-
buf []byte
78-
sz uint32
79-
bufLock sync.Mutex // This lock guards the buf. We acquire lock when we resize the buf.
78+
buf []byte
79+
sz uint32
80+
bufLock sync.Mutex // This lock guards the buf. We acquire lock when we resize the buf.
81+
actualSize uint32 // Used to store the sum of sizes of blocks after compression/encryption.
8082

8183
baseKey []byte // Base key for the current block.
8284
baseOffset uint32 // Offset for the current block.
@@ -152,6 +154,9 @@ func (b *Builder) handleBlock() {
152154
copy(b.buf[item.start:], blockBuf)
153155
b.bufLock.Unlock()
154156

157+
// Add the actual size of current block.
158+
atomic.AddUint32(&b.actualSize, uint32(len(blockBuf)))
159+
155160
// Fix the boundary of the block.
156161
item.end = item.start + uint32(len(blockBuf))
157162

@@ -273,6 +278,7 @@ func (b *Builder) finishBlock() {
273278
// If compression/encryption is disabled, no need to send the block to the blockChan.
274279
// There's nothing to be done.
275280
if b.blockChan == nil {
281+
atomic.StoreUint32(&b.actualSize, b.sz)
276282
b.addBlockToIndex()
277283
return
278284
}
@@ -346,8 +352,8 @@ func (b *Builder) Add(key []byte, value y.ValueStruct, valueLen uint32) {
346352
// at the end. The diff can vary.
347353

348354
// ReachedCapacity returns true if we... roughly (?) reached capacity?
349-
func (b *Builder) ReachedCapacity(cap int64) bool {
350-
blocksSize := b.sz + // length of current buffer
355+
func (b *Builder) ReachedCapacity(capacity uint64) bool {
356+
blocksSize := atomic.LoadUint32(&b.actualSize) + // actual length of current buffer
351357
uint32(len(b.entryOffsets)*4) + // all entry offsets size
352358
4 + // count of all entry offsets
353359
8 + // checksum bytes
@@ -356,7 +362,7 @@ func (b *Builder) ReachedCapacity(cap int64) bool {
356362
4 + // Index length
357363
5*(uint32(len(b.tableIndex.Offsets))) // approximate index size
358364

359-
return int64(estimateSz) > cap
365+
return uint64(estimateSz) > capacity
360366
}
361367

362368
// Finish finishes the table by appending the index.

0 commit comments

Comments
 (0)