Skip to content

Commit 7d7e2f4

Browse files
committed
sstable: set up proper iterator transforms during EstimateDiskUsage
Previously, we did not set up any iterator transforms during EstimateDiskUsage -- leading to an inaccurate disk usage estimation that was eventually used in compaction heuristics. This patch fixes this bug by grabbing the IterTransforms from the table we are reading and using it during disk usage estimation. Fixes: #4597
1 parent 20b0451 commit 7d7e2f4

File tree

7 files changed

+49
-16
lines changed

7 files changed

+49
-16
lines changed

file_cache.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ func (h *fileCacheHandle) estimateSize(
328328
meta *manifest.TableMetadata, lower, upper []byte,
329329
) (size uint64, err error) {
330330
err = h.withReader(context.TODO(), block.NoReadEnv, meta, func(r *sstable.Reader, env sstable.ReadEnv) error {
331-
size, err = r.EstimateDiskUsage(lower, upper, env)
331+
size, err = r.EstimateDiskUsage(lower, upper, env, meta.IterTransforms())
332332
return err
333333
})
334334
return size, err

ingest.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1172,7 +1172,7 @@ type ExternalFile struct {
11721172
// ingestion.
11731173
HasPointKey, HasRangeKey bool
11741174

1175-
// SyntheticPrefix will prepend this suffix to all keys in the file during
1175+
// SyntheticPrefix will prepend this prefix to all keys in the file during
11761176
// iteration. Note that the backing file itself is not modified.
11771177
//
11781178
// SyntheticPrefix must be a prefix of both Bounds.Start and Bounds.End.

sstable/reader.go

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -796,19 +796,20 @@ func (r *Reader) ValidateBlockChecksums() error {
796796
// TODO(ajkr): account for metablock space usage. Perhaps look at the fraction of
797797
// data blocks overlapped and add that same fraction of the metadata blocks to the
798798
// estimate.
799-
func (r *Reader) EstimateDiskUsage(start []byte, end []byte, env ReadEnv) (uint64, error) {
799+
func (r *Reader) EstimateDiskUsage(
800+
start []byte, end []byte, env ReadEnv, transforms IterTransforms,
801+
) (uint64, error) {
800802
if env.Virtual != nil {
801803
_, start, end = env.Virtual.ConstrainBounds(start, end, false, r.Comparer.Compare)
802804
}
803-
804805
if !r.tableFormat.BlockColumnar() {
805-
return estimateDiskUsage[rowblk.IndexIter, *rowblk.IndexIter](r, start, end)
806+
return estimateDiskUsage[rowblk.IndexIter, *rowblk.IndexIter](r, start, end, transforms)
806807
}
807-
return estimateDiskUsage[colblk.IndexIter, *colblk.IndexIter](r, start, end)
808+
return estimateDiskUsage[colblk.IndexIter, *colblk.IndexIter](r, start, end, transforms)
808809
}
809810

810811
func estimateDiskUsage[I any, PI indexBlockIterator[I]](
811-
r *Reader, start, end []byte,
812+
r *Reader, start, end []byte, transforms IterTransforms,
812813
) (uint64, error) {
813814
if r.err != nil {
814815
return 0, r.err
@@ -830,13 +831,13 @@ func estimateDiskUsage[I any, PI indexBlockIterator[I]](
830831
var startIdxIter, endIdxIter PI
831832
if !r.Attributes.Has(AttributeTwoLevelIndex) {
832833
startIdxIter = new(I)
833-
if err := startIdxIter.InitHandle(r.Comparer, indexH, NoTransforms); err != nil {
834+
if err := startIdxIter.InitHandle(r.Comparer, indexH, transforms); err != nil {
834835
return 0, err
835836
}
836837
endIdxIter = startIdxIter
837838
} else {
838839
var topIter PI = new(I)
839-
if err := topIter.InitHandle(r.Comparer, indexH, NoTransforms); err != nil {
840+
if err := topIter.InitHandle(r.Comparer, indexH, transforms); err != nil {
840841
return 0, err
841842
}
842843
if !topIter.SeekGE(start) {
@@ -853,7 +854,7 @@ func estimateDiskUsage[I any, PI indexBlockIterator[I]](
853854
}
854855
defer startIdxBlock.Release()
855856
startIdxIter = new(I)
856-
err = startIdxIter.InitHandle(r.Comparer, startIdxBlock, NoTransforms)
857+
err = startIdxIter.InitHandle(r.Comparer, startIdxBlock, transforms)
857858
if err != nil {
858859
return 0, err
859860
}
@@ -869,7 +870,7 @@ func estimateDiskUsage[I any, PI indexBlockIterator[I]](
869870
}
870871
defer endIdxBlock.Release()
871872
endIdxIter = new(I)
872-
err = endIdxIter.InitHandle(r.Comparer, endIdxBlock, NoTransforms)
873+
err = endIdxIter.InitHandle(r.Comparer, endIdxBlock, transforms)
873874
if err != nil {
874875
return 0, err
875876
}

sstable/reader_test.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,13 @@ func runVirtualReaderTest(t *testing.T, path string, blockSize, indexBlockSize i
199199

200200
showProps := td.HasArg("show-props")
201201

202+
var syntheticPrefix []byte
203+
if td.HasArg("prefix") {
204+
var synthPrefixStr string
205+
td.ScanArgs(t, "prefix", &synthPrefixStr)
206+
syntheticPrefix = []byte(synthPrefixStr)
207+
}
208+
202209
syntheticSuffix = nil
203210
if td.HasArg("suffix") {
204211
var synthSuffixStr string
@@ -210,7 +217,10 @@ func runVirtualReaderTest(t *testing.T, path string, blockSize, indexBlockSize i
210217
env.Virtual = &params
211218

212219
var err error
213-
tableSize, err := r.EstimateDiskUsage(params.Lower.UserKey, params.Upper.UserKey, env)
220+
transforms := IterTransforms{
221+
SyntheticPrefixAndSuffix: block.MakeSyntheticPrefixAndSuffix(syntheticPrefix, syntheticSuffix),
222+
}
223+
tableSize, err := r.EstimateDiskUsage(params.Lower.UserKey, params.Upper.UserKey, env, transforms)
214224
if err != nil {
215225
return err.Error()
216226
}
@@ -590,7 +600,7 @@ func TestInjectedErrors(t *testing.T) {
590600
}
591601
defer func() { reterr = firstError(reterr, r.Close()) }()
592602

593-
_, err = r.EstimateDiskUsage([]byte("borrower"), []byte("lender"), NoReadEnv)
603+
_, err = r.EstimateDiskUsage([]byte("borrower"), []byte("lender"), NoReadEnv, NoTransforms)
594604
if err != nil {
595605
return err
596606
}

sstable/testdata/virtual_reader_props

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,3 +345,25 @@ props:
345345
rocksdb.num.data.blocks: 1
346346
rocksdb.compression: Snappy
347347
rocksdb.compression_options: window_bits=-14; level=32767; strategy=0; max_dict_bytes=0; zstd_max_train_bytes=0; enabled=0;
348+
349+
# Test virtual sstable with a synthetic prefix.
350+
build
351+
a.SET.1:a
352+
b.SET.1:b
353+
c.SET.1:c
354+
d.SET.1:d
355+
----
356+
point: [a#1,SET-d#1,SET]
357+
seqnums: [1-1]
358+
359+
virtualize lower=poi-b.SET.1 upper=poi-c.SET.1 prefix=poi- show-props
360+
----
361+
bounds: [poi-b#1,SET-poi-c#1,SET]
362+
filenum: 000010
363+
props:
364+
rocksdb.num.entries: 1
365+
rocksdb.raw.key.size: 4
366+
rocksdb.raw.value.size: 1
367+
rocksdb.num.data.blocks: 1
368+
rocksdb.compression: Snappy
369+
rocksdb.compression_options: window_bits=-14; level=32767; strategy=0; max_dict_bytes=0; zstd_max_train_bytes=0; enabled=0;

table_stats.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -440,7 +440,7 @@ func (d *DB) loadTableRangeDelStats(
440440
// the size of the range key block relative to the overall size of the
441441
// table is expected to be small.
442442
if level == numLevels-1 && meta.SmallestSeqNum < maxRangeDeleteSeqNum {
443-
size, err := r.EstimateDiskUsage(start, end, env)
443+
size, err := r.EstimateDiskUsage(start, end, env, meta.IterTransforms())
444444
if err != nil {
445445
return nil, err
446446
}
@@ -649,7 +649,7 @@ func (d *DB) estimateReclaimedSizeBeneath(
649649
var size uint64
650650
err := d.fileCache.withReader(ctx, block.NoReadEnv, file,
651651
func(r *sstable.Reader, env sstable.ReadEnv) (err error) {
652-
size, err = r.EstimateDiskUsage(start, end, env)
652+
size, err = r.EstimateDiskUsage(start, end, env, file.IterTransforms())
653653
return err
654654
})
655655
if err != nil {

tool/sstable.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -542,7 +542,7 @@ func (s *sstableT) runScan(cmd *cobra.Command, args []string) {
542542
func (s *sstableT) runSpace(cmd *cobra.Command, args []string) {
543543
stdout, stderr := cmd.OutOrStdout(), cmd.OutOrStderr()
544544
s.foreachSstable(stderr, args, func(path string, r *sstable.Reader, props sstable.Properties) {
545-
bytes, err := r.EstimateDiskUsage(s.start, s.end, sstable.NoReadEnv)
545+
bytes, err := r.EstimateDiskUsage(s.start, s.end, sstable.NoReadEnv, sstable.NoTransforms)
546546
if err != nil {
547547
fmt.Fprintf(stderr, "%s\n", err)
548548
return

0 commit comments

Comments
 (0)