Skip to content

Commit df7e1fc

Browse files
committed
db, valsep: do not separate mvcc values if DisableSeparationBySuffix is true
Move this field into the ValueStoragePolicy struct and make sure it's being read at the time of value separation.
1 parent 1c9b3f3 commit df7e1fc

File tree

7 files changed

+111
-43
lines changed

7 files changed

+111
-43
lines changed

compaction.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3438,7 +3438,7 @@ func (d *DB) compactAndWrite(
34383438
spanPolicyValid = true
34393439
}
34403440
writerOpts := d.makeWriterOptions(c.outputLevel.level)
3441-
if spanPolicy.DisableValueSeparationBySuffix {
3441+
if spanPolicy.ValueStoragePolicy.DisableSeparationBySuffix {
34423442
writerOpts.DisableValueBlocks = true
34433443
}
34443444
if spanPolicy.PreferFastCompression && writerOpts.Compression != block.NoCompression {
@@ -3454,7 +3454,8 @@ func (d *DB) compactAndWrite(
34543454
// This span of keyspace is more tolerant of latency, so set a more
34553455
// aggressive value separation policy for this output.
34563456
vSep.SetNextOutputConfig(valsep.ValueSeparationOutputConfig{
3457-
MinimumSize: spanPolicy.ValueStoragePolicy.MinimumSize,
3457+
MinimumSize: spanPolicy.ValueStoragePolicy.MinimumSize,
3458+
DisableValueSeparationBySuffix: spanPolicy.ValueStoragePolicy.DisableSeparationBySuffix,
34583459
})
34593460
}
34603461
objMeta, tw, err := d.newCompactionOutputTable(jobID, c, writerOpts)

compaction_test.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1526,6 +1526,15 @@ func runCompactionTest(
15261526
for _, arg := range args {
15271527
parts := strings.Split(arg, "=")
15281528
switch parts[0] {
1529+
case "disable-separation-by-suffix":
1530+
if len(parts) != 1 {
1531+
td.Fatalf(t, "expected disable-separation-by-suffix with no value, got: %s", arg)
1532+
}
1533+
if policy.ValueStoragePolicy.PolicyAdjustment == NoValueSeparation {
1534+
td.Fatalf(t, "conflicting value storage policies for span: %s", line)
1535+
}
1536+
policy.ValueStoragePolicy.PolicyAdjustment = OverrideValueStorage
1537+
policy.ValueStoragePolicy.DisableSeparationBySuffix = true
15291538
case "val-sep-minimum-size":
15301539
if len(parts) != 2 {
15311540
td.Fatalf(t, "expected val-sep-minimum-size=<size>, got: %s", arg)
@@ -1540,6 +1549,8 @@ func runCompactionTest(
15401549
} else if int(size) != d.opts.Experimental.ValueSeparationPolicy().MinimumSize {
15411550
policy.ValueStoragePolicy.PolicyAdjustment = OverrideValueStorage
15421551
}
1552+
default:
1553+
td.Fatalf(t, "unknown span policy arg: %s", arg)
15431554
}
15441555
}
15451556
spanPolicies = append(spanPolicies, SpanAndPolicy{

data_test.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1783,8 +1783,7 @@ func parseDBOptionsArgs(opts *Options, args []datadriven.CmdArg) error {
17831783
End: []byte(cmdArg.Vals[1]),
17841784
}
17851785
policy := SpanPolicy{
1786-
DisableValueSeparationBySuffix: true,
1787-
ValueStoragePolicy: ValueStorageLowReadLatency,
1786+
ValueStoragePolicy: ValueStorageLowReadLatency,
17881787
}
17891788
spanPolicies = append(spanPolicies, SpanAndPolicy{
17901789
KeyRange: span,

options.go

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1283,15 +1283,6 @@ type SpanPolicy struct {
12831283
// amount to a significant amount of space.
12841284
PreferFastCompression bool
12851285

1286-
// DisableValueSeparationBySuffix disables discriminating KVs depending on
1287-
// suffix.
1288-
//
1289-
// Among a set of keys with the same prefix, Pebble's default heuristics
1290-
// optimize access to the KV with the smallest suffix. This is useful for MVCC
1291-
// keys (where the smallest suffix is the latest version), but should be
1292-
// disabled for keys where the suffix does not correspond to a version.
1293-
DisableValueSeparationBySuffix bool
1294-
12951286
// ValueStoragePolicy is a hint used to determine where to store the values
12961287
// for KVs.
12971288
ValueStoragePolicy ValueStoragePolicyAdjustment
@@ -1303,7 +1294,7 @@ func (p SpanPolicy) String() string {
13031294
if p.PreferFastCompression {
13041295
sb.WriteString("fast-compression,")
13051296
}
1306-
if p.DisableValueSeparationBySuffix {
1297+
if p.ValueStoragePolicy.DisableSeparationBySuffix {
13071298
sb.WriteString("disable-value-separation-by-suffix,")
13081299
}
13091300
switch p.ValueStoragePolicy.PolicyAdjustment {
@@ -1324,6 +1315,18 @@ type ValueStoragePolicyAdjustment struct {
13241315

13251316
// Remaining fields are ignored, unless the PolicyAdjustment is OverrideValueStorage.
13261317

1318+
// DisableSeparationBySuffix disables discriminating KVs depending on
1319+
// suffix.
1320+
//
1321+
// Among a set of keys with the same prefix, Pebble's default heuristics
1322+
// optimize access to the KV with the smallest suffix. This is useful for MVCC
1323+
// keys (where the smallest suffix is the latest version), but should be
1324+
// disabled for keys where the suffix does not correspond to a version.
1325+
//
1326+
// TODO(xinhaoz): Persist this setting when writing sstables so we can
1327+
// compare against current span policy configs during compaction to determine
1328+
// whether we can preserve blob references.
1329+
DisableSeparationBySuffix bool
13271330
// MinimumSize is the minimum size of the value.
13281331
MinimumSize int
13291332
}

testdata/compaction/mvcc_garbage_blob

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,3 +63,40 @@ Blob files:
6363
flush-log
6464
----
6565
[JOB 1] flushed 1 memtable (100B) to L0 [000010] (837B) blob [000011 (MVCCGarbage: 17%)] (102B), in 1.0s (1.0s total), output rate 837B/s
66+
67+
68+
# Another test with the same value separation config, but this time we set a span
69+
# policy for the span y-zz that disables value separation by suffix. This should
70+
# prevent MVCC garbage from being written to blob files.
71+
72+
define value-separation=(enabled, min-size=5, max-ref-depth=3, garbage-ratios=1.0:1.0)
73+
----
74+
75+
76+
# Unspecified minimum value size should result in using the global min size of 5.
77+
set-span-policies
78+
y,zz disable-separation-by-suffix
79+
----
80+
81+
82+
batch
83+
set apple@3 a
84+
set apple@2 ab
85+
set yay@3 a
86+
set yay@2 ab
87+
set zoo@4 b
88+
set zoo@3 ba
89+
del zoo@2
90+
set zoo@2 bag
91+
set zoo@1 bah
92+
----
93+
94+
# We expect 1 blob file to be created for the MVCC garbage values from apple but not
95+
# for the values from y-z.
96+
flush
97+
----
98+
L0.0:
99+
000005:[apple@3#10,SET-apple@2#11,SET] seqnums:[10-11] points:[apple@3#10,SET-apple@2#11,SET] size:809 blobrefs:[(B000006: 2); depth:1]
100+
000007:[yay@3#12,SET-zoo@1#18,SET] seqnums:[12-18] points:[yay@3#12,SET-zoo@1#18,SET] size:768
101+
Blob files:
102+
B000006 physical:{000006 size:[91 (91B)] vals:[2 (2B)]}

valsep/value_separation.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,15 @@ import (
1515
// ValueSeparationOutputConfig is used to configure value separation for an
1616
// individual compaction output.
1717
type ValueSeparationOutputConfig struct {
18-
// MinimumSize is the minimum size of a value that will be separated into a
19-
// blob file.
18+
// MinimumSize imposes a lower bound on the size of values that can be
19+
// separated into a blob file. Values smaller than this are always written
20+
// to the sstable (but may still be written to a value block within the
21+
// sstable).
2022
MinimumSize int
23+
// DisableValueSeparationBySuffix indicates whether we should consider the
24+
// KV suffix when separating values (e.g. for MVCC garbage). See
25+
// pebble.ValueStoragePolicyAdjustment for details.
26+
DisableValueSeparationBySuffix bool
2127
}
2228

2329
// ValueSeparation defines an interface for writing some values to separate blob

valsep/value_separator.go

Lines changed: 38 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,11 @@ const (
3333
rewriteAllHotBlobReferences
3434
)
3535

36-
// ValueSeparator can function in any of the valueSeparationModes,
37-
// It will be extended in the future to support the writing of hot and cold
36+
// ValueSeparator can function in any of the valueSeparationModes to write
37+
// new or preserve blob references when writing an sstable. FinishOutput
38+
// should be called when the output sstable is complete. The ValueSeparator
39+
// can then be reused for the next output sstable.
40+
// This will be extended in the future to support the writing of hot and cold
3841
// blob files. All blob references are currently written to the hot tier.
3942
type ValueSeparator struct {
4043
mode valueSeparationMode
@@ -48,24 +51,21 @@ type ValueSeparator struct {
4851
shortAttrExtractor base.ShortAttributeExtractor
4952
// writerOpts is used to configure all constructed blob writers.
5053
writerOpts blob.FileWriterOptions
51-
// minimumSize imposes a lower bound on the size of values that can be
52-
// separated into a blob file. Values smaller than this are always written
53-
// to the sstable (but may still be written to a value block within the
54-
// sstable).
5554
//
56-
// minimumSize is set to globalMinimumSize by default and on every call to
57-
// FinishOutput. It may be overriden by SetNextOutputConfig (i.e, if a
58-
// SpanPolicy dictates a different minimum size for a span of the keyspace).
59-
minimumSize int
60-
// globalMinimumSize is the size threshold for separating values into blob
61-
// files globally across the keyspace.
62-
globalMinimumSize int
55+
// currentConfig holds the configuration for the current output sstable.
56+
// It is set to globalConfig by default and on every call to FinishOutput.
57+
// It may be overridden by SetNextOutputConfig (i.e, if a SpanPolicy
58+
// dictates a different minimum size for a span of the keyspace).
59+
currentConfig ValueSeparationOutputConfig
60+
// globalConfig holds settings that are applied globally across all outputs.
61+
globalConfig ValueSeparationOutputConfig
6362
// invalidValueCallback is called when a value is encountered for which the
6463
// short attribute extractor returns an error.
6564
invalidValueCallback func(userKey []byte, value []byte, err error)
6665

6766
// state.
6867
buf []byte
68+
6969
// currPendingReferences holds the pending references that have been referenced by
7070
// the current output sstable. The index of a reference with a given blob
7171
// file ID is the value of the base.BlobReferenceID used by its value handles
@@ -109,22 +109,26 @@ func NewPreserveAllHotBlobReferences(
109109
outputBlobReferenceDepth manifest.BlobReferenceDepth,
110110
globalMinimumSize int,
111111
) *ValueSeparator {
112+
config := ValueSeparationOutputConfig{
113+
MinimumSize: globalMinimumSize,
114+
}
112115
return &ValueSeparator{
113116
mode: preserveAllHotBlobReferences,
114117
inputBlobPhysicalFiles: inputBlobPhysicalFiles,
115118
outputBlobReferenceDepth: outputBlobReferenceDepth,
116-
minimumSize: globalMinimumSize,
117-
globalMinimumSize: globalMinimumSize,
119+
globalConfig: config,
120+
currentConfig: config,
118121
}
119122
}
120123

121124
type WriteNewBlobFilesOptions struct {
122125
// InputBlobPhysicalFiles holds the *PhysicalBlobFile for every unique blob
123126
// file referenced by input sstables. This may be nil if there are no input
124127
// blob files to preserve.
125-
InputBlobPhysicalFiles map[base.BlobFileID]*manifest.PhysicalBlobFile
126-
ShortAttrExtractor base.ShortAttributeExtractor
127-
InvalidValueCallback func(userKey []byte, value []byte, err error)
128+
InputBlobPhysicalFiles map[base.BlobFileID]*manifest.PhysicalBlobFile
129+
ShortAttrExtractor base.ShortAttributeExtractor
130+
InvalidValueCallback func(userKey []byte, value []byte, err error)
131+
DisableValueSeparationBySuffix bool
128132
}
129133

130134
func NewWriteNewBlobFiles(
@@ -138,6 +142,10 @@ func NewWriteNewBlobFiles(
138142
if inputBlobPhysicalFiles == nil {
139143
inputBlobPhysicalFiles = make(map[base.BlobFileID]*manifest.PhysicalBlobFile)
140144
}
145+
config := ValueSeparationOutputConfig{
146+
MinimumSize: globalMinimumSize,
147+
DisableValueSeparationBySuffix: opts.DisableValueSeparationBySuffix,
148+
}
141149
return &ValueSeparator{
142150
mode: rewriteAllHotBlobReferences,
143151
inputBlobPhysicalFiles: inputBlobPhysicalFiles,
@@ -146,26 +154,31 @@ func NewWriteNewBlobFiles(
146154
newBlobObject: newBlobObject,
147155
shortAttrExtractor: opts.ShortAttrExtractor,
148156
writerOpts: writerOpts,
149-
minimumSize: globalMinimumSize,
150-
globalMinimumSize: globalMinimumSize,
157+
globalConfig: config,
158+
currentConfig: config,
151159
invalidValueCallback: opts.InvalidValueCallback,
152160
}
153161
}
154162

155163
// SetNextOutputConfig implements the ValueSeparation interface.
156164
func (vs *ValueSeparator) SetNextOutputConfig(config ValueSeparationOutputConfig) {
157-
vs.minimumSize = config.MinimumSize
165+
if config.MinimumSize == 0 {
166+
// This indicates that MinimumSize was unset, so fall back
167+
// to the global minimum size.
168+
config.MinimumSize = vs.globalConfig.MinimumSize
169+
}
170+
vs.currentConfig = config
158171
}
159172

160173
func (vs *ValueSeparator) Kind() sstable.ValueSeparationKind {
161-
if vs.minimumSize != vs.globalMinimumSize {
174+
if vs.currentConfig != vs.globalConfig {
162175
return sstable.ValueSeparationSpanPolicy
163176
}
164177
return sstable.ValueSeparationDefault
165178
}
166179

167180
func (vs *ValueSeparator) MinimumSize() int {
168-
return vs.minimumSize
181+
return vs.currentConfig.MinimumSize
169182
}
170183

171184
// EstimatedFileSize returns an estimate of the disk space consumed by the current
@@ -241,9 +254,7 @@ func (vs *ValueSeparator) Add(
241254

242255
// Values that are too small are never separated; however, MVCC keys are
243256
// separated if they are a SET key kind, as long as the value is not empty.
244-
// TODO(xinhaoz): Handle the case where DisableValueSeparationBySuffix=true,
245-
// for which do not want to separate MVCC garbage values.
246-
if len(v) < vs.minimumSize && !isLikelyMVCCGarbage {
257+
if len(v) < vs.currentConfig.MinimumSize && (vs.currentConfig.DisableValueSeparationBySuffix || !isLikelyMVCCGarbage) {
247258
return tw.Add(kv.K, v, forceObsolete)
248259
}
249260

@@ -462,7 +473,7 @@ func (vs *ValueSeparator) FinishOutput() (ValueSeparationMetadata, error) {
462473
referenceSize := vs.blobTiers[base.HotTier].totalPreservedValueSize + newReferencedValueSize
463474

464475
// Reset the minimum size for the next output.
465-
vs.minimumSize = vs.globalMinimumSize
476+
vs.currentConfig = vs.globalConfig
466477
// Reset the remaining state.
467478
vs.currPendingReferences = vs.currPendingReferences[:0]
468479
for i := range vs.blobTiers {

0 commit comments

Comments
 (0)