@@ -33,8 +33,11 @@ const (
3333 rewriteAllHotBlobReferences
3434)
3535
36- // ValueSeparator can function in any of the valueSeparationModes,
37- // It will be extended in the future to support the writing of hot and cold
36+ // ValueSeparator can function in any of the valueSeparationModes to write
37+ // new or preserve blob references when writing an sstable. FinishOutput
38+ // should be called when the output sstable is complete. The ValueSeparator
39+ // can then be reused for the next output sstable.
40+ // This will be extended in the future to support the writing of hot and cold
3841// blob files. All blob references are currently written to the hot tier.
3942type ValueSeparator struct {
4043 mode valueSeparationMode
@@ -48,24 +51,21 @@ type ValueSeparator struct {
4851 shortAttrExtractor base.ShortAttributeExtractor
4952 // writerOpts is used to configure all constructed blob writers.
5053 writerOpts blob.FileWriterOptions
51- // minimumSize imposes a lower bound on the size of values that can be
52- // separated into a blob file. Values smaller than this are always written
53- // to the sstable (but may still be written to a value block within the
54- // sstable).
5554 //
56- // minimumSize is set to globalMinimumSize by default and on every call to
57- // FinishOutput. It may be overriden by SetNextOutputConfig (i.e, if a
58- // SpanPolicy dictates a different minimum size for a span of the keyspace).
59- minimumSize int
60- // globalMinimumSize is the size threshold for separating values into blob
61- // files globally across the keyspace .
62- globalMinimumSize int
55+ // currentConfig holds the configuration for the current output sstable.
56+ // It is set to globalConfig by default and on every call to FinishOutput.
57+ // It may be overridden by SetNextOutputConfig (i.e, if a SpanPolicy
58+ // dictates a different minimum size for a span of the keyspace).
59+ currentConfig ValueSeparationOutputConfig
60+ // globalConfig holds settings that are applied globally across all outputs .
61+ globalConfig ValueSeparationOutputConfig
6362 // invalidValueCallback is called when a value is encountered for which the
6463 // short attribute extractor returns an error.
6564 invalidValueCallback func (userKey []byte , value []byte , err error )
6665
6766 // state.
6867 buf []byte
68+
6969 // currPendingReferences holds the pending references that have been referenced by
7070 // the current output sstable. The index of a reference with a given blob
7171 // file ID is the value of the base.BlobReferenceID used by its value handles
@@ -109,22 +109,26 @@ func NewPreserveAllHotBlobReferences(
109109 outputBlobReferenceDepth manifest.BlobReferenceDepth ,
110110 globalMinimumSize int ,
111111) * ValueSeparator {
112+ config := ValueSeparationOutputConfig {
113+ MinimumSize : globalMinimumSize ,
114+ }
112115 return & ValueSeparator {
113116 mode : preserveAllHotBlobReferences ,
114117 inputBlobPhysicalFiles : inputBlobPhysicalFiles ,
115118 outputBlobReferenceDepth : outputBlobReferenceDepth ,
116- minimumSize : globalMinimumSize ,
117- globalMinimumSize : globalMinimumSize ,
119+ globalConfig : config ,
120+ currentConfig : config ,
118121 }
119122}
120123
121124type WriteNewBlobFilesOptions struct {
122125 // InputBlobPhysicalFiles holds the *PhysicalBlobFile for every unique blob
123126 // file referenced by input sstables. This may be nil if there are no input
124127 // blob files to preserve.
125- InputBlobPhysicalFiles map [base.BlobFileID ]* manifest.PhysicalBlobFile
126- ShortAttrExtractor base.ShortAttributeExtractor
127- InvalidValueCallback func (userKey []byte , value []byte , err error )
128+ InputBlobPhysicalFiles map [base.BlobFileID ]* manifest.PhysicalBlobFile
129+ ShortAttrExtractor base.ShortAttributeExtractor
130+ InvalidValueCallback func (userKey []byte , value []byte , err error )
131+ DisableValueSeparationBySuffix bool
128132}
129133
130134func NewWriteNewBlobFiles (
@@ -138,6 +142,10 @@ func NewWriteNewBlobFiles(
138142 if inputBlobPhysicalFiles == nil {
139143 inputBlobPhysicalFiles = make (map [base.BlobFileID ]* manifest.PhysicalBlobFile )
140144 }
145+ config := ValueSeparationOutputConfig {
146+ MinimumSize : globalMinimumSize ,
147+ DisableValueSeparationBySuffix : opts .DisableValueSeparationBySuffix ,
148+ }
141149 return & ValueSeparator {
142150 mode : rewriteAllHotBlobReferences ,
143151 inputBlobPhysicalFiles : inputBlobPhysicalFiles ,
@@ -146,26 +154,31 @@ func NewWriteNewBlobFiles(
146154 newBlobObject : newBlobObject ,
147155 shortAttrExtractor : opts .ShortAttrExtractor ,
148156 writerOpts : writerOpts ,
149- minimumSize : globalMinimumSize ,
150- globalMinimumSize : globalMinimumSize ,
157+ globalConfig : config ,
158+ currentConfig : config ,
151159 invalidValueCallback : opts .InvalidValueCallback ,
152160 }
153161}
154162
155163// SetNextOutputConfig implements the ValueSeparation interface.
156164func (vs * ValueSeparator ) SetNextOutputConfig (config ValueSeparationOutputConfig ) {
157- vs .minimumSize = config .MinimumSize
165+ if config .MinimumSize == 0 {
166+ // This indicates that MinimumSize was unset, so fall back
167+ // to the global minimum size.
168+ config .MinimumSize = vs .globalConfig .MinimumSize
169+ }
170+ vs .currentConfig = config
158171}
159172
160173func (vs * ValueSeparator ) Kind () sstable.ValueSeparationKind {
161- if vs .minimumSize != vs .globalMinimumSize {
174+ if vs .currentConfig != vs .globalConfig {
162175 return sstable .ValueSeparationSpanPolicy
163176 }
164177 return sstable .ValueSeparationDefault
165178}
166179
167180func (vs * ValueSeparator ) MinimumSize () int {
168- return vs .minimumSize
181+ return vs .currentConfig . MinimumSize
169182}
170183
171184// EstimatedFileSize returns an estimate of the disk space consumed by the current
@@ -241,9 +254,7 @@ func (vs *ValueSeparator) Add(
241254
242255 // Values that are too small are never separated; however, MVCC keys are
243256 // separated if they are a SET key kind, as long as the value is not empty.
244- // TODO(xinhaoz): Handle the case where DisableValueSeparationBySuffix=true,
245- // for which do not want to separate MVCC garbage values.
246- if len (v ) < vs .minimumSize && ! isLikelyMVCCGarbage {
257+ if len (v ) < vs .currentConfig .MinimumSize && (vs .currentConfig .DisableValueSeparationBySuffix || ! isLikelyMVCCGarbage ) {
247258 return tw .Add (kv .K , v , forceObsolete )
248259 }
249260
@@ -462,7 +473,7 @@ func (vs *ValueSeparator) FinishOutput() (ValueSeparationMetadata, error) {
462473 referenceSize := vs .blobTiers [base .HotTier ].totalPreservedValueSize + newReferencedValueSize
463474
464475 // Reset the minimum size for the next output.
465- vs .minimumSize = vs .globalMinimumSize
476+ vs .currentConfig = vs .globalConfig
466477 // Reset the remaining state.
467478 vs .currPendingReferences = vs .currPendingReferences [:0 ]
468479 for i := range vs .blobTiers {
0 commit comments