Skip to content

Commit 4a06644

Browse files
committed
db: add fallback behavior if ShortAttributeExtractor errors
When separating a value, we call a user-provided 'ShortAttributeExtractor' to extract a few bits of data from the value. These bits, known as the 'short attribute', are stored in-place with the sstable key for fast retrieval. Previously, if the user-provided ShortAttributeExtractor errored, the calling flush or compaction would abort and bubble the error up. If this was during a flush, the engine was unable to make progress, repeatedly attempting to flush a key that cannot be flushed. This commit updates the logic to fallback to storing a value in-place if the short attribute extractor is unable to parse a value. The error is still surfaced through the PossibleAPIMisuse event.
1 parent ff44e8a commit 4a06644

File tree

5 files changed

+86
-7
lines changed

5 files changed

+86
-7
lines changed

compaction.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import (
3232
"github.com/cockroachdb/pebble/sstable/blob"
3333
"github.com/cockroachdb/pebble/sstable/block"
3434
"github.com/cockroachdb/pebble/vfs"
35+
"github.com/cockroachdb/redact"
3536
)
3637

3738
var errEmptyTable = errors.New("pebble: empty table")
@@ -3258,9 +3259,10 @@ func (d *DB) compactAndWrite(
32583259
},
32593260
MissizedDeleteCallback: func(userKey []byte, elidedSize, expectedSize uint64) {
32603261
d.opts.EventListener.PossibleAPIMisuse(PossibleAPIMisuseInfo{
3261-
Kind: MissizedDelete,
3262-
UserKey: slices.Clone(userKey),
3263-
ExtraInfo: fmt.Sprintf("elidedSize=%d,expectedSize=%d", elidedSize, expectedSize),
3262+
Kind: MissizedDelete,
3263+
UserKey: slices.Clone(userKey),
3264+
ExtraInfo: redact.Sprintf("elidedSize=%d,expectedSize=%d",
3265+
redact.SafeUint(elidedSize), redact.SafeUint(expectedSize)),
32643266
})
32653267
},
32663268
}

event.go

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -646,12 +646,14 @@ type PossibleAPIMisuseInfo struct {
646646
// UserKey is set for the following kinds:
647647
// - IneffectualSingleDelete,
648648
// - NondeterministicSingleDelete,
649-
// - MissizedDelete.
649+
// - MissizedDelete,
650+
// - InvalidValue.
650651
UserKey []byte
651652

652653
// ExtraInfo is set for the following kinds:
653654
// - MissizedDelete: contains "elidedSize=<size>,expectedSize=<size>"
654-
ExtraInfo string
655+
// - InvalidValue: contains "callback=<callbackName>,value=<value>,err=<err>"
656+
ExtraInfo redact.RedactableString
655657
}
656658

657659
func (i PossibleAPIMisuseInfo) String() string {
@@ -664,7 +666,9 @@ func (i PossibleAPIMisuseInfo) SafeFormat(w redact.SafePrinter, _ rune) {
664666
case IneffectualSingleDelete, NondeterministicSingleDelete:
665667
w.Printf("possible API misuse: %s (key=%q)", redact.Safe(i.Kind), i.UserKey)
666668
case MissizedDelete:
667-
w.Printf("possible API misuse: %s (key=%q, %s)", redact.Safe(i.Kind), i.UserKey, redact.Safe(i.ExtraInfo))
669+
w.Printf("possible API misuse: %s (key=%q, %s)", redact.Safe(i.Kind), i.UserKey, i.ExtraInfo)
670+
case InvalidValue:
671+
w.Printf("possible API misuse: %s (key=%q, %s)", redact.Safe(i.Kind), i.UserKey, i.ExtraInfo)
668672
default:
669673
if invariants.Enabled {
670674
panic("invalid API misuse event")
@@ -759,6 +763,12 @@ const (
759763
// not accurately record the size of the value it deleted. This can lead to
760764
// incorrect behavior in compactions.
761765
MissizedDelete
766+
767+
// InvalidValue is emitted when a user-implemented callback (such as
768+
// ShortAttributeExtractor) returns an error for a committed value. This
769+
// suggests that either the callback is not implemented for all possible
770+
// values or a malformed value was committed to the DB.
771+
InvalidValue
762772
)
763773

764774
func (k APIMisuseKind) String() string {
@@ -769,6 +779,8 @@ func (k APIMisuseKind) String() string {
769779
return "nondeterministic SINGLEDEL"
770780
case MissizedDelete:
771781
return "missized DELSIZED"
782+
case InvalidValue:
783+
return "invalid value"
772784
default:
773785
return "unknown"
774786
}

testdata/value_separation_policy

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,3 +155,27 @@ no blob file created
155155
blobrefs:[
156156
0: B000003 5
157157
]
158+
159+
# Test handling of a short attribute extractor that errors when we attempt to
160+
# extract the short attribute. We should fall back to writing the value inplace
161+
# within the output sstable.
162+
163+
init write-new-blob-files minimum-size=2 short-attr-extractor=error
164+
----
165+
166+
add
167+
bar#201,SET:poi
168+
bax#202,SET:blob{value=yaya}
169+
----
170+
# create: 000007.sst
171+
# invalid value for key "bar", value: "poi": short attribute extractor error
172+
RawWriter.Add("bar#201,SET", "poi", false)
173+
# invalid value for key "bax", value: "yaya": short attribute extractor error
174+
RawWriter.Add("bax#202,SET", "yaya", false)
175+
176+
close-output
177+
----
178+
# sync-data: 000007.sst
179+
# close: 000007.sst
180+
no blob file created
181+
blobrefs:[]

value_separation.go

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"github.com/cockroachdb/pebble/objstorage"
1717
"github.com/cockroachdb/pebble/sstable"
1818
"github.com/cockroachdb/pebble/sstable/blob"
19+
"github.com/cockroachdb/redact"
1920
)
2021

2122
var neverSeparateValues getValueSeparation = func(JobID, *compaction, sstable.TableFormat) compact.ValueSeparation {
@@ -56,6 +57,16 @@ func (d *DB) determineCompactionValueSeparation(
5657
shortAttrExtractor: d.opts.Experimental.ShortAttributeExtractor,
5758
writerOpts: d.opts.MakeBlobWriterOptions(c.outputLevel.level),
5859
minimumSize: policy.MinimumSize,
60+
invalidValueCallback: func(userKey []byte, value []byte, err error) {
61+
// The value may not be safe, so it will be redacted when redaction
62+
// is enabled.
63+
d.opts.EventListener.PossibleAPIMisuse(PossibleAPIMisuseInfo{
64+
Kind: InvalidValue,
65+
UserKey: userKey,
66+
ExtraInfo: redact.Sprintf("callback=ShortAttributeExtractor,value=%x,err=%q",
67+
value, err),
68+
})
69+
},
5970
}
6071
}
6172

@@ -167,6 +178,9 @@ type writeNewBlobFiles struct {
167178
// to the sstable (but may still be written to a value block within the
168179
// sstable).
169180
minimumSize int
181+
// invalidValueCallback is called when a value is encountered for which the
182+
// short attribute extractor returns an error.
183+
invalidValueCallback func(userKey []byte, value []byte, err error)
170184

171185
// Current blob writer state
172186
writer *blob.FileWriter
@@ -230,7 +244,15 @@ func (vs *writeNewBlobFiles) Add(
230244
keyPrefixLen := vs.comparer.Split(kv.K.UserKey)
231245
shortAttr, err = vs.shortAttrExtractor(kv.K.UserKey, keyPrefixLen, v)
232246
if err != nil {
233-
return err
247+
// Report that there was a value for which the short attribute
248+
// extractor was unable to extract a short attribute.
249+
vs.invalidValueCallback(kv.K.UserKey, v, err)
250+
251+
// Rather than erroring out and aborting the flush or compaction, we
252+
// fallback to writing the value verbatim to the sstable. Otherwise
253+
// a flush could busy loop, repeatedly attempting to write the same
254+
// memtable and repeatedly unable to extract a key's short attribute.
255+
return tw.Add(kv.K, v, forceObsolete)
234256
}
235257
}
236258

value_separation_test.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515

1616
"github.com/cockroachdb/crlib/crstrings"
1717
"github.com/cockroachdb/datadriven"
18+
"github.com/cockroachdb/errors"
1819
"github.com/cockroachdb/pebble/internal/base"
1920
"github.com/cockroachdb/pebble/internal/blobtest"
2021
"github.com/cockroachdb/pebble/internal/compact"
@@ -94,8 +95,19 @@ func TestValueSeparationPolicy(t *testing.T) {
9495
fn++
9596
return objStore.Create(ctx, base.FileTypeBlob, fn, objstorage.CreateOptions{})
9697
},
98+
invalidValueCallback: func(userKey []byte, value []byte, err error) {
99+
fmt.Fprintf(&buf, "# invalid value for key %q, value: %q: %s\n", userKey, value, err)
100+
},
97101
}
98102
d.MaybeScanArgs(t, "minimum-size", &newSep.minimumSize)
103+
if arg, ok := d.Arg("short-attr-extractor"); ok {
104+
switch arg.SingleVal(t) {
105+
case "error":
106+
newSep.shortAttrExtractor = errShortAttrExtractor
107+
default:
108+
t.Fatalf("unknown short attribute extractor: %s", arg.String())
109+
}
110+
}
99111
vs = newSep
100112
default:
101113
t.Fatalf("unknown value separation policy: %s", x)
@@ -153,6 +165,13 @@ func TestValueSeparationPolicy(t *testing.T) {
153165
})
154166
}
155167

168+
func errShortAttrExtractor(key []byte, keyPrefixLen int, value []byte) (ShortAttribute, error) {
169+
return 0, errors.New("short attribute extractor error")
170+
}
171+
172+
// Assert that errShortAttrExtractor implements the ShortAttributeExtractor
173+
var _ ShortAttributeExtractor = errShortAttrExtractor
174+
156175
// loggingRawWriter wraps a sstable.RawWriter and logs calls to Add and
157176
// AddWithBlobHandle to provide observability into the separation of values into
158177
// blob files.

0 commit comments

Comments
 (0)