Skip to content

Commit 8bf6345

Browse files
committed
db: prohibit ingestion or external iteration of sstables with blob references
This commit adds validation to NewExternalIter and the ingestion pathways prohibiting the external iteration or ingestion of sstables containing blob references. This could conceivably be relaxed in the future if we provide a mechanism to provide the referenced blob files too, but for now this is an error condition. Additionally the most recent experimental format major version that introduces the TableFormatPebblev6 is renamed to capture the fact that it most significantly will enable use of blob value separation (in addition to the introduction of a checksum covering sstable footer data).
1 parent 791dda0 commit 8bf6345

File tree

11 files changed

+280
-72
lines changed

11 files changed

+280
-72
lines changed

compaction_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1225,8 +1225,8 @@ func TestManualCompaction(t *testing.T) {
12251225
},
12261226
{
12271227
testData: "testdata/manual_compaction_set_with_del_sstable_Pebblev6",
1228-
minVersion: formatChecksumFooter,
1229-
maxVersion: formatChecksumFooter,
1228+
minVersion: formatTableFormatV6,
1229+
maxVersion: formatTableFormatV6,
12301230
},
12311231
}
12321232

data_test.go

Lines changed: 104 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@ import (
1818
"strings"
1919
"testing"
2020
"time"
21+
"unicode"
2122

23+
"github.com/cockroachdb/crlib/crstrings"
2224
"github.com/cockroachdb/datadriven"
2325
"github.com/cockroachdb/errors"
2426
"github.com/cockroachdb/pebble/bloom"
@@ -28,6 +30,7 @@ import (
2830
"github.com/cockroachdb/pebble/internal/rangekey"
2931
"github.com/cockroachdb/pebble/internal/sstableinternal"
3032
"github.com/cockroachdb/pebble/internal/testkeys"
33+
"github.com/cockroachdb/pebble/internal/testutils"
3134
"github.com/cockroachdb/pebble/objstorage/objstorageprovider"
3235
"github.com/cockroachdb/pebble/objstorage/remote"
3336
"github.com/cockroachdb/pebble/sstable"
@@ -428,26 +431,51 @@ func parseValue(s string) []byte {
428431
return []byte(s)
429432
}
430433

434+
func splitFields(line string, n int) ([]string, error) {
435+
return splitFieldsRange(line, n, n)
436+
}
437+
438+
func splitFieldsRange(line string, minmum, maximum int) ([]string, error) {
439+
fields := strings.Fields(line)
440+
if len(fields) < minmum {
441+
return nil, errors.Errorf("require at least %d fields, got %d", minmum, len(fields))
442+
}
443+
if len(fields) > maximum {
444+
fields[maximum-1] = strings.Join(fields[maximum-1:], " ")
445+
fields = fields[:maximum]
446+
}
447+
for i := range fields {
448+
if fields[i] == `<nil>` {
449+
fields[i] = ""
450+
}
451+
}
452+
return fields, nil
453+
}
454+
431455
func runBatchDefineCmd(d *datadriven.TestData, b *Batch) error {
432-
for _, line := range strings.Split(d.Input, "\n") {
433-
parts := strings.Fields(line)
434-
if len(parts) == 0 {
456+
for _, line := range crstrings.Lines(d.Input) {
457+
i := strings.IndexFunc(line, unicode.IsSpace)
458+
cmd := line
459+
if i > 0 {
460+
cmd = line[:i]
461+
} else if cmd == "" {
435462
continue
436463
}
437-
if parts[1] == `<nil>` {
438-
parts[1] = ""
439-
}
464+
465+
var parts []string
440466
var err error
441-
switch parts[0] {
467+
switch cmd {
442468
case "set":
443-
if len(parts) != 3 {
444-
return errors.Errorf("%s expects 2 arguments", parts[0])
469+
parts, err = splitFields(line, 3)
470+
if err != nil {
471+
return err
445472
}
446473
err = b.Set([]byte(parts[1]), parseValue(parts[2]), nil)
447474

448475
case "set-multiple":
449-
if len(parts) != 3 {
450-
return errors.Errorf("%s expects 2 arguments (n and prefix)", parts[0])
476+
parts, err = splitFields(line, 3)
477+
if err != nil {
478+
return err
451479
}
452480
n, err := strconv.ParseUint(parts[1], 10, 32)
453481
if err != nil {
@@ -462,13 +490,15 @@ func runBatchDefineCmd(d *datadriven.TestData, b *Batch) error {
462490
}
463491

464492
case "del":
465-
if len(parts) != 2 {
466-
return errors.Errorf("%s expects 1 argument", parts[0])
493+
parts, err = splitFields(line, 2)
494+
if err != nil {
495+
return err
467496
}
468497
err = b.Delete([]byte(parts[1]), nil)
469498
case "del-sized":
470-
if len(parts) != 3 {
471-
return errors.Errorf("%s expects 2 arguments", parts[0])
499+
parts, err = splitFields(line, 3)
500+
if err != nil {
501+
return err
472502
}
473503
var valSize uint64
474504
valSize, err = strconv.ParseUint(parts[2], 10, 32)
@@ -477,23 +507,27 @@ func runBatchDefineCmd(d *datadriven.TestData, b *Batch) error {
477507
}
478508
err = b.DeleteSized([]byte(parts[1]), uint32(valSize), nil)
479509
case "singledel":
480-
if len(parts) != 2 {
481-
return errors.Errorf("%s expects 1 argument", parts[0])
510+
parts, err = splitFields(line, 2)
511+
if err != nil {
512+
return err
482513
}
483514
err = b.SingleDelete([]byte(parts[1]), nil)
484515
case "del-range":
485-
if len(parts) != 3 {
486-
return errors.Errorf("%s expects 2 arguments", parts[0])
516+
parts, err = splitFields(line, 3)
517+
if err != nil {
518+
return err
487519
}
488520
err = b.DeleteRange([]byte(parts[1]), []byte(parts[2]), nil)
489521
case "merge":
490-
if len(parts) != 3 {
491-
return errors.Errorf("%s expects 2 arguments", parts[0])
522+
parts, err = splitFields(line, 3)
523+
if err != nil {
524+
return err
492525
}
493526
err = b.Merge([]byte(parts[1]), parseValue(parts[2]), nil)
494527
case "range-key-set":
495-
if len(parts) < 4 || len(parts) > 5 {
496-
return errors.Errorf("%s expects 3 or 4 arguments", parts[0])
528+
parts, err = splitFieldsRange(line, 4, 5)
529+
if err != nil {
530+
return err
497531
}
498532
var val []byte
499533
if len(parts) == 5 {
@@ -506,24 +540,26 @@ func runBatchDefineCmd(d *datadriven.TestData, b *Batch) error {
506540
val,
507541
nil)
508542
case "range-key-unset":
509-
if len(parts) != 4 {
510-
return errors.Errorf("%s expects 3 arguments", parts[0])
543+
parts, err = splitFields(line, 4)
544+
if err != nil {
545+
return err
511546
}
512547
err = b.RangeKeyUnset(
513548
[]byte(parts[1]),
514549
[]byte(parts[2]),
515550
[]byte(parts[3]),
516551
nil)
517552
case "range-key-del":
518-
if len(parts) != 3 {
519-
return errors.Errorf("%s expects 2 arguments", parts[0])
553+
parts, err = splitFields(line, 3)
554+
if err != nil {
555+
return err
520556
}
521557
err = b.RangeKeyDelete(
522558
[]byte(parts[1]),
523559
[]byte(parts[2]),
524560
nil)
525561
default:
526-
return errors.Errorf("unknown op: %s", parts[0])
562+
return errors.Errorf("unknown op: %s", cmd)
527563
}
528564
if err != nil {
529565
return err
@@ -635,7 +671,27 @@ func runBuildRemoteCmd(td *datadriven.TestData, d *DB, storage remote.Storage) e
635671
return w.Close()
636672
}
637673

638-
func runBuildCmd(td *datadriven.TestData, d *DB, fs vfs.FS) error {
674+
type dataDrivenCmdOptions struct {
675+
blobValues *testutils.BlobValues
676+
}
677+
678+
func withBlobValues(bv *testutils.BlobValues) func(*dataDrivenCmdOptions) {
679+
return func(o *dataDrivenCmdOptions) { o.blobValues = bv }
680+
}
681+
682+
func combineDataDrivenOpts(opts ...func(*dataDrivenCmdOptions)) dataDrivenCmdOptions {
683+
combined := dataDrivenCmdOptions{}
684+
for _, opt := range opts {
685+
opt(&combined)
686+
}
687+
return combined
688+
}
689+
690+
func runBuildCmd(
691+
td *datadriven.TestData, d *DB, fs vfs.FS, opts ...func(*dataDrivenCmdOptions),
692+
) error {
693+
ddOpts := combineDataDrivenOpts(opts...)
694+
639695
b := newIndexedBatch(nil, d.opts.Comparer)
640696
if err := runBatchDefineCmd(td, b); err != nil {
641697
return err
@@ -660,14 +716,16 @@ func runBuildCmd(td *datadriven.TestData, d *DB, fs vfs.FS) error {
660716
tableFormat = sstable.TableFormatPebblev3
661717
case "pebblev4":
662718
tableFormat = sstable.TableFormatPebblev4
719+
case "pebblev5":
720+
tableFormat = sstable.TableFormatPebblev5
663721
default:
664722
return errors.Errorf("unknown format string %s", cmdArg.Vals[0])
665723
}
666724
}
667725
}
668726

669727
writeOpts := d.opts.MakeWriterOptions(0 /* level */, tableFormat)
670-
728+
var blobReferences testutils.BlobReferences
671729
f, err := fs.Create(path, vfs.WriteCategoryUnspecified)
672730
if err != nil {
673731
return err
@@ -677,7 +735,22 @@ func runBuildCmd(td *datadriven.TestData, d *DB, fs vfs.FS) error {
677735
for kv := iter.First(); kv != nil; kv = iter.Next() {
678736
tmp := kv.K
679737
tmp.SetSeqNum(0)
680-
if err := w.Raw().Add(tmp, kv.InPlaceValue(), false); err != nil {
738+
739+
v := kv.InPlaceValue()
740+
// If the value looks like it's a debug blob handle, parse it and add it
741+
// to the sstable as a blob handle.
742+
if ddOpts.blobValues != nil && ddOpts.blobValues.IsBlobHandle(string(v)) {
743+
handle, err := ddOpts.blobValues.ParseInlineHandle(string(v), &blobReferences)
744+
if err != nil {
745+
return err
746+
}
747+
if err := w.Raw().AddWithBlobHandle(tmp, handle, base.ShortAttribute(0), false); err != nil {
748+
return err
749+
}
750+
continue
751+
}
752+
// Otherwise add it as an ordinary value.
753+
if err := w.Raw().Add(tmp, v, false); err != nil {
681754
return err
682755
}
683756
}

external_iterator.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ import (
2626
// sorted in internal key order, where lower index files contain keys that sort
2727
// left of files with higher indexes.
2828
//
29-
// Input sstables must only contain keys with the zero sequence number.
29+
// Input sstables must only contain keys with the zero sequence number and must
30+
// not contain references to values in external blob files.
3031
//
3132
// Iterators constructed through NewExternalIter do not support all iterator
3233
// options, including block-property and table filters. NewExternalIter errors
@@ -302,6 +303,9 @@ func openExternalTables(
302303
if err != nil {
303304
return readers, err
304305
}
306+
if r.Properties.NumValuesInBlobFiles > 0 {
307+
return readers, errors.Newf("pebble: NewExternalIter does not support blob references")
308+
}
305309
readers = append(readers, r)
306310
}
307311
return readers, err

external_iterator_test.go

Lines changed: 32 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"github.com/cockroachdb/datadriven"
1515
"github.com/cockroachdb/errors"
1616
"github.com/cockroachdb/pebble/internal/testkeys"
17+
"github.com/cockroachdb/pebble/internal/testutils"
1718
"github.com/cockroachdb/pebble/objstorage/objstorageprovider"
1819
"github.com/cockroachdb/pebble/sstable"
1920
"github.com/cockroachdb/pebble/vfs"
@@ -33,39 +34,49 @@ func TestExternalIterator(t *testing.T) {
3334
d, err := Open("", o)
3435
require.NoError(t, err)
3536
defer func() { require.NoError(t, d.Close()) }()
37+
var bv testutils.BlobValues
38+
39+
getOptsAndFiles := func(td *datadriven.TestData) (opts IterOptions, files [][]sstable.ReadableFile) {
40+
opts = IterOptions{KeyTypes: IterKeyTypePointsAndRanges}
41+
for _, arg := range td.CmdArgs {
42+
switch arg.Key {
43+
case "mask-suffix":
44+
opts.RangeKeyMasking.Suffix = []byte(arg.Vals[0])
45+
case "lower":
46+
opts.LowerBound = []byte(arg.Vals[0])
47+
case "upper":
48+
opts.UpperBound = []byte(arg.Vals[0])
49+
case "files":
50+
for _, v := range arg.Vals {
51+
f, err := mem.Open(v)
52+
require.NoError(t, err)
53+
files = append(files, []sstable.ReadableFile{f})
54+
}
55+
}
56+
}
57+
return opts, files
58+
}
3659

3760
datadriven.RunTest(t, "testdata/external_iterator", func(t *testing.T, td *datadriven.TestData) string {
3861
switch td.Cmd {
3962
case "reset":
4063
mem = vfs.NewMem()
4164
return ""
4265
case "build":
43-
if err := runBuildCmd(td, d, mem); err != nil {
66+
if err := runBuildCmd(td, d, mem, withBlobValues(&bv)); err != nil {
4467
return err.Error()
4568
}
4669
return ""
47-
case "iter":
48-
opts := IterOptions{KeyTypes: IterKeyTypePointsAndRanges}
49-
var files [][]sstable.ReadableFile
50-
for _, arg := range td.CmdArgs {
51-
switch arg.Key {
52-
case "mask-suffix":
53-
opts.RangeKeyMasking.Suffix = []byte(arg.Vals[0])
54-
case "lower":
55-
opts.LowerBound = []byte(arg.Vals[0])
56-
case "upper":
57-
opts.UpperBound = []byte(arg.Vals[0])
58-
case "files":
59-
for _, v := range arg.Vals {
60-
f, err := mem.Open(v)
61-
require.NoError(t, err)
62-
files = append(files, []sstable.ReadableFile{f})
63-
}
64-
}
65-
}
70+
case "iter-init-error":
71+
opts, files := getOptsAndFiles(td)
6672
testExternalIteratorInitError(t, o, &opts, files)
73+
return ""
74+
case "iter":
75+
opts, files := getOptsAndFiles(td)
6776
it, err := NewExternalIter(o, &opts, files)
68-
require.NoError(t, err)
77+
if err != nil {
78+
return fmt.Sprintf("error: %s", err.Error())
79+
}
6980
return runIterCmd(td, it, true /* close iter */)
7081
default:
7182
return fmt.Sprintf("unknown command: %s", td.Cmd)

format_major_version.go

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -214,10 +214,15 @@ const (
214214

215215
// -- Add experimental versions here --
216216

217-
// formatChecksumFooter is a format major version enabling use of the
218-
// TableFormatPebblev6 table format. It is a format allowing for the checksum
219-
// of sstable footers.
220-
formatChecksumFooter
217+
// formatTableFormatV6 is a format major version enabling the sstable table
218+
// format TableFormatPebblev6.
219+
//
220+
// The TableFormatPebblev6 sstable format introduces a checksum within the
221+
// sstable footer, and allows inclusion of blob handle references within the
222+
// value column of a sstable block.
223+
//
224+
// This format major version does not yet enable use of value separation.
225+
formatTableFormatV6
221226

222227
// internalFormatNewest is the most recent, possibly experimental format major
223228
// version.
@@ -249,7 +254,7 @@ func (v FormatMajorVersion) MaxTableFormat() sstable.TableFormat {
249254
return sstable.TableFormatPebblev4
250255
case FormatColumnarBlocks, FormatWALSyncChunks:
251256
return sstable.TableFormatPebblev5
252-
case formatChecksumFooter:
257+
case formatTableFormatV6:
253258
return sstable.TableFormatPebblev6
254259
default:
255260
panic(fmt.Sprintf("pebble: unsupported format major version: %s", v))
@@ -263,7 +268,7 @@ func (v FormatMajorVersion) MinTableFormat() sstable.TableFormat {
263268
case FormatDefault, FormatFlushableIngest, FormatPrePebblev1MarkedCompacted,
264269
FormatDeleteSizedAndObsolete, FormatVirtualSSTables, FormatSyntheticPrefixSuffix,
265270
FormatFlushableIngestExcises, FormatColumnarBlocks, FormatWALSyncChunks,
266-
formatChecksumFooter:
271+
formatTableFormatV6:
267272
return sstable.TableFormatPebblev1
268273
default:
269274
panic(fmt.Sprintf("pebble: unsupported format major version: %s", v))
@@ -309,8 +314,8 @@ var formatMajorVersionMigrations = map[FormatMajorVersion]func(*DB) error{
309314
FormatWALSyncChunks: func(d *DB) error {
310315
return d.finalizeFormatVersUpgrade(FormatWALSyncChunks)
311316
},
312-
formatChecksumFooter: func(d *DB) error {
313-
return d.finalizeFormatVersUpgrade(formatChecksumFooter)
317+
formatTableFormatV6: func(d *DB) error {
318+
return d.finalizeFormatVersUpgrade(formatTableFormatV6)
314319
},
315320
}
316321

0 commit comments

Comments
 (0)