Skip to content

Commit 95d16a9

Browse files
committed
sstable: export functionality to parse SST contents
We export `ParseTestKVsAndSpans` which can parse multi-line SST contents and `ParseTestSST` which also writes the contents to a RawWriter.
1 parent 34d4e7f commit 95d16a9

File tree

9 files changed

+196
-115
lines changed

9 files changed

+196
-115
lines changed

sstable/blob/handle.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ func (h InlineHandle) String() string {
8989

9090
// SafeFormat implements redact.SafeFormatter.
9191
func (h InlineHandle) SafeFormat(w redact.SafePrinter, _ rune) {
92-
w.Printf("(f%d,blk%d[%d:%d])",
92+
w.Printf("(%d, blk%d[%d:%d])",
9393
h.ReferenceID, h.BlockNum, h.OffsetInBlock, h.OffsetInBlock+h.ValueLen)
9494
}
9595

sstable/data_test.go

Lines changed: 1 addition & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,10 @@ package sstable
77
import (
88
"bytes"
99
"context"
10-
"encoding/hex"
1110
"fmt"
1211
"io"
1312
"strconv"
1413
"strings"
15-
"unicode"
1614

1715
"github.com/cockroachdb/crlib/crstrings"
1816
"github.com/cockroachdb/datadriven"
@@ -25,7 +23,6 @@ import (
2523
"github.com/cockroachdb/pebble/internal/testkeys"
2624
"github.com/cockroachdb/pebble/objstorage"
2725
"github.com/cockroachdb/pebble/objstorage/objstorageprovider"
28-
"github.com/cockroachdb/pebble/sstable/blob"
2926
"github.com/cockroachdb/pebble/sstable/colblk"
3027
"github.com/cockroachdb/pebble/vfs"
3128
)
@@ -106,7 +103,7 @@ func runBuildMemObjCmd(
106103
_ = w.Close()
107104
}
108105
}()
109-
if err := writeKVs(w, td.Input); err != nil {
106+
if err := ParseTestSST(w, td.Input); err != nil {
110107
return nil, nil, err
111108
}
112109
if err := w.Close(); err != nil {
@@ -120,100 +117,6 @@ func runBuildMemObjCmd(
120117
return meta, obj, nil
121118
}
122119

123-
func writeKVs(w RawWriter, input string) (err error) {
124-
defer func() {
125-
if r := recover(); r != nil {
126-
err = errors.Errorf("%v", r)
127-
}
128-
}()
129-
for _, data := range strings.Split(input, "\n") {
130-
switch {
131-
case strings.HasPrefix(data, "Span:"):
132-
err = w.EncodeSpan(keyspan.ParseSpan(strings.TrimPrefix(data, "Span:")))
133-
default:
134-
forceObsolete := strings.HasPrefix(data, "force-obsolete:")
135-
if forceObsolete {
136-
data = strings.TrimSpace(strings.TrimPrefix(data, "force-obsolete:"))
137-
}
138-
j := strings.Index(data, ":")
139-
key := base.ParseInternalKey(data[:j])
140-
value := []byte(data[j+1:])
141-
142-
switch key.Kind() {
143-
case InternalKeyKindRangeDelete:
144-
if forceObsolete {
145-
return errors.Errorf("force-obsolete is not allowed for RANGEDEL")
146-
}
147-
err = w.Add(key, value, false /* forceObsolete */)
148-
default:
149-
if bytes.HasPrefix(value, []byte("blobInlineHandle(")) {
150-
var handle blob.InlineHandle
151-
var attr base.ShortAttribute
152-
handle, attr, err = decodeBlobInlineHandleAndAttribute(string(value))
153-
if err != nil {
154-
return err
155-
}
156-
err = w.AddWithBlobHandle(key, handle, attr, forceObsolete)
157-
} else {
158-
err = w.Add(key, value, forceObsolete)
159-
}
160-
}
161-
if err != nil {
162-
return err
163-
}
164-
}
165-
if err != nil {
166-
return err
167-
}
168-
}
169-
return err
170-
}
171-
172-
// decodeBlobInlineHandleAndAttribute decodes a blob handle (in its inline form)
173-
// and its short attribute from a debug string. It expects a value of the form:
174-
// blobInlineHandle(<refIndex>, blk<blocknum>, <offset>, <valLen>, <attr>). For example:
175-
//
176-
// blobInlineHandle(24, blk255, 10, 9235, 0x07)
177-
func decodeBlobInlineHandleAndAttribute(
178-
ref string,
179-
) (blob.InlineHandle, base.ShortAttribute, error) {
180-
fields := strings.FieldsFunc(strings.TrimSuffix(strings.TrimPrefix(ref, "blobInlineHandle("), ")"),
181-
func(r rune) bool { return r == ',' || unicode.IsSpace(r) })
182-
if len(fields) != 5 {
183-
return blob.InlineHandle{}, base.ShortAttribute(0), errors.New("expected 5 fields")
184-
}
185-
refIdx, err := strconv.ParseUint(fields[0], 10, 32)
186-
if err != nil {
187-
return blob.InlineHandle{}, base.ShortAttribute(0), errors.Wrap(err, "failed to parse file offset")
188-
}
189-
blockNum, err := strconv.ParseUint(strings.TrimPrefix(fields[1], "blk"), 10, 32)
190-
if err != nil {
191-
return blob.InlineHandle{}, base.ShortAttribute(0), errors.Wrap(err, "failed to parse block number")
192-
}
193-
off, err := strconv.ParseUint(fields[2], 10, 32)
194-
if err != nil {
195-
return blob.InlineHandle{}, base.ShortAttribute(0), errors.Wrap(err, "failed to parse offset")
196-
}
197-
valLen, err := strconv.ParseUint(fields[3], 10, 32)
198-
if err != nil {
199-
return blob.InlineHandle{}, base.ShortAttribute(0), errors.Wrap(err, "failed to parse value length")
200-
}
201-
attr, err := hex.DecodeString(strings.TrimPrefix(fields[4], "0x"))
202-
if err != nil {
203-
return blob.InlineHandle{}, base.ShortAttribute(0), errors.Wrap(err, "failed to parse attribute")
204-
}
205-
return blob.InlineHandle{
206-
InlineHandlePreface: blob.InlineHandlePreface{
207-
ReferenceID: blob.ReferenceID(refIdx),
208-
ValueLen: uint32(valLen),
209-
},
210-
HandleSuffix: blob.HandleSuffix{
211-
BlockNum: uint32(blockNum),
212-
OffsetInBlock: uint32(off),
213-
},
214-
}, base.ShortAttribute(attr[0]), nil
215-
}
216-
217120
func runBuildCmd(
218121
td *datadriven.TestData, writerOpts *WriterOptions, cacheHandle *cache.Handle,
219122
) (*WriterMetadata, *Reader, error) {

sstable/test_utils.go

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,19 @@ package sstable
66

77
import (
88
"context"
9+
"encoding/hex"
10+
"fmt"
11+
"runtime/debug"
12+
"strconv"
13+
"strings"
14+
"unicode"
915

16+
"github.com/cockroachdb/crlib/crstrings"
17+
"github.com/cockroachdb/errors"
1018
"github.com/cockroachdb/pebble/internal/base"
1119
"github.com/cockroachdb/pebble/internal/keyspan"
1220
"github.com/cockroachdb/pebble/objstorage"
21+
"github.com/cockroachdb/pebble/sstable/blob"
1322
"github.com/cockroachdb/pebble/sstable/block"
1423
)
1524

@@ -80,3 +89,172 @@ func ReadAll(
8089
}
8190
return points, rangeDels, rangeKeys, nil
8291
}
92+
93+
// ParsedKVOrSpan represents a KV or a key span produced by ParseTestKVsAndSpans.
94+
//
95+
// There are three possibilities:
96+
// - key span: only the Span field is set.
97+
// - KV without blob value: only the Key and Value fields are set (and
98+
// optionally ForceObsolete).
99+
// - KV with blob value: only the Key, BlobHandle, and Attr fields are set
100+
// (and optionally ForceObsolete).
101+
type ParsedKVOrSpan struct {
102+
// If Span is not nil, the rest of the fields are unset.
103+
Span *keyspan.Span
104+
Key base.InternalKey
105+
ForceObsolete bool
106+
// Either Value is set, or BlobHandle and Attr are set.
107+
Value []byte
108+
BlobHandle blob.InlineHandle
109+
Attr base.ShortAttribute
110+
}
111+
112+
func (kv ParsedKVOrSpan) IsKeySpan() bool {
113+
return kv.Span != nil
114+
}
115+
116+
func (kv ParsedKVOrSpan) HasBlobValue() bool {
117+
return kv.Span == nil && kv.Value == nil
118+
}
119+
120+
func (kv ParsedKVOrSpan) String() string {
121+
if kv.IsKeySpan() {
122+
return fmt.Sprintf("Span: %s", kv.Span)
123+
}
124+
prefix := crstrings.If(kv.ForceObsolete, "force-obsolete: ")
125+
if !kv.HasBlobValue() {
126+
return fmt.Sprintf("%s%s = %s", prefix, kv.Key, kv.Value)
127+
}
128+
return fmt.Sprintf("%s%s = blobInlineHandle(%d, blk%d, %d, %d, 0x%02x)", prefix, kv.Key,
129+
kv.BlobHandle.ReferenceID, kv.BlobHandle.BlockNum, kv.BlobHandle.OffsetInBlock, kv.BlobHandle.ValueLen, kv.Attr,
130+
)
131+
}
132+
133+
// ParseTestKVsAndSpans parses a multi-line string that defines SSTable contents.
134+
// The lines can be either key-value pairs or key spans.
135+
// Sample input showing the format:
136+
//
137+
// a#1,SET = a
138+
// force-obsolete: d#2,SET = d
139+
// f#3,SET = blobInlineHandle(0, blk1, 10, 100, 0x07)
140+
// Span: d-e:{(#4,RANGEDEL)}
141+
// Span: a-d:{(#11,RANGEKEYSET,@10,foo)}
142+
// Span: g-l:{(#5,RANGEDEL)}
143+
// Span: y-z:{(#12,RANGEKEYSET,@11,foo)}
144+
//
145+
// Note that the older KV format "<user-key>.<kind>.<seq-num> : <value>" is also supported
146+
// (for now).
147+
func ParseTestKVsAndSpans(input string) (_ []ParsedKVOrSpan, err error) {
148+
defer func() {
149+
if r := recover(); r != nil {
150+
err = errors.Newf("%v\n%s", r, debug.Stack())
151+
}
152+
}()
153+
var result []ParsedKVOrSpan
154+
for _, line := range crstrings.Lines(input) {
155+
if strings.HasPrefix(line, "Span:") {
156+
span := keyspan.ParseSpan(strings.TrimPrefix(line, "Span:"))
157+
result = append(result, ParsedKVOrSpan{Span: &span})
158+
continue
159+
}
160+
161+
var kv ParsedKVOrSpan
162+
line, kv.ForceObsolete = strings.CutPrefix(line, "force-obsolete:")
163+
// There should be exactly one "=" or ":" in the remaining line.
164+
keyStr, valStr, ok := strings.Cut(line, "=")
165+
if !ok {
166+
keyStr, valStr, ok = strings.Cut(line, ":")
167+
}
168+
if !ok {
169+
return nil, errors.Newf("KV format is [force-obsolete:] <key>=<value> (or <key>:<value>): %q", line)
170+
}
171+
kv.Key = base.ParseInternalKey(strings.TrimSpace(keyStr))
172+
valStr = strings.TrimSpace(valStr)
173+
174+
if kv.ForceObsolete && kv.Key.Kind() == InternalKeyKindRangeDelete {
175+
return nil, errors.Errorf("force-obsolete is not allowed for RANGEDEL")
176+
}
177+
178+
if strings.HasPrefix(valStr, "blobInlineHandle(") {
179+
handle, attr, err := decodeBlobInlineHandleAndAttribute(valStr)
180+
if err != nil {
181+
return nil, err
182+
}
183+
kv.BlobHandle = handle
184+
kv.Attr = attr
185+
} else {
186+
kv.Value = []byte(valStr)
187+
}
188+
result = append(result, kv)
189+
}
190+
return result, nil
191+
}
192+
193+
// ParseTestSST parses the KVs and spans in the input (see ParseTestKVAndSpans)
194+
// and writes them to an sstable.
195+
func ParseTestSST(w RawWriter, input string) error {
196+
kvs, err := ParseTestKVsAndSpans(input)
197+
if err != nil {
198+
return err
199+
}
200+
for _, kv := range kvs {
201+
var err error
202+
switch {
203+
case kv.IsKeySpan():
204+
err = w.EncodeSpan(*kv.Span)
205+
case kv.HasBlobValue():
206+
err = w.AddWithBlobHandle(kv.Key, kv.BlobHandle, kv.Attr, kv.ForceObsolete)
207+
default:
208+
err = w.Add(kv.Key, kv.Value, kv.ForceObsolete)
209+
}
210+
if err != nil {
211+
return errors.Wrapf(err, "failed to write %s", kv)
212+
}
213+
}
214+
return nil
215+
}
216+
217+
// decodeBlobInlineHandleAndAttribute decodes a blob handle (in its inline form)
218+
// and its short attribute from a debug string. It expects a value of the form:
219+
// blobInlineHandle(<refIndex>, blk<blocknum>, <offset>, <valLen>, <attr>). For example:
220+
//
221+
// blobInlineHandle(24, blk255, 10, 9235, 0x07)
222+
func decodeBlobInlineHandleAndAttribute(
223+
ref string,
224+
) (blob.InlineHandle, base.ShortAttribute, error) {
225+
fields := strings.FieldsFunc(strings.TrimSuffix(strings.TrimPrefix(ref, "blobInlineHandle("), ")"),
226+
func(r rune) bool { return r == ',' || unicode.IsSpace(r) })
227+
if len(fields) != 5 {
228+
return blob.InlineHandle{}, base.ShortAttribute(0), errors.New("expected 5 fields")
229+
}
230+
refIdx, err := strconv.ParseUint(fields[0], 10, 32)
231+
if err != nil {
232+
return blob.InlineHandle{}, base.ShortAttribute(0), errors.Wrap(err, "failed to parse file offset")
233+
}
234+
blockNum, err := strconv.ParseUint(strings.TrimPrefix(fields[1], "blk"), 10, 32)
235+
if err != nil {
236+
return blob.InlineHandle{}, base.ShortAttribute(0), errors.Wrap(err, "failed to parse block number")
237+
}
238+
off, err := strconv.ParseUint(fields[2], 10, 32)
239+
if err != nil {
240+
return blob.InlineHandle{}, base.ShortAttribute(0), errors.Wrap(err, "failed to parse offset")
241+
}
242+
valLen, err := strconv.ParseUint(fields[3], 10, 32)
243+
if err != nil {
244+
return blob.InlineHandle{}, base.ShortAttribute(0), errors.Wrap(err, "failed to parse value length")
245+
}
246+
attr, err := hex.DecodeString(strings.TrimPrefix(fields[4], "0x"))
247+
if err != nil {
248+
return blob.InlineHandle{}, base.ShortAttribute(0), errors.Wrap(err, "failed to parse attribute")
249+
}
250+
return blob.InlineHandle{
251+
InlineHandlePreface: blob.InlineHandlePreface{
252+
ReferenceID: blob.ReferenceID(refIdx),
253+
ValueLen: uint32(valLen),
254+
},
255+
HandleSuffix: blob.HandleSuffix{
256+
BlockNum: uint32(blockNum),
257+
OffsetInBlock: uint32(off),
258+
},
259+
}, base.ShortAttribute(attr[0]), nil
260+
}

sstable/testdata/reader_hide_obsolete/iter

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,7 @@ build is-strict-obsolete
315315
d.SINGLEDEL.40:
316316
d.MERGE.30:D30
317317
----
318-
MERGE not supported in a strict-obsolete sstable
318+
failed to write d#30,MERGE = D30: MERGE not supported in a strict-obsolete sstable
319319

320320
# Regression test for #3761. If an entire block contains obsolete points,
321321
# skipBackward should still skip to blocks earlier in the sstable.

sstable/testdata/writer

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -147,19 +147,19 @@ build
147147
a.SET.1:b
148148
a.SET.2:c
149149
----
150-
pebble: keys must be added in strictly increasing order: a#1,SET, a#2,SET
150+
failed to write a#2,SET = c: pebble: keys must be added in strictly increasing order: a#1,SET, a#2,SET
151151

152152
build
153153
b.SET.1:a
154154
a.SET.2:b
155155
----
156-
pebble: keys must be added in strictly increasing order: b#1,SET, a#2,SET
156+
failed to write a#2,SET = b: pebble: keys must be added in strictly increasing order: b#1,SET, a#2,SET
157157

158158
build
159159
b.RANGEDEL.1:c
160160
a.RANGEDEL.2:b
161161
----
162-
pebble: keys must be added in order: b#1,RANGEDEL, a#2,RANGEDEL
162+
failed to write a#2,RANGEDEL = b: pebble: keys must be added in order: b#1,RANGEDEL, a#2,RANGEDEL
163163

164164
build-raw
165165
.RANGEDEL.1:b

sstable/testdata/writer_v3

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -121,19 +121,19 @@ build
121121
a.SET.1:b
122122
a.SET.2:c
123123
----
124-
pebble: keys must be added in strictly increasing order: a#1,SET, a#2,SET
124+
failed to write a#2,SET = c: pebble: keys must be added in strictly increasing order: a#1,SET, a#2,SET
125125

126126
build
127127
b.SET.1:a
128128
a.SET.2:b
129129
----
130-
pebble: keys must be added in strictly increasing order: b#1,SET, a#2,SET
130+
failed to write a#2,SET = b: pebble: keys must be added in strictly increasing order: b#1,SET, a#2,SET
131131

132132
build
133133
b.RANGEDEL.1:c
134134
a.RANGEDEL.2:b
135135
----
136-
pebble: keys must be added in order: b#1,RANGEDEL, a#2,RANGEDEL
136+
failed to write a#2,RANGEDEL = b: pebble: keys must be added in order: b#1,RANGEDEL, a#2,RANGEDEL
137137

138138
build-raw
139139
.RANGEDEL.1:b

0 commit comments

Comments
 (0)