Skip to content

Commit 1b8f1ef

Browse files
committed
tool: add support for loading blob values during sstable scan
This patch adds support for loading a blob value for the `sstable scan` tool. Fixes: #4448
1 parent 6c8d161 commit 1b8f1ef

File tree

2 files changed

+116
-5
lines changed

2 files changed

+116
-5
lines changed

tool/sstable.go

Lines changed: 99 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,18 @@ import (
1515
"strings"
1616
"text/tabwriter"
1717

18+
"github.com/cockroachdb/errors"
1819
"github.com/cockroachdb/pebble"
1920
"github.com/cockroachdb/pebble/internal/base"
2021
"github.com/cockroachdb/pebble/internal/cache"
2122
"github.com/cockroachdb/pebble/internal/humanize"
2223
"github.com/cockroachdb/pebble/internal/keyspan"
24+
"github.com/cockroachdb/pebble/internal/manifest"
2325
"github.com/cockroachdb/pebble/internal/rangedel"
2426
"github.com/cockroachdb/pebble/internal/sstableinternal"
27+
"github.com/cockroachdb/pebble/record"
2528
"github.com/cockroachdb/pebble/sstable"
29+
"github.com/cockroachdb/pebble/sstable/blob"
2630
"github.com/cockroachdb/pebble/vfs"
2731
"github.com/spf13/cobra"
2832
)
@@ -96,12 +100,15 @@ properties are pretty-printed or displayed in a verbose/raw format.
96100
Run: s.runProperties,
97101
}
98102
s.Scan = &cobra.Command{
99-
Use: "scan <sstables>",
103+
Use: "scan <sstables> [manifest-dir]",
100104
Short: "print sstable records",
101105
Long: `
102106
Print the records in the sstables. The sstables are scanned in command line
103107
order which means the records will be printed in that order. Raw range
104108
tombstones are displayed interleaved with point records.
109+
110+
When --blob-mode=load is specified, the path to a directory containing a
111+
manifest and blob file must be provided as the last argument.
105112
`,
106113
Args: cobra.MinimumNArgs(1),
107114
Run: s.runScan,
@@ -320,6 +327,19 @@ func (s *sstableT) runProperties(cmd *cobra.Command, args []string) {
320327

321328
func (s *sstableT) runScan(cmd *cobra.Command, args []string) {
322329
stdout, stderr := cmd.OutOrStdout(), cmd.OutOrStderr()
330+
// If in blob load mode, the last argument is the path to our directory
331+
// containing the manifest(s) and blob file(s).
332+
blobMode := ConvertToBlobRefMode(s.blobMode)
333+
var blobDir string
334+
if blobMode == BlobRefModeLoad {
335+
if len(args) < 2 {
336+
fmt.Fprintf(stderr, "when --blob-mode=load is specified, the path to a "+
337+
"directory containing a manifest and blob file must be provided as the last argument")
338+
return
339+
}
340+
blobDir = args[len(args)-1]
341+
args = args[:len(args)-1]
342+
}
323343
s.foreachSstable(stderr, args, func(path string, r *sstable.Reader) {
324344
// Update the internal formatter if this comparator has one specified.
325345
s.fmtKey.setForComparer(r.Properties.ComparerName, s.comparers)
@@ -335,15 +355,26 @@ func (s *sstableT) runScan(cmd *cobra.Command, args []string) {
335355
}
336356

337357
var blobContext sstable.TableBlobContext
338-
switch ConvertToBlobRefMode(s.blobMode) {
358+
switch blobMode {
339359
case BlobRefModePrint:
340360
blobContext = sstable.DebugHandlesBlobContext
361+
case BlobRefModeLoad:
362+
blobRefs, err := findAndReadManifests(stderr, s.opts.FS, blobDir)
363+
if err != nil {
364+
fmt.Fprintf(stderr, "%s\n", err)
365+
return
366+
}
367+
provider := debugReaderProvider{
368+
fs: s.opts.FS,
369+
dir: blobDir,
370+
}
371+
s.fmtValue.mustSet("[%s]")
372+
var vf *blob.ValueFetcher
373+
vf, blobContext = sstable.LoadValBlobContext(&provider, blobRefs)
374+
defer func() { _ = vf.Close() }()
341375
default:
342376
blobContext = sstable.AssertNoBlobHandles
343377
}
344-
// TODO(annie): Adjust to support two modes: one that surfaces the raw
345-
// blob value handles, and one that fetches the blob values from blob
346-
// files uncovered by scanning the directory entries. See #4448.
347378
iter, err := r.NewIter(sstable.NoTransforms, nil, s.end, blobContext)
348379
if err != nil {
349380
fmt.Fprintf(stderr, "%s%s\n", prefix, err)
@@ -556,3 +587,66 @@ func (s *sstableT) foreachSstable(
556587
})
557588
}
558589
}
590+
591+
// findAndReadManifests finds and reads all manifests in the specified
592+
// directory to gather blob references.
593+
func findAndReadManifests(
594+
stderr io.Writer, fs vfs.FS, dir string,
595+
) (manifest.BlobReferences, error) {
596+
var manifests []fileLoc
597+
walk(stderr, fs, dir, func(path string) {
598+
ft, fileNum, ok := base.ParseFilename(fs, path)
599+
if !ok {
600+
return
601+
}
602+
fl := fileLoc{DiskFileNum: fileNum, path: path}
603+
switch ft {
604+
case base.FileTypeManifest:
605+
manifests = append(manifests, fl)
606+
}
607+
})
608+
if len(manifests) == 0 {
609+
return nil, errors.New("no MANIFEST files found in the given path")
610+
}
611+
blobMetas := make(map[base.DiskFileNum]struct{})
612+
for _, fl := range manifests {
613+
func() {
614+
mf, err := fs.Open(fl.path)
615+
if err != nil {
616+
fmt.Fprintf(stderr, "%s\n", err)
617+
return
618+
}
619+
defer mf.Close()
620+
621+
rr := record.NewReader(mf, 0 /* logNum */)
622+
for {
623+
r, err := rr.Next()
624+
if err != nil {
625+
if err != io.EOF {
626+
fmt.Fprintf(stderr, "%s: %s\n", fl.path, err)
627+
}
628+
break
629+
}
630+
var ve manifest.VersionEdit
631+
if err = ve.Decode(r); err != nil {
632+
fmt.Fprintf(stderr, "%s: %s\n", fl.path, err)
633+
break
634+
}
635+
for _, bf := range ve.NewBlobFiles {
636+
if _, ok := blobMetas[bf.FileNum]; !ok {
637+
blobMetas[bf.FileNum] = struct{}{}
638+
}
639+
}
640+
}
641+
}()
642+
}
643+
blobRefs := make(manifest.BlobReferences, len(blobMetas))
644+
i := 0
645+
for fn := range blobMetas {
646+
blobRefs[i] = manifest.BlobReference{
647+
FileNum: fn,
648+
}
649+
i++
650+
}
651+
return blobRefs, nil
652+
}

tool/testdata/sstable_scan

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -438,3 +438,20 @@ sstable scan
438438
../sstable/testdata/h.sst
439439
----
440440
h.sst: armed#0,SET [32]
441+
442+
sstable scan
443+
./testdata/find-db-val-sep/000005.sst
444+
--blob-mode=load
445+
----
446+
when --blob-mode=load is specified, the path to a directory containing a manifest and blob file must be provided as the last argument
447+
448+
sstable scan
449+
./testdata/find-db-val-sep/000005.sst
450+
./testdata/find-db-val-sep
451+
--blob-mode=load
452+
----
453+
000005.sst
454+
aaa\x00#10,SET [yuumi]
455+
bbb\x00#11,SET [mai]
456+
ccc\x00#12,SET [poiandyaya]
457+
ddd\x00#13,SET [6]

0 commit comments

Comments
 (0)