Skip to content

Commit 3b55640

Browse files
committed
tool: add tool for inspecting blob files
This patch adds tool support for inspecting the layout of blob files, dumping the layout indicated by the index block. Fixes: #4521
1 parent c726afe commit 3b55640

23 files changed

+1718
-84
lines changed

blob_rewrite.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -401,8 +401,7 @@ func newBlobFileRewriter(
401401

402402
// generateHeap populates rw.blkHeap with the blob reference liveness encodings
403403
// for each referencing sstable, rw.sstables.
404-
func (rw *blobFileRewriter) generateHeap() error {
405-
ctx := context.TODO()
404+
func (rw *blobFileRewriter) generateHeap(ctx context.Context) error {
406405
heap.Init(&rw.blkHeap)
407406

408407
var decoder colblk.ReferenceLivenessBlockDecoder
@@ -440,7 +439,7 @@ func (rw *blobFileRewriter) generateHeap() error {
440439
}
441440

442441
func (rw *blobFileRewriter) Rewrite(ctx context.Context) (blob.FileWriterStats, error) {
443-
if err := rw.generateHeap(); err != nil {
442+
if err := rw.generateHeap(ctx); err != nil {
444443
return blob.FileWriterStats{}, err
445444
}
446445
if rw.blkHeap.Len() == 0 {

sstable/blob/blob.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -477,3 +477,49 @@ func (r *FileReader) ReadIndexBlock(
477477
func (r *FileReader) IndexHandle() block.Handle {
478478
return r.footer.indexHandle
479479
}
480+
481+
// Layout returns the layout (block organization) as a string for a blob file.
482+
func (r *FileReader) Layout() (string, error) {
483+
ctx := context.TODO()
484+
485+
indexH, err := r.ReadIndexBlock(ctx, block.NoReadEnv, nil /* rh */)
486+
if err != nil {
487+
return "", err
488+
}
489+
defer indexH.Release()
490+
491+
var buf bytes.Buffer
492+
indexDecoder := indexBlockDecoder{}
493+
indexDecoder.Init(indexH.BlockData())
494+
495+
if indexDecoder.virtualBlockCount > 0 {
496+
fmt.Fprintf(&buf, "virtual blocks mapping:\n")
497+
for i := range indexDecoder.virtualBlockCount {
498+
blockIndex, valueIDOffset := indexDecoder.RemapVirtualBlockID(BlockID(i))
499+
fmt.Fprintf(&buf, "virtual block %d -> physical block %d (valueID offset: %d)\n",
500+
i, blockIndex, valueIDOffset)
501+
}
502+
fmt.Fprintf(&buf, "\n")
503+
}
504+
505+
fmt.Fprintf(&buf, "physical blocks:\n")
506+
for i := range indexDecoder.BlockCount() {
507+
handle := indexDecoder.BlockHandle(i)
508+
fmt.Fprintf(&buf, "block %d: offset=%d length=%d\n", i, handle.Offset, handle.Length)
509+
510+
valueBlockH, err := r.ReadValueBlock(ctx, block.NoReadEnv, nil /* rh */, handle)
511+
if err != nil {
512+
return "", err
513+
}
514+
515+
valueDecoder := blobValueBlockDecoder{}
516+
valueDecoder.Init(valueBlockH.BlockData())
517+
518+
fmt.Fprintf(&buf, "values: %d\n", valueDecoder.bd.Rows())
519+
fmt.Fprintf(&buf, "%s", valueDecoder.bd.FormattedString())
520+
521+
valueBlockH.Release()
522+
}
523+
524+
return buf.String(), nil
525+
}

tool/blob.go

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
// Copyright 2025 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2+
// of this source code is governed by a BSD-style license that can be found in
3+
// the LICENSE file.
4+
5+
package tool
6+
7+
import (
8+
"context"
9+
"fmt"
10+
"io"
11+
12+
"github.com/cockroachdb/errors"
13+
"github.com/cockroachdb/pebble"
14+
"github.com/cockroachdb/pebble/internal/base"
15+
"github.com/cockroachdb/pebble/internal/cache"
16+
"github.com/cockroachdb/pebble/internal/sstableinternal"
17+
"github.com/cockroachdb/pebble/sstable"
18+
"github.com/cockroachdb/pebble/sstable/blob"
19+
"github.com/cockroachdb/pebble/vfs"
20+
"github.com/spf13/cobra"
21+
)
22+
23+
// blobT implements the blob introspection tool.
24+
type blobT struct {
25+
Root *cobra.Command
26+
Layout *cobra.Command
27+
28+
// Configuration and state.
29+
opts *pebble.Options
30+
}
31+
32+
func newBlob(opts *pebble.Options) *blobT {
33+
s := &blobT{
34+
opts: opts,
35+
}
36+
37+
s.Root = &cobra.Command{
38+
Use: "blob",
39+
Short: "blob introspection tools",
40+
}
41+
s.Layout = &cobra.Command{
42+
Use: "layout <blob files>",
43+
Short: "print the layout of a blob file",
44+
Long: `
45+
Print the layout for the given blob files.
46+
`,
47+
Args: cobra.MinimumNArgs(1),
48+
Run: s.runLayout,
49+
}
50+
s.Root.AddCommand(s.Layout)
51+
return s
52+
}
53+
54+
func (b *blobT) newReader(
55+
f vfs.File, cacheHandle *cache.Handle, fn string,
56+
) (*blob.FileReader, error) {
57+
readable, err := sstable.NewSimpleReadable(f)
58+
if err != nil {
59+
return nil, err
60+
}
61+
o := b.opts.MakeReaderOptions()
62+
o.CacheOpts = sstableinternal.CacheOptions{CacheHandle: cacheHandle}
63+
if cacheHandle != nil {
64+
_, fileNum, ok := base.ParseFilename(b.opts.FS, fn)
65+
if ok {
66+
o.CacheOpts.FileNum = fileNum
67+
}
68+
}
69+
reader, err := blob.NewFileReader(context.TODO(), readable, blob.FileReaderOptions{
70+
ReaderOptions: o.ReaderOptions,
71+
})
72+
if err != nil {
73+
return nil, errors.CombineErrors(err, readable.Close())
74+
}
75+
return reader, nil
76+
}
77+
78+
// foreachBlob opens each blob file specified in the args (if an arg is a
79+
// directory, it is walked for blob files) and calls the given function.
80+
func (b *blobT) foreachBlob(
81+
stderr io.Writer, args []string, fn func(path string, r *blob.FileReader),
82+
) {
83+
processFileFn := func(path string, r *blob.FileReader) error {
84+
fn(path, r)
85+
return nil
86+
}
87+
closeReaderFn := func(r *blob.FileReader) error {
88+
return r.Close()
89+
}
90+
processFiles(stderr, b.opts.FS, args, []string{".blob"}, b.newReader, closeReaderFn, processFileFn)
91+
}
92+
93+
func (b *blobT) runLayout(cmd *cobra.Command, args []string) {
94+
stdout, stderr := cmd.OutOrStdout(), cmd.OutOrStderr()
95+
b.foreachBlob(stderr, args, func(path string, r *blob.FileReader) {
96+
fmt.Fprintf(stdout, "%s\n", path)
97+
98+
l, err := r.Layout()
99+
if err != nil {
100+
fmt.Fprintf(stderr, "%s\n", err)
101+
return
102+
}
103+
fmt.Fprintf(stdout, "%s\n", l)
104+
})
105+
}

tool/blob_test.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
package tool
2+
3+
import "testing"
4+
5+
func TestBlob(t *testing.T) {
6+
runTests(t, "testdata/blob_*")
7+
}

tool/make_test_find_db_val_sep.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
package main
99

1010
import (
11+
"fmt"
1112
"log"
1213

1314
"github.com/cockroachdb/pebble"
@@ -58,6 +59,10 @@ func main() {
5859
},
5960
FormatMajorVersion: pebble.FormatValueSeparation,
6061
}
62+
// Set block size settings for all levels
63+
for i := range opts.Levels {
64+
opts.Levels[i].BlockSize = 100
65+
}
6166
opts.Experimental.ValueSeparationPolicy = func() pebble.ValueSeparationPolicy {
6267
return pebble.ValueSeparationPolicy{
6368
Enabled: true,
@@ -83,4 +88,9 @@ func main() {
8388
tdb.set("eee", "pigeon")
8489
tdb.set("fff", "chicken")
8590
tdb.flush()
91+
92+
for i := range 30 {
93+
tdb.set(fmt.Sprintf("ggg%d", i), fmt.Sprintf("chicken%d", i))
94+
}
95+
tdb.flush()
8696
}

tool/sstable.go

Lines changed: 6 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ import (
1010
"fmt"
1111
"io"
1212
"os"
13-
"path/filepath"
1413
"slices"
1514
"strings"
1615
"text/tabwriter"
@@ -559,41 +558,16 @@ func (s *sstableT) foreachSstable(
559558
args []string,
560559
fn func(path string, r *sstable.Reader, props sstable.Properties),
561560
) {
562-
pathFn := func(path string) {
563-
f, err := s.opts.FS.Open(path)
564-
if err != nil {
565-
fmt.Fprintf(stderr, "%s\n", err)
566-
return
567-
}
568-
569-
// TODO(annie): Use a BufferPool.
570-
c := pebble.NewCache(128 << 20 /* 128 MB */)
571-
defer c.Unref()
572-
ch := c.NewHandle()
573-
defer ch.Close()
574-
575-
r, err := s.newReader(f, ch, path)
576-
if err != nil {
577-
fmt.Fprintf(stderr, "%s: %s\n", path, err)
578-
return
579-
}
580-
defer func() { _ = r.Close() }()
581-
561+
processFileFn := func(path string, r *sstable.Reader) error {
582562
props, err := r.ReadPropertiesBlock(context.Background(), nil /* buffer pool */)
583563
if err != nil {
584-
fmt.Fprintf(stderr, "%s\n", err)
585-
return
564+
return err
586565
}
587566
fn(path, r, props)
567+
return nil
588568
}
589-
590-
// listed and fn is invoked on any file with an .sst or .ldb suffix.
591-
for _, arg := range args {
592-
walk(stderr, s.opts.FS, arg, func(path string) {
593-
switch filepath.Ext(path) {
594-
case ".sst", ".ldb":
595-
pathFn(path)
596-
}
597-
})
569+
closeReaderFn := func(r *sstable.Reader) error {
570+
return r.Close()
598571
}
572+
processFiles(stderr, s.opts.FS, args, []string{".sst", ".ldb"}, s.newReader, closeReaderFn, processFileFn)
599573
}

tool/testdata/blob_layout

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
blob layout
2+
./testdata/find-val-sep-db/000012.blob
3+
----
4+
----
5+
000012.blob
6+
physical blocks:
7+
block 0: offset=0 length=62
8+
values: 8
9+
block
10+
├── columnar block header
11+
│ ├── 00-01: x 01 # version 1
12+
│ ├── 01-03: x 0100 # 1 columns
13+
│ ├── 03-07: x 08000000 # 8 rows
14+
│ ├── 07-08: b 00000011 # col 0: bytes
15+
│ └── 08-12: x 0c000000 # col 0: page start 12
16+
├── data for column 0 (bytes)
17+
│ ├── offsets table
18+
│ │ ├── 12-13: x 01 # encoding: 1b
19+
│ │ ├── 13-14: x 00 # data[0] = 0 [22 overall]
20+
│ │ ├── 14-15: x 08 # data[1] = 8 [30 overall]
21+
│ │ ├── 15-16: x 10 # data[2] = 16 [38 overall]
22+
│ │ ├── 16-17: x 19 # data[3] = 25 [47 overall]
23+
│ │ ├── 17-18: x 22 # data[4] = 34 [56 overall]
24+
│ │ ├── 18-19: x 2b # data[5] = 43 [65 overall]
25+
│ │ ├── 19-20: x 34 # data[6] = 52 [74 overall]
26+
│ │ ├── 20-21: x 3d # data[7] = 61 [83 overall]
27+
│ │ └── 21-22: x 46 # data[8] = 70 [92 overall]
28+
│ └── data
29+
│ ├── 22-30: x 636869636b656e30 # data[0]: chicken0
30+
│ ├── 30-38: x 636869636b656e31 # data[1]: chicken1
31+
│ ├── 38-47: x 636869636b656e3130 # data[2]: chicken10
32+
│ ├── 47-56: x 636869636b656e3131 # data[3]: chicken11
33+
│ ├── 56-65: x 636869636b656e3132 # data[4]: chicken12
34+
│ ├── 65-74: x 636869636b656e3133 # data[5]: chicken13
35+
│ ├── 74-83: x 636869636b656e3134 # data[6]: chicken14
36+
│ └── 83-92: x 636869636b656e3135 # data[7]: chicken15
37+
└── 92-93: x 00 # block trailer padding
38+
block 1: offset=67 length=67
39+
values: 8
40+
block
41+
├── columnar block header
42+
│ ├── 00-01: x 01 # version 1
43+
│ ├── 01-03: x 0100 # 1 columns
44+
│ ├── 03-07: x 08000000 # 8 rows
45+
│ ├── 07-08: b 00000011 # col 0: bytes
46+
│ └── 08-12: x 0c000000 # col 0: page start 12
47+
├── data for column 0 (bytes)
48+
│ ├── offsets table
49+
│ │ ├── 12-13: x 01 # encoding: 1b
50+
│ │ ├── 13-14: x 00 # data[0] = 0 [22 overall]
51+
│ │ ├── 14-15: x 09 # data[1] = 9 [31 overall]
52+
│ │ ├── 15-16: x 12 # data[2] = 18 [40 overall]
53+
│ │ ├── 16-17: x 1b # data[3] = 27 [49 overall]
54+
│ │ ├── 17-18: x 24 # data[4] = 36 [58 overall]
55+
│ │ ├── 18-19: x 2c # data[5] = 44 [66 overall]
56+
│ │ ├── 19-20: x 35 # data[6] = 53 [75 overall]
57+
│ │ ├── 20-21: x 3e # data[7] = 62 [84 overall]
58+
│ │ └── 21-22: x 47 # data[8] = 71 [93 overall]
59+
│ └── data
60+
│ ├── 22-31: x 636869636b656e3136 # data[0]: chicken16
61+
│ ├── 31-40: x 636869636b656e3137 # data[1]: chicken17
62+
│ ├── 40-49: x 636869636b656e3138 # data[2]: chicken18
63+
│ ├── 49-58: x 636869636b656e3139 # data[3]: chicken19
64+
│ ├── 58-66: x 636869636b656e32 # data[4]: chicken2
65+
│ ├── 66-75: x 636869636b656e3230 # data[5]: chicken20
66+
│ ├── 75-84: x 636869636b656e3231 # data[6]: chicken21
67+
│ └── 84-93: x 636869636b656e3232 # data[7]: chicken22
68+
└── 93-94: x 00 # block trailer padding
69+
block 2: offset=139 length=66
70+
values: 8
71+
block
72+
├── columnar block header
73+
│ ├── 00-01: x 01 # version 1
74+
│ ├── 01-03: x 0100 # 1 columns
75+
│ ├── 03-07: x 08000000 # 8 rows
76+
│ ├── 07-08: b 00000011 # col 0: bytes
77+
│ └── 08-12: x 0c000000 # col 0: page start 12
78+
├── data for column 0 (bytes)
79+
│ ├── offsets table
80+
│ │ ├── 12-13: x 01 # encoding: 1b
81+
│ │ ├── 13-14: x 00 # data[0] = 0 [22 overall]
82+
│ │ ├── 14-15: x 09 # data[1] = 9 [31 overall]
83+
│ │ ├── 15-16: x 12 # data[2] = 18 [40 overall]
84+
│ │ ├── 16-17: x 1b # data[3] = 27 [49 overall]
85+
│ │ ├── 17-18: x 24 # data[4] = 36 [58 overall]
86+
│ │ ├── 18-19: x 2d # data[5] = 45 [67 overall]
87+
│ │ ├── 19-20: x 36 # data[6] = 54 [76 overall]
88+
│ │ ├── 20-21: x 3f # data[7] = 63 [85 overall]
89+
│ │ └── 21-22: x 47 # data[8] = 71 [93 overall]
90+
│ └── data
91+
│ ├── 22-31: x 636869636b656e3233 # data[0]: chicken23
92+
│ ├── 31-40: x 636869636b656e3234 # data[1]: chicken24
93+
│ ├── 40-49: x 636869636b656e3235 # data[2]: chicken25
94+
│ ├── 49-58: x 636869636b656e3236 # data[3]: chicken26
95+
│ ├── 58-67: x 636869636b656e3237 # data[4]: chicken27
96+
│ ├── 67-76: x 636869636b656e3238 # data[5]: chicken28
97+
│ ├── 76-85: x 636869636b656e3239 # data[6]: chicken29
98+
│ └── 85-93: x 636869636b656e33 # data[7]: chicken3
99+
└── 93-94: x 00 # block trailer padding
100+
block 3: offset=210 length=55
101+
values: 6
102+
block
103+
├── columnar block header
104+
│ ├── 00-01: x 01 # version 1
105+
│ ├── 01-03: x 0100 # 1 columns
106+
│ ├── 03-07: x 06000000 # 6 rows
107+
│ ├── 07-08: b 00000011 # col 0: bytes
108+
│ └── 08-12: x 0c000000 # col 0: page start 12
109+
├── data for column 0 (bytes)
110+
│ ├── offsets table
111+
│ │ ├── 12-13: x 01 # encoding: 1b
112+
│ │ ├── 13-14: x 00 # data[0] = 0 [20 overall]
113+
│ │ ├── 14-15: x 08 # data[1] = 8 [28 overall]
114+
│ │ ├── 15-16: x 10 # data[2] = 16 [36 overall]
115+
│ │ ├── 16-17: x 18 # data[3] = 24 [44 overall]
116+
│ │ ├── 17-18: x 20 # data[4] = 32 [52 overall]
117+
│ │ ├── 18-19: x 28 # data[5] = 40 [60 overall]
118+
│ │ └── 19-20: x 30 # data[6] = 48 [68 overall]
119+
│ └── data
120+
│ ├── 20-28: x 636869636b656e34 # data[0]: chicken4
121+
│ ├── 28-36: x 636869636b656e35 # data[1]: chicken5
122+
│ ├── 36-44: x 636869636b656e36 # data[2]: chicken6
123+
│ ├── 44-52: x 636869636b656e37 # data[3]: chicken7
124+
│ ├── 52-60: x 636869636b656e38 # data[4]: chicken8
125+
│ └── 60-68: x 636869636b656e39 # data[5]: chicken9
126+
└── 68-69: x 00 # block trailer padding
127+
128+
----
129+
----

0 commit comments

Comments
 (0)