Skip to content

Commit 6c8d161

Browse files
committed
tool: add support for loading blob values for find
This patch adds support for loading a blob value for the `find` tool. Informs: #4448
1 parent 0175c8c commit 6c8d161

18 files changed

+305
-23
lines changed

sstable/values.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,20 @@ var DebugHandlesBlobContext = TableBlobContext{
3939
},
4040
}
4141

42+
// LoadValBlobContext returns a TableBlobContext that configures a
43+
// sstable iterator to fetch the value stored in a blob file. It is the
44+
// caller's responsibility to close the ValueFetcher returned.
45+
func LoadValBlobContext(
46+
rp blob.ReaderProvider, blobRefs BlobReferences,
47+
) (*blob.ValueFetcher, TableBlobContext) {
48+
vf := &blob.ValueFetcher{}
49+
vf.Init(rp, block.ReadEnv{})
50+
return vf, TableBlobContext{
51+
ValueFetcher: vf,
52+
References: blobRefs,
53+
}
54+
}
55+
4256
// BlobReferences provides a mapping from an index to a file number for a
4357
// sstable's blob references. In practice, this is implemented by
4458
// manifest.BlobReferences.

tool/find.go

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"github.com/cockroachdb/pebble/internal/sstableinternal"
2323
"github.com/cockroachdb/pebble/record"
2424
"github.com/cockroachdb/pebble/sstable"
25+
"github.com/cockroachdb/pebble/sstable/blob"
2526
"github.com/cockroachdb/pebble/wal"
2627
"github.com/spf13/cobra"
2728
)
@@ -71,6 +72,8 @@ type findT struct {
7172
tableRefs map[base.FileNum]bool
7273
// Map from file num to table metadata.
7374
tableMeta map[base.FileNum]*manifest.TableMetadata
75+
// Slice of references to a blob value file.
76+
blobRefs manifest.BlobReferences
7477
// List of error messages for SSTables that could not be decoded.
7578
errors []string
7679
}
@@ -179,6 +182,7 @@ func (f *findT) findFiles(stdout, stderr io.Writer, dir string) error {
179182
f.manifests = nil
180183
f.tables = nil
181184
f.tableMeta = make(map[base.FileNum]*manifest.TableMetadata)
185+
f.blobRefs = nil
182186

183187
if _, err := f.opts.FS.Stat(dir); err != nil {
184188
return err
@@ -227,6 +231,7 @@ func (f *findT) findFiles(stdout, stderr io.Writer, dir string) error {
227231
// Read the manifests and populate the editRefs map which is used to determine
228232
// the provenance and metadata of tables.
229233
func (f *findT) readManifests(stdout io.Writer) {
234+
blobMetas := make(map[base.DiskFileNum]struct{})
230235
for _, fl := range f.manifests {
231236
func() {
232237
mf, err := f.opts.FS.Open(fl.path)
@@ -281,9 +286,22 @@ func (f *findT) readManifests(stdout io.Writer) {
281286
f.tableMeta[nf.Meta.FileNum] = nf.Meta
282287
}
283288
}
289+
for _, bf := range ve.NewBlobFiles {
290+
if _, ok := blobMetas[bf.FileNum]; !ok {
291+
blobMetas[bf.FileNum] = struct{}{}
292+
}
293+
}
284294
}
285295
}()
286296
}
297+
f.blobRefs = make(manifest.BlobReferences, len(blobMetas))
298+
i := 0
299+
for fn := range blobMetas {
300+
f.blobRefs[i] = manifest.BlobReference{
301+
FileNum: fn,
302+
}
303+
i++
304+
}
287305

288306
if f.verbose {
289307
fmt.Fprintf(stdout, "%5d %s\n", len(f.edits), makePlural("edit", int64(len(f.edits))))
@@ -476,12 +494,18 @@ func (f *findT) searchTables(stdout io.Writer, searchKey []byte, refs []findRef)
476494
switch ConvertToBlobRefMode(f.blobMode) {
477495
case BlobRefModePrint:
478496
blobContext = sstable.DebugHandlesBlobContext
497+
case BlobRefModeLoad:
498+
provider := debugReaderProvider{
499+
fs: f.opts.FS,
500+
dir: f.Root.Flags().Arg(0),
501+
}
502+
f.fmtValue.mustSet("[%s]")
503+
var vf *blob.ValueFetcher
504+
vf, blobContext = sstable.LoadValBlobContext(&provider, f.blobRefs)
505+
defer func() { _ = vf.Close() }()
479506
default:
480507
blobContext = sstable.AssertNoBlobHandles
481508
}
482-
// TODO(annie): Adjust to support two modes: one that surfaces the
483-
// raw blob value handles, and one that fetches the blob values from
484-
// blob files uncovered by scanning the directory entries. See #4448.
485509
iter, err := r.NewIter(transforms, nil, nil, blobContext)
486510
if err != nil {
487511
return err

tool/make_test_find_db_val_sep.go

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
// Copyright 2025 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2+
// of this source code is governed by a BSD-style license that can be found in
3+
// the LICENSE file.
4+
5+
//go:build make_test_find_db
6+
7+
// Run using: go run -tags make_test_find_db_val_sep ./tool/make_test_find_db_val_sep.go
8+
package main
9+
10+
import (
11+
"log"
12+
13+
"github.com/cockroachdb/pebble"
14+
"github.com/cockroachdb/pebble/cockroachkvs"
15+
"github.com/cockroachdb/pebble/internal/blobtest"
16+
"github.com/cockroachdb/pebble/sstable"
17+
"github.com/cockroachdb/pebble/vfs"
18+
)
19+
20+
const minSizeForValSep = 3
21+
22+
type db struct {
23+
db *pebble.DB
24+
bv blobtest.Values
25+
}
26+
27+
func (d *db) set(key, value string) {
28+
encodedKey := cockroachkvs.ParseFormattedKey(key)
29+
if err := d.db.Set(encodedKey, []byte(value), nil); err != nil {
30+
log.Fatal(err)
31+
}
32+
}
33+
34+
func (d *db) flush() {
35+
if err := d.db.Flush(); err != nil {
36+
log.Fatal(err)
37+
}
38+
}
39+
40+
func (d *db) close() {
41+
if err := d.db.Close(); err != nil {
42+
log.Fatal(err)
43+
}
44+
}
45+
46+
func main() {
47+
const dir = "tool/testdata/find-db-val-sep"
48+
49+
fs := vfs.Default
50+
if err := fs.RemoveAll(dir); err != nil {
51+
log.Fatal(err)
52+
}
53+
opts := pebble.Options{
54+
FS: fs,
55+
KeySchema: cockroachkvs.KeySchema.Name,
56+
KeySchemas: sstable.KeySchemas{
57+
cockroachkvs.KeySchema.Name: &cockroachkvs.KeySchema,
58+
},
59+
FormatMajorVersion: pebble.FormatExperimentalValueSeparation,
60+
}
61+
opts.Experimental.ValueSeparationPolicy = func() pebble.ValueSeparationPolicy {
62+
return pebble.ValueSeparationPolicy{
63+
Enabled: true,
64+
MinimumSize: minSizeForValSep,
65+
MaxBlobReferenceDepth: 10,
66+
}
67+
}
68+
d, err := pebble.Open(dir, &opts)
69+
if err != nil {
70+
return
71+
}
72+
tdb := &db{
73+
db: d,
74+
}
75+
defer tdb.close()
76+
77+
tdb.set("aaa", "yuumi")
78+
tdb.set("bbb", "mai")
79+
tdb.set("ccc", "poiandyaya")
80+
tdb.set("ddd", "6")
81+
tdb.flush()
82+
}

tool/sstable.go

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,6 @@ inclusive-inclusive range specified by --start and --end.
122122

123123
s.Check.Flags().Var(
124124
&s.fmtKey, "key", "key formatter")
125-
s.Check.Flags().StringVar(
126-
&s.blobMode, "blob-mode", "none", "blob value formatter")
127125
s.Layout.Flags().Var(
128126
&s.fmtKey, "key", "key formatter")
129127
s.Layout.Flags().Var(
@@ -168,17 +166,7 @@ func (s *sstableT) runCheck(cmd *cobra.Command, args []string) {
168166
s.fmtKey.setForComparer(r.Properties.ComparerName, s.comparers)
169167
s.fmtValue.setForComparer(r.Properties.ComparerName, s.comparers)
170168

171-
var blobContext sstable.TableBlobContext
172-
switch ConvertToBlobRefMode(s.blobMode) {
173-
case BlobRefModePrint:
174-
blobContext = sstable.DebugHandlesBlobContext
175-
default:
176-
blobContext = sstable.AssertNoBlobHandles
177-
}
178-
// TODO(annie): Adjust to support two modes: one that surfaces the raw
179-
// blob value handles, and one that fetches the blob values from blob
180-
// files uncovered by scanning the directory entries. See #4448.
181-
iter, err := r.NewIter(sstable.NoTransforms, nil, nil, blobContext)
169+
iter, err := r.NewIter(sstable.NoTransforms, nil, nil, sstable.AssertNoBlobHandles)
182170
if err != nil {
183171
fmt.Fprintf(stderr, "%s\n", err)
184172
return

tool/testdata/find

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,3 +138,55 @@ hex:636363
138138
000002.sst
139139
test formatter: ccc#14,MERGE
140140
Unable to decode sstable find-mixed/000001.sst, pebble/table: invalid table 000001 (file size is too small)
141+
142+
find
143+
testdata/find-db
144+
eee
145+
--blob-mode=print
146+
----
147+
000004.log
148+
bbb-eee#19,RANGEDEL
149+
150+
find
151+
testdata/find-db-val-sep
152+
crdb:aaa
153+
--blob-mode=print
154+
----
155+
000002.log
156+
aaa\x00#10,SET [7975756d69]
157+
000005.sst [aaa\x00#10,SET-ddd\x00#13,SET]
158+
(flushed to L0)
159+
aaa\x00#10,SET [2866302c626c6b305b303a355d29]
160+
161+
find
162+
testdata/find-db-val-sep
163+
crdb:ddd
164+
--blob-mode=print
165+
----
166+
000002.log
167+
ddd\x00#13,SET [36]
168+
000005.sst [aaa\x00#10,SET-ddd\x00#13,SET]
169+
(flushed to L0)
170+
ddd\x00#13,SET [36]
171+
172+
find
173+
testdata/find-db-val-sep
174+
crdb:aaa
175+
--blob-mode=load
176+
----
177+
000002.log
178+
aaa\x00#10,SET [yuumi]
179+
000005.sst [aaa\x00#10,SET-ddd\x00#13,SET]
180+
(flushed to L0)
181+
aaa\x00#10,SET [yuumi]
182+
183+
find
184+
testdata/find-db-val-sep
185+
crdb:ddd
186+
--blob-mode=load
187+
----
188+
000002.log
189+
ddd\x00#13,SET [6]
190+
000005.sst [aaa\x00#10,SET-ddd\x00#13,SET]
191+
(flushed to L0)
192+
ddd\x00#13,SET [6]
150 Bytes
Binary file not shown.
11 Bytes
Binary file not shown.
814 Bytes
Binary file not shown.
64 Bytes
Binary file not shown.

tool/testdata/find-db-val-sep/LOCK

Whitespace-only changes.

0 commit comments

Comments
 (0)