Skip to content

Commit a266829

Browse files
committed
tool: refactor blob file mappings
Refactor the tool package's interfaces surrounding blob references in preparation for blob file replacement.
1 parent 7cd1b7e commit a266829

File tree

5 files changed

+139
-90
lines changed

5 files changed

+139
-90
lines changed

tool/blob_files.go

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
// Copyright 2025 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2+
// of this source code is governed by a BSD-style license that can be found in
3+
// the LICENSE file.
4+
5+
package tool
6+
7+
import (
8+
"fmt"
9+
"io"
10+
11+
"github.com/cockroachdb/errors"
12+
"github.com/cockroachdb/pebble/internal/base"
13+
"github.com/cockroachdb/pebble/internal/manifest"
14+
"github.com/cockroachdb/pebble/objstorage/objstorageprovider"
15+
"github.com/cockroachdb/pebble/record"
16+
"github.com/cockroachdb/pebble/sstable"
17+
"github.com/cockroachdb/pebble/sstable/blob"
18+
"github.com/cockroachdb/pebble/sstable/block"
19+
"github.com/cockroachdb/pebble/vfs"
20+
)
21+
22+
// blobFileMappings contains state for retrieving a value separated into a blob
23+
// file.
24+
//
25+
// An sstable alone does not contain sufficient information to read a value from
26+
// a blob file. It requires the manifest to map the ReferenceIDs encoded within
27+
// the sstable to physical blob files. The blobFileMappings type contains
28+
// sufficient information to perform this mapping.
29+
type blobFileMappings struct {
30+
references map[base.TableNum]*manifest.BlobReferences
31+
fetcher blob.ValueFetcher
32+
provider debugReaderProvider
33+
}
34+
35+
// LoadValueBlobContext returns a TableBlobContext that configures a sstable
36+
// iterator to fetch the value stored in a blob file, using the mappings in this
37+
// blobFileMappings.
38+
func (m *blobFileMappings) LoadValueBlobContext(tableNum base.TableNum) sstable.TableBlobContext {
39+
return sstable.TableBlobContext{
40+
ValueFetcher: &m.fetcher,
41+
References: m.references[tableNum],
42+
}
43+
}
44+
45+
// Close releases any resources held by the blobFileMappings.
46+
func (m *blobFileMappings) Close() error {
47+
return errors.CombineErrors(m.fetcher.Close(), m.provider.objProvider.Close())
48+
}
49+
50+
// newBlobFileMappings builds a blobFileMappings from a list of manifests.
51+
//
52+
// Possibly benign errors are printed to stderr. For example, if a process exits
53+
// during a manifest rotation, we may encounter an error reading a manifest, but
54+
// we'll still have sufficient information to read separated values from all
55+
// tables in the current version of the LSM.
56+
//
57+
// The returned blobFileMappings must be closed to release resources.
58+
func newBlobFileMappings(
59+
stderr io.Writer, fs vfs.FS, dir string, manifests []fileLoc,
60+
) (*blobFileMappings, error) {
61+
settings := objstorageprovider.DefaultSettings(fs, dir)
62+
provider, err := objstorageprovider.Open(settings)
63+
if err != nil {
64+
return nil, err
65+
}
66+
mappings := blobFileMappings{
67+
references: make(map[base.TableNum]*manifest.BlobReferences),
68+
provider: debugReaderProvider{objProvider: provider},
69+
}
70+
mappings.fetcher.Init(&mappings.provider, block.ReadEnv{})
71+
for _, fl := range manifests {
72+
err := func() error {
73+
mf, err := fs.Open(fl.path)
74+
if err != nil {
75+
return err
76+
}
77+
defer mf.Close()
78+
79+
rr := record.NewReader(mf, fl.DiskFileNum)
80+
for {
81+
r, err := rr.Next()
82+
if err != nil {
83+
if err != io.EOF {
84+
return err
85+
}
86+
break
87+
}
88+
var ve manifest.VersionEdit
89+
if err = ve.Decode(r); err != nil {
90+
return err
91+
}
92+
for _, nf := range ve.NewTables {
93+
mappings.references[nf.Meta.TableNum] = &nf.Meta.BlobReferences
94+
}
95+
}
96+
return nil
97+
}()
98+
if err != nil {
99+
fmt.Fprintf(stderr, "error reading manifest %s: %v\n", fl.path, err)
100+
}
101+
}
102+
return &mappings, nil
103+
}

tool/find.go

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ import (
2323
"github.com/cockroachdb/pebble/internal/sstableinternal"
2424
"github.com/cockroachdb/pebble/record"
2525
"github.com/cockroachdb/pebble/sstable"
26-
"github.com/cockroachdb/pebble/sstable/blob"
2726
"github.com/cockroachdb/pebble/wal"
2827
"github.com/spf13/cobra"
2928
)
@@ -73,8 +72,8 @@ type findT struct {
7372
tableRefs map[base.FileNum]bool
7473
// Map from file num to table metadata.
7574
tableMeta map[base.FileNum]*manifest.TableMetadata
76-
// Map from sstable file num to slice of blob references.
77-
blobRefsMap map[base.FileNum]*manifest.BlobReferences
75+
// Blob file mappings.
76+
blobMappings *blobFileMappings
7877
// List of error messages for SSTables that could not be decoded.
7978
errors []string
8079
}
@@ -140,6 +139,13 @@ func (f *findT) run(cmd *cobra.Command, args []string) {
140139
return
141140
}
142141
f.readManifests(stdout)
142+
var err error
143+
f.blobMappings, err = newBlobFileMappings(stderr, f.opts.FS, args[0], f.manifests)
144+
if err != nil {
145+
fmt.Fprintf(stderr, "%s\n", err)
146+
return
147+
}
148+
defer func() { _ = f.blobMappings.Close() }()
143149

144150
f.opts.Comparer = f.comparers[f.comparerName]
145151
if f.opts.Comparer == nil {
@@ -183,7 +189,6 @@ func (f *findT) findFiles(stdout, stderr io.Writer, dir string) error {
183189
f.manifests = nil
184190
f.tables = nil
185191
f.tableMeta = make(map[base.FileNum]*manifest.TableMetadata)
186-
f.blobRefsMap = make(map[base.FileNum]*manifest.BlobReferences)
187192

188193
if _, err := f.opts.FS.Stat(dir); err != nil {
189194
return err
@@ -285,7 +290,6 @@ func (f *findT) readManifests(stdout io.Writer) {
285290
if _, ok := f.tableMeta[nf.Meta.TableNum]; !ok {
286291
f.tableMeta[nf.Meta.TableNum] = nf.Meta
287292
}
288-
f.blobRefsMap[nf.Meta.TableNum] = &nf.Meta.BlobReferences
289293
}
290294
}
291295
}()
@@ -485,15 +489,8 @@ func (f *findT) searchTables(stdout io.Writer, searchKey []byte, refs []findRef)
485489
f.fmtValue.mustSet("[%s]")
486490
blobContext = sstable.DebugHandlesBlobContext
487491
case BlobRefModeLoad:
488-
provider := debugReaderProvider{
489-
fs: f.opts.FS,
490-
dir: f.Root.Flags().Arg(0),
491-
}
492492
f.fmtValue.mustSet("[%s]")
493-
var vf *blob.ValueFetcher
494-
blobRefs := f.blobRefsMap[base.PhysicalTableFileNum(fl.DiskFileNum)]
495-
vf, blobContext = sstable.LoadValBlobContext(&provider, blobRefs)
496-
defer func() { _ = vf.Close() }()
493+
blobContext = f.blobMappings.LoadValueBlobContext(base.PhysicalTableFileNum(fl.DiskFileNum))
497494
default:
498495
blobContext = sstable.AssertNoBlobHandles
499496
}

tool/manifest.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"bytes"
99
"fmt"
1010
"io"
11+
"slices"
1112
"time"
1213

1314
"github.com/HdrHistogram/hdrhistogram-go"
@@ -18,6 +19,7 @@ import (
1819
"github.com/cockroachdb/pebble/internal/manifest"
1920
"github.com/cockroachdb/pebble/record"
2021
"github.com/cockroachdb/pebble/sstable"
22+
"github.com/cockroachdb/pebble/vfs"
2123
"github.com/spf13/cobra"
2224
)
2325

@@ -577,3 +579,16 @@ func (m *manifestT) runCheck(cmd *cobra.Command, args []string) {
577579
fmt.Fprintf(stdout, "OK\n")
578580
}
579581
}
582+
583+
func findManifests(stderr io.Writer, fs vfs.FS, dir string) ([]fileLoc, error) {
584+
var manifests []fileLoc
585+
walk(stderr, fs, dir, func(path string) {
586+
ft, fileNum, ok := base.ParseFilename(fs, path)
587+
if !ok || ft != base.FileTypeManifest {
588+
return
589+
}
590+
manifests = append(manifests, fileLoc{DiskFileNum: fileNum, path: path})
591+
})
592+
slices.SortFunc(manifests, cmpFileLoc)
593+
return manifests, nil
594+
}

tool/sstable.go

Lines changed: 9 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,9 @@ import (
2121
"github.com/cockroachdb/pebble/internal/cache"
2222
"github.com/cockroachdb/pebble/internal/humanize"
2323
"github.com/cockroachdb/pebble/internal/keyspan"
24-
"github.com/cockroachdb/pebble/internal/manifest"
2524
"github.com/cockroachdb/pebble/internal/rangedel"
2625
"github.com/cockroachdb/pebble/internal/sstableinternal"
27-
"github.com/cockroachdb/pebble/record"
2826
"github.com/cockroachdb/pebble/sstable"
29-
"github.com/cockroachdb/pebble/sstable/blob"
3027
"github.com/cockroachdb/pebble/vfs"
3128
"github.com/spf13/cobra"
3229
)
@@ -346,7 +343,7 @@ func (s *sstableT) runScan(cmd *cobra.Command, args []string) {
346343
// containing the manifest(s) and blob file(s).
347344
blobMode := ConvertToBlobRefMode(s.blobMode)
348345
var blobDir string
349-
var blobRefsMap map[base.FileNum]*manifest.BlobReferences
346+
var blobMappings *blobFileMappings
350347
if blobMode == BlobRefModeLoad {
351348
if len(args) < 2 {
352349
fmt.Fprintf(stderr, "when --blob-mode=load is specified, the path to a "+
@@ -355,12 +352,17 @@ func (s *sstableT) runScan(cmd *cobra.Command, args []string) {
355352
}
356353
blobDir = args[len(args)-1]
357354
args = args[:len(args)-1]
358-
var err error
359-
blobRefsMap, err = findAndReadManifests(stderr, s.opts.FS, blobDir)
355+
manifests, err := findManifests(stderr, s.opts.FS, blobDir)
360356
if err != nil {
361357
fmt.Fprintf(stderr, "%s\n", err)
362358
return
363359
}
360+
blobMappings, err = newBlobFileMappings(stderr, s.opts.FS, blobDir, manifests)
361+
if err != nil {
362+
fmt.Fprintf(stderr, "%s\n", err)
363+
return
364+
}
365+
defer func() { _ = blobMappings.Close() }()
364366
}
365367

366368
s.foreachSstable(stderr, args, func(path string, r *sstable.Reader, props sstable.Properties) {
@@ -389,15 +391,8 @@ func (s *sstableT) runScan(cmd *cobra.Command, args []string) {
389391
fmt.Fprintf(stderr, "unset file in path %s\n", path)
390392
return
391393
}
392-
provider := debugReaderProvider{
393-
fs: s.opts.FS,
394-
dir: blobDir,
395-
}
396394
s.fmtValue.mustSet("[%s]")
397-
var vf *blob.ValueFetcher
398-
blobRefs := blobRefsMap[base.PhysicalTableFileNum(r.BlockReader().FileNum())]
399-
vf, blobContext = sstable.LoadValBlobContext(&provider, blobRefs)
400-
defer func() { _ = vf.Close() }()
395+
blobContext = blobMappings.LoadValueBlobContext(base.PhysicalTableFileNum(r.BlockReader().FileNum()))
401396
default:
402397
blobContext = sstable.AssertNoBlobHandles
403398
}
@@ -622,57 +617,3 @@ func (s *sstableT) foreachSstable(
622617
})
623618
}
624619
}
625-
626-
// findAndReadManifests finds and reads all manifests in the specified
627-
// directory to gather blob references for each sstable.
628-
func findAndReadManifests(
629-
stderr io.Writer, fs vfs.FS, dir string,
630-
) (map[base.FileNum]*manifest.BlobReferences, error) {
631-
blobRefsMap := make(map[base.FileNum]*manifest.BlobReferences)
632-
var manifests []fileLoc
633-
walk(stderr, fs, dir, func(path string) {
634-
ft, fileNum, ok := base.ParseFilename(fs, path)
635-
if !ok {
636-
return
637-
}
638-
fl := fileLoc{DiskFileNum: fileNum, path: path}
639-
switch ft {
640-
case base.FileTypeManifest:
641-
manifests = append(manifests, fl)
642-
}
643-
})
644-
if len(manifests) == 0 {
645-
return nil, errors.New("no MANIFEST files found in the given path")
646-
}
647-
slices.SortFunc(manifests, cmpFileLoc)
648-
for _, fl := range manifests {
649-
func() {
650-
mf, err := fs.Open(fl.path)
651-
if err != nil {
652-
fmt.Fprintf(stderr, "%s\n", err)
653-
return
654-
}
655-
defer mf.Close()
656-
657-
rr := record.NewReader(mf, 0 /* logNum */)
658-
for {
659-
r, err := rr.Next()
660-
if err != nil {
661-
if err != io.EOF {
662-
fmt.Fprintf(stderr, "%s: %s\n", fl.path, err)
663-
}
664-
break
665-
}
666-
var ve manifest.VersionEdit
667-
if err = ve.Decode(r); err != nil {
668-
fmt.Fprintf(stderr, "%s: %s\n", fl.path, err)
669-
break
670-
}
671-
for _, nf := range ve.NewTables {
672-
blobRefsMap[nf.Meta.TableNum] = &nf.Meta.BlobReferences
673-
}
674-
}
675-
}()
676-
}
677-
return blobRefsMap, nil
678-
}

tool/tool.go

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import (
1111
"github.com/cockroachdb/pebble/bloom"
1212
"github.com/cockroachdb/pebble/internal/base"
1313
"github.com/cockroachdb/pebble/objstorage"
14-
"github.com/cockroachdb/pebble/objstorage/objstorageprovider"
1514
"github.com/cockroachdb/pebble/objstorage/remote"
1615
"github.com/cockroachdb/pebble/sstable"
1716
"github.com/cockroachdb/pebble/sstable/blob"
@@ -228,8 +227,7 @@ func ConvertToBlobRefMode(s string) BlobRefMode {
228227
// debugReaderProvider is a cache-less ReaderProvider meant for debugging blob
229228
// files.
230229
type debugReaderProvider struct {
231-
fs vfs.FS
232-
dir string
230+
objProvider objstorage.Provider
233231
}
234232

235233
// Assert that *debugReaderProvider implements blob.ReaderProvider.
@@ -240,12 +238,7 @@ var _ blob.ReaderProvider = (*debugReaderProvider)(nil)
240238
func (p *debugReaderProvider) GetValueReader(
241239
ctx context.Context, fileNum base.DiskFileNum,
242240
) (blob.ValueReader, func(), error) {
243-
settings := objstorageprovider.DefaultSettings(p.fs, p.dir)
244-
provider, err := objstorageprovider.Open(settings)
245-
if err != nil {
246-
return nil, nil, err
247-
}
248-
readable, err := provider.OpenForReading(ctx, base.FileTypeBlob, fileNum, objstorage.OpenOptions{})
241+
readable, err := p.objProvider.OpenForReading(ctx, base.FileTypeBlob, fileNum, objstorage.OpenOptions{})
249242
if err != nil {
250243
return nil, nil, err
251244
}

0 commit comments

Comments
 (0)