Skip to content

Commit 98ed96c

Browse files
committed
*: add BlobFileID
Add a BlobFileID type for identifying blob files. When we introduce rewriting and replacing blob files, a BlobReference's file identifier may not translate directly to a file on disk, so we should not use DiskFileNum. Informs #112.
1 parent d26c655 commit 98ed96c

26 files changed

+226
-183
lines changed

checkpoint.go

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"github.com/cockroachdb/pebble/internal/base"
1515
"github.com/cockroachdb/pebble/internal/manifest"
1616
"github.com/cockroachdb/pebble/record"
17+
"github.com/cockroachdb/pebble/sstable/blob"
1718
"github.com/cockroachdb/pebble/vfs"
1819
"github.com/cockroachdb/pebble/vfs/atomicfs"
1920
)
@@ -265,7 +266,7 @@ func (d *DB) Checkpoint(
265266
}
266267

267268
var excludedTables map[manifest.DeletedTableEntry]*manifest.TableMetadata
268-
var includedBlobFiles map[base.DiskFileNum]struct{}
269+
var includedBlobFiles map[base.BlobFileID]struct{}
269270
var remoteFiles []base.DiskFileNum
270271
// Set of TableBacking.DiskFileNum which will be required by virtual sstables
271272
// in the checkpoint.
@@ -309,12 +310,17 @@ func (d *DB) Checkpoint(
309310
// Copy any referenced blob files that have not already been copied.
310311
if len(f.BlobReferences) > 0 {
311312
if includedBlobFiles == nil {
312-
includedBlobFiles = make(map[base.DiskFileNum]struct{})
313+
includedBlobFiles = make(map[base.BlobFileID]struct{})
313314
}
314315
for _, ref := range f.BlobReferences {
315-
if _, ok := includedBlobFiles[ref.FileNum]; !ok {
316-
includedBlobFiles[ref.FileNum] = struct{}{}
317-
ckErr = copyFile(base.FileTypeBlob, ref.FileNum)
316+
if _, ok := includedBlobFiles[ref.FileID]; !ok {
317+
includedBlobFiles[ref.FileID] = struct{}{}
318+
319+
// TODO(jackson): Perform a translation to the
320+
// appropriate disk file number once we support blob
321+
// file replacement.
322+
diskFileNum := blob.DiskFileNumTODO(ref.FileID)
323+
ckErr = copyFile(base.FileTypeBlob, diskFileNum)
318324
if ckErr != nil {
319325
return ckErr
320326
}
@@ -352,8 +358,9 @@ func (d *DB) Checkpoint(
352358
if len(includedBlobFiles) < len(versionBlobFiles) {
353359
excludedBlobFiles = make(map[base.DiskFileNum]*manifest.BlobFileMetadata, len(versionBlobFiles)-len(includedBlobFiles))
354360
for _, blobFile := range versionBlobFiles {
355-
if _, ok := includedBlobFiles[blobFile.FileNum]; !ok {
356-
excludedBlobFiles[blobFile.FileNum] = blobFile
361+
if _, ok := includedBlobFiles[blobFile.FileID]; !ok {
362+
diskFileNum := blob.DiskFileNumTODO(blobFile.FileID)
363+
excludedBlobFiles[diskFileNum] = blobFile
357364
}
358365
}
359366
}

compaction.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2488,10 +2488,10 @@ func (d *DB) cleanupVersionEdit(ve *manifest.VersionEdit) {
24882488
obsoleteFiles.AddBlob(ve.NewBlobFiles[i])
24892489
d.mu.versions.zombieBlobs.Add(objectInfo{
24902490
fileInfo: fileInfo{
2491-
FileNum: ve.NewBlobFiles[i].FileNum,
2491+
FileNum: base.DiskFileNum(ve.NewBlobFiles[i].FileID),
24922492
FileSize: ve.NewBlobFiles[i].Size,
24932493
},
2494-
isLocal: objstorage.IsLocalBlobFile(d.objProvider, ve.NewBlobFiles[i].FileNum),
2494+
isLocal: objstorage.IsLocalBlobFile(d.objProvider, base.DiskFileNum(ve.NewBlobFiles[i].FileID)),
24952495
})
24962496
}
24972497
for i := range ve.NewTables {

data_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -927,7 +927,7 @@ func runDBDefineCmdReuseFS(td *datadriven.TestData, opts *Options) (*DB, error)
927927
// them to the final version edit.
928928
valueSeparator := &defineDBValueSeparator{
929929
pbr: &preserveBlobReferences{},
930-
metas: make(map[base.DiskFileNum]*manifest.BlobFileMetadata),
930+
metas: make(map[base.BlobFileID]*manifest.BlobFileMetadata),
931931
}
932932

933933
var mem *memTable
@@ -1173,8 +1173,8 @@ func runDBDefineCmdReuseFS(td *datadriven.TestData, opts *Options) (*DB, error)
11731173
return nil, err
11741174
}
11751175
for f, stats := range fileStats {
1176-
valueSeparator.metas[f].Size = stats.FileLen
1177-
valueSeparator.metas[f].ValueSize = stats.UncompressedValueBytes
1176+
valueSeparator.metas[base.BlobFileID(f)].Size = stats.FileLen
1177+
valueSeparator.metas[base.BlobFileID(f)].ValueSize = stats.UncompressedValueBytes
11781178
}
11791179

11801180
ve.NewBlobFiles = slices.Collect(maps.Values(valueSeparator.metas))
@@ -1640,7 +1640,7 @@ func describeLSM(d *DB, verbose bool) string {
16401640
if blobFileMetas := d.mu.versions.blobFiles.Metadatas(); len(blobFileMetas) > 0 {
16411641
buf.WriteString("Blob files:\n")
16421642
for _, meta := range blobFileMetas {
1643-
fmt.Fprintf(&buf, " %s: %d physical bytes, %d value bytes\n", meta.FileNum, meta.Size, meta.ValueSize)
1643+
fmt.Fprintf(&buf, " %s: %d physical bytes, %d value bytes\n", meta.FileID, meta.Size, meta.ValueSize)
16441644
}
16451645
}
16461646
return buf.String()

event.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"github.com/cockroachdb/pebble/internal/manifest"
2020
"github.com/cockroachdb/pebble/objstorage"
2121
"github.com/cockroachdb/pebble/objstorage/remote"
22+
"github.com/cockroachdb/pebble/sstable/blob"
2223
"github.com/cockroachdb/pebble/vfs"
2324
"github.com/cockroachdb/redact"
2425
)
@@ -1234,8 +1235,9 @@ func (d *DB) reportCorruption(meta any, err error) error {
12341235
case *manifest.TableMetadata:
12351236
return d.reportFileCorruption(base.FileTypeTable, meta.TableBacking.DiskFileNum, meta.UserKeyBounds(), err)
12361237
case *manifest.BlobFileMetadata:
1238+
diskFileNum := blob.DiskFileNumTODO(meta.FileID)
12371239
// TODO(jackson): Add bounds for blob files.
1238-
return d.reportFileCorruption(base.FileTypeBlob, meta.FileNum, base.UserKeyBounds{}, err)
1240+
return d.reportFileCorruption(base.FileTypeBlob, diskFileNum, base.UserKeyBounds{}, err)
12391241
default:
12401242
panic(fmt.Sprintf("unknown metadata type: %T", meta))
12411243
}

internal/base/filenames.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,21 @@ func PhysicalTableFileNum(f DiskFileNum) TableNum {
4444
return TableNum(f)
4545
}
4646

47+
// BlobFileID is an internal identifier for a blob file.
48+
//
49+
// Initially there exists a physical blob file with a DiskFileNum that equals
50+
// the value of the BlobFileID. However, if the blob file is replaced, the
51+
// manifest.Version may re-map the BlobFileID to a new DiskFileNum.
52+
type BlobFileID uint64
53+
54+
// String returns a string representation of the blob file ID.
55+
func (id BlobFileID) String() string { return fmt.Sprintf("%06d", id) }
56+
57+
// SafeFormat implements redact.SafeFormatter.
58+
func (id BlobFileID) SafeFormat(w redact.SafePrinter, _ rune) {
59+
w.Printf("%06d", redact.SafeUint(id))
60+
}
61+
4762
// A DiskFileNum identifies a file or object with exists on disk.
4863
type DiskFileNum uint64
4964

internal/base/lazy_value.go

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -163,9 +163,9 @@ type LazyFetcher struct {
163163
Fetcher ValueFetcher
164164
// Attribute includes the short attribute and value length.
165165
Attribute AttributeAndLen
166-
// BlobFileNum identifies the blob file containing the value. It is only
166+
// BlobFileID identifies the blob file containing the value. It is only
167167
// populated if the value is stored in a blob file.
168-
BlobFileNum DiskFileNum
168+
BlobFileID BlobFileID
169169
}
170170

171171
// ValueFetcher is an interface for fetching a value.
@@ -182,7 +182,7 @@ type ValueFetcher interface {
182182
// will allocate a new slice for the value. In either case it will set
183183
// callerOwned to true.
184184
Fetch(
185-
ctx context.Context, handle []byte, blobFileNum DiskFileNum, valLen uint32, buf []byte,
185+
ctx context.Context, handle []byte, blobFileID BlobFileID, valLen uint32, buf []byte,
186186
) (val []byte, callerOwned bool, err error)
187187
}
188188

@@ -193,7 +193,7 @@ func (lv *LazyValue) Value(buf []byte) (val []byte, callerOwned bool, err error)
193193
return lv.ValueOrHandle, false, nil
194194
}
195195
return f.Fetcher.Fetch(context.TODO(),
196-
lv.ValueOrHandle, f.BlobFileNum, f.Attribute.ValueLen, buf)
196+
lv.ValueOrHandle, f.BlobFileID, f.Attribute.ValueLen, buf)
197197
}
198198

199199
// Len returns the length of the value.
@@ -237,9 +237,9 @@ func (lv *LazyValue) Clone(buf []byte, fetcher *LazyFetcher) (LazyValue, []byte)
237237
var lvCopy LazyValue
238238
if lv.Fetcher != nil {
239239
*fetcher = LazyFetcher{
240-
Fetcher: lv.Fetcher.Fetcher,
241-
Attribute: lv.Fetcher.Attribute,
242-
BlobFileNum: lv.Fetcher.BlobFileNum,
240+
Fetcher: lv.Fetcher.Fetcher,
241+
Attribute: lv.Fetcher.Attribute,
242+
BlobFileID: lv.Fetcher.BlobFileID,
243243
// Not copying anything that has been extracted.
244244
}
245245
lvCopy.Fetcher = fetcher
@@ -266,8 +266,8 @@ var _ ValueFetcher = errValueFetcher{}
266266

267267
// Fetch implements base.ValueFetcher.
268268
func (e errValueFetcher) Fetch(
269-
_ context.Context, _ []byte, blobFileNum DiskFileNum, valLen uint32, _ []byte,
269+
_ context.Context, _ []byte, blobFileID BlobFileID, valLen uint32, _ []byte,
270270
) (val []byte, callerOwned bool, err error) {
271-
err = AssertionFailedf("unexpected blob value: %d-byte from %s", valLen, blobFileNum)
271+
err = AssertionFailedf("unexpected blob value: %d-byte from %s", valLen, blobFileID)
272272
return nil, false, err
273273
}

internal/base/lazy_value_test.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,12 @@ import (
1414
)
1515

1616
type valueFetcherFunc func(
17-
handle []byte, blobFileNum DiskFileNum, valLen uint32, buf []byte) (val []byte, callerOwned bool, err error)
17+
handle []byte, blobFileID BlobFileID, valLen uint32, buf []byte) (val []byte, callerOwned bool, err error)
1818

1919
func (v valueFetcherFunc) Fetch(
20-
ctx context.Context, handle []byte, blobFileNum DiskFileNum, valLen uint32, buf []byte,
20+
ctx context.Context, handle []byte, blobFileID BlobFileID, valLen uint32, buf []byte,
2121
) (val []byte, callerOwned bool, err error) {
22-
return v(handle, blobFileNum, valLen, buf)
22+
return v(handle, blobFileID, valLen, buf)
2323
}
2424

2525
func TestLazyValue(t *testing.T) {
@@ -54,15 +54,15 @@ func TestLazyValue(t *testing.T) {
5454
ValueOrHandle: []byte("foo-handle"),
5555
Fetcher: &LazyFetcher{
5656
Fetcher: valueFetcherFunc(
57-
func(handle []byte, blobFileNum DiskFileNum, valLen uint32, buf []byte) ([]byte, bool, error) {
57+
func(handle []byte, blobFileID BlobFileID, valLen uint32, buf []byte) ([]byte, bool, error) {
5858
numCalls++
5959
require.Equal(t, []byte("foo-handle"), handle)
6060
require.Equal(t, uint32(3), valLen)
61-
require.Equal(t, DiskFileNum(90), blobFileNum)
61+
require.Equal(t, BlobFileID(90), blobFileID)
6262
return fooBytes1, callerOwned, nil
6363
}),
64-
Attribute: AttributeAndLen{ValueLen: 3, ShortAttribute: 7},
65-
BlobFileNum: 90,
64+
Attribute: AttributeAndLen{ValueLen: 3, ShortAttribute: 7},
65+
BlobFileID: 90,
6666
},
6767
}
6868
require.Equal(t, []byte("foo"), getValue(fooLV3, callerOwned))

internal/base/value.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ func MakeInPlaceValue(val []byte) InternalValue {
3131
// to a value stored externally in a blob file.
3232
func (v *InternalValue) IsBlobValueHandle() bool {
3333
f := v.lazyValue.Fetcher
34-
return f != nil && f.BlobFileNum > 0
34+
return f != nil && f.BlobFileID > 0
3535
}
3636

3737
// IsInPlaceValue returns true iff the value was stored in-place and does not

internal/blobtest/handles.go

Lines changed: 29 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ import (
2626
type Values struct {
2727
References References
2828

29-
mostRecentFileNum base.DiskFileNum
30-
mostRecentHandles map[base.DiskFileNum]blob.Handle
29+
mostRecentBlobFileID base.BlobFileID
30+
mostRecentHandles map[base.BlobFileID]blob.Handle
3131
// trackedHandles maps from a blob handle to its value. The value may be nil
3232
// if the value was not specified (in which case Fetch will
3333
// deterministically derive a random value from the handle itself.)
@@ -36,18 +36,18 @@ type Values struct {
3636

3737
// Fetch returns the value corresponding to the given handle.
3838
func (bv *Values) Fetch(
39-
ctx context.Context, handleSuffix []byte, blobFileNum base.DiskFileNum, valLen uint32, _ []byte,
39+
ctx context.Context, handleSuffix []byte, blobFileID base.BlobFileID, valLen uint32, _ []byte,
4040
) (val []byte, callerOwned bool, err error) {
4141
if bv.trackedHandles == nil {
4242
return nil, false, errors.New("no tracked handles")
4343
}
4444

4545
decodedHandleSuffix := blob.DecodeHandleSuffix(handleSuffix)
4646
decodedHandle := blob.Handle{
47-
FileNum: blobFileNum,
48-
ValueLen: valLen,
49-
BlockID: decodedHandleSuffix.BlockID,
50-
ValueID: decodedHandleSuffix.ValueID,
47+
BlobFileID: blobFileID,
48+
ValueLen: valLen,
49+
BlockID: decodedHandleSuffix.BlockID,
50+
ValueID: decodedHandleSuffix.ValueID,
5151
}
5252

5353
value, ok := bv.trackedHandles[decodedHandle]
@@ -64,7 +64,7 @@ func (bv *Values) Fetch(
6464
}
6565

6666
func deriveValueFromHandle(handle blob.Handle) []byte {
67-
rng := rand.New(rand.NewPCG((uint64(handle.FileNum)<<32)|uint64(handle.BlockID), uint64(handle.ValueID)))
67+
rng := rand.New(rand.NewPCG((uint64(handle.BlobFileID)<<32)|uint64(handle.BlockID), uint64(handle.ValueID)))
6868
b := make([]byte, handle.ValueLen)
6969
for i := range b {
7070
b[i] = 'a' + byte(rng.IntN(26))
@@ -97,7 +97,7 @@ func (bv *Values) ParseInternalValue(input string) (base.InternalValue, error) {
9797
// TODO(jackson): Support user-specified short attributes.
9898
ShortAttribute: base.ShortAttribute(h.ValueLen & 0x07),
9999
},
100-
BlobFileNum: h.FileNum,
100+
BlobFileID: h.BlobFileID,
101101
},
102102
}), nil
103103
}
@@ -113,7 +113,7 @@ func IsBlobHandle(input string) bool {
113113
func (bv *Values) Parse(input string) (h blob.Handle, remaining string, err error) {
114114
if bv.trackedHandles == nil {
115115
bv.trackedHandles = make(map[blob.Handle]string)
116-
bv.mostRecentHandles = make(map[base.DiskFileNum]blob.Handle)
116+
bv.mostRecentHandles = make(map[base.BlobFileID]blob.Handle)
117117
}
118118

119119
defer func() {
@@ -136,7 +136,7 @@ func (bv *Values) Parse(input string) (h blob.Handle, remaining string, err erro
136136
done = true
137137
case "fileNum":
138138
p.Expect("=")
139-
h.FileNum = p.DiskFileNum()
139+
h.BlobFileID = base.BlobFileID(p.Uint64())
140140
fileNumSet = true
141141
case "blockID":
142142
p.Expect("=")
@@ -162,13 +162,13 @@ func (bv *Values) Parse(input string) (h blob.Handle, remaining string, err erro
162162
}
163163

164164
if !fileNumSet {
165-
h.FileNum = bv.mostRecentFileNum
165+
h.BlobFileID = bv.mostRecentBlobFileID
166166
}
167167
if !blockIDSet {
168-
h.BlockID = bv.mostRecentHandles[h.FileNum].BlockID
168+
h.BlockID = bv.mostRecentHandles[h.BlobFileID].BlockID
169169
}
170170
if !valueIDSet {
171-
if recentHandle, ok := bv.mostRecentHandles[h.FileNum]; ok {
171+
if recentHandle, ok := bv.mostRecentHandles[h.BlobFileID]; ok {
172172
h.ValueID = recentHandle.ValueID + 1
173173
} else {
174174
h.ValueID = 0
@@ -181,8 +181,8 @@ func (bv *Values) Parse(input string) (h blob.Handle, remaining string, err erro
181181
h.ValueLen = 12
182182
}
183183
}
184-
bv.mostRecentFileNum = h.FileNum
185-
bv.mostRecentHandles[h.FileNum] = h
184+
bv.mostRecentBlobFileID = h.BlobFileID
185+
bv.mostRecentHandles[h.BlobFileID] = h
186186
bv.trackedHandles[h] = value
187187
return h, p.Remaining(), nil
188188
}
@@ -201,7 +201,7 @@ func (bv *Values) ParseInlineHandle(
201201
}
202202
h = blob.InlineHandle{
203203
InlineHandlePreface: blob.InlineHandlePreface{
204-
ReferenceID: bv.References.MapToReferenceID(fullHandle.FileNum),
204+
ReferenceID: bv.References.MapToReferenceID(fullHandle.BlobFileID),
205205
ValueLen: fullHandle.ValueLen,
206206
},
207207
HandleSuffix: blob.HandleSuffix{
@@ -223,7 +223,8 @@ func (bv *Values) WriteFiles(
223223
// Organize the handles by file number.
224224
files := make(map[base.DiskFileNum][]blob.Handle)
225225
for handle := range bv.trackedHandles {
226-
files[handle.FileNum] = append(files[handle.FileNum], handle)
226+
diskFileNum := base.DiskFileNum(handle.BlobFileID)
227+
files[diskFileNum] = append(files[diskFileNum], handle)
227228
}
228229

229230
stats := make(map[base.DiskFileNum]blob.FileWriterStats)
@@ -251,10 +252,10 @@ func (bv *Values) WriteFiles(
251252
prevID++
252253
for prevID < int(handle.ValueID) {
253254
writer.AddValue(deriveValueFromHandle(blob.Handle{
254-
FileNum: fileNum,
255-
BlockID: handle.BlockID,
256-
ValueID: blob.BlockValueID(prevID),
257-
ValueLen: 12,
255+
BlobFileID: base.BlobFileID(fileNum),
256+
BlockID: handle.BlockID,
257+
ValueID: blob.BlockValueID(prevID),
258+
ValueLen: 12,
258259
}))
259260
prevID++
260261
}
@@ -287,17 +288,17 @@ func errFromPanic(r any) error {
287288
// each file number to a reference index (encoded within the
288289
// blob.InlineHandlePreface).
289290
type References struct {
290-
fileNums []base.DiskFileNum
291+
fileIDs []base.BlobFileID
291292
}
292293

293294
// MapToReferenceID maps the given file number to a reference ID.
294-
func (b *References) MapToReferenceID(fileNum base.DiskFileNum) blob.ReferenceID {
295-
for i, fn := range b.fileNums {
296-
if fn == fileNum {
295+
func (b *References) MapToReferenceID(fileID base.BlobFileID) blob.ReferenceID {
296+
for i, fn := range b.fileIDs {
297+
if fn == fileID {
297298
return blob.ReferenceID(i)
298299
}
299300
}
300-
i := uint32(len(b.fileNums))
301-
b.fileNums = append(b.fileNums, fileNum)
301+
i := uint32(len(b.fileIDs))
302+
b.fileIDs = append(b.fileIDs, fileID)
302303
return blob.ReferenceID(i)
303304
}

0 commit comments

Comments
 (0)