Skip to content

Commit d7ce913

Browse files
committed
objstorage: support cold tier
This change adds objstorage provider support for a cold tier. `CreateOptions.UseColdTier` can be used to create cold objects; the tier becomes part of the `ObjectMetadata`.
1 parent 4e65970 commit d7ce913

File tree

7 files changed

+294
-81
lines changed

7 files changed

+294
-81
lines changed

internal/base/internal.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -774,5 +774,6 @@ type StorageTier uint8
774774

775775
const (
776776
HotTier StorageTier = iota
777+
ColdTier
777778
NumStorageTiers
778779
)

objstorage/objstorage.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,11 @@ type ObjectMetadata struct {
120120
DiskFileNum base.DiskFileNum
121121
FileType base.FileType
122122

123+
// The fields below are only set if the object is on local storage.
124+
Local struct {
125+
Tier base.StorageTier
126+
}
127+
123128
// The fields below are only set if the object is on remote storage.
124129
Remote struct {
125130
// CreatorID identifies the DB instance that originally created the object.
@@ -243,6 +248,11 @@ type CreateOptions struct {
243248
// The default (zero) value is SharedRefTracking.
244249
SharedCleanupMethod SharedCleanupMethod
245250

251+
// Tier is the storage tier for this object. If Tier is ColdTier and the
252+
// provider has a local cold tier configured (and PreferSharedStorage is
253+
// false), the object will be created on the local cold tier.
254+
Tier base.StorageTier
255+
246256
// WriteCategory is used for the object when it is created on local storage
247257
// to collect aggregated write metrics for each write source.
248258
WriteCategory vfs.DiskWriteCategory

objstorage/objstorageprovider/provider.go

Lines changed: 43 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ import (
1919
"github.com/cockroachdb/pebble/internal/invariants"
2020
"github.com/cockroachdb/pebble/objstorage"
2121
"github.com/cockroachdb/pebble/objstorage/objstorageprovider/objiotracing"
22-
"github.com/cockroachdb/pebble/objstorage/objstorageprovider/remoteobjcat"
2322
"github.com/cockroachdb/pebble/objstorage/objstorageprovider/sharedcache"
2423
"github.com/cockroachdb/pebble/objstorage/remote"
2524
"github.com/cockroachdb/pebble/vfs"
@@ -61,6 +60,12 @@ type Settings struct {
6160
FS vfs.FS
6261
FSDirName string
6362

63+
// ColdTier is set only when a secondary "cold" storage tier is to be used.
64+
ColdTier struct {
65+
FS vfs.FS
66+
FSDirName string
67+
}
68+
6469
// FSDirInitialListing is a listing of FSDirName at the time of calling Open.
6570
//
6671
// This is an optional optimization to avoid double listing on Open when the
@@ -275,7 +280,7 @@ func (p *provider) OpenForReading(
275280

276281
var r objstorage.Readable
277282
if !meta.IsRemote() {
278-
r, err = p.localOpenForReading(ctx, fileType, fileNum, opts)
283+
r, err = p.localOpenForReading(ctx, fileType, fileNum, meta.Local.Tier, opts)
279284
} else {
280285
r, err = p.remoteOpenForReading(ctx, meta, opts)
281286
if err != nil && p.isNotExistError(meta, err) {
@@ -312,7 +317,11 @@ func (p *provider) Create(
312317
} else {
313318
category = vfs.WriteCategoryUnspecified
314319
}
315-
w, meta, err = p.vfsCreate(ctx, fileType, fileNum, category)
320+
tier := opts.Tier
321+
if tier == base.ColdTier && p.st.Local.ColdTier.FS == nil {
322+
tier = base.HotTier
323+
}
324+
w, meta, err = p.vfsCreate(ctx, fileType, fileNum, tier, category)
316325
}
317326
if err != nil {
318327
err = errors.Wrapf(err, "creating object %s", fileNum)
@@ -339,7 +348,7 @@ func (p *provider) Remove(fileType base.FileType, fileNum base.DiskFileNum) erro
339348
}
340349

341350
if !meta.IsRemote() {
342-
err = p.localRemove(fileType, fileNum)
351+
err = p.localRemove(fileType, fileNum, meta.Local.Tier)
343352
} else {
344353
// TODO(radu): implement remote object removal (i.e. deref).
345354
err = p.sharedUnref(meta)
@@ -398,25 +407,32 @@ func (p *provider) LinkOrCopyFromLocal(
398407
dstFileNum base.DiskFileNum,
399408
opts objstorage.CreateOptions,
400409
) (objstorage.ObjectMetadata, error) {
410+
tier := opts.Tier
411+
if tier == base.ColdTier && p.st.Local.ColdTier.FS == nil {
412+
tier = base.HotTier
413+
}
401414
shared := opts.PreferSharedStorage && p.st.Remote.CreateOnShared != remote.CreateOnSharedNone
402-
if !shared && srcFS == p.st.Local.FS {
403-
// Wrap the normal filesystem with one which wraps newly created files with
404-
// vfs.NewSyncingFile.
405-
fs := vfs.NewSyncingFS(p.st.Local.FS, vfs.SyncingFileOptions{
406-
NoSyncOnClose: p.st.Local.NoSyncOnClose,
407-
BytesPerSync: p.st.Local.BytesPerSync,
408-
})
409-
dstPath := p.localPath(dstFileType, dstFileNum)
410-
if err := vfs.LinkOrCopy(fs, srcFilePath, dstPath); err != nil {
411-
return objstorage.ObjectMetadata{}, err
412-
}
415+
if !shared && (srcFS == p.st.Local.FS || srcFS == p.st.Local.ColdTier.FS) {
416+
fs, dstPath := p.localPath(dstFileType, dstFileNum, tier)
417+
if srcFS == fs {
418+
// Wrap the normal filesystem with one which wraps newly created files with
419+
// vfs.NewSyncingFile.
420+
fs = vfs.NewSyncingFS(fs, vfs.SyncingFileOptions{
421+
NoSyncOnClose: p.st.Local.NoSyncOnClose,
422+
BytesPerSync: p.st.Local.BytesPerSync,
423+
})
424+
if err := vfs.LinkOrCopy(fs, srcFilePath, dstPath); err != nil {
425+
return objstorage.ObjectMetadata{}, err
426+
}
413427

414-
meta := objstorage.ObjectMetadata{
415-
DiskFileNum: dstFileNum,
416-
FileType: dstFileType,
428+
meta := objstorage.ObjectMetadata{
429+
DiskFileNum: dstFileNum,
430+
FileType: dstFileType,
431+
}
432+
meta.Local.Tier = tier
433+
p.addMetadata(meta)
434+
return meta, nil
417435
}
418-
p.addMetadata(meta)
419-
return meta, nil
420436
}
421437
// Create the object and copy the data.
422438
w, meta, err := p.Create(ctx, dstFileType, dstFileNum, opts)
@@ -479,15 +495,16 @@ func (p *provider) Lookup(
479495
// Path is part of the objstorage.Provider interface.
480496
func (p *provider) Path(meta objstorage.ObjectMetadata) string {
481497
if !meta.IsRemote() {
482-
return p.localPath(meta.FileType, meta.DiskFileNum)
498+
_, path := p.localPath(meta.FileType, meta.DiskFileNum, meta.Local.Tier)
499+
return path
483500
}
484501
return p.remotePath(meta)
485502
}
486503

487504
// Size returns the size of the object.
488505
func (p *provider) Size(meta objstorage.ObjectMetadata) (int64, error) {
489506
if !meta.IsRemote() {
490-
return p.localSize(meta.FileType, meta.DiskFileNum)
507+
return p.localSize(meta.FileType, meta.DiskFileNum, meta.Local.Tier)
491508
}
492509
return p.remoteSize(meta)
493510
}
@@ -548,20 +565,9 @@ func (p *provider) addMetadataLocked(meta objstorage.ObjectMetadata) {
548565
}
549566
p.mu.knownObjects[meta.DiskFileNum] = meta
550567
if meta.IsRemote() {
551-
p.mu.remote.catalogBatch.AddObject(remoteobjcat.RemoteObjectMetadata{
552-
FileNum: meta.DiskFileNum,
553-
FileType: meta.FileType,
554-
CreatorID: meta.Remote.CreatorID,
555-
CreatorFileNum: meta.Remote.CreatorFileNum,
556-
Locator: meta.Remote.Locator,
557-
CleanupMethod: meta.Remote.CleanupMethod,
558-
CustomObjectName: meta.Remote.CustomObjectName,
559-
})
560-
if meta.IsExternal() {
561-
p.mu.remote.addExternalObject(meta)
562-
}
568+
p.mu.remote.addObject(meta)
563569
} else {
564-
p.mu.local.objChangeCounter++
570+
p.mu.local.objChanged(meta)
565571
}
566572
}
567573

@@ -574,13 +580,10 @@ func (p *provider) removeMetadata(fileNum base.DiskFileNum) {
574580
return
575581
}
576582
delete(p.mu.knownObjects, fileNum)
577-
if meta.IsExternal() {
578-
p.mu.remote.removeExternalObject(meta)
579-
}
580583
if meta.IsRemote() {
581-
p.mu.remote.catalogBatch.DeleteObject(fileNum)
584+
p.mu.remote.removeObject(meta)
582585
} else {
583-
p.mu.local.objChangeCounter++
586+
p.mu.local.objChanged(meta)
584587
}
585588
}
586589

objstorage/objstorageprovider/provider_test.go

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ func TestProvider(t *testing.T) {
6363
log.Reset()
6464
switch d.Cmd {
6565
case "open":
66-
var fsDir string
66+
var fsDir, coldDir string
6767
var creatorID objstorage.CreatorID
6868
d.CmdArgs = slices.DeleteFunc(d.CmdArgs, func(arg datadriven.CmdArg) bool {
6969
switch arg.Key {
@@ -72,19 +72,29 @@ func TestProvider(t *testing.T) {
7272
arg.Scan(t, 0, &id)
7373
creatorID = objstorage.CreatorID(id)
7474
return true
75+
case "cold-tier":
76+
coldDir = arg.SingleVal(t)
77+
return true
7578
}
7679
return false
7780
})
7881
scanArgs("<fs-dir> [creator-id=X]", &fsDir)
7982

83+
require.NoError(t, fs.MkdirAll(fsDir, 0755))
8084
st := DefaultSettings(fs, fsDir)
85+
if coldDir != "" {
86+
st.Local.ColdTier.FS = fs
87+
st.Local.ColdTier.FSDirName = coldDir
88+
require.NoError(t, fs.MkdirAll(coldDir, 0755))
89+
}
8190
if creatorID != 0 {
8291
st.Remote.StorageFactory = sharedFactory
8392
st.Remote.CreateOnShared = remote.CreateOnSharedAll
8493
st.Remote.CreateOnSharedLocator = ""
8594
}
8695
st.Local.ReadaheadConfig = readaheadConfig
87-
require.NoError(t, fs.MkdirAll(fsDir, 0755))
96+
if coldDir != "" {
97+
}
8898
p, err := Open(st)
8999
require.NoError(t, err)
90100
if creatorID != 0 {
@@ -122,18 +132,24 @@ func TestProvider(t *testing.T) {
122132
SharedCleanupMethod: objstorage.SharedRefTracking,
123133
}
124134
ft := base.FileTypeTable
125-
if len(d.CmdArgs) > 0 && d.CmdArgs[0].Key == "file-type" {
126-
ft = base.FileTypeFromName(d.CmdArgs[0].FirstVal(t))
127-
d.CmdArgs = d.CmdArgs[1:]
128-
}
129-
if len(d.CmdArgs) == 5 && d.CmdArgs[4].Key == "no-ref-tracking" {
130-
d.CmdArgs = d.CmdArgs[:4]
131-
opts.SharedCleanupMethod = objstorage.SharedNoCleanup
132-
}
135+
d.CmdArgs = slices.DeleteFunc(d.CmdArgs, func(arg datadriven.CmdArg) bool {
136+
switch arg.Key {
137+
case "file-type":
138+
ft = base.FileTypeFromName(d.CmdArgs[0].FirstVal(t))
139+
return true
140+
case "no-ref-tracking":
141+
opts.SharedCleanupMethod = objstorage.SharedNoCleanup
142+
return true
143+
case "cold-tier":
144+
opts.Tier = base.ColdTier
145+
return true
146+
}
147+
return false
148+
})
133149
var fileNum base.DiskFileNum
134150
var typ string
135151
var salt, size int
136-
scanArgs("[file-type=sstable|blob] <file-num> <local|shared> <salt> <size> [no-ref-tracking]", &fileNum, &typ, &salt, &size)
152+
scanArgs("[file-type=sstable|blob] <file-num> <local|shared> <salt> <size> [no-ref-tracking] [cold-tier]", &fileNum, &typ, &salt, &size)
137153
switch typ {
138154
case "local":
139155
case "shared":

objstorage/objstorageprovider/remote.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,28 @@ type remoteLockedState struct {
5656
externalObjects map[remote.ObjectKey][]base.DiskFileNum
5757
}
5858

59+
func (rs *remoteLockedState) addObject(meta objstorage.ObjectMetadata) {
60+
rs.catalogBatch.AddObject(remoteobjcat.RemoteObjectMetadata{
61+
FileNum: meta.DiskFileNum,
62+
FileType: meta.FileType,
63+
CreatorID: meta.Remote.CreatorID,
64+
CreatorFileNum: meta.Remote.CreatorFileNum,
65+
Locator: meta.Remote.Locator,
66+
CleanupMethod: meta.Remote.CleanupMethod,
67+
CustomObjectName: meta.Remote.CustomObjectName,
68+
})
69+
if meta.IsExternal() {
70+
rs.addExternalObject(meta)
71+
}
72+
}
73+
74+
func (rs *remoteLockedState) removeObject(meta objstorage.ObjectMetadata) {
75+
if meta.IsExternal() {
76+
rs.removeExternalObject(meta)
77+
}
78+
rs.catalogBatch.DeleteObject(meta.DiskFileNum)
79+
}
80+
5981
func (rs *remoteLockedState) addExternalObject(meta objstorage.ObjectMetadata) {
6082
if rs.externalObjects == nil {
6183
rs.externalObjects = make(map[remote.ObjectKey][]base.DiskFileNum)
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
open p1 cold-tier=cold1
2+
----
3+
<local fs> mkdir-all: p1 0755
4+
<local fs> mkdir-all: cold1 0755
5+
<local fs> open-dir: p1
6+
<local fs> open-dir: cold1
7+
8+
# Create a cold file.
9+
create file-type=blob 1 local 1 1024 cold-tier
10+
----
11+
<local fs> create: cold1/000001.blob
12+
<local fs> sync-data: cold1/000001.blob
13+
<local fs> close: cold1/000001.blob
14+
15+
read file-type=blob 1
16+
0 500
17+
512 1024
18+
----
19+
<local fs> open: cold1/000001.blob (options: *vfs.randomReadsOption)
20+
size: 1024
21+
<local fs> read-at(0, 500): cold1/000001.blob
22+
0 500: ok (salt 1)
23+
<local fs> read-at(512, 1024): cold1/000001.blob
24+
512 1024: EOF
25+
<local fs> close: cold1/000001.blob
26+
27+
create 2 local 2 1024
28+
----
29+
<local fs> create: p1/000002.sst
30+
<local fs> sync-data: p1/000002.sst
31+
<local fs> close: p1/000002.sst
32+
33+
read 2
34+
0 500
35+
512 1024
36+
----
37+
<local fs> open: p1/000002.sst (options: *vfs.randomReadsOption)
38+
size: 1024
39+
<local fs> read-at(0, 500): p1/000002.sst
40+
0 500: ok (salt 2)
41+
<local fs> read-at(512, 1024): p1/000002.sst
42+
512 1024: EOF
43+
<local fs> close: p1/000002.sst
44+
45+
list
46+
----
47+
000001 -> cold1/000001.blob
48+
000002 -> p1/000002.sst
49+
50+
remove file-type=blob 1
51+
----
52+
<local fs> remove: cold1/000001.blob
53+
54+
remove 2
55+
----
56+
<local fs> remove: p1/000002.sst
57+
58+
# Verify that we can request cold tier even if it is not configured.
59+
open p2
60+
----
61+
<local fs> mkdir-all: p2 0755
62+
<local fs> open-dir: p2
63+
64+
create file-type=blob 3 local 3 1024 cold-tier
65+
----
66+
<local fs> create: p2/000003.blob
67+
<local fs> sync-data: p2/000003.blob
68+
<local fs> close: p2/000003.blob
69+
70+
read file-type=blob 3
71+
0 500
72+
512 1024
73+
----
74+
<local fs> open: p2/000003.blob (options: *vfs.randomReadsOption)
75+
size: 1024
76+
<local fs> read-at(0, 500): p2/000003.blob
77+
0 500: ok (salt 3)
78+
<local fs> read-at(512, 1024): p2/000003.blob
79+
512 1024: EOF
80+
<local fs> close: p2/000003.blob
81+
82+
remove file-type=blob 3
83+
----
84+
<local fs> remove: p2/000003.blob

0 commit comments

Comments
 (0)