Skip to content

Commit bc68d6c

Browse files
committed
tool: analyze-data: separate vfs-specific code behind interface
1 parent e24d2aa commit bc68d6c

File tree

4 files changed

+89
-23
lines changed

4 files changed

+89
-23
lines changed

tool/db.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ type dbT struct {
5555
openErrEnhancer func(error) error
5656
openOptions []OpenOption
5757
exciseSpanFn DBExciseSpanFn
58+
remoteStorageFn DBRemoteStorageFn
5859

5960
// Flags.
6061
comparerName string

tool/db_analyze_data.go

Lines changed: 73 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"github.com/cockroachdb/errors"
1919
"github.com/cockroachdb/pebble/internal/base"
2020
"github.com/cockroachdb/pebble/internal/humanize"
21+
"github.com/cockroachdb/pebble/objstorage"
2122
"github.com/cockroachdb/pebble/objstorage/objstorageprovider"
2223
"github.com/cockroachdb/pebble/sstable/compressionanalyzer"
2324
"github.com/cockroachdb/pebble/vfs"
@@ -38,7 +39,8 @@ func (d *dbT) runAnalyzeData(cmd *cobra.Command, args []string) {
3839
return
3940
}
4041
rng := rand.New(rand.NewPCG(rand.Uint64(), rand.Uint64()))
41-
files, err := makeFileSet(d.opts.FS, dir, rng)
42+
dbStorage := newVFSStorage(d.opts.FS, dir)
43+
files, err := makeFileSet(dbStorage, rng)
4244
if err != nil {
4345
fmt.Fprintf(stderr, "error loading file list: %s\n", err)
4446
return
@@ -121,7 +123,8 @@ func (d *dbT) runAnalyzeData(cmd *cobra.Command, args []string) {
121123
lastReportTime = time.Now()
122124
}
123125
// Sample a file and analyze it.
124-
path, size := files.Sample()
126+
filename, size := files.Sample()
127+
path := d.opts.FS.PathJoin(dir, filename)
125128
if err := d.analyzeSSTable(analyzer, path); err != nil {
126129
// We silently ignore errors from files that are deleted from under us.
127130
if !errors.Is(err, os.ErrNotExist) {
@@ -153,15 +156,59 @@ func analyzeSaveCSVFile(a *compressionanalyzer.FileAnalyzer, path string) error
153156
return os.WriteFile(path, []byte(csv), 0o666)
154157
}
155158

159+
type vfsStorage struct {
160+
fs vfs.FS
161+
dir string
162+
}
163+
164+
func newVFSStorage(fs vfs.FS, dir string) *vfsStorage {
165+
return &vfsStorage{
166+
fs: fs,
167+
dir: dir,
168+
}
169+
}
170+
171+
var _ dbStorage = (*vfsStorage)(nil)
172+
173+
func (l *vfsStorage) List() ([]string, error) {
174+
return l.fs.List(l.dir)
175+
}
176+
177+
func (l *vfsStorage) Size(name string) int64 {
178+
fileInfo, err := l.fs.Stat(l.fs.PathJoin(l.dir, name))
179+
if err != nil {
180+
return 0
181+
}
182+
// We ignore files that are less than 15 seconds old. This is to avoid trying
183+
// to read a file that is still being written.
184+
if time.Since(fileInfo.ModTime()) < 15*time.Second {
185+
return 0
186+
}
187+
return fileInfo.Size()
188+
}
189+
190+
func (l *vfsStorage) Open(name string) (objstorage.Readable, error) {
191+
path := l.fs.PathJoin(l.dir, name)
192+
file, err := l.fs.Open(path)
193+
if err != nil {
194+
return nil, err
195+
}
196+
readable, err := objstorageprovider.NewFileReadable(file, l.fs, objstorageprovider.NewReadaheadConfig(), path)
197+
if err != nil {
198+
_ = file.Close()
199+
return nil, err
200+
}
201+
return readable, nil
202+
}
203+
156204
// We avoid files that are very large to prevent excessive memory usage. Note
157205
// that we have seen cases where large files contain a giant top index block, so
158206
// even getting the block layout of the file would use a lot of memory.
159207
const analyzeMaxFileSize = 512 * 1024 * 1024
160208

161209
type fileSet struct {
162-
fs vfs.FS
163-
dir string
164-
rng *rand.Rand
210+
dbStorage dbStorage
211+
rng *rand.Rand
165212

166213
files []fileInSet
167214
sampleIdx []int
@@ -175,11 +222,21 @@ type fileInSet struct {
175222
sampled bool
176223
}
177224

178-
func makeFileSet(fs vfs.FS, dir string, rng *rand.Rand) (fileSet, error) {
225+
type dbStorage interface {
226+
// List files or objects.
227+
List() ([]string, error)
228+
// Size returns the size of a file or object, or 0 if the file no longer
229+
// exists (or some other error was encountered).
230+
Size(name string) int64
231+
232+
// Open returns a Readable for the file or object with the given name.
233+
Open(name string) (objstorage.Readable, error)
234+
}
235+
236+
func makeFileSet(dbStorage dbStorage, rng *rand.Rand) (fileSet, error) {
179237
s := fileSet{
180-
fs: fs,
181-
dir: dir,
182-
rng: rng,
238+
dbStorage: dbStorage,
239+
rng: rng,
183240
}
184241
return s, s.Refresh()
185242
}
@@ -192,32 +249,26 @@ func samplingKey(rng *rand.Rand, size int64) float64 {
192249
}
193250

194251
func (s *fileSet) Refresh() error {
195-
filenames, err := s.fs.List(s.dir)
252+
filenames, err := s.dbStorage.List()
196253
if err != nil {
197254
return err
198255
}
199256
slices.Sort(filenames)
200257
oldFiles := slices.Clone(s.files)
201258
s.files = s.files[:0]
202-
now := time.Now()
203259

204260
newFile := func(filename string) {
205-
// New file.
206-
fileType, _, ok := base.ParseFilename(s.fs, filename)
261+
// Note that vfs.Default is only used to call BaseName which should be a
262+
// no-op.
263+
fileType, _, ok := base.ParseFilename(vfs.Default, filename)
207264
if !ok || fileType != base.FileTypeTable {
208265
return
209266
}
210-
fileInfo, err := s.fs.Stat(s.fs.PathJoin(s.dir, filename))
267+
size := s.dbStorage.Size(filename)
211268
if err != nil {
212269
// Files can get deleted from under us, so we tolerate errors.
213270
return
214271
}
215-
// We ignore files that are less than 15 seconds old. This is to avoid
216-
// trying to read a file that is still being written.
217-
if now.Sub(fileInfo.ModTime()) < 15*time.Second {
218-
return
219-
}
220-
size := fileInfo.Size()
221272
if size == 0 || size > analyzeMaxFileSize {
222273
return
223274
}
@@ -273,11 +324,11 @@ func (s *fileSet) Done() bool {
273324

274325
// Sample returns a random file from the set (which was not previously sampled),
275326
// weighted by size.
276-
func (s *fileSet) Sample() (path string, size int64) {
327+
func (s *fileSet) Sample() (filename string, size int64) {
277328
idx := s.sampleIdx[0]
278329
s.sampleIdx = s.sampleIdx[1:]
279330
s.files[idx].sampled = true
280-
return s.fs.PathJoin(s.dir, s.files[idx].filename), s.files[idx].size
331+
return s.files[idx].filename, s.files[idx].size
281332
}
282333

283334
func isTTY(out io.Writer) bool {

tool/db_analyze_data_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@ func TestFileSetSampling(t *testing.T) {
4848
rng := rand.New(rand.NewPCG(rand.Uint64(), rand.Uint64()))
4949
smallFileChosen := 0
5050
for i := 0; i < iterations; i++ {
51-
fs, err := makeFileSet(fsWrapper{memFS}, "", rng)
51+
dbStorage := newVFSStorage(fsWrapper{memFS}, "")
52+
fs, err := makeFileSet(dbStorage, rng)
5253
require.NoError(t, err)
5354
file, _ := fs.Sample()
5455
if file != largeFileName {

tool/tool.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ type T struct {
4545
openErrEnhancer func(error) error
4646
openOptions []OpenOption
4747
exciseSpanFn DBExciseSpanFn
48+
remoteStorageFn DBRemoteStorageFn
4849
}
4950

5051
// A Option configures the Pebble introspection tool.
@@ -148,6 +149,18 @@ func WithDBExciseSpanFn(fn DBExciseSpanFn) Option {
148149
}
149150
}
150151

152+
// DBRemoteStorageFn is used for certain commands which support cloud URIs (like
153+
// gs://foo/bar).
154+
type DBRemoteStorageFn func(uri string) (remote.Storage, error)
155+
156+
// WithDBRemoteStorageFn specifies a function that returns the excise span for the
157+
// `db excise` command.
158+
func WithDBRemoteStorageFn(fn DBRemoteStorageFn) Option {
159+
return func(t *T) {
160+
t.remoteStorageFn = fn
161+
}
162+
}
163+
151164
// New creates a new introspection tool.
152165
func New(opts ...Option) *T {
153166
t := &T{

0 commit comments

Comments
 (0)