@@ -18,6 +18,7 @@ import (
18
18
"github.com/cockroachdb/errors"
19
19
"github.com/cockroachdb/pebble/internal/base"
20
20
"github.com/cockroachdb/pebble/internal/humanize"
21
+ "github.com/cockroachdb/pebble/objstorage"
21
22
"github.com/cockroachdb/pebble/objstorage/objstorageprovider"
22
23
"github.com/cockroachdb/pebble/sstable/compressionanalyzer"
23
24
"github.com/cockroachdb/pebble/vfs"
@@ -38,7 +39,8 @@ func (d *dbT) runAnalyzeData(cmd *cobra.Command, args []string) {
38
39
return
39
40
}
40
41
rng := rand .New (rand .NewPCG (rand .Uint64 (), rand .Uint64 ()))
41
- files , err := makeFileSet (d .opts .FS , dir , rng )
42
+ dbStorage := newVFSStorage (d .opts .FS , dir )
43
+ files , err := makeFileSet (dbStorage , rng )
42
44
if err != nil {
43
45
fmt .Fprintf (stderr , "error loading file list: %s\n " , err )
44
46
return
@@ -121,7 +123,8 @@ func (d *dbT) runAnalyzeData(cmd *cobra.Command, args []string) {
121
123
lastReportTime = time .Now ()
122
124
}
123
125
// Sample a file and analyze it.
124
- path , size := files .Sample ()
126
+ filename , size := files .Sample ()
127
+ path := d .opts .FS .PathJoin (dir , filename )
125
128
if err := d .analyzeSSTable (analyzer , path ); err != nil {
126
129
// We silently ignore errors from files that are deleted from under us.
127
130
if ! errors .Is (err , os .ErrNotExist ) {
@@ -153,15 +156,59 @@ func analyzeSaveCSVFile(a *compressionanalyzer.FileAnalyzer, path string) error
153
156
return os .WriteFile (path , []byte (csv ), 0o666 )
154
157
}
155
158
159
+ type vfsStorage struct {
160
+ fs vfs.FS
161
+ dir string
162
+ }
163
+
164
+ func newVFSStorage (fs vfs.FS , dir string ) * vfsStorage {
165
+ return & vfsStorage {
166
+ fs : fs ,
167
+ dir : dir ,
168
+ }
169
+ }
170
+
171
+ var _ dbStorage = (* vfsStorage )(nil )
172
+
173
+ func (l * vfsStorage ) List () ([]string , error ) {
174
+ return l .fs .List (l .dir )
175
+ }
176
+
177
+ func (l * vfsStorage ) Size (name string ) int64 {
178
+ fileInfo , err := l .fs .Stat (l .fs .PathJoin (l .dir , name ))
179
+ if err != nil {
180
+ return 0
181
+ }
182
+ // We ignore files that are less than 15 seconds old. This is to avoid trying
183
+ // to read a file that is still being written.
184
+ if time .Since (fileInfo .ModTime ()) < 15 * time .Second {
185
+ return 0
186
+ }
187
+ return fileInfo .Size ()
188
+ }
189
+
190
+ func (l * vfsStorage ) Open (name string ) (objstorage.Readable , error ) {
191
+ path := l .fs .PathJoin (l .dir , name )
192
+ file , err := l .fs .Open (path )
193
+ if err != nil {
194
+ return nil , err
195
+ }
196
+ readable , err := objstorageprovider .NewFileReadable (file , l .fs , objstorageprovider .NewReadaheadConfig (), path )
197
+ if err != nil {
198
+ _ = file .Close ()
199
+ return nil , err
200
+ }
201
+ return readable , nil
202
+ }
203
+
156
204
// We avoid files that are very large to prevent excessive memory usage. Note
157
205
// that we have seen cases where large files contain a giant top index block, so
158
206
// even getting the block layout of the file would use a lot of memory.
159
207
const analyzeMaxFileSize = 512 * 1024 * 1024
160
208
161
209
type fileSet struct {
162
- fs vfs.FS
163
- dir string
164
- rng * rand.Rand
210
+ dbStorage dbStorage
211
+ rng * rand.Rand
165
212
166
213
files []fileInSet
167
214
sampleIdx []int
@@ -175,11 +222,21 @@ type fileInSet struct {
175
222
sampled bool
176
223
}
177
224
178
- func makeFileSet (fs vfs.FS , dir string , rng * rand.Rand ) (fileSet , error ) {
225
+ type dbStorage interface {
226
+ // List files or objects.
227
+ List () ([]string , error )
228
+ // Size returns the size of a file or object, or 0 if the file no longer
229
+ // exists (or some other error was encountered).
230
+ Size (name string ) int64
231
+
232
+ // Open returns a Readable for the file or object with the given name.
233
+ Open (name string ) (objstorage.Readable , error )
234
+ }
235
+
236
+ func makeFileSet (dbStorage dbStorage , rng * rand.Rand ) (fileSet , error ) {
179
237
s := fileSet {
180
- fs : fs ,
181
- dir : dir ,
182
- rng : rng ,
238
+ dbStorage : dbStorage ,
239
+ rng : rng ,
183
240
}
184
241
return s , s .Refresh ()
185
242
}
@@ -192,32 +249,26 @@ func samplingKey(rng *rand.Rand, size int64) float64 {
192
249
}
193
250
194
251
func (s * fileSet ) Refresh () error {
195
- filenames , err := s .fs .List (s . dir )
252
+ filenames , err := s .dbStorage .List ()
196
253
if err != nil {
197
254
return err
198
255
}
199
256
slices .Sort (filenames )
200
257
oldFiles := slices .Clone (s .files )
201
258
s .files = s .files [:0 ]
202
- now := time .Now ()
203
259
204
260
newFile := func (filename string ) {
205
- // New file.
206
- fileType , _ , ok := base .ParseFilename (s .fs , filename )
261
+ // Note that vfs.Default is only used to call BaseName which should be a
262
+ // no-op.
263
+ fileType , _ , ok := base .ParseFilename (vfs .Default , filename )
207
264
if ! ok || fileType != base .FileTypeTable {
208
265
return
209
266
}
210
- fileInfo , err := s .fs . Stat ( s . fs . PathJoin ( s . dir , filename ) )
267
+ size := s .dbStorage . Size ( filename )
211
268
if err != nil {
212
269
// Files can get deleted from under us, so we tolerate errors.
213
270
return
214
271
}
215
- // We ignore files that are less than 15 seconds old. This is to avoid
216
- // trying to read a file that is still being written.
217
- if now .Sub (fileInfo .ModTime ()) < 15 * time .Second {
218
- return
219
- }
220
- size := fileInfo .Size ()
221
272
if size == 0 || size > analyzeMaxFileSize {
222
273
return
223
274
}
@@ -273,11 +324,11 @@ func (s *fileSet) Done() bool {
273
324
274
325
// Sample returns a random file from the set (which was not previously sampled),
275
326
// weighted by size.
276
- func (s * fileSet ) Sample () (path string , size int64 ) {
327
+ func (s * fileSet ) Sample () (filename string , size int64 ) {
277
328
idx := s .sampleIdx [0 ]
278
329
s .sampleIdx = s .sampleIdx [1 :]
279
330
s .files [idx ].sampled = true
280
- return s .fs . PathJoin ( s . dir , s . files [idx ].filename ) , s .files [idx ].size
331
+ return s .files [idx ].filename , s .files [idx ].size
281
332
}
282
333
283
334
func isTTY (out io.Writer ) bool {
0 commit comments