@@ -99,7 +99,6 @@ func (d *DB) collectTableStats() bool {
99
99
d .mu .tableStats .pending = nil
100
100
d .mu .tableStats .loading = true
101
101
jobID := d .newJobIDLocked ()
102
- loadedInitial := d .mu .tableStats .loadedInitial
103
102
// Drop DB.mu before performing IO.
104
103
d .mu .Unlock ()
105
104
@@ -111,22 +110,29 @@ func (d *DB) collectTableStats() bool {
111
110
rs := d .loadReadState ()
112
111
var collected []collectedStats
113
112
var hints []deleteCompactionHint
113
+ initialLoadCompleted := false
114
114
if len (pending ) > 0 {
115
115
collected , hints = d .loadNewFileStats (ctx , rs , pending )
116
116
} else {
117
117
var moreRemain bool
118
118
var buf [maxTableStatsPerScan ]collectedStats
119
- collected , hints , moreRemain = d .scanReadStateTableStats (ctx , rs , buf [:0 ])
120
- loadedInitial = ! moreRemain
119
+ collected , hints , moreRemain = d .scanReadStateTableStats (ctx , rs .current , buf [:0 ])
120
+ if ! moreRemain {
121
+ // Once we're done with table stats, load blob file properties.
122
+ moreRemain = d .scanBlobFileProperties (ctx , rs .current , maxTableStatsPerScan - len (collected ))
123
+ if ! moreRemain {
124
+ initialLoadCompleted = true
125
+ }
126
+ }
121
127
}
122
128
rs .unref ()
123
129
124
130
// Update the TableMetadata with the loaded stats while holding d.mu.
125
131
d .mu .Lock ()
126
132
defer d .mu .Unlock ()
127
133
d .mu .tableStats .loading = false
128
- if loadedInitial && ! d .mu .tableStats .loadedInitial {
129
- d .mu .tableStats .loadedInitial = loadedInitial
134
+ if initialLoadCompleted && ! d .mu .tableStats .loadedInitial {
135
+ d .mu .tableStats .loadedInitial = true
130
136
d .opts .EventListener .TableStatsLoaded (TableStatsInfo {
131
137
JobID : int (jobID ),
132
138
})
@@ -216,12 +222,12 @@ func (d *DB) loadNewFileStats(
216
222
// are no pending new files, but there might be files that existed at Open for
217
223
// which we haven't loaded table stats.
218
224
func (d * DB ) scanReadStateTableStats (
219
- ctx context.Context , rs * readState , fill []collectedStats ,
220
- ) ([]collectedStats , []deleteCompactionHint , bool ) {
221
- moreRemain := false
225
+ ctx context.Context , version * manifest.Version , fill []collectedStats ,
226
+ ) (_ []collectedStats , _ []deleteCompactionHint , moreRemain bool ) {
222
227
var hints []deleteCompactionHint
223
228
sizesChecked := make (map [base.DiskFileNum ]struct {})
224
- for l , levelMetadata := range rs .current .Levels {
229
+ // TODO(radu): an O(#tables) scan every time could be problematic.
230
+ for l , levelMetadata := range version .Levels {
225
231
for f := range levelMetadata .All () {
226
232
// NB: Only the active stats collection job updates f.Stats for active
227
233
// files, and we ensure only one goroutine runs it at a time through
@@ -278,7 +284,7 @@ func (d *DB) scanReadStateTableStats(
278
284
sizesChecked [f .TableBacking .DiskFileNum ] = struct {}{}
279
285
}
280
286
281
- stats , newHints , err := d .loadTableStats (ctx , rs . current , l , f )
287
+ stats , newHints , err := d .loadTableStats (ctx , version , l , f )
282
288
if err != nil {
283
289
// Set `moreRemain` so we'll try again.
284
290
moreRemain = true
@@ -295,6 +301,43 @@ func (d *DB) scanReadStateTableStats(
295
301
return fill , hints , moreRemain
296
302
}
297
303
304
+ // populateBlobFileProperties reads at most maxNum blob file properties for blob
305
+ // files that don't have them populated. Returns false once all properties have
306
+ // been populated.
307
+ func (d * DB ) scanBlobFileProperties (
308
+ ctx context.Context , version * manifest.Version , maxNum int ,
309
+ ) (moreRemain bool ) {
310
+ // TODO(radu): an O(#files) scan every time could be problematic.
311
+ // We could remember the last blob file ID and scan from there.
312
+ for f := range version .BlobFiles .All () {
313
+ if _ , propsValid := f .Physical .Properties (); propsValid {
314
+ // Properties are already populated.
315
+ continue
316
+ }
317
+ if maxNum == 0 {
318
+ return moreRemain
319
+ }
320
+ v , err := d .fileCache .findOrCreateBlob (ctx , f .Physical , block.InitFileReadStats {})
321
+ if err != nil {
322
+ moreRemain = true
323
+ continue
324
+ }
325
+ blobReader := v .Value ().mustBlob ()
326
+ blobProps , err := blobReader .ReadProperties (ctx )
327
+ v .Unref ()
328
+ if err != nil {
329
+ moreRemain = true
330
+ continue
331
+ }
332
+ // It is ok to call PopulateProperties here because this function runs as
333
+ // part of a table statistics job, and at most one goroutine runs this at a
334
+ // time (see d.mu.tableStats.loading).
335
+ f .Physical .PopulateProperties (& blobProps )
336
+ maxNum --
337
+ }
338
+ return moreRemain
339
+ }
340
+
298
341
func (d * DB ) loadTableStats (
299
342
ctx context.Context , v * manifest.Version , level int , meta * manifest.TableMetadata ,
300
343
) (manifest.TableStats , []deleteCompactionHint , error ) {
@@ -525,6 +568,9 @@ func (d *DB) estimateSizesBeneath(
525
568
if err != nil {
526
569
return err
527
570
}
571
+ // It is ok to call PopulateProperties here because this function runs as part of
572
+ // a table statistics job, and at most one goroutine runs this at a
573
+ // time (see d.mu.tableStats.loading).
528
574
backingProps = tableBeneath .TableBacking .PopulateProperties (& loadedProps )
529
575
return nil
530
576
})
0 commit comments