Skip to content

Commit f81f12c

Browse files
committed
db: add metrics for problem spans, failed compactions
1 parent 1b8f1ef commit f81f12c

File tree

11 files changed

+154
-46
lines changed

11 files changed

+154
-46
lines changed

compaction.go

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1589,7 +1589,7 @@ func (d *DB) flush1() (bytesFlushed uint64, err error) {
15891589

15901590
d.clearCompactingState(c, err != nil)
15911591
delete(d.mu.compact.inProgress, c)
1592-
d.mu.versions.incrementCompactions(c.kind, c.extraLevels, c.pickerMetrics)
1592+
d.mu.versions.incrementCompactions(c.kind, c.extraLevels, c.pickerMetrics, c.bytesWritten, err)
15931593

15941594
var flushed flushableList
15951595
if err == nil {
@@ -2492,9 +2492,6 @@ func (d *DB) compact1(c *compaction, errChannel chan error) (err error) {
24922492
// the manifest lock, we don't expect this bool to change its value
24932493
// as only the holder of the manifest lock will ever write to it.
24942494
if c.cancel.Load() {
2495-
d.mu.versions.metrics.Compact.CancelledCount++
2496-
d.mu.versions.metrics.Compact.CancelledBytes += c.bytesWritten
2497-
24982495
err = firstError(err, ErrCancelledCompaction)
24992496
// This is the first time we've seen a cancellation during the
25002497
// life of this compaction (or the original condition on err == nil
@@ -2530,11 +2527,7 @@ func (d *DB) compact1(c *compaction, errChannel chan error) (err error) {
25302527
// NB: clearing compacting state must occur before updating the read state;
25312528
// L0Sublevels initialization depends on it.
25322529
d.clearCompactingState(c, err != nil)
2533-
if err != nil && errors.Is(err, ErrCancelledCompaction) {
2534-
d.mu.versions.metrics.Compact.CancelledCount++
2535-
d.mu.versions.metrics.Compact.CancelledBytes += c.bytesWritten
2536-
}
2537-
d.mu.versions.incrementCompactions(c.kind, c.extraLevels, c.pickerMetrics)
2530+
d.mu.versions.incrementCompactions(c.kind, c.extraLevels, c.pickerMetrics, c.bytesWritten, err)
25382531
d.mu.versions.incrementCompactionBytes(-c.bytesWritten)
25392532

25402533
info.TotalDuration = d.timeNow().Sub(c.beganAt)

db.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2016,6 +2016,7 @@ func (d *DB) Metrics() *Metrics {
20162016
metrics.Compact.Duration += d.timeNow().Sub(c.beganAt)
20172017
}
20182018
}
2019+
metrics.Compact.NumProblemSpans = d.problemSpans.Len()
20192020

20202021
for _, m := range d.mu.mem.queue {
20212022
metrics.MemTable.Size += m.totalBytes()

internal/problemspans/by_level.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,22 @@ func (bl *ByLevel) Excise(bounds base.UserKeyBounds) {
9696
}
9797
}
9898

99+
// Len returns the number of non-overlapping spans that have not expired. Two
100+
// spans that touch are both counted if they have different expiration times.
101+
func (bl *ByLevel) Len() int {
102+
if bl.empty.Load() {
103+
// Fast path.
104+
return 0
105+
}
106+
bl.mu.Lock()
107+
defer bl.mu.Unlock()
108+
n := 0
109+
for i := range bl.levels {
110+
n += bl.levels[i].Len()
111+
}
112+
return n
113+
}
114+
99115
// String prints all active (non-expired) span fragments.
100116
func (bl *ByLevel) String() string {
101117
bl.mu.Lock()

internal/problemspans/set.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,18 @@ func (s *Set) IsEmpty() bool {
101101
return s.rt.IsEmpty()
102102
}
103103

104+
// Len returns the number of non-overlapping spans that have not expired. Two
105+
// spans that touch are both counted if they have different expiration times.
106+
func (s *Set) Len() int {
107+
s.now = s.nowFn()
108+
n := 0
109+
s.rt.EnumerateAll(func(start, end axisds.Endpoint[[]byte], prop expirationTime) bool {
110+
n++
111+
return true
112+
})
113+
return n
114+
}
115+
104116
// String prints all active (non-expired) span fragments.
105117
func (s *Set) String() string {
106118
var buf strings.Builder

metrics.go

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,11 @@ type Metrics struct {
197197
// CancelledBytes the number of bytes written by compactions that were
198198
// cancelled.
199199
CancelledBytes int64
200+
// Total number of compactions that hit an error.
201+
FailedCount int64
202+
// NumProblemSpans is the current (instantaneous) count of "problem spans"
203+
// which temporarily block compactions.
204+
NumProblemSpans int
200205
// MarkedFiles is a count of files that are marked for
201206
// compaction. Such files are compacted in a rewrite compaction
202207
// when no other compactions are picked.
@@ -687,11 +692,16 @@ func (m *Metrics) SafeFormat(w redact.SafePrinter, _ rune) {
687692

688693
w.Printf("Flushes: %d\n", redact.Safe(m.Flush.Count))
689694

690-
w.Printf("Compactions: %d estimated debt: %s in progress: %d (%s)\n",
695+
w.Printf("Compactions: %d estimated debt: %s in progress: %d (%s) canceled: %d (%s) failed: %d problem spans: %d\n",
691696
redact.Safe(m.Compact.Count),
692697
humanize.Bytes.Uint64(m.Compact.EstimatedDebt),
693698
redact.Safe(m.Compact.NumInProgress),
694-
humanize.Bytes.Int64(m.Compact.InProgressBytes))
699+
humanize.Bytes.Int64(m.Compact.InProgressBytes),
700+
redact.Safe(m.Compact.CancelledCount),
701+
humanize.Bytes.Int64(m.Compact.CancelledBytes),
702+
redact.Safe(m.Compact.FailedCount),
703+
redact.Safe(m.Compact.NumProblemSpans),
704+
)
695705

696706
w.Printf(" default: %d delete: %d elision: %d move: %d read: %d tombstone-density: %d rewrite: %d copy: %d multi-level: %d\n",
697707
redact.Safe(m.Compact.DefaultCount),
@@ -702,7 +712,8 @@ func (m *Metrics) SafeFormat(w redact.SafePrinter, _ rune) {
702712
redact.Safe(m.Compact.TombstoneDensityCount),
703713
redact.Safe(m.Compact.RewriteCount),
704714
redact.Safe(m.Compact.CopyCount),
705-
redact.Safe(m.Compact.MultiLevelCount))
715+
redact.Safe(m.Compact.MultiLevelCount),
716+
)
706717

707718
w.Printf("MemTables: %d (%s) zombie: %d (%s)\n",
708719
redact.Safe(m.MemTable.Count),

metrics_test.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,12 @@ import (
1515
"testing"
1616
"time"
1717

18+
"github.com/cockroachdb/crlib/crstrings"
1819
"github.com/cockroachdb/datadriven"
20+
"github.com/cockroachdb/pebble/internal/base"
1921
"github.com/cockroachdb/pebble/internal/cache"
2022
"github.com/cockroachdb/pebble/internal/humanize"
23+
"github.com/cockroachdb/pebble/internal/manifest"
2124
"github.com/cockroachdb/pebble/internal/manual"
2225
"github.com/cockroachdb/pebble/internal/testkeys"
2326
"github.com/cockroachdb/pebble/objstorage/remote"
@@ -47,6 +50,11 @@ func exampleMetrics() Metrics {
4750
m.Compact.EstimatedDebt = 6
4851
m.Compact.InProgressBytes = 7
4952
m.Compact.NumInProgress = 2
53+
m.Compact.CounterLevelCount = 10
54+
m.Compact.CancelledCount = 3
55+
m.Compact.CancelledBytes = 3 * 1024
56+
m.Compact.FailedCount = 5
57+
m.Compact.NumProblemSpans = 2
5058
m.Flush.Count = 8
5159
m.Flush.AsIngestBytes = 34
5260
m.Flush.AsIngestTableCount = 35
@@ -440,6 +448,22 @@ func TestMetrics(t *testing.T) {
440448
}
441449
return b.String()
442450

451+
case "problem-spans":
452+
d.mu.Lock()
453+
defer d.mu.Unlock()
454+
d.problemSpans.Init(manifest.NumLevels, d.cmp)
455+
for _, line := range crstrings.Lines(td.Input) {
456+
var level int
457+
var span1, span2 string
458+
n, err := fmt.Sscanf(line, "L%d %s %s", &level, &span1, &span2)
459+
if err != nil || n != 3 {
460+
td.Fatalf(t, "malformed problem span %q", line)
461+
}
462+
bounds := base.ParseUserKeyBounds(span1 + " " + span2)
463+
d.problemSpans.Add(level, bounds, time.Hour*10)
464+
}
465+
return ""
466+
443467
default:
444468
return fmt.Sprintf("unknown command: %s", td.Cmd)
445469
}

testdata/event_listener

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ total | 3 2.2KB 0B 0 | - - - | 843B | 1 746B |
239239
----------------------------------------------------------------------------------------------------------------------------
240240
WAL: 1 files (0B) in: 48B written: 97B (102% overhead)
241241
Flushes: 3
242-
Compactions: 1 estimated debt: 2.2KB in progress: 0 (0B)
242+
Compactions: 1 estimated debt: 2.2KB in progress: 0 (0B) canceled: 0 (0B) failed: 0 problem spans: 0
243243
default: 1 delete: 0 elision: 0 move: 0 read: 0 tombstone-density: 0 rewrite: 0 copy: 0 multi-level: 0
244244
MemTables: 1 (256KB) zombie: 1 (256KB)
245245
Zombie tables: 0 (0B, local: 0B)
@@ -342,7 +342,7 @@ total | 6 4.4KB 0B 0 | - - - | 2.3KB | 3 2.2KB |
342342
----------------------------------------------------------------------------------------------------------------------------
343343
WAL: 1 files (0B) in: 82B written: 132B (61% overhead)
344344
Flushes: 6
345-
Compactions: 1 estimated debt: 4.4KB in progress: 0 (0B)
345+
Compactions: 1 estimated debt: 4.4KB in progress: 0 (0B) canceled: 0 (0B) failed: 0 problem spans: 0
346346
default: 1 delete: 0 elision: 0 move: 0 read: 0 tombstone-density: 0 rewrite: 0 copy: 0 multi-level: 0
347347
MemTables: 1 (512KB) zombie: 1 (512KB)
348348
Zombie tables: 0 (0B, local: 0B)

testdata/ingest

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ total | 1 569B 0B 0 | - - - | 569B | 1 569B |
4545
----------------------------------------------------------------------------------------------------------------------------
4646
WAL: 1 files (0B) in: 0B written: 0B (0% overhead)
4747
Flushes: 0
48-
Compactions: 0 estimated debt: 0B in progress: 0 (0B)
48+
Compactions: 0 estimated debt: 0B in progress: 0 (0B) canceled: 0 (0B) failed: 0 problem spans: 0
4949
default: 0 delete: 0 elision: 0 move: 0 read: 0 tombstone-density: 0 rewrite: 0 copy: 0 multi-level: 0
5050
MemTables: 1 (256KB) zombie: 0 (0B)
5151
Zombie tables: 0 (0B, local: 0B)

0 commit comments

Comments
 (0)