From 39d2395afae41b2a9fa32dab04fe3e4cdfb3bae2 Mon Sep 17 00:00:00 2001 From: Jackson Owens Date: Wed, 10 Apr 2024 16:22:43 -0400 Subject: [PATCH] kvserver: add storage.sstable.zombie.bytes metric Add a new timeseries metric that provides visibility into the volume of data that exists in sstables that are not part of the most recent version of the LSM. Epic: none Informs #121935. Informs #122139. Informs cockroachdb/pebble#3500. Close #122110. Release note (ops change): Adds a new timeseries metric storage.sstable.zombie.bytes. --- docs/generated/metrics/metrics.html | 1 + pkg/kv/kvserver/metrics.go | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/docs/generated/metrics/metrics.html b/docs/generated/metrics/metrics.html index 1544f3fbd57f..91bdaac39d11 100644 --- a/docs/generated/metrics/metrics.html +++ b/docs/generated/metrics/metrics.html @@ -701,6 +701,7 @@ STORAGEstorage.shared-storage.writeBytes written to external storageBytesGAUGEBYTESAVGNONE STORAGEstorage.single-delete.ineffectualNumber of SingleDeletes that were ineffectualEventsGAUGECOUNTAVGNONE STORAGEstorage.single-delete.invariant-violationNumber of SingleDelete invariant violationsEventsGAUGECOUNTAVGNONE +STORAGEstorage.sstable.zombie.bytesBytes in SSTables that have been logically deleted, but can't yet be physically deleted because an open iterator may be reading them.BytesGAUGEBYTESAVGNONE STORAGEstorage.wal.bytes_inThe number of logical bytes the storage engine has written to the WALEventsGAUGECOUNTAVGNONE STORAGEstorage.wal.bytes_writtenThe number of bytes the storage engine has written to the WALEventsGAUGECOUNTAVGNONE STORAGEstorage.wal.failover.primary.durationCumulative time spent writing to the primary WAL directory. Only populated when WAL failover is configuredNanosecondsGAUGENANOSECONDSAVGNONE diff --git a/pkg/kv/kvserver/metrics.go b/pkg/kv/kvserver/metrics.go index 31cd6d97dac6..c8da71dc2fbc 100644 --- a/pkg/kv/kvserver/metrics.go +++ b/pkg/kv/kvserver/metrics.go @@ -942,6 +942,14 @@ bytes preserved during flushes and compactions over the lifetime of the process. Measurement: "Nanoseconds", Unit: metric.Unit_NANOSECONDS, } + metaSSTableZombieBytes = metric.Metadata{ + Name: "storage.sstable.zombie.bytes", + Help: "Bytes in SSTables that have been logically deleted, " + + "but can't yet be physically deleted because an " + + "open iterator may be reading them.", + Measurement: "Bytes", + Unit: metric.Unit_BYTES, + } ) var ( @@ -2590,6 +2598,7 @@ type StoreMetrics struct { BatchCommitL0StallDuration *metric.Gauge BatchCommitWALRotWaitDuration *metric.Gauge BatchCommitCommitWaitDuration *metric.Gauge + SSTableZombieBytes *metric.Gauge categoryIterMetrics pebbleCategoryIterMetricsContainer categoryDiskWriteMetrics pebbleCategoryDiskWriteMetricsContainer WALBytesWritten *metric.Gauge @@ -3293,6 +3302,7 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics { BatchCommitL0StallDuration: metric.NewGauge(metaBatchCommitL0StallDuration), BatchCommitWALRotWaitDuration: metric.NewGauge(metaBatchCommitWALRotDuration), BatchCommitCommitWaitDuration: metric.NewGauge(metaBatchCommitCommitWaitDuration), + SSTableZombieBytes: metric.NewGauge(metaSSTableZombieBytes), categoryIterMetrics: pebbleCategoryIterMetricsContainer{ registry: storeRegistry, }, @@ -3723,6 +3733,7 @@ func (sm *StoreMetrics) updateEngineMetrics(m storage.Metrics) { sm.BatchCommitL0StallDuration.Update(int64(m.BatchCommitStats.L0ReadAmpWriteStallDuration)) sm.BatchCommitWALRotWaitDuration.Update(int64(m.BatchCommitStats.WALRotationDuration)) sm.BatchCommitCommitWaitDuration.Update(int64(m.BatchCommitStats.CommitWaitDuration)) + sm.SSTableZombieBytes.Update(int64(m.Table.ZombieSize)) sm.categoryIterMetrics.update(m.CategoryStats) sm.categoryDiskWriteMetrics.update(m.DiskWriteStats)