Skip to content

Commit

Permalink
go_metrics.go: use histogram buckets instead of summary for Go runtim…
Browse files Browse the repository at this point in the history
…e histogram

It is unclear how and when to reset summary - it is smoothed during the Go app uptime.
On the other hand, histogram buckets can be wrapped into increase() or rate() in order
to calculate the histogram distribution on arbitrary time range.

Limit the number of buckets per Go runtime histogram to 30 in order to prevent from high cardinality issues.
  • Loading branch information
valyala committed Nov 29, 2023
1 parent 5b58446 commit 2ec1497
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 32 deletions.
42 changes: 30 additions & 12 deletions go_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"math"
"runtime"
runtimemetrics "runtime/metrics"
"strings"

"github.com/valyala/histogram"
)
Expand Down Expand Up @@ -104,23 +105,40 @@ func writeRuntimeMetric(w io.Writer, name string, sample *runtimemetrics.Sample)
}

func writeRuntimeHistogramMetric(w io.Writer, name string, h *runtimemetrics.Float64Histogram) {
// Expose histogram metric as summary, since Go runtime returns too many histogram buckets,
// which may lead to high cardinality issues at the scraper side.
buckets := h.Buckets
counts := h.Counts
if len(buckets) != len(counts)+1 {
panic(fmt.Errorf("the number of buckets must be bigger than the number of counts by 1 in histogram %s; got buckets=%d, counts=%d", name, len(buckets), len(counts)))
}
tailCount := uint64(0)
if strings.HasSuffix(name, "_seconds") {
// Limit the maximum bucket to 1 second, since Go runtime exposes buckets with 10K seconds,
// which have little sense. At the same time such buckets may lead to high cardinality issues
// at the scraper side.
for len(buckets) > 0 && buckets[len(buckets)-1] > 1 {
buckets = buckets[:len(buckets)-1]
tailCount += counts[len(counts)-1]
counts = counts[:len(counts)-1]
}
}

iStep := float64(len(buckets)) / maxRuntimeHistogramBuckets

totalCount := uint64(0)
for _, count := range counts {
iNext := 0.0
for i, count := range counts {
totalCount += count
}
for _, q := range defaultSummaryQuantiles {
upperBound := uint64(math.Ceil(q * float64(totalCount)))
runningCount := uint64(0)
for i, count := range counts {
runningCount += count
if runningCount >= upperBound {
fmt.Fprintf(w, `%s{quantile="%g"} %g`+"\n", name, q, buckets[i+1])
break
if float64(i) >= iNext {
iNext += iStep
le := buckets[i+1]
if !math.IsInf(le, 1) {
fmt.Fprintf(w, `%s_bucket{le="%g"} %d`+"\n", name, le, totalCount)
}
}
}
totalCount += tailCount
fmt.Fprintf(w, `%s_bucket{le="+Inf"} %d`+"\n", name, totalCount)
}

// Limit the number of buckets for Go runtime histograms in order to prevent from high cardinality issues at scraper side.
const maxRuntimeHistogramBuckets = 30
41 changes: 21 additions & 20 deletions go_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,40 +22,41 @@ func TestWriteRuntimeHistogramMetricOk(t *testing.T) {
f(&runtimemetrics.Float64Histogram{
Counts: []uint64{1, 2, 3},
Buckets: []float64{1, 2, 3, 4},
}, `foo{quantile="0.5"} 3
foo{quantile="0.9"} 4
foo{quantile="0.97"} 4
foo{quantile="0.99"} 4
foo{quantile="1"} 4
}, `foo_bucket{le="2"} 1
foo_bucket{le="3"} 3
foo_bucket{le="4"} 6
foo_bucket{le="+Inf"} 6
`)

f(&runtimemetrics.Float64Histogram{
Counts: []uint64{0, 25, 1, 0},
Buckets: []float64{1, 2, 3, 4, math.Inf(1)},
}, `foo{quantile="0.5"} 3
foo{quantile="0.9"} 3
foo{quantile="0.97"} 4
foo{quantile="0.99"} 4
foo{quantile="1"} 4
}, `foo_bucket{le="2"} 0
foo_bucket{le="3"} 25
foo_bucket{le="4"} 26
foo_bucket{le="+Inf"} 26
`)

f(&runtimemetrics.Float64Histogram{
Counts: []uint64{0, 25, 1, 3, 0, 44, 15, 132, 10, 0},
Buckets: []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, math.Inf(1)},
}, `foo{quantile="0.5"} 9
foo{quantile="0.9"} 9
foo{quantile="0.97"} 10
foo{quantile="0.99"} 10
foo{quantile="1"} 10
}, `foo_bucket{le="2"} 0
foo_bucket{le="3"} 25
foo_bucket{le="4"} 26
foo_bucket{le="5"} 29
foo_bucket{le="6"} 29
foo_bucket{le="7"} 73
foo_bucket{le="8"} 88
foo_bucket{le="9"} 220
foo_bucket{le="10"} 230
foo_bucket{le="+Inf"} 230
`)

f(&runtimemetrics.Float64Histogram{
Counts: []uint64{1, 5, 0},
Buckets: []float64{math.Inf(-1), 4, 5, math.Inf(1)},
}, `foo{quantile="0.5"} 5
foo{quantile="0.9"} 5
foo{quantile="0.97"} 5
foo{quantile="0.99"} 5
foo{quantile="1"} 5
}, `foo_bucket{le="4"} 1
foo_bucket{le="5"} 6
foo_bucket{le="+Inf"} 6
`)
}

0 comments on commit 2ec1497

Please sign in to comment.