From 0232686cac93d2333bf95e07f1fcbee12b0dac0c Mon Sep 17 00:00:00 2001 From: Daniel Tripp <38776199+thedtripp@users.noreply.github.com> Date: Sat, 11 May 2024 00:56:07 +0000 Subject: [PATCH] etcdserver: add server range duration metrics Signed-off-by: Daniel Tripp <38776199+thedtripp@users.noreply.github.com> Co-authored-by: Ravi Hari --- CHANGELOG/CHANGELOG-3.6.md | 1 + server/etcdserver/txn/metrics.go | 16 +++++++ server/etcdserver/txn/metrics_test.go | 62 +++++++++++++++++++++++++++ server/etcdserver/txn/txn.go | 5 +++ tests/integration/metrics_test.go | 34 +++++++++++++++ 5 files changed, 118 insertions(+) create mode 100644 server/etcdserver/txn/metrics_test.go diff --git a/CHANGELOG/CHANGELOG-3.6.md b/CHANGELOG/CHANGELOG-3.6.md index 8cd06c85381..078d2b75455 100644 --- a/CHANGELOG/CHANGELOG-3.6.md +++ b/CHANGELOG/CHANGELOG-3.6.md @@ -91,6 +91,7 @@ See [List of metrics](https://etcd.io/docs/latest/metrics/) for all metrics per - Add [`etcd_disk_defrag_inflight`](https://github.com/etcd-io/etcd/pull/13371). - Add [`etcd_debugging_server_alarms`](https://github.com/etcd-io/etcd/pull/14276). +- Add [`etcd_server_range_duration_seconds`](https://github.com/etcd-io/etcd/pull/17983). ### Go - Require [Go 1.22+](https://github.com/etcd-io/etcd/pull/16594). diff --git a/server/etcdserver/txn/metrics.go b/server/etcdserver/txn/metrics.go index 1e7a6f19712..e66254b1c60 100644 --- a/server/etcdserver/txn/metrics.go +++ b/server/etcdserver/txn/metrics.go @@ -39,13 +39,29 @@ var ( Buckets: prometheus.ExponentialBuckets(0.0001, 2, 20), }, []string{"version", "op", "success"}) + rangeSec = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: "etcd", + Subsystem: "server", + Name: "range_duration_seconds", + Help: "The latency distributions of txn.Range", + + // lowest bucket start of upper bound 0.0001 sec (0.1 ms) with factor 2 + // highest bucket start of 0.0001 sec * 2^19 == 52.4288 sec + Buckets: prometheus.ExponentialBuckets(0.0001, 2, 20), + }, + []string{"success"}) ) func ApplySecObserve(version, op string, success bool, latency time.Duration) { applySec.WithLabelValues(version, op, strconv.FormatBool(success)).Observe(float64(latency.Microseconds()) / 1000000.0) } +func RangeSecObserve(success bool, latency time.Duration) { + rangeSec.WithLabelValues(strconv.FormatBool(success)).Observe(float64(latency.Microseconds()) / 1000000.0) +} + func init() { prometheus.MustRegister(applySec) + prometheus.MustRegister(rangeSec) prometheus.MustRegister(slowApplies) } diff --git a/server/etcdserver/txn/metrics_test.go b/server/etcdserver/txn/metrics_test.go new file mode 100644 index 00000000000..45aa68ebde1 --- /dev/null +++ b/server/etcdserver/txn/metrics_test.go @@ -0,0 +1,62 @@ +// Copyright 2022 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package txn + +import ( + "strings" + "testing" + "time" + + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/require" +) + +func TestRangeSecObserve(t *testing.T) { + // Simulate a range operation taking 500 milliseconds. + latency := 500 * time.Millisecond + RangeSecObserve(true, latency) + + // Use testutil to collect the results and check against expected value + expected := ` +# HELP etcd_server_range_duration_seconds The latency distributions of txn.Range +# TYPE etcd_server_range_duration_seconds histogram +etcd_server_range_duration_seconds_bucket{success="true",le="0.0001"} 0 +etcd_server_range_duration_seconds_bucket{success="true",le="0.0002"} 0 +etcd_server_range_duration_seconds_bucket{success="true",le="0.0004"} 0 +etcd_server_range_duration_seconds_bucket{success="true",le="0.0008"} 0 +etcd_server_range_duration_seconds_bucket{success="true",le="0.0016"} 0 +etcd_server_range_duration_seconds_bucket{success="true",le="0.0032"} 0 +etcd_server_range_duration_seconds_bucket{success="true",le="0.0064"} 0 +etcd_server_range_duration_seconds_bucket{success="true",le="0.0128"} 0 +etcd_server_range_duration_seconds_bucket{success="true",le="0.0256"} 0 +etcd_server_range_duration_seconds_bucket{success="true",le="0.0512"} 0 +etcd_server_range_duration_seconds_bucket{success="true",le="0.1024"} 0 +etcd_server_range_duration_seconds_bucket{success="true",le="0.2048"} 0 +etcd_server_range_duration_seconds_bucket{success="true",le="0.4096"} 0 +etcd_server_range_duration_seconds_bucket{success="true",le="0.8192"} 1 +etcd_server_range_duration_seconds_bucket{success="true",le="1.6384"} 1 +etcd_server_range_duration_seconds_bucket{success="true",le="3.2768"} 1 +etcd_server_range_duration_seconds_bucket{success="true",le="6.5536"} 1 +etcd_server_range_duration_seconds_bucket{success="true",le="13.1072"} 1 +etcd_server_range_duration_seconds_bucket{success="true",le="26.2144"} 1 +etcd_server_range_duration_seconds_bucket{success="true",le="52.4288"} 1 +etcd_server_range_duration_seconds_bucket{success="true",le="+Inf"} 1 +etcd_server_range_duration_seconds_sum{success="true"} 0.5 +etcd_server_range_duration_seconds_count{success="true"} 1 +` + + err := testutil.CollectAndCompare(rangeSec, strings.NewReader(expected)) + require.NoError(t, err, "Collected metrics did not match expected metrics: %v", err) +} diff --git a/server/etcdserver/txn/txn.go b/server/etcdserver/txn/txn.go index de23d5db504..8f0e6c4b4a7 100644 --- a/server/etcdserver/txn/txn.go +++ b/server/etcdserver/txn/txn.go @@ -19,6 +19,7 @@ import ( "context" "fmt" "sort" + "time" "go.uber.org/zap" @@ -138,6 +139,10 @@ func Range(ctx context.Context, lg *zap.Logger, kv mvcc.KV, r *pb.RangeRequest) trace = traceutil.New("range", lg) ctx = context.WithValue(ctx, traceutil.TraceKey{}, trace) } + defer func(start time.Time) { + success := err == nil + RangeSecObserve(success, time.Since(start)) + }(time.Now()) txnRead := kv.Read(mvcc.ConcurrentReadTxMode, trace) defer txnRead.End() resp, err = executeRange(ctx, lg, txnRead, r) diff --git a/tests/integration/metrics_test.go b/tests/integration/metrics_test.go index 79189efecdf..836b070b14d 100644 --- a/tests/integration/metrics_test.go +++ b/tests/integration/metrics_test.go @@ -22,10 +22,14 @@ import ( "testing" "time" + "github.com/stretchr/testify/require" + pb "go.etcd.io/etcd/api/v3/etcdserverpb" "go.etcd.io/etcd/client/pkg/v3/transport" "go.etcd.io/etcd/server/v3/storage" "go.etcd.io/etcd/tests/v3/framework/integration" + + clientv3 "go.etcd.io/etcd/client/v3" ) // TestMetricDbSizeBoot checks that the db size metric is set on boot. @@ -211,3 +215,33 @@ func TestMetricsHealth(t *testing.T) { t.Fatalf("expected '0' from etcd_server_health_failures, got %q", hv) } } + +func TestMetricsRangeDurationSeconds(t *testing.T) { + integration.BeforeTest(t) + clus := integration.NewCluster(t, &integration.ClusterConfig{Size: 1}) + defer clus.Terminate(t) + + client := clus.RandClient() + + keys := []string{ + "my-namespace/foobar", "my-namespace/foobar1", "namespace/foobar1"} + for _, key := range keys { + _, err := client.Put(context.Background(), key, "data") + require.NoError(t, err) + } + + _, err := client.Get(context.Background(), "", clientv3.WithFromKey()) + require.NoError(t, err) + + rangeDurationSeconds, err := clus.Members[0].Metric("etcd_server_range_duration_seconds") + require.NoError(t, err) + + require.NotEmpty(t, rangeDurationSeconds, "expected a number from etcd_server_range_duration_seconds") + + rangeDuration, err := strconv.ParseFloat(rangeDurationSeconds, 64) + require.NoError(t, err, "failed to parse duration: %s", err) + + maxRangeDuration := 600.0 + require.GreaterOrEqual(t, rangeDuration, 0.0, "expected etcd_server_range_duration_seconds to be between 0 and %f, got %f", maxRangeDuration, rangeDuration) + require.LessOrEqual(t, rangeDuration, maxRangeDuration, "expected etcd_server_range_duration_seconds to be between 0 and %f, got %f", maxRangeDuration, rangeDuration) +}