diff --git a/agreement/gossip/network.go b/agreement/gossip/network.go
index e6b7d77aad..43eefd2db5 100644
--- a/agreement/gossip/network.go
+++ b/agreement/gossip/network.go
@@ -32,9 +32,11 @@ import (
)
var messagesHandledTotal = metrics.MakeCounter(metrics.AgreementMessagesHandled)
-var messagesHandledByType = metrics.NewTagCounter("algod_agreement_handled_{TAG}", "Number of agreement messages handled per type")
+var messagesHandledByType = metrics.NewTagCounter("algod_agreement_handled_{TAG}", "Number of agreement {TAG} messages handled",
+ agreementVoteMessageType, agreementProposalMessageType, agreementBundleMessageType)
var messagesDroppedTotal = metrics.MakeCounter(metrics.AgreementMessagesDropped)
-var messagesDroppedByType = metrics.NewTagCounter("algod_agreement_dropped_{TAG}", "Number of agreement messages handled per type")
+var messagesDroppedByType = metrics.NewTagCounter("algod_agreement_dropped_{TAG}", "Number of agreement {TAG} messages dropped",
+ agreementVoteMessageType, agreementProposalMessageType, agreementBundleMessageType)
const (
agreementVoteMessageType = "vote"
diff --git a/agreement/pseudonode.go b/agreement/pseudonode.go
index bdaa2f359d..06a91b210e 100644
--- a/agreement/pseudonode.go
+++ b/agreement/pseudonode.go
@@ -44,8 +44,8 @@ var errPseudonodeVerifierClosedChannel = errors.New("crypto verifier closed the
var errPseudonodeNoVotes = errors.New("no valid participation keys to generate votes for given round")
var errPseudonodeNoProposals = errors.New("no valid participation keys to generate proposals for given round")
-var pseudonodeBacklogFullByType = metrics.NewTagCounter("algod_agreement_pseudonode_tasks_dropped_{TAG}", "Number of pseudonode tasks dropped per type")
-var pseudonodeResultTimeoutsByType = metrics.NewTagCounter("algod_agreement_pseudonode_tasks_timeouts_{TAG}", "Number of pseudonode task result timeouts per type")
+var pseudonodeBacklogFullByType = metrics.NewTagCounter("algod_agreement_pseudonode_tasks_dropped_{TAG}", "Number of pseudonode {TAG} tasks dropped", "proposal", "vote")
+var pseudonodeResultTimeoutsByType = metrics.NewTagCounter("algod_agreement_pseudonode_tasks_timeouts_{TAG}", "Number of pseudonode {TAG} task result timeouts", "vote", "pvote", "ppayload")
// A pseudonode creates proposals and votes with a KeyManager which holds participation keys.
//
diff --git a/network/wsPeer.go b/network/wsPeer.go
index 337dae07b3..272e39c294 100644
--- a/network/wsPeer.go
+++ b/network/wsPeer.go
@@ -48,14 +48,14 @@ const averageMessageLength = 2 * 1024 // Most of the messages are smaller tha
const msgsInReadBufferPerPeer = 10
var networkSentBytesTotal = metrics.MakeCounter(metrics.NetworkSentBytesTotal)
-var networkSentBytesByTag = metrics.NewTagCounter("algod_network_sent_bytes_{TAG}", "Number of bytes that were sent over the network per message tag")
+var networkSentBytesByTag = metrics.NewTagCounter("algod_network_sent_bytes_{TAG}", "Number of bytes that were sent over the network for {TAG} messages")
var networkReceivedBytesTotal = metrics.MakeCounter(metrics.NetworkReceivedBytesTotal)
-var networkReceivedBytesByTag = metrics.NewTagCounter("algod_network_received_bytes_{TAG}", "Number of bytes that were received from the network per message tag")
+var networkReceivedBytesByTag = metrics.NewTagCounter("algod_network_received_bytes_{TAG}", "Number of bytes that were received from the network for {TAG} messages")
var networkMessageReceivedTotal = metrics.MakeCounter(metrics.NetworkMessageReceivedTotal)
-var networkMessageReceivedByTag = metrics.NewTagCounter("algod_network_message_received_{TAG}", "Number of complete messages that were received from the network per message tag")
+var networkMessageReceivedByTag = metrics.NewTagCounter("algod_network_message_received_{TAG}", "Number of complete messages that were received from the network for {TAG} messages")
var networkMessageSentTotal = metrics.MakeCounter(metrics.NetworkMessageSentTotal)
-var networkMessageSentByTag = metrics.NewTagCounter("algod_network_message_sent_{TAG}", "Number of complete messages that were sent to the network per message tag")
+var networkMessageSentByTag = metrics.NewTagCounter("algod_network_message_sent_{TAG}", "Number of complete messages that were sent to the network for {TAG} messages")
var networkConnectionsDroppedTotal = metrics.MakeCounter(metrics.NetworkConnectionsDroppedTotal)
var networkMessageQueueMicrosTotal = metrics.MakeCounter(metrics.MetricName{Name: "algod_network_message_sent_queue_micros_total", Description: "Total microseconds message spent waiting in queue to be sent"})
diff --git a/util/metrics/counter.go b/util/metrics/counter.go
index 73debb3e91..06ea4b0c44 100644
--- a/util/metrics/counter.go
+++ b/util/metrics/counter.go
@@ -155,9 +155,6 @@ func (counter *Counter) WriteMetric(buf *strings.Builder, parentLabels string) {
counter.Lock()
defer counter.Unlock()
- if len(counter.values) < 1 {
- return
- }
buf.WriteString("# HELP ")
buf.WriteString(counter.name)
buf.WriteString(" ")
@@ -165,6 +162,17 @@ func (counter *Counter) WriteMetric(buf *strings.Builder, parentLabels string) {
buf.WriteString("\n# TYPE ")
buf.WriteString(counter.name)
buf.WriteString(" counter\n")
+ // if counter is zero, report 0 using parentLabels and no tags
+ if len(counter.values) == 0 {
+ buf.WriteString(counter.name)
+ if len(parentLabels) > 0 {
+ buf.WriteString("{" + parentLabels + "}")
+ }
+ buf.WriteString(" 0")
+ buf.WriteString("\n")
+ return
+ }
+ // otherwise iterate through values and write one line per label
for _, l := range counter.values {
buf.WriteString(counter.name)
buf.WriteString("{")
diff --git a/util/metrics/counter_test.go b/util/metrics/counter_test.go
index 43ef0c27aa..75512b80e0 100644
--- a/util/metrics/counter_test.go
+++ b/util/metrics/counter_test.go
@@ -19,6 +19,7 @@ package metrics
import (
"context"
"fmt"
+ "strings"
"testing"
"time"
@@ -67,7 +68,7 @@ func TestMetricCounter(t *testing.T) {
defer test.Unlock()
// the the loop above we've created a single metric name with five different labels set ( host0, host1 .. host 4)
// let's see if we received all the 5 different labels.
- require.Equal(t, 5, len(test.metrics), "Missing metric counts were reported.")
+ require.Equal(t, 5, len(test.metrics), "Missing metric counts were reported: %+v", test.metrics)
for k, v := range test.metrics {
// we have increased each one of the labels exactly 4 times. See that the counter was counting correctly.
@@ -114,7 +115,7 @@ func TestMetricCounterFastInts(t *testing.T) {
defer test.Unlock()
// the the loop above we've created a single metric name with five different labels set ( host0, host1 .. host 4)
// let's see if we received all the 5 different labels.
- require.Equal(t, 1, len(test.metrics), "Missing metric counts were reported.")
+ require.Equal(t, 1, len(test.metrics), "Missing metric counts were reported: %+v", test.metrics)
for k, v := range test.metrics {
// we have increased each one of the labels exactly 4 times. See that the counter was counting correctly.
@@ -163,7 +164,7 @@ func TestMetricCounterMixed(t *testing.T) {
defer test.Unlock()
// the the loop above we've created a single metric name with five different labels set ( host0, host1 .. host 4)
// let's see if we received all the 5 different labels.
- require.Equal(t, 1, len(test.metrics), "Missing metric counts were reported.")
+ require.Equal(t, 1, len(test.metrics), "Missing metric counts were reported: %+v", test.metrics)
for k, v := range test.metrics {
// we have increased each one of the labels exactly 4 times. See that the counter was counting correctly.
@@ -171,3 +172,29 @@ func TestMetricCounterMixed(t *testing.T) {
require.Equal(t, "35.5", v, fmt.Sprintf("The metric '%s' reached value '%s'", k, v))
}
}
+
+func TestCounterWriteMetric(t *testing.T) {
+ partitiontest.PartitionTest(t)
+
+ c := MakeCounter(MetricName{Name: "testname", Description: "testhelp"})
+ c.Deregister(nil)
+
+ // ensure 0 counters are still logged
+ sbOut := strings.Builder{}
+ c.WriteMetric(&sbOut, `host="myhost"`)
+ expected := `# HELP testname testhelp
+# TYPE testname counter
+testname{host="myhost"} 0
+`
+ require.Equal(t, expected, sbOut.String())
+
+ c.Add(2.3, nil)
+ // ensure non-zero counters are logged
+ sbOut = strings.Builder{}
+ c.WriteMetric(&sbOut, `host="myhost"`)
+ expected = `# HELP testname testhelp
+# TYPE testname counter
+testname{host="myhost"} 2.3
+`
+ require.Equal(t, expected, sbOut.String())
+}
diff --git a/util/metrics/gauge_test.go b/util/metrics/gauge_test.go
index 301ae22a74..9a228c0670 100644
--- a/util/metrics/gauge_test.go
+++ b/util/metrics/gauge_test.go
@@ -68,7 +68,7 @@ func TestMetricGauge(t *testing.T) {
// the the loop above we've created a single metric name with five different labels set ( host0, host1 .. host 4)
// let's see if we received all the 5 different labels.
- require.Equal(t, 5, len(test.metrics), "Missing metric counts were reported.")
+ require.Equal(t, 5, len(test.metrics), "Missing metric counts were reported: %+v", test.metrics)
// iterate through the metrics and check the each of the metrics reached it's correct count.
for k, v := range test.metrics {
diff --git a/util/metrics/registry_test.go b/util/metrics/registry_test.go
index aa48516309..2256993f20 100644
--- a/util/metrics/registry_test.go
+++ b/util/metrics/registry_test.go
@@ -37,7 +37,7 @@ func TestWriteAdd(t *testing.T) {
results := make(map[string]float64)
DefaultRegistry().AddMetrics(results)
- require.Equal(t, 2, len(results))
+ require.Equal(t, 2, len(results), "results", results)
require.Contains(t, results, "gauge-name")
require.InDelta(t, 12.34, results["gauge-name"], 0.01)
require.Contains(t, results, "label-counter_label__a_label_value_")
diff --git a/util/metrics/segment.go b/util/metrics/segment.go
deleted file mode 100644
index 21db004f3a..0000000000
--- a/util/metrics/segment.go
+++ /dev/null
@@ -1,90 +0,0 @@
-// Copyright (C) 2019-2022 Algorand, Inc.
-// This file is part of go-algorand
-//
-// go-algorand is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as
-// published by the Free Software Foundation, either version 3 of the
-// License, or (at your option) any later version.
-//
-// go-algorand is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with go-algorand. If not, see .
-
-package metrics
-
-import (
- "sync/atomic"
- "time"
-)
-
-// Segment represent a single segment variable.
-type Segment struct {
- duration *Gauge
- totalDuration *Counter
- counter *Counter
- concurrentInstances *Gauge
- concurrentCounter uint32
-}
-
-// SegmentInstance is generated once a segments starts.
-type SegmentInstance struct {
- segment *Segment
- start time.Time
- labels map[string]string
-}
-
-// MakeSegment create a new segment with the provided name and description.
-func MakeSegment(metric *MetricName) *Segment {
- c := &Segment{
- duration: MakeGauge(MetricName{Name: metric.Name + "_sec", Description: metric.Description + "(duration)"}),
- totalDuration: MakeCounter(MetricName{Name: metric.Name + "_sec_total", Description: metric.Description + "(total duration)"}),
- counter: MakeCounter(MetricName{Name: metric.Name + "_total", Description: metric.Description + "(total count)"}),
- concurrentInstances: MakeGauge(MetricName{Name: metric.Name + "_concurrent", Description: metric.Description + "(concurrent instances)"}),
- }
- return c
-}
-
-// EnterSegment is called when a segment is entered.
-func (segment *Segment) EnterSegment(labels map[string]string) (*SegmentInstance, error) {
- segment.counter.Inc(labels)
- concurrentCounter := atomic.AddUint32(&segment.concurrentCounter, uint32(1))
- segment.concurrentInstances.Set(float64(concurrentCounter), labels)
- return &SegmentInstance{
- segment: segment,
- start: time.Now(),
- labels: labels,
- }, nil
-}
-
-// Register registers the counter with the default/specific registry
-func (segment *Segment) Register(reg *Registry) {
- segment.duration.Register(reg)
- segment.totalDuration.Register(reg)
- segment.counter.Register(reg)
- segment.concurrentInstances.Register(reg)
-}
-
-// Deregister deregisters the counter with the default/specific registry
-func (segment *Segment) Deregister(reg *Registry) {
- segment.duration.Deregister(reg)
- segment.totalDuration.Deregister(reg)
- segment.counter.Deregister(reg)
- segment.concurrentInstances.Deregister(reg)
-}
-
-// LeaveSegment is expected to be called via a "defer" statement.
-func (segInstance *SegmentInstance) LeaveSegment() error {
- if segInstance == nil {
- return nil
- }
- concurrentCounter := atomic.AddUint32(&segInstance.segment.concurrentCounter, ^uint32(0))
- seconds := time.Since(segInstance.start).Seconds()
- segInstance.segment.duration.Set(seconds, segInstance.labels)
- segInstance.segment.totalDuration.Add(seconds, segInstance.labels)
- segInstance.segment.concurrentInstances.Set(float64(concurrentCounter), segInstance.labels)
- return nil
-}
diff --git a/util/metrics/segment_test.go b/util/metrics/segment_test.go
deleted file mode 100644
index c5cae321d9..0000000000
--- a/util/metrics/segment_test.go
+++ /dev/null
@@ -1,119 +0,0 @@
-// Copyright (C) 2019-2022 Algorand, Inc.
-// This file is part of go-algorand
-//
-// go-algorand is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as
-// published by the Free Software Foundation, either version 3 of the
-// License, or (at your option) any later version.
-//
-// go-algorand is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with go-algorand. If not, see .
-
-package metrics
-
-import (
- "context"
- "fmt"
- "strconv"
- "strings"
- "testing"
- "time"
-
- "github.com/algorand/go-algorand/test/partitiontest"
- "github.com/stretchr/testify/require"
-)
-
-type SegmentTest struct {
- MetricTest
-}
-
-func TestMetricSegment(t *testing.T) {
- partitiontest.PartitionTest(t)
-
- const initialSleepDuration = 10 * time.Millisecond
- const maxSleepDuration = 4 * time.Second
- done := false
- for sleepDuration := initialSleepDuration; sleepDuration <= maxSleepDuration; sleepDuration *= 2 {
- done = testMetricSegmentHelper(t, sleepDuration)
- if done {
- break
- }
- }
- if !done {
- require.Fail(t, "test failed")
- }
-}
-
-func testMetricSegmentHelper(t *testing.T, functionTime time.Duration) bool {
-
- test := &SegmentTest{
- MetricTest: NewMetricTest(),
- }
- // create a http listener.
- port := test.createListener(":0")
-
- metricService := MakeMetricService(&ServiceConfig{
- NodeExporterListenAddress: fmt.Sprintf("localhost:%d", port),
- Labels: map[string]string{
- "host_name": "host_one",
- "session_id": "AFX-229"},
- })
- metricService.Start(context.Background())
-
- acceptedFunctionThreshold := 1.1 // 10 percent.
- segment := MakeSegment(&MetricName{Name: "test_segment_name1", Description: "this is the metric test for segment object"})
- segmentTest := func() {
- inst, _ := segment.EnterSegment(map[string]string{"pid": "123"})
- defer inst.LeaveSegment()
- time.Sleep(functionTime)
- }
- segmentTest()
- segmentTest()
- // wait two reporting cycles to ensure we received all the messages.
- time.Sleep(test.sampleRate * 2)
-
- metricService.Shutdown()
-
- segment.Deregister(nil)
-
- test.Lock()
- defer test.Unlock()
-
- // test the metrics values. see if we received all the 4 metrics back correctly.
- // we expect the get 4 metrics : test_segment_name1_sec, test_segment_name1_sec_total, test_segment_name1_total and test_segment_name1_concurrent
- // ( we don't know in which order they would appear, but the total count should be 4 )
- require.Equal(t, 4, len(test.metrics), "Missing metric counts were reported.")
-
- for k, v := range test.metrics {
- if strings.Contains(k, "test_segment_name1_sec{") {
- // should be around 400 milliseconds.
- if elapsedTime, err := strconv.ParseFloat(v, 64); err != nil {
- t.Fatalf("The metric '%s' has unexpected value of '%s'", k, v)
- } else {
- if elapsedTime < functionTime.Seconds() || elapsedTime > functionTime.Seconds()*acceptedFunctionThreshold {
- return false
- }
- }
- }
- if strings.Contains(k, "test_segment_name1_sec_total{") {
- // should be around 800 milliseconds.
- if elapsedTime, err := strconv.ParseFloat(v, 64); err != nil {
- t.Fatalf("The metric '%s' has unexpected value of '%s'", k, v)
- } else {
- if elapsedTime < 2*functionTime.Seconds() || elapsedTime > 2*functionTime.Seconds()*acceptedFunctionThreshold {
- return false
- }
- }
- }
- if strings.Contains(k, "test_segment_name1_total{") {
- // should be 2, since we had 2 calls.
- require.Equal(t, "2", v, "The metric '%s' has unexpected value of '%s'", k, v)
- }
- }
- return true
-}
diff --git a/util/metrics/tagcounter.go b/util/metrics/tagcounter.go
index 53cce7ba69..d110b8d4f0 100644
--- a/util/metrics/tagcounter.go
+++ b/util/metrics/tagcounter.go
@@ -26,8 +26,12 @@ import (
// NewTagCounter makes a set of metrics under rootName for tagged counting.
// "{TAG}" in rootName is replaced by the tag, otherwise "_{TAG}" is appended.
-func NewTagCounter(rootName, desc string) *TagCounter {
+// Optionally provided declaredTags counters for these names up front (making them easier to discover).
+func NewTagCounter(rootName, desc string, declaredTags ...string) *TagCounter {
tc := &TagCounter{Name: rootName, Description: desc}
+ for _, tag := range declaredTags {
+ tc.Add(tag, 0)
+ }
DefaultRegistry().Register(tc)
return tc
}
@@ -98,32 +102,34 @@ func (tc *TagCounter) WriteMetric(buf *strings.Builder, parentLabels string) {
// no values, nothing to say.
return
}
- // TODO: what to do with "parentLabels"? obsolete part of interface?
- buf.WriteString("# ")
- buf.WriteString(tc.Name)
- buf.WriteString(" ")
- buf.WriteString(tc.Description)
- buf.WriteString("\n")
isTemplate := strings.Contains(tc.Name, "{TAG}")
tags := tagptr.(map[string]*uint64)
for tag, tagcount := range tags {
if tagcount == nil {
continue
}
+ var name string
if isTemplate {
- name := strings.ReplaceAll(tc.Name, "{TAG}", tag)
- buf.WriteString(name)
- buf.WriteRune(' ')
- buf.WriteString(strconv.FormatUint(*tagcount, 10))
- buf.WriteRune('\n')
+ name = strings.ReplaceAll(tc.Name, "{TAG}", tag)
} else {
- buf.WriteString(tc.Name)
- buf.WriteRune('_')
- buf.WriteString(tag)
- buf.WriteRune(' ')
- buf.WriteString(strconv.FormatUint(*tagcount, 10))
- buf.WriteRune('\n')
+ name = tc.Name + "_" + tag
+ }
+ buf.WriteString("# HELP ")
+ buf.WriteString(name)
+ buf.WriteRune(' ')
+ buf.WriteString(strings.ReplaceAll(tc.Description, "{TAG}", tag))
+ buf.WriteString("\n# TYPE ")
+ buf.WriteString(name)
+ buf.WriteString(" counter\n")
+ buf.WriteString(name)
+ if len(parentLabels) > 0 {
+ buf.WriteRune('{')
+ buf.WriteString(parentLabels)
+ buf.WriteRune('}')
}
+ buf.WriteRune(' ')
+ buf.WriteString(strconv.FormatUint(*tagcount, 10))
+ buf.WriteRune('\n')
}
}
diff --git a/util/metrics/tagcounter_test.go b/util/metrics/tagcounter_test.go
index b76202c537..feb464a35c 100644
--- a/util/metrics/tagcounter_test.go
+++ b/util/metrics/tagcounter_test.go
@@ -40,6 +40,7 @@ func TestTagCounter(t *testing.T) {
}
tc := NewTagCounter("tc", "wat")
+ DefaultRegistry().Deregister(tc)
// check that empty TagCounter cleanly returns no results
var sb strings.Builder
@@ -80,6 +81,39 @@ func TestTagCounter(t *testing.T) {
}
}
+func TestTagCounterWriteMetric(t *testing.T) {
+ partitiontest.PartitionTest(t)
+
+ tc := NewTagCounter("count_msgs_{TAG}", "number of {TAG} messages")
+ DefaultRegistry().Deregister(tc)
+
+ tc.Add("TX", 100)
+ tc.Add("TX", 1)
+ tc.Add("RX", 0)
+
+ var sbOut strings.Builder
+ tc.WriteMetric(&sbOut, `host="myhost"`)
+ txExpected := `# HELP count_msgs_TX number of TX messages
+# TYPE count_msgs_TX counter
+count_msgs_TX{host="myhost"} 101
+`
+ rxExpected := `# HELP count_msgs_RX number of RX messages
+# TYPE count_msgs_RX counter
+count_msgs_RX{host="myhost"} 0
+`
+ expfmt := sbOut.String()
+ require.True(t, expfmt == txExpected+rxExpected || expfmt == rxExpected+txExpected, "bad fmt: %s", expfmt)
+
+ tc2 := NewTagCounter("declared", "number of {TAG}s", "A", "B")
+ DefaultRegistry().Deregister(tc2)
+ aExpected := "# HELP declared_A number of As\n# TYPE declared_A counter\ndeclared_A{host=\"h\"} 0\n"
+ bExpected := "# HELP declared_B number of Bs\n# TYPE declared_B counter\ndeclared_B{host=\"h\"} 0\n"
+ sbOut = strings.Builder{}
+ tc2.WriteMetric(&sbOut, `host="h"`)
+ expfmt = sbOut.String()
+ require.True(t, expfmt == aExpected+bExpected || expfmt == bExpected+aExpected, "bad fmt: %s", expfmt)
+}
+
func BenchmarkTagCounter(b *testing.B) {
b.Logf("b.N = %d", b.N)
t := b