diff --git a/agreement/gossip/network.go b/agreement/gossip/network.go index e6b7d77aad..43eefd2db5 100644 --- a/agreement/gossip/network.go +++ b/agreement/gossip/network.go @@ -32,9 +32,11 @@ import ( ) var messagesHandledTotal = metrics.MakeCounter(metrics.AgreementMessagesHandled) -var messagesHandledByType = metrics.NewTagCounter("algod_agreement_handled_{TAG}", "Number of agreement messages handled per type") +var messagesHandledByType = metrics.NewTagCounter("algod_agreement_handled_{TAG}", "Number of agreement {TAG} messages handled", + agreementVoteMessageType, agreementProposalMessageType, agreementBundleMessageType) var messagesDroppedTotal = metrics.MakeCounter(metrics.AgreementMessagesDropped) -var messagesDroppedByType = metrics.NewTagCounter("algod_agreement_dropped_{TAG}", "Number of agreement messages handled per type") +var messagesDroppedByType = metrics.NewTagCounter("algod_agreement_dropped_{TAG}", "Number of agreement {TAG} messages dropped", + agreementVoteMessageType, agreementProposalMessageType, agreementBundleMessageType) const ( agreementVoteMessageType = "vote" diff --git a/agreement/pseudonode.go b/agreement/pseudonode.go index bdaa2f359d..06a91b210e 100644 --- a/agreement/pseudonode.go +++ b/agreement/pseudonode.go @@ -44,8 +44,8 @@ var errPseudonodeVerifierClosedChannel = errors.New("crypto verifier closed the var errPseudonodeNoVotes = errors.New("no valid participation keys to generate votes for given round") var errPseudonodeNoProposals = errors.New("no valid participation keys to generate proposals for given round") -var pseudonodeBacklogFullByType = metrics.NewTagCounter("algod_agreement_pseudonode_tasks_dropped_{TAG}", "Number of pseudonode tasks dropped per type") -var pseudonodeResultTimeoutsByType = metrics.NewTagCounter("algod_agreement_pseudonode_tasks_timeouts_{TAG}", "Number of pseudonode task result timeouts per type") +var pseudonodeBacklogFullByType = metrics.NewTagCounter("algod_agreement_pseudonode_tasks_dropped_{TAG}", "Number of pseudonode {TAG} tasks dropped", "proposal", "vote") +var pseudonodeResultTimeoutsByType = metrics.NewTagCounter("algod_agreement_pseudonode_tasks_timeouts_{TAG}", "Number of pseudonode {TAG} task result timeouts", "vote", "pvote", "ppayload") // A pseudonode creates proposals and votes with a KeyManager which holds participation keys. // diff --git a/network/wsPeer.go b/network/wsPeer.go index 337dae07b3..272e39c294 100644 --- a/network/wsPeer.go +++ b/network/wsPeer.go @@ -48,14 +48,14 @@ const averageMessageLength = 2 * 1024 // Most of the messages are smaller tha const msgsInReadBufferPerPeer = 10 var networkSentBytesTotal = metrics.MakeCounter(metrics.NetworkSentBytesTotal) -var networkSentBytesByTag = metrics.NewTagCounter("algod_network_sent_bytes_{TAG}", "Number of bytes that were sent over the network per message tag") +var networkSentBytesByTag = metrics.NewTagCounter("algod_network_sent_bytes_{TAG}", "Number of bytes that were sent over the network for {TAG} messages") var networkReceivedBytesTotal = metrics.MakeCounter(metrics.NetworkReceivedBytesTotal) -var networkReceivedBytesByTag = metrics.NewTagCounter("algod_network_received_bytes_{TAG}", "Number of bytes that were received from the network per message tag") +var networkReceivedBytesByTag = metrics.NewTagCounter("algod_network_received_bytes_{TAG}", "Number of bytes that were received from the network for {TAG} messages") var networkMessageReceivedTotal = metrics.MakeCounter(metrics.NetworkMessageReceivedTotal) -var networkMessageReceivedByTag = metrics.NewTagCounter("algod_network_message_received_{TAG}", "Number of complete messages that were received from the network per message tag") +var networkMessageReceivedByTag = metrics.NewTagCounter("algod_network_message_received_{TAG}", "Number of complete messages that were received from the network for {TAG} messages") var networkMessageSentTotal = metrics.MakeCounter(metrics.NetworkMessageSentTotal) -var networkMessageSentByTag = metrics.NewTagCounter("algod_network_message_sent_{TAG}", "Number of complete messages that were sent to the network per message tag") +var networkMessageSentByTag = metrics.NewTagCounter("algod_network_message_sent_{TAG}", "Number of complete messages that were sent to the network for {TAG} messages") var networkConnectionsDroppedTotal = metrics.MakeCounter(metrics.NetworkConnectionsDroppedTotal) var networkMessageQueueMicrosTotal = metrics.MakeCounter(metrics.MetricName{Name: "algod_network_message_sent_queue_micros_total", Description: "Total microseconds message spent waiting in queue to be sent"}) diff --git a/util/metrics/counter.go b/util/metrics/counter.go index 73debb3e91..06ea4b0c44 100644 --- a/util/metrics/counter.go +++ b/util/metrics/counter.go @@ -155,9 +155,6 @@ func (counter *Counter) WriteMetric(buf *strings.Builder, parentLabels string) { counter.Lock() defer counter.Unlock() - if len(counter.values) < 1 { - return - } buf.WriteString("# HELP ") buf.WriteString(counter.name) buf.WriteString(" ") @@ -165,6 +162,17 @@ func (counter *Counter) WriteMetric(buf *strings.Builder, parentLabels string) { buf.WriteString("\n# TYPE ") buf.WriteString(counter.name) buf.WriteString(" counter\n") + // if counter is zero, report 0 using parentLabels and no tags + if len(counter.values) == 0 { + buf.WriteString(counter.name) + if len(parentLabels) > 0 { + buf.WriteString("{" + parentLabels + "}") + } + buf.WriteString(" 0") + buf.WriteString("\n") + return + } + // otherwise iterate through values and write one line per label for _, l := range counter.values { buf.WriteString(counter.name) buf.WriteString("{") diff --git a/util/metrics/counter_test.go b/util/metrics/counter_test.go index 43ef0c27aa..75512b80e0 100644 --- a/util/metrics/counter_test.go +++ b/util/metrics/counter_test.go @@ -19,6 +19,7 @@ package metrics import ( "context" "fmt" + "strings" "testing" "time" @@ -67,7 +68,7 @@ func TestMetricCounter(t *testing.T) { defer test.Unlock() // the the loop above we've created a single metric name with five different labels set ( host0, host1 .. host 4) // let's see if we received all the 5 different labels. - require.Equal(t, 5, len(test.metrics), "Missing metric counts were reported.") + require.Equal(t, 5, len(test.metrics), "Missing metric counts were reported: %+v", test.metrics) for k, v := range test.metrics { // we have increased each one of the labels exactly 4 times. See that the counter was counting correctly. @@ -114,7 +115,7 @@ func TestMetricCounterFastInts(t *testing.T) { defer test.Unlock() // the the loop above we've created a single metric name with five different labels set ( host0, host1 .. host 4) // let's see if we received all the 5 different labels. - require.Equal(t, 1, len(test.metrics), "Missing metric counts were reported.") + require.Equal(t, 1, len(test.metrics), "Missing metric counts were reported: %+v", test.metrics) for k, v := range test.metrics { // we have increased each one of the labels exactly 4 times. See that the counter was counting correctly. @@ -163,7 +164,7 @@ func TestMetricCounterMixed(t *testing.T) { defer test.Unlock() // the the loop above we've created a single metric name with five different labels set ( host0, host1 .. host 4) // let's see if we received all the 5 different labels. - require.Equal(t, 1, len(test.metrics), "Missing metric counts were reported.") + require.Equal(t, 1, len(test.metrics), "Missing metric counts were reported: %+v", test.metrics) for k, v := range test.metrics { // we have increased each one of the labels exactly 4 times. See that the counter was counting correctly. @@ -171,3 +172,29 @@ func TestMetricCounterMixed(t *testing.T) { require.Equal(t, "35.5", v, fmt.Sprintf("The metric '%s' reached value '%s'", k, v)) } } + +func TestCounterWriteMetric(t *testing.T) { + partitiontest.PartitionTest(t) + + c := MakeCounter(MetricName{Name: "testname", Description: "testhelp"}) + c.Deregister(nil) + + // ensure 0 counters are still logged + sbOut := strings.Builder{} + c.WriteMetric(&sbOut, `host="myhost"`) + expected := `# HELP testname testhelp +# TYPE testname counter +testname{host="myhost"} 0 +` + require.Equal(t, expected, sbOut.String()) + + c.Add(2.3, nil) + // ensure non-zero counters are logged + sbOut = strings.Builder{} + c.WriteMetric(&sbOut, `host="myhost"`) + expected = `# HELP testname testhelp +# TYPE testname counter +testname{host="myhost"} 2.3 +` + require.Equal(t, expected, sbOut.String()) +} diff --git a/util/metrics/gauge_test.go b/util/metrics/gauge_test.go index 301ae22a74..9a228c0670 100644 --- a/util/metrics/gauge_test.go +++ b/util/metrics/gauge_test.go @@ -68,7 +68,7 @@ func TestMetricGauge(t *testing.T) { // the the loop above we've created a single metric name with five different labels set ( host0, host1 .. host 4) // let's see if we received all the 5 different labels. - require.Equal(t, 5, len(test.metrics), "Missing metric counts were reported.") + require.Equal(t, 5, len(test.metrics), "Missing metric counts were reported: %+v", test.metrics) // iterate through the metrics and check the each of the metrics reached it's correct count. for k, v := range test.metrics { diff --git a/util/metrics/registry_test.go b/util/metrics/registry_test.go index aa48516309..2256993f20 100644 --- a/util/metrics/registry_test.go +++ b/util/metrics/registry_test.go @@ -37,7 +37,7 @@ func TestWriteAdd(t *testing.T) { results := make(map[string]float64) DefaultRegistry().AddMetrics(results) - require.Equal(t, 2, len(results)) + require.Equal(t, 2, len(results), "results", results) require.Contains(t, results, "gauge-name") require.InDelta(t, 12.34, results["gauge-name"], 0.01) require.Contains(t, results, "label-counter_label__a_label_value_") diff --git a/util/metrics/segment.go b/util/metrics/segment.go deleted file mode 100644 index 21db004f3a..0000000000 --- a/util/metrics/segment.go +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright (C) 2019-2022 Algorand, Inc. -// This file is part of go-algorand -// -// go-algorand is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as -// published by the Free Software Foundation, either version 3 of the -// License, or (at your option) any later version. -// -// go-algorand is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with go-algorand. If not, see . - -package metrics - -import ( - "sync/atomic" - "time" -) - -// Segment represent a single segment variable. -type Segment struct { - duration *Gauge - totalDuration *Counter - counter *Counter - concurrentInstances *Gauge - concurrentCounter uint32 -} - -// SegmentInstance is generated once a segments starts. -type SegmentInstance struct { - segment *Segment - start time.Time - labels map[string]string -} - -// MakeSegment create a new segment with the provided name and description. -func MakeSegment(metric *MetricName) *Segment { - c := &Segment{ - duration: MakeGauge(MetricName{Name: metric.Name + "_sec", Description: metric.Description + "(duration)"}), - totalDuration: MakeCounter(MetricName{Name: metric.Name + "_sec_total", Description: metric.Description + "(total duration)"}), - counter: MakeCounter(MetricName{Name: metric.Name + "_total", Description: metric.Description + "(total count)"}), - concurrentInstances: MakeGauge(MetricName{Name: metric.Name + "_concurrent", Description: metric.Description + "(concurrent instances)"}), - } - return c -} - -// EnterSegment is called when a segment is entered. -func (segment *Segment) EnterSegment(labels map[string]string) (*SegmentInstance, error) { - segment.counter.Inc(labels) - concurrentCounter := atomic.AddUint32(&segment.concurrentCounter, uint32(1)) - segment.concurrentInstances.Set(float64(concurrentCounter), labels) - return &SegmentInstance{ - segment: segment, - start: time.Now(), - labels: labels, - }, nil -} - -// Register registers the counter with the default/specific registry -func (segment *Segment) Register(reg *Registry) { - segment.duration.Register(reg) - segment.totalDuration.Register(reg) - segment.counter.Register(reg) - segment.concurrentInstances.Register(reg) -} - -// Deregister deregisters the counter with the default/specific registry -func (segment *Segment) Deregister(reg *Registry) { - segment.duration.Deregister(reg) - segment.totalDuration.Deregister(reg) - segment.counter.Deregister(reg) - segment.concurrentInstances.Deregister(reg) -} - -// LeaveSegment is expected to be called via a "defer" statement. -func (segInstance *SegmentInstance) LeaveSegment() error { - if segInstance == nil { - return nil - } - concurrentCounter := atomic.AddUint32(&segInstance.segment.concurrentCounter, ^uint32(0)) - seconds := time.Since(segInstance.start).Seconds() - segInstance.segment.duration.Set(seconds, segInstance.labels) - segInstance.segment.totalDuration.Add(seconds, segInstance.labels) - segInstance.segment.concurrentInstances.Set(float64(concurrentCounter), segInstance.labels) - return nil -} diff --git a/util/metrics/segment_test.go b/util/metrics/segment_test.go deleted file mode 100644 index c5cae321d9..0000000000 --- a/util/metrics/segment_test.go +++ /dev/null @@ -1,119 +0,0 @@ -// Copyright (C) 2019-2022 Algorand, Inc. -// This file is part of go-algorand -// -// go-algorand is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as -// published by the Free Software Foundation, either version 3 of the -// License, or (at your option) any later version. -// -// go-algorand is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with go-algorand. If not, see . - -package metrics - -import ( - "context" - "fmt" - "strconv" - "strings" - "testing" - "time" - - "github.com/algorand/go-algorand/test/partitiontest" - "github.com/stretchr/testify/require" -) - -type SegmentTest struct { - MetricTest -} - -func TestMetricSegment(t *testing.T) { - partitiontest.PartitionTest(t) - - const initialSleepDuration = 10 * time.Millisecond - const maxSleepDuration = 4 * time.Second - done := false - for sleepDuration := initialSleepDuration; sleepDuration <= maxSleepDuration; sleepDuration *= 2 { - done = testMetricSegmentHelper(t, sleepDuration) - if done { - break - } - } - if !done { - require.Fail(t, "test failed") - } -} - -func testMetricSegmentHelper(t *testing.T, functionTime time.Duration) bool { - - test := &SegmentTest{ - MetricTest: NewMetricTest(), - } - // create a http listener. - port := test.createListener(":0") - - metricService := MakeMetricService(&ServiceConfig{ - NodeExporterListenAddress: fmt.Sprintf("localhost:%d", port), - Labels: map[string]string{ - "host_name": "host_one", - "session_id": "AFX-229"}, - }) - metricService.Start(context.Background()) - - acceptedFunctionThreshold := 1.1 // 10 percent. - segment := MakeSegment(&MetricName{Name: "test_segment_name1", Description: "this is the metric test for segment object"}) - segmentTest := func() { - inst, _ := segment.EnterSegment(map[string]string{"pid": "123"}) - defer inst.LeaveSegment() - time.Sleep(functionTime) - } - segmentTest() - segmentTest() - // wait two reporting cycles to ensure we received all the messages. - time.Sleep(test.sampleRate * 2) - - metricService.Shutdown() - - segment.Deregister(nil) - - test.Lock() - defer test.Unlock() - - // test the metrics values. see if we received all the 4 metrics back correctly. - // we expect the get 4 metrics : test_segment_name1_sec, test_segment_name1_sec_total, test_segment_name1_total and test_segment_name1_concurrent - // ( we don't know in which order they would appear, but the total count should be 4 ) - require.Equal(t, 4, len(test.metrics), "Missing metric counts were reported.") - - for k, v := range test.metrics { - if strings.Contains(k, "test_segment_name1_sec{") { - // should be around 400 milliseconds. - if elapsedTime, err := strconv.ParseFloat(v, 64); err != nil { - t.Fatalf("The metric '%s' has unexpected value of '%s'", k, v) - } else { - if elapsedTime < functionTime.Seconds() || elapsedTime > functionTime.Seconds()*acceptedFunctionThreshold { - return false - } - } - } - if strings.Contains(k, "test_segment_name1_sec_total{") { - // should be around 800 milliseconds. - if elapsedTime, err := strconv.ParseFloat(v, 64); err != nil { - t.Fatalf("The metric '%s' has unexpected value of '%s'", k, v) - } else { - if elapsedTime < 2*functionTime.Seconds() || elapsedTime > 2*functionTime.Seconds()*acceptedFunctionThreshold { - return false - } - } - } - if strings.Contains(k, "test_segment_name1_total{") { - // should be 2, since we had 2 calls. - require.Equal(t, "2", v, "The metric '%s' has unexpected value of '%s'", k, v) - } - } - return true -} diff --git a/util/metrics/tagcounter.go b/util/metrics/tagcounter.go index 53cce7ba69..d110b8d4f0 100644 --- a/util/metrics/tagcounter.go +++ b/util/metrics/tagcounter.go @@ -26,8 +26,12 @@ import ( // NewTagCounter makes a set of metrics under rootName for tagged counting. // "{TAG}" in rootName is replaced by the tag, otherwise "_{TAG}" is appended. -func NewTagCounter(rootName, desc string) *TagCounter { +// Optionally provided declaredTags counters for these names up front (making them easier to discover). +func NewTagCounter(rootName, desc string, declaredTags ...string) *TagCounter { tc := &TagCounter{Name: rootName, Description: desc} + for _, tag := range declaredTags { + tc.Add(tag, 0) + } DefaultRegistry().Register(tc) return tc } @@ -98,32 +102,34 @@ func (tc *TagCounter) WriteMetric(buf *strings.Builder, parentLabels string) { // no values, nothing to say. return } - // TODO: what to do with "parentLabels"? obsolete part of interface? - buf.WriteString("# ") - buf.WriteString(tc.Name) - buf.WriteString(" ") - buf.WriteString(tc.Description) - buf.WriteString("\n") isTemplate := strings.Contains(tc.Name, "{TAG}") tags := tagptr.(map[string]*uint64) for tag, tagcount := range tags { if tagcount == nil { continue } + var name string if isTemplate { - name := strings.ReplaceAll(tc.Name, "{TAG}", tag) - buf.WriteString(name) - buf.WriteRune(' ') - buf.WriteString(strconv.FormatUint(*tagcount, 10)) - buf.WriteRune('\n') + name = strings.ReplaceAll(tc.Name, "{TAG}", tag) } else { - buf.WriteString(tc.Name) - buf.WriteRune('_') - buf.WriteString(tag) - buf.WriteRune(' ') - buf.WriteString(strconv.FormatUint(*tagcount, 10)) - buf.WriteRune('\n') + name = tc.Name + "_" + tag + } + buf.WriteString("# HELP ") + buf.WriteString(name) + buf.WriteRune(' ') + buf.WriteString(strings.ReplaceAll(tc.Description, "{TAG}", tag)) + buf.WriteString("\n# TYPE ") + buf.WriteString(name) + buf.WriteString(" counter\n") + buf.WriteString(name) + if len(parentLabels) > 0 { + buf.WriteRune('{') + buf.WriteString(parentLabels) + buf.WriteRune('}') } + buf.WriteRune(' ') + buf.WriteString(strconv.FormatUint(*tagcount, 10)) + buf.WriteRune('\n') } } diff --git a/util/metrics/tagcounter_test.go b/util/metrics/tagcounter_test.go index b76202c537..feb464a35c 100644 --- a/util/metrics/tagcounter_test.go +++ b/util/metrics/tagcounter_test.go @@ -40,6 +40,7 @@ func TestTagCounter(t *testing.T) { } tc := NewTagCounter("tc", "wat") + DefaultRegistry().Deregister(tc) // check that empty TagCounter cleanly returns no results var sb strings.Builder @@ -80,6 +81,39 @@ func TestTagCounter(t *testing.T) { } } +func TestTagCounterWriteMetric(t *testing.T) { + partitiontest.PartitionTest(t) + + tc := NewTagCounter("count_msgs_{TAG}", "number of {TAG} messages") + DefaultRegistry().Deregister(tc) + + tc.Add("TX", 100) + tc.Add("TX", 1) + tc.Add("RX", 0) + + var sbOut strings.Builder + tc.WriteMetric(&sbOut, `host="myhost"`) + txExpected := `# HELP count_msgs_TX number of TX messages +# TYPE count_msgs_TX counter +count_msgs_TX{host="myhost"} 101 +` + rxExpected := `# HELP count_msgs_RX number of RX messages +# TYPE count_msgs_RX counter +count_msgs_RX{host="myhost"} 0 +` + expfmt := sbOut.String() + require.True(t, expfmt == txExpected+rxExpected || expfmt == rxExpected+txExpected, "bad fmt: %s", expfmt) + + tc2 := NewTagCounter("declared", "number of {TAG}s", "A", "B") + DefaultRegistry().Deregister(tc2) + aExpected := "# HELP declared_A number of As\n# TYPE declared_A counter\ndeclared_A{host=\"h\"} 0\n" + bExpected := "# HELP declared_B number of Bs\n# TYPE declared_B counter\ndeclared_B{host=\"h\"} 0\n" + sbOut = strings.Builder{} + tc2.WriteMetric(&sbOut, `host="h"`) + expfmt = sbOut.String() + require.True(t, expfmt == aExpected+bExpected || expfmt == bExpected+aExpected, "bad fmt: %s", expfmt) +} + func BenchmarkTagCounter(b *testing.B) { b.Logf("b.N = %d", b.N) t := b