Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

metrics: make metrics easier to use with prometheus #4020

Merged
merged 10 commits into from
May 24, 2022
6 changes: 4 additions & 2 deletions agreement/gossip/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,11 @@ import (
)

var messagesHandledTotal = metrics.MakeCounter(metrics.AgreementMessagesHandled)
var messagesHandledByType = metrics.NewTagCounter("algod_agreement_handled_{TAG}", "Number of agreement messages handled per type")
var messagesHandledByType = metrics.NewTagCounter("algod_agreement_handled_{TAG}", "Number of agreement {TAG} messages handled",
agreementVoteMessageType, agreementProposalMessageType, agreementBundleMessageType)
var messagesDroppedTotal = metrics.MakeCounter(metrics.AgreementMessagesDropped)
var messagesDroppedByType = metrics.NewTagCounter("algod_agreement_dropped_{TAG}", "Number of agreement messages handled per type")
var messagesDroppedByType = metrics.NewTagCounter("algod_agreement_dropped_{TAG}", "Number of agreement {TAG} messages dropped",
agreementVoteMessageType, agreementProposalMessageType, agreementBundleMessageType)

const (
agreementVoteMessageType = "vote"
Expand Down
4 changes: 2 additions & 2 deletions agreement/pseudonode.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ var errPseudonodeVerifierClosedChannel = errors.New("crypto verifier closed the
var errPseudonodeNoVotes = errors.New("no valid participation keys to generate votes for given round")
var errPseudonodeNoProposals = errors.New("no valid participation keys to generate proposals for given round")

var pseudonodeBacklogFullByType = metrics.NewTagCounter("algod_agreement_pseudonode_tasks_dropped_{TAG}", "Number of pseudonode tasks dropped per type")
var pseudonodeResultTimeoutsByType = metrics.NewTagCounter("algod_agreement_pseudonode_tasks_timeouts_{TAG}", "Number of pseudonode task result timeouts per type")
var pseudonodeBacklogFullByType = metrics.NewTagCounter("algod_agreement_pseudonode_tasks_dropped_{TAG}", "Number of pseudonode {TAG} tasks dropped", "proposal", "vote")
var pseudonodeResultTimeoutsByType = metrics.NewTagCounter("algod_agreement_pseudonode_tasks_timeouts_{TAG}", "Number of pseudonode {TAG} task result timeouts", "vote", "pvote", "ppayload")

// A pseudonode creates proposals and votes with a KeyManager which holds participation keys.
//
Expand Down
8 changes: 4 additions & 4 deletions network/wsPeer.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,14 @@ const averageMessageLength = 2 * 1024 // Most of the messages are smaller tha
const msgsInReadBufferPerPeer = 10

var networkSentBytesTotal = metrics.MakeCounter(metrics.NetworkSentBytesTotal)
var networkSentBytesByTag = metrics.NewTagCounter("algod_network_sent_bytes_{TAG}", "Number of bytes that were sent over the network per message tag")
var networkSentBytesByTag = metrics.NewTagCounter("algod_network_sent_bytes_{TAG}", "Number of bytes that were sent over the network for {TAG} messages")
var networkReceivedBytesTotal = metrics.MakeCounter(metrics.NetworkReceivedBytesTotal)
var networkReceivedBytesByTag = metrics.NewTagCounter("algod_network_received_bytes_{TAG}", "Number of bytes that were received from the network per message tag")
var networkReceivedBytesByTag = metrics.NewTagCounter("algod_network_received_bytes_{TAG}", "Number of bytes that were received from the network for {TAG} messages")

var networkMessageReceivedTotal = metrics.MakeCounter(metrics.NetworkMessageReceivedTotal)
var networkMessageReceivedByTag = metrics.NewTagCounter("algod_network_message_received_{TAG}", "Number of complete messages that were received from the network per message tag")
var networkMessageReceivedByTag = metrics.NewTagCounter("algod_network_message_received_{TAG}", "Number of complete messages that were received from the network for {TAG} messages")
var networkMessageSentTotal = metrics.MakeCounter(metrics.NetworkMessageSentTotal)
var networkMessageSentByTag = metrics.NewTagCounter("algod_network_message_sent_{TAG}", "Number of complete messages that were sent to the network per message tag")
var networkMessageSentByTag = metrics.NewTagCounter("algod_network_message_sent_{TAG}", "Number of complete messages that were sent to the network for {TAG} messages")

var networkConnectionsDroppedTotal = metrics.MakeCounter(metrics.NetworkConnectionsDroppedTotal)
var networkMessageQueueMicrosTotal = metrics.MakeCounter(metrics.MetricName{Name: "algod_network_message_sent_queue_micros_total", Description: "Total microseconds message spent waiting in queue to be sent"})
Expand Down
14 changes: 11 additions & 3 deletions util/metrics/counter.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,16 +155,24 @@ func (counter *Counter) WriteMetric(buf *strings.Builder, parentLabels string) {
counter.Lock()
defer counter.Unlock()

if len(counter.values) < 1 {
return
}
buf.WriteString("# HELP ")
buf.WriteString(counter.name)
buf.WriteString(" ")
buf.WriteString(counter.description)
buf.WriteString("\n# TYPE ")
buf.WriteString(counter.name)
buf.WriteString(" counter\n")
// if counter is zero, report 0 using parentLabels and no tags
if len(counter.values) == 0 {
buf.WriteString(counter.name)
if len(parentLabels) > 0 {
buf.WriteString("{" + parentLabels + "}")
}
buf.WriteString(" 0")
buf.WriteString("\n")
cce marked this conversation as resolved.
Show resolved Hide resolved
return
}
// otherwise iterate through values and write one line per label
for _, l := range counter.values {
buf.WriteString(counter.name)
buf.WriteString("{")
Expand Down
33 changes: 30 additions & 3 deletions util/metrics/counter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package metrics
import (
"context"
"fmt"
"strings"
"testing"
"time"

Expand Down Expand Up @@ -67,7 +68,7 @@ func TestMetricCounter(t *testing.T) {
defer test.Unlock()
// the the loop above we've created a single metric name with five different labels set ( host0, host1 .. host 4)
// let's see if we received all the 5 different labels.
require.Equal(t, 5, len(test.metrics), "Missing metric counts were reported.")
require.Equal(t, 5, len(test.metrics), "Missing metric counts were reported: %+v", test.metrics)

for k, v := range test.metrics {
// we have increased each one of the labels exactly 4 times. See that the counter was counting correctly.
Expand Down Expand Up @@ -114,7 +115,7 @@ func TestMetricCounterFastInts(t *testing.T) {
defer test.Unlock()
// the the loop above we've created a single metric name with five different labels set ( host0, host1 .. host 4)
// let's see if we received all the 5 different labels.
require.Equal(t, 1, len(test.metrics), "Missing metric counts were reported.")
require.Equal(t, 1, len(test.metrics), "Missing metric counts were reported: %+v", test.metrics)

for k, v := range test.metrics {
// we have increased each one of the labels exactly 4 times. See that the counter was counting correctly.
Expand Down Expand Up @@ -163,11 +164,37 @@ func TestMetricCounterMixed(t *testing.T) {
defer test.Unlock()
// the the loop above we've created a single metric name with five different labels set ( host0, host1 .. host 4)
// let's see if we received all the 5 different labels.
require.Equal(t, 1, len(test.metrics), "Missing metric counts were reported.")
require.Equal(t, 1, len(test.metrics), "Missing metric counts were reported: %+v", test.metrics)

for k, v := range test.metrics {
// we have increased each one of the labels exactly 4 times. See that the counter was counting correctly.
// ( counters starts at zero )
require.Equal(t, "35.5", v, fmt.Sprintf("The metric '%s' reached value '%s'", k, v))
}
}

func TestCounterWriteMetric(t *testing.T) {
partitiontest.PartitionTest(t)

c := MakeCounter(MetricName{Name: "testname", Description: "testhelp"})
c.Deregister(nil)

// ensure 0 counters are still logged
sbOut := strings.Builder{}
c.WriteMetric(&sbOut, `host="myhost"`)
expected := `# HELP testname testhelp
# TYPE testname counter
testname{host="myhost"} 0
`
require.Equal(t, expected, sbOut.String())

c.Add(2.3, nil)
// ensure non-zero counters are logged
sbOut = strings.Builder{}
c.WriteMetric(&sbOut, `host="myhost"`)
expected = `# HELP testname testhelp
# TYPE testname counter
testname{host="myhost"} 2.3
`
require.Equal(t, expected, sbOut.String())
}
2 changes: 1 addition & 1 deletion util/metrics/gauge_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func TestMetricGauge(t *testing.T) {

// the the loop above we've created a single metric name with five different labels set ( host0, host1 .. host 4)
// let's see if we received all the 5 different labels.
require.Equal(t, 5, len(test.metrics), "Missing metric counts were reported.")
require.Equal(t, 5, len(test.metrics), "Missing metric counts were reported: %+v", test.metrics)

// iterate through the metrics and check the each of the metrics reached it's correct count.
for k, v := range test.metrics {
Expand Down
2 changes: 1 addition & 1 deletion util/metrics/registry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ func TestWriteAdd(t *testing.T) {
results := make(map[string]float64)
DefaultRegistry().AddMetrics(results)

require.Equal(t, 2, len(results))
require.Equal(t, 2, len(results), "results", results)
require.Contains(t, results, "gauge-name")
require.InDelta(t, 12.34, results["gauge-name"], 0.01)
require.Contains(t, results, "label-counter_label__a_label_value_")
Expand Down
2 changes: 1 addition & 1 deletion util/metrics/segment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ func testMetricSegmentHelper(t *testing.T, functionTime time.Duration) bool {
// test the metrics values. see if we received all the 4 metrics back correctly.
// we expect the get 4 metrics : test_segment_name1_sec, test_segment_name1_sec_total, test_segment_name1_total and test_segment_name1_concurrent
// ( we don't know in which order they would appear, but the total count should be 4 )
require.Equal(t, 4, len(test.metrics), "Missing metric counts were reported.")
require.Equal(t, 4, len(test.metrics), "Missing metric counts were reported: %+v", test.metrics)

for k, v := range test.metrics {
if strings.Contains(k, "test_segment_name1_sec{") {
Expand Down
42 changes: 24 additions & 18 deletions util/metrics/tagcounter.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,12 @@ import (

// NewTagCounter makes a set of metrics under rootName for tagged counting.
// "{TAG}" in rootName is replaced by the tag, otherwise "_{TAG}" is appended.
func NewTagCounter(rootName, desc string) *TagCounter {
// Optionally provided declaredTags counters for these names up front (making them easier to discover).
func NewTagCounter(rootName, desc string, declaredTags ...string) *TagCounter {
tc := &TagCounter{Name: rootName, Description: desc}
for _, tag := range declaredTags {
tc.Add(tag, 0)
}
DefaultRegistry().Register(tc)
return tc
}
Expand Down Expand Up @@ -98,32 +102,34 @@ func (tc *TagCounter) WriteMetric(buf *strings.Builder, parentLabels string) {
// no values, nothing to say.
return
}
// TODO: what to do with "parentLabels"? obsolete part of interface?
buf.WriteString("# ")
buf.WriteString(tc.Name)
buf.WriteString(" ")
buf.WriteString(tc.Description)
buf.WriteString("\n")
isTemplate := strings.Contains(tc.Name, "{TAG}")
tags := tagptr.(map[string]*uint64)
for tag, tagcount := range tags {
if tagcount == nil {
continue
}
var name string
if isTemplate {
name := strings.ReplaceAll(tc.Name, "{TAG}", tag)
buf.WriteString(name)
buf.WriteRune(' ')
buf.WriteString(strconv.FormatUint(*tagcount, 10))
buf.WriteRune('\n')
name = strings.ReplaceAll(tc.Name, "{TAG}", tag)
} else {
buf.WriteString(tc.Name)
buf.WriteRune('_')
buf.WriteString(tag)
buf.WriteRune(' ')
buf.WriteString(strconv.FormatUint(*tagcount, 10))
buf.WriteRune('\n')
name = tc.Name + "_" + tag
}
buf.WriteString("# HELP ")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the value of adding HELP here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the HELP line provides a description of the metric for some UIs & backends use ... for example Grafana 6.6+ shows it alongside the metric name when you are browsing metrics https://grafana.com/blog/2020/06/15/how-we-made-working-with-prometheus-easier-with-metric-metadata-in-grafanas-explore-view/

buf.WriteString(name)
buf.WriteRune(' ')
buf.WriteString(strings.ReplaceAll(tc.Description, "{TAG}", tag))
buf.WriteString("\n# TYPE ")
buf.WriteString(name)
buf.WriteString(" counter\n")
buf.WriteString(name)
if len(parentLabels) > 0 {
buf.WriteRune('{')
buf.WriteString(parentLabels)
buf.WriteRune('}')
}
buf.WriteRune(' ')
buf.WriteString(strconv.FormatUint(*tagcount, 10))
buf.WriteRune('\n')
}
}

Expand Down
34 changes: 34 additions & 0 deletions util/metrics/tagcounter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ func TestTagCounter(t *testing.T) {
}

tc := NewTagCounter("tc", "wat")
DefaultRegistry().Deregister(tc)

// check that empty TagCounter cleanly returns no results
var sb strings.Builder
Expand Down Expand Up @@ -80,6 +81,39 @@ func TestTagCounter(t *testing.T) {
}
}

func TestTagCounterWriteMetric(t *testing.T) {
partitiontest.PartitionTest(t)

tc := NewTagCounter("count_msgs_{TAG}", "number of {TAG} messages")
DefaultRegistry().Deregister(tc)

tc.Add("TX", 100)
tc.Add("TX", 1)
tc.Add("RX", 0)

var sbOut strings.Builder
tc.WriteMetric(&sbOut, `host="myhost"`)
txExpected := `# HELP count_msgs_TX number of TX messages
# TYPE count_msgs_TX counter
count_msgs_TX{host="myhost"} 101
`
rxExpected := `# HELP count_msgs_RX number of RX messages
# TYPE count_msgs_RX counter
count_msgs_RX{host="myhost"} 0
`
expfmt := sbOut.String()
require.True(t, expfmt == txExpected+rxExpected || expfmt == rxExpected+txExpected, "bad fmt: %s", expfmt)

tc2 := NewTagCounter("declared", "number of {TAG}s", "A", "B")
DefaultRegistry().Deregister(tc2)
aExpected := "# HELP declared_A number of As\n# TYPE declared_A counter\ndeclared_A{host=\"h\"} 0\n"
bExpected := "# HELP declared_B number of Bs\n# TYPE declared_B counter\ndeclared_B{host=\"h\"} 0\n"
sbOut = strings.Builder{}
tc2.WriteMetric(&sbOut, `host="h"`)
expfmt = sbOut.String()
require.True(t, expfmt == aExpected+bExpected || expfmt == bExpected+aExpected, "bad fmt: %s", expfmt)
}

func BenchmarkTagCounter(b *testing.B) {
b.Logf("b.N = %d", b.N)
t := b
Expand Down