Rename RPC metrics and use more tags (#121)

Remove "_ms" suffix from request latency metric name In practice, the precision of the recorded timing values depends on the metrics backend and the client library. Since we're using time.Duration based API, we cannot claim the unit of measure in the metric name. Rename all metrics to "request" and "http_request" and use tags for other dimensions Success/failure are distinguished by error=true|false tag. HTTP Requests are distinguished by status_code=2xx|3xx|... tag
jaegertracing · Mar 21, 2017 · 1669a1b · 1669a1b
1 parent bcc8018
commit 1669a1b
Show file tree

Hide file tree

Showing 4 changed files with 37 additions and 44 deletions.
diff --git a/rpcmetrics/metrics.go b/rpcmetrics/metrics.go
@@ -34,29 +34,26 @@ const (
 // Metrics is a collection of metrics for an endpoint describing
 // throughput, success, errors, and performance.
 type Metrics struct {
-	// Requests is a counter of the total number of successes + failures.
-	Requests metrics.Counter `metric:"requests"`
-
 	// Success is a counter of the total number of successes.
-	Success metrics.Counter `metric:"success"`
+	Success metrics.Counter `metric:"requests" tags:"error=false"`
 
 	// Failures is a counter of the number of times any failure has been observed.
-	Failures metrics.Counter `metric:"failures"`
+	Failures metrics.Counter `metric:"requests" tags:"error=true"`
 
-	// RequestLatencyMs is a histogram of the latency of requests in milliseconds.
-	RequestLatencyMs metrics.Timer `metric:"request_latency_ms"`
+	// RequestLatency is a histogram of the latency of requests.
+	RequestLatency metrics.Timer `metric:"request_latency"`
 
 	// HTTPStatusCode2xx is a counter of the total number of requests with HTTP status code 200-299
-	HTTPStatusCode2xx metrics.Counter `metric:"http_status_code_2xx"`
+	HTTPStatusCode2xx metrics.Counter `metric:"http_requests" tags:"status_code=2xx"`
 
 	// HTTPStatusCode3xx is a counter of the total number of requests with HTTP status code 300-399
-	HTTPStatusCode3xx metrics.Counter `metric:"http_status_code_3xx"`
+	HTTPStatusCode3xx metrics.Counter `metric:"http_requests" tags:"status_code=3xx"`
 
 	// HTTPStatusCode4xx is a counter of the total number of requests with HTTP status code 400-499
-	HTTPStatusCode4xx metrics.Counter `metric:"http_status_code_4xx"`
+	HTTPStatusCode4xx metrics.Counter `metric:"http_requests" tags:"status_code=4xx"`
 
 	// HTTPStatusCode5xx is a counter of the total number of requests with HTTP status code 500-599
-	HTTPStatusCode5xx metrics.Counter `metric:"http_status_code_5xx"`
+	HTTPStatusCode5xx metrics.Counter `metric:"http_requests" tags:"status_code=5xx"`
 }
 
 func (m *Metrics) recordHTTPStatusCode(statusCode uint16) {

diff --git a/rpcmetrics/metrics_test.go b/rpcmetrics/metrics_test.go
@@ -28,10 +28,17 @@ import (
 	"github.com/uber/jaeger-lib/metrics/testutils"
 )
 
-func endpointTags(endpoint string) map[string]string {
-	return map[string]string{
-		"endpoint": endpoint,
+// E.g. tags("key", "value", "key", "value")
+func tags(kv ...string) map[string]string {
+	m := make(map[string]string)
+	for i := 0; i < len(kv)-1; i += 2 {
+		m[kv[i]] = kv[i+1]
 	}
+	return m
+}
+
+func endpointTags(endpoint string, kv ...string) map[string]string {
+	return tags(append([]string{"endpoint", endpoint}, kv...)...)
 }
 
 func TestMetricsByEndpoint(t *testing.T) {
@@ -49,12 +56,12 @@ func TestMetricsByEndpoint(t *testing.T) {
 	m5 := mbe.get("overflow2")
 
 	for _, m := range []*Metrics{m1, m2, m2a, m3, m4, m5} {
-		m.Requests.Inc(1)
+		m.Success.Inc(1)
 	}
 
 	testutils.AssertCounterMetrics(t, met,
-		testutils.ExpectedMetric{Name: "requests", Tags: endpointTags("abc1"), Value: 3},
-		testutils.ExpectedMetric{Name: "requests", Tags: endpointTags("abc3"), Value: 1},
-		testutils.ExpectedMetric{Name: "requests", Tags: endpointTags("other"), Value: 2},
+		testutils.ExpectedMetric{Name: "requests", Tags: endpointTags("abc1", "error", "false"), Value: 3},
+		testutils.ExpectedMetric{Name: "requests", Tags: endpointTags("abc3", "error", "false"), Value: 1},
+		testutils.ExpectedMetric{Name: "requests", Tags: endpointTags("other", "error", "false"), Value: 2},
 	)
 }
diff --git a/rpcmetrics/observer.go b/rpcmetrics/observer.go
@@ -151,13 +151,12 @@ func (so *SpanObserver) OnFinish(options opentracing.FinishOptions) {
 	}
 
 	mets := so.metricsByEndpoint.get(so.operationName)
-	mets.Requests.Inc(1)
 	if so.err {
 		mets.Failures.Inc(1)
 	} else {
 		mets.Success.Inc(1)
 	}
-	mets.RequestLatencyMs.Record(options.FinishTime.Sub(so.startTime))
+	mets.RequestLatency.Record(options.FinishTime.Sub(so.startTime))
 	mets.recordHTTPStatusCode(so.httpStatusCode)
 }
 

diff --git a/rpcmetrics/observer_test.go b/rpcmetrics/observer_test.go
@@ -52,13 +52,11 @@ func ExampleObserver() {
 	span.Finish()
 
 	c, _ := metricsFactory.Snapshot()
-	fmt.Printf("requests: %d\n", c["requests|endpoint=test"])
-	fmt.Printf("success: %d\n", c["success|endpoint=test"])
-	fmt.Printf("errors: %d\n", c["errors|endpoint=test"])
+	fmt.Printf("requests (success): %d\n", c["requests|endpoint=test|error=false"])
+	fmt.Printf("requests (failure): %d\n", c["requests|endpoint=test|error=true"])
 	// Output:
-	// requests: 1
-	// success: 1
-	// errors: 0
+	// requests (success): 1
+	// requests (failure): 0
 }
 
 type testTracer struct {
@@ -115,19 +113,15 @@ func TestObserver(t *testing.T) {
 
 		u.AssertCounterMetrics(t,
 			testTracer.metrics,
-			u.ExpectedMetric{Name: "requests", Tags: endpointTags("local-span"), Value: 0},
-			u.ExpectedMetric{Name: "success", Tags: endpointTags("local-span"), Value: 0},
-			u.ExpectedMetric{Name: "requests", Tags: endpointTags("get-user"), Value: 1},
-			u.ExpectedMetric{Name: "success", Tags: endpointTags("get-user"), Value: 1},
-			u.ExpectedMetric{Name: "requests", Tags: endpointTags("get-user-override"), Value: 1},
-			u.ExpectedMetric{Name: "success", Tags: endpointTags("get-user-override"), Value: 1},
-			u.ExpectedMetric{Name: "requests", Tags: endpointTags("get-user-client"), Value: 0},
-			u.ExpectedMetric{Name: "success", Tags: endpointTags("get-user-client"), Value: 0},
+			u.ExpectedMetric{Name: "requests", Tags: endpointTags("local-span", "error", "false"), Value: 0},
+			u.ExpectedMetric{Name: "requests", Tags: endpointTags("get-user", "error", "false"), Value: 1},
+			u.ExpectedMetric{Name: "requests", Tags: endpointTags("get-user-override", "error", "false"), Value: 1},
+			u.ExpectedMetric{Name: "requests", Tags: endpointTags("get-user-client", "error", "false"), Value: 0},
 		)
 		// TODO something wrong with string generation, .P99 should not be appended to the tag
 		// as a result we cannot use u.AssertGaugeMetrics
 		_, g := testTracer.metrics.Snapshot()
-		assert.EqualValues(t, 51, g["request_latency_ms|endpoint=get-user.P99"])
+		assert.EqualValues(t, 51, g["request_latency|endpoint=get-user.P99"])
 	})
 }
 
@@ -140,16 +134,13 @@ func TestTags(t *testing.T) {
 
 	testCases := []tagTestCase{
 		{key: "something", value: 42, metrics: []u.ExpectedMetric{
-			{Name: "success", Value: 1},
-			{Name: "requests", Value: 1},
+			{Name: "requests", Value: 1, Tags: tags("error", "false")},
 		}},
 		{key: "error", value: true, metrics: []u.ExpectedMetric{
-			{Name: "failures", Value: 1},
-			{Name: "requests", Value: 1},
+			{Name: "requests", Value: 1, Tags: tags("error", "true")},
 		}},
 		{key: "error", value: "true", metrics: []u.ExpectedMetric{
-			{Name: "failures", Value: 1},
-			{Name: "requests", Value: 1},
+			{Name: "requests", Value: 1, Tags: tags("error", "true")},
 		}},
 	}
 
@@ -162,8 +153,7 @@ func TestTags(t *testing.T) {
 		for _, v := range values {
 			testCases = append(testCases, tagTestCase{
 				key: "http.status_code", value: v, metrics: []u.ExpectedMetric{
-					{Name: "requests", Value: 1},
-					{Name: fmt.Sprintf("http_status_code_%dxx", i), Value: 1},
+					{Name: "http_requests", Value: 1, Tags: tags("status_code", fmt.Sprintf("%dxx", i))},
 				},
 			})
 		}
@@ -172,7 +162,7 @@ func TestTags(t *testing.T) {
 	for _, tc := range testCases {
 		testCase := tc // capture loop var
 		for i := range testCase.metrics {
-			testCase.metrics[i].Tags = endpointTags("span")
+			testCase.metrics[i].Tags["endpoint"] = "span"
 		}
 		t.Run(fmt.Sprintf("%s-%v", testCase.key, testCase.value), func(t *testing.T) {
 			withTestTracer(func(testTracer *testTracer) {