Skip to content

Commit 80f2062

Browse files
authored
feat(metrics): add modern Micrometer metrics PFP-1613 (#14661)
- Add micrometer metrics in RequestContext - Add a smoke test that verifies metrics via the prometheus endpoint
1 parent 8b194cd commit 80f2062

File tree

6 files changed

+229
-9
lines changed

6 files changed

+229
-9
lines changed

metadata-operation-context/src/main/java/io/datahubproject/metadata/context/RequestContext.java

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -268,13 +268,19 @@ private static void captureAPIMetrics(MetricUtils metricUtils, RequestContext re
268268
}
269269

270270
if (requestContext.getRequestAPI() != RequestAPI.TEST && metricUtils != null) {
271+
String agentClass = requestContext.getAgentClass().toLowerCase().replaceAll("\\s+", "");
272+
String requestAPI = requestContext.getRequestAPI().toString().toLowerCase();
271273
metricUtils.increment(
272-
String.format(
273-
"requestContext_%s_%s_%s",
274-
userCategory,
275-
requestContext.getAgentClass().toLowerCase().replaceAll("\\s+", ""),
276-
requestContext.getRequestAPI().toString().toLowerCase()),
277-
1);
274+
String.format("requestContext_%s_%s_%s", userCategory, agentClass, requestAPI), 1);
275+
metricUtils.incrementMicrometer(
276+
MetricUtils.DATAHUB_REQUEST_COUNT,
277+
1,
278+
"user.category",
279+
userCategory,
280+
"agent.class",
281+
agentClass,
282+
"request.api",
283+
requestAPI);
278284
}
279285
}
280286

metadata-operation-context/src/test/java/io/datahubproject/metadata/context/RequestContextTest.java

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -272,9 +272,20 @@ public void testCaptureAPIMetricsForSystemUser() {
272272
.metricUtils(mockMetricUtils)
273273
.build();
274274

275-
// Verify that the counter was incremented
275+
// Verify that both legacy and new metrics are recorded
276276
verify(mockMetricUtils, atLeastOnce())
277277
.increment(eq("requestContext_system_unknown_restli"), eq(1.0d));
278+
279+
verify(mockMetricUtils, atLeastOnce())
280+
.incrementMicrometer(
281+
eq("datahub.request.count"),
282+
eq(1.0d),
283+
eq("user.category"),
284+
eq("system"),
285+
eq("agent.class"),
286+
eq("unknown"),
287+
eq("request.api"),
288+
eq("restli"));
278289
}
279290

280291
@Test
@@ -286,9 +297,20 @@ public void testCaptureAPIMetricsForDatahubUser() {
286297
.metricUtils(mockMetricUtils)
287298
.build();
288299

289-
// Verify that the counter was incremented
300+
// Verify that both legacy and new metrics are recorded
290301
verify(mockMetricUtils, atLeastOnce())
291302
.increment(eq("requestContext_admin_unknown_restli"), eq(1.0d));
303+
304+
verify(mockMetricUtils, atLeastOnce())
305+
.incrementMicrometer(
306+
eq("datahub.request.count"),
307+
eq(1.0d),
308+
eq("user.category"),
309+
eq("admin"),
310+
eq("agent.class"),
311+
eq("unknown"),
312+
eq("request.api"),
313+
eq("restli"));
292314
}
293315

294316
@Test
@@ -300,9 +322,20 @@ public void testCaptureAPIMetricsForRegularUser() {
300322
.metricUtils(mockMetricUtils)
301323
.build();
302324

303-
// Verify that the counter was incremented
325+
// Verify that both legacy and new metrics are recorded
304326
verify(mockMetricUtils, atLeastOnce())
305327
.increment(eq("requestContext_regular_unknown_restli"), eq(1.0d));
328+
329+
verify(mockMetricUtils, atLeastOnce())
330+
.incrementMicrometer(
331+
eq("datahub.request.count"),
332+
eq(1.0d),
333+
eq("user.category"),
334+
eq("regular"),
335+
eq("agent.class"),
336+
eq("unknown"),
337+
eq("request.api"),
338+
eq("restli"));
306339
}
307340

308341
@Test

metadata-utils/src/main/java/com/linkedin/metadata/utils/metrics/MetricUtils.java

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ public class MetricUtils {
2424
/* Micrometer */
2525
public static final String KAFKA_MESSAGE_QUEUE_TIME = "kafka.message.queue.time";
2626
public static final String DATAHUB_REQUEST_HOOK_QUEUE_TIME = "datahub.request.hook.queue.time";
27+
public static final String DATAHUB_REQUEST_COUNT = "datahub.request.count";
2728

2829
/* OpenTelemetry */
2930
public static final String CACHE_HIT_ATTR = "cache.hit";
@@ -47,6 +48,7 @@ public class MetricUtils {
4748
private static final Map<String, DistributionSummary> legacyHistogramCache =
4849
new ConcurrentHashMap<>();
4950
private static final Map<String, Gauge> legacyGaugeCache = new ConcurrentHashMap<>();
51+
private static final Map<String, Counter> micrometerCounterCache = new ConcurrentHashMap<>();
5052
// For state-based gauges (like throttled state)
5153
private static final Map<String, AtomicDouble> gaugeStates = new ConcurrentHashMap<>();
5254

@@ -102,6 +104,57 @@ public void increment(String metricName, double increment) {
102104
});
103105
}
104106

107+
/**
108+
* Increment a counter using Micrometer metrics library.
109+
*
110+
* @param metricName The name of the metric
111+
* @param increment The value to increment by
112+
* @param tags The tags to associate with the metric (can be empty)
113+
*/
114+
public void incrementMicrometer(String metricName, double increment, String... tags) {
115+
getRegistry()
116+
.ifPresent(
117+
meterRegistry -> {
118+
// Create a cache key that includes both metric name and tags
119+
String cacheKey = createCacheKey(metricName, tags);
120+
Counter counter =
121+
micrometerCounterCache.computeIfAbsent(
122+
cacheKey, key -> meterRegistry.counter(metricName, tags));
123+
counter.increment(increment);
124+
});
125+
}
126+
127+
/**
128+
* Creates a cache key for a metric with its tags.
129+
*
130+
* <p>Examples:
131+
*
132+
* <ul>
133+
* <li>No tags: {@code createCacheKey("datahub.request.count")} returns {@code
134+
* "datahub.request.count"}
135+
* <li>With tags: {@code createCacheKey("datahub.request.count", "user_category", "regular",
136+
* "agent_class", "browser")} returns {@code
137+
* "datahub.request.count|user_category=regular|agent_class=browser"}
138+
* </ul>
139+
*
140+
* @param metricName the name of the metric
141+
* @param tags the tags to associate with the metric (key-value pairs)
142+
* @return a string key that uniquely identifies this metric+tags combination
143+
*/
144+
private String createCacheKey(String metricName, String... tags) {
145+
if (tags.length == 0) {
146+
return metricName;
147+
}
148+
149+
StringBuilder keyBuilder = new StringBuilder(metricName);
150+
for (int i = 0; i < tags.length; i += 2) {
151+
if (i + 1 < tags.length) {
152+
keyBuilder.append("|").append(tags[i]).append("=").append(tags[i + 1]);
153+
}
154+
}
155+
return keyBuilder.toString();
156+
}
157+
105158
/**
106159
* Set a state-based gauge value (e.g., for binary states like throttled/not throttled). This is
107160
* more efficient than repeatedly calling gauge() with different suppliers.

metadata-utils/src/test/java/com/linkedin/metadata/utils/metrics/MetricUtilsTest.java

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,4 +400,72 @@ public void testParseSLOSecondsWithNegativeValues() {
400400
assertEquals(result[1], 500.0);
401401
assertEquals(result[2], 1000.0);
402402
}
403+
404+
@Test
405+
public void testIncrementMicrometerBasicFunctionality() {
406+
String metricName = "test.micrometer.counter";
407+
double incrementValue = 2.5;
408+
409+
metricUtils.incrementMicrometer(metricName, incrementValue);
410+
411+
Counter counter = meterRegistry.counter(metricName);
412+
assertNotNull(counter);
413+
assertEquals(counter.count(), incrementValue);
414+
}
415+
416+
@Test
417+
public void testIncrementMicrometerWithTags() {
418+
String metricName = "test.micrometer.tagged";
419+
double incrementValue = 1.0;
420+
421+
metricUtils.incrementMicrometer(metricName, incrementValue, "env", "prod", "service", "api");
422+
423+
Counter counter = meterRegistry.counter(metricName, "env", "prod", "service", "api");
424+
assertNotNull(counter);
425+
assertEquals(counter.count(), incrementValue);
426+
}
427+
428+
@Test
429+
public void testIncrementMicrometerCachingBehavior() {
430+
String metricName = "test.cache.counter";
431+
432+
// First call should create the counter
433+
metricUtils.incrementMicrometer(metricName, 1.0);
434+
Counter counter1 = meterRegistry.counter(metricName);
435+
assertEquals(counter1.count(), 1.0);
436+
437+
// Second call should reuse the same counter
438+
metricUtils.incrementMicrometer(metricName, 2.0);
439+
Counter counter2 = meterRegistry.counter(metricName);
440+
assertSame(counter1, counter2); // Should be the exact same object due to caching
441+
assertEquals(counter2.count(), 3.0); // 1.0 + 2.0
442+
}
443+
444+
@Test
445+
public void testIncrementMicrometerDifferentTagsCacheSeparately() {
446+
String metricName = "test.cache.tags";
447+
448+
// Create counters with different tags
449+
metricUtils.incrementMicrometer(metricName, 1.0, "env", "prod");
450+
metricUtils.incrementMicrometer(metricName, 2.0, "env", "dev");
451+
452+
Counter prodCounter = meterRegistry.counter(metricName, "env", "prod");
453+
Counter devCounter = meterRegistry.counter(metricName, "env", "dev");
454+
455+
assertNotSame(prodCounter, devCounter); // Different cache entries
456+
assertEquals(prodCounter.count(), 1.0);
457+
assertEquals(devCounter.count(), 2.0);
458+
}
459+
460+
@Test
461+
public void testIncrementMicrometerMultipleIncrementsOnSameCounter() {
462+
String metricName = "test.multiple.increments";
463+
464+
metricUtils.incrementMicrometer(metricName, 1.0, "type", "request");
465+
metricUtils.incrementMicrometer(metricName, 3.0, "type", "request");
466+
metricUtils.incrementMicrometer(metricName, 2.0, "type", "request");
467+
468+
Counter counter = meterRegistry.counter(metricName, "type", "request");
469+
assertEquals(counter.count(), 6.0); // 1.0 + 3.0 + 2.0
470+
}
403471
}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# Metrics smoke tests package
2+
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
"""
2+
Smoke test for DataHub-specific metrics.
3+
4+
This test verifies that:
5+
1. DataHub custom metrics are being generated
6+
2. The new incrementMicrometer metrics are working
7+
3. Request context metrics are being recorded
8+
"""
9+
10+
import logging
11+
12+
import requests
13+
14+
from tests.utils import get_gms_url
15+
16+
logger = logging.getLogger(__name__)
17+
18+
19+
def test_datahub_request_count_metric_present():
20+
"""Test that the new datahub_request_count metric is present in Prometheus output."""
21+
gms_url = get_gms_url()
22+
prometheus_url = f"{gms_url}/actuator/prometheus"
23+
24+
# Service initialization should already induce requests that will generate
25+
# metrics. So we don't need to trigger any requests as part of test setup.
26+
response = requests.get(prometheus_url)
27+
content = response.text
28+
29+
# Look specifically for the datahub_request_count metric
30+
metric_lines = []
31+
for line in content.split("\n"):
32+
line = line.strip()
33+
if line and not line.startswith("#"):
34+
if "datahub_request_count" in line:
35+
metric_lines.append(line)
36+
37+
logger.info(f"✅ Found {len(metric_lines)} datahub_request_count metric lines")
38+
for line in metric_lines:
39+
logger.info(f" - {line}")
40+
41+
# The metric should be present
42+
assert len(metric_lines) > 0, (
43+
"datahub_request_count metric not found in Prometheus output"
44+
)
45+
46+
# Verify that the metric has the expected tags
47+
expected_tags = ["user_category", "agent_class", "request_api"]
48+
logger.info(f"🔍 Checking for expected tags: {expected_tags}")
49+
50+
for metric_line in metric_lines:
51+
# Check if the metric line contains the expected tags
52+
has_expected_tags = all(tag in metric_line for tag in expected_tags)
53+
assert has_expected_tags, (
54+
f"Metric line missing expected tags. Line: {metric_line}, Expected tags: {expected_tags}"
55+
)
56+
logger.info(f"✅ Metric line has all expected tags: {metric_line}")
57+
58+
logger.info("🎉 All datahub_request_count metrics have the expected tags!")

0 commit comments

Comments
 (0)