Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
8cd2278
Add jvm gc monitor and alerter
Pengzna Jul 20, 2023
6fc2282
Fix concurrent GC duration calculation
Pengzna Jul 20, 2023
dc61e20
Merge remote-tracking branch 'origin/master'
Pengzna Jul 20, 2023
6f17647
Reformat codes with IoTDB's standard.
Jul 20, 2023
7b5247e
Reformat gc_pause_{cause} naming style.
Jul 20, 2023
a8772b1
Merge branch 'apache:master' into master
Pengzna Jul 20, 2023
d8af94d
adding license.
Jul 20, 2023
1b4a6f5
add metric remove and some comments
Jul 21, 2023
b841b55
remove end-of-line comment
Jul 21, 2023
9281355
Merge remote-tracking branch 'base/master'
Jul 21, 2023
6042b27
merge base iotdb
Jul 21, 2023
3428dd4
Merge branch 'apache:master' into master
Pengzna Jul 24, 2023
771d35d
add thread name
Jul 25, 2023
db909e1
Merge remote-tracking branch 'base/master'
Jul 25, 2023
4dff86d
spotless apply
Jul 25, 2023
c71517f
rename and add some comments
Jul 25, 2023
ae79618
add some detailed comments
Jul 26, 2023
26d21a2
Merge branch 'apache:master' into master
Pengzna Jul 26, 2023
c477c80
fix global previousTotal var
Jul 26, 2023
bc6f0bf
fix review of ThreadName
Jul 26, 2023
fbb3562
remove duplicate metrics `throughout`
Jul 26, 2023
84a78a3
fix review 1-4
Jul 27, 2023
2fc18db
remove duplicate
Jul 27, 2023
6cfae46
remove duplicate fields and add accumulated gc time within obsWindows
Jul 28, 2023
15eaf43
adjust max GC time threshold
Jul 28, 2023
5109116
Merge branch 'apache:master' into master
Pengzna Jul 28, 2023
276f662
Merge remote-tracking branch 'origin/master'
Jul 28, 2023
17f60e7
fix review
Jul 28, 2023
df8caa5
fix threshold
Jul 28, 2023
d5786f6
Merge branch 'apache:master' into master
Pengzna Jul 28, 2023
9800c54
Merge branch 'apache:master' into master
Pengzna Jul 28, 2023
a625d7b
Merge branch 'apache:master' into master
Pengzna Jul 28, 2023
79da71f
Merge branch 'apache:master' into master
Pengzna Jul 30, 2023
e45a8ed
remove unnecessary metric and fix young pool and old pool allocated b…
Jul 30, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -54,16 +54,16 @@
public class JvmGcMetrics implements IMetricSet, AutoCloseable {
private static final Logger logger = LoggerFactory.getLogger(JvmGcMetrics.class);
private final List<Runnable> notificationListenerCleanUpRunnables = new CopyOnWriteArrayList<>();
private String youngGenPoolName;
private String firstYoungGenPoolName;
private String oldGenPoolName;
private String nonGenerationalMemoryPool;
private final Map<String, AtomicLong> lastGcTotalDurationMap = new ConcurrentHashMap<>();

public JvmGcMetrics() {
for (MemoryPoolMXBean mbean : ManagementFactory.getMemoryPoolMXBeans()) {
String name = mbean.getName();
if (isYoungGenPool(name)) {
youngGenPoolName = name;
if (isFirstYoungGenPool(name)) {
firstYoungGenPoolName = name;
} else if (isOldGenPool(name)) {
oldGenPoolName = name;
} else if (isNonGenerationalHeapPool(name)) {
Expand Down Expand Up @@ -98,12 +98,8 @@ private static boolean isPartiallyConcurrentGC(GarbageCollectorMXBean gc) {
}
}

private static boolean isConcurrentPhase(String cause, String name) {
return "No GC".equals(cause) || "Shenandoah Cycles".equals(name);
}

private static boolean isYoungGenPool(String name) {
return name != null && (name.endsWith("Eden Space") || name.endsWith("Survivor Space"));
private static boolean isFirstYoungGenPool(String name) {
return name != null && name.endsWith("Eden Space");
}

private static boolean isOldGenPool(String name) {
Expand Down Expand Up @@ -142,26 +138,35 @@ public void bindTo(AbstractMetricService metricService) {
liveDataSize,
AtomicLong::get);

AtomicLong heapMemUsedPercentage = new AtomicLong(calculateMemoryUsagePercentage());
metricService.createAutoGauge(
SystemMetric.JVM_GC_MEMORY_USED_PERCENT.toString(),
MetricLevel.CORE,
heapMemUsedPercentage,
AtomicLong::get);

Counter allocatedBytes =
metricService.getOrCreateCounter(
SystemMetric.JVM_GC_MEMORY_ALLOCATED_BYTES.toString(), MetricLevel.CORE);

Counter promotedBytes =
(oldGenPoolName == null)
? null
: metricService.getOrCreateCounter(
SystemMetric.JVM_GC_MEMORY_PROMOTED_BYTES.toString(), MetricLevel.CORE);

// start watching for GC notifications
final AtomicLong heapPoolSizeAfterGc = new AtomicLong();
Counter nonGenAllocatedBytes =
(nonGenerationalMemoryPool == null)
? null
: metricService.getOrCreateCounter(
SystemMetric.JVM_GC_NON_GEN_MEMORY_ALLOCATED_BYTES.toString(), MetricLevel.CORE);

Counter oldGenAllocatedBytes =
(oldGenPoolName == null)
? null
: metricService.getOrCreateCounter(
SystemMetric.JVM_GC_OLD_MEMORY_ALLOCATED_BYTES.toString(), MetricLevel.CORE);

Counter youngGenAllocatedBytes =
(firstYoungGenPoolName == null)
? null
: metricService.getOrCreateCounter(
SystemMetric.JVM_GC_YOUNG_MEMORY_ALLOCATED_BYTES.toString(), MetricLevel.CORE);

final AtomicLong firstYoungHeapPoolSizeAfterGc = new AtomicLong();
// long live heap pool includes old gen heap pool and non-generation heap pool.
final AtomicLong longLivedHeapPoolSizeAfterGc = new AtomicLong();

// start watching for GC notifications
for (GarbageCollectorMXBean mbean : ManagementFactory.getGarbageCollectorMXBeans()) {
if (!(mbean instanceof NotificationEmitter)) {
continue;
Expand Down Expand Up @@ -194,16 +199,15 @@ public void bindTo(AbstractMetricService metricService) {
previousTotal.set(total);
}

String timerName;
if (isConcurrentPhase(gcCause, notificationInfo.getGcName())) {
timerName = "jvm_gc_concurrent_phase_time";
} else {
timerName = "jvm_gc_pause";
}
// create a timer with tags named by gcCause, which binds gcCause with gcDuration
Timer timer =
metricService.getOrCreateTimer(
timerName, MetricLevel.CORE, "action", gcAction, "cause", gcCause);
SystemMetric.JVM_GC_PAUSE.toString(),
MetricLevel.CORE,
"action",
gcAction,
"cause",
gcCause);
timer.update(duration, TimeUnit.MILLISECONDS);

// add support for ZGC
Expand All @@ -219,25 +223,6 @@ public void bindTo(AbstractMetricService metricService) {
pausesCount.inc();
}

// monitoring old/young GC count, which is helpful for users to locate GC exception.
// Unfortunately, the JMX doesn't seem to provide an api for monitoring mixed gc in G1.
// In fact, JMX may treat mixed GCs as minor GCs.
if (GcGenerationAge.fromName(notificationInfo.getGcName()) == GcGenerationAge.OLD) {
Counter oldGcCounter =
metricService.getOrCreateCounter(
SystemMetric.JVM_GC_YOUNG_GC_COUNT.toString(), MetricLevel.CORE);
oldGcCounter.inc();
} else if (GcGenerationAge.fromName(notificationInfo.getGcName())
== GcGenerationAge.YOUNG) {
Counter youngGcCounter =
metricService.getOrCreateCounter(
SystemMetric.JVM_GC_OLD_GC_COUNT.toString(), MetricLevel.CORE);
youngGcCounter.inc();
}

// update memory usage percentage
heapMemUsedPercentage.set(calculateMemoryUsagePercentage());

// Update promotion and allocation counters
final Map<String, MemoryUsage> before = gcInfo.getMemoryUsageBeforeGc();
final Map<String, MemoryUsage> after = gcInfo.getMemoryUsageAfterGc();
Expand All @@ -246,8 +231,8 @@ public void bindTo(AbstractMetricService metricService) {
countPoolSizeDelta(
gcInfo.getMemoryUsageBeforeGc(),
gcInfo.getMemoryUsageAfterGc(),
allocatedBytes,
heapPoolSizeAfterGc,
nonGenAllocatedBytes,
longLivedHeapPoolSizeAfterGc,
nonGenerationalMemoryPool);
if (after.get(nonGenerationalMemoryPool).getUsed()
< before.get(nonGenerationalMemoryPool).getUsed()) {
Expand All @@ -272,24 +257,29 @@ public void bindTo(AbstractMetricService metricService) {
// GC (since in JMX, a minor GC of G1 may actually represent mixed GC, which collect
// some obj in old gen region). To track the
// live data size we record the value if we see a reduction in the old gen heap size
// or
// after a major GC.
// or after a major GC.
if (oldAfter < oldBefore
|| GcGenerationAge.fromName(notificationInfo.getGcName())
== GcGenerationAge.OLD) {
liveDataSize.set(oldAfter);
final long oldMaxAfter = after.get(oldGenPoolName).getMax();
maxDataSize.set(oldMaxAfter);
}
countPoolSizeDelta(
gcInfo.getMemoryUsageBeforeGc(),
gcInfo.getMemoryUsageAfterGc(),
oldGenAllocatedBytes,
longLivedHeapPoolSizeAfterGc,
oldGenPoolName);
}

if (youngGenPoolName != null) {
if (firstYoungGenPoolName != null) {
countPoolSizeDelta(
gcInfo.getMemoryUsageBeforeGc(),
gcInfo.getMemoryUsageAfterGc(),
allocatedBytes,
heapPoolSizeAfterGc,
youngGenPoolName);
youngGenAllocatedBytes,
firstYoungHeapPoolSizeAfterGc,
firstYoungGenPoolName);
}
}
};
Expand Down Expand Up @@ -321,12 +311,22 @@ public void unbindFrom(AbstractMetricService metricService) {
metricService.remove(MetricType.AUTO_GAUGE, SystemMetric.JVM_GC_MAX_DATA_SIZE_BYTES.toString());
metricService.remove(
MetricType.AUTO_GAUGE, SystemMetric.JVM_GC_LIVE_DATA_SIZE_BYTES.toString());
metricService.remove(MetricType.COUNTER, SystemMetric.JVM_GC_MEMORY_ALLOCATED_BYTES.toString());
metricService.remove(MetricType.AUTO_GAUGE, SystemMetric.JVM_GC_MEMORY_USED_PERCENT.toString());

if (oldGenPoolName != null) {
if (nonGenerationalMemoryPool != null) {
metricService.remove(
MetricType.COUNTER, SystemMetric.JVM_GC_MEMORY_PROMOTED_BYTES.toString());
MetricType.COUNTER, SystemMetric.JVM_GC_NON_GEN_MEMORY_ALLOCATED_BYTES.toString());
} else {
if (oldGenPoolName != null) {
metricService.remove(
MetricType.COUNTER, SystemMetric.JVM_GC_MEMORY_PROMOTED_BYTES.toString());
metricService.remove(
MetricType.COUNTER, SystemMetric.JVM_GC_OLD_MEMORY_ALLOCATED_BYTES.toString());
}

if (firstYoungGenPoolName != null) {
metricService.remove(
MetricType.COUNTER, SystemMetric.JVM_GC_YOUNG_MEMORY_ALLOCATED_BYTES.toString());
}
}

// start watching for GC notifications
Expand All @@ -342,27 +342,21 @@ public void unbindFrom(AbstractMetricService metricService) {

String gcCause = notificationInfo.getGcCause();
String gcAction = notificationInfo.getGcAction();
String timerName;
if (isConcurrentPhase(gcCause, notificationInfo.getGcName())) {
timerName = "jvm_gc_concurrent_phase_time";
} else {
timerName = "jvm_gc_pause";
}
metricService.remove(MetricType.TIMER, timerName, "action", gcAction, "cause", gcCause);
metricService.remove(
MetricType.TIMER,
SystemMetric.JVM_GC_PAUSE.toString(),
"action",
gcAction,
"cause",
gcCause);

if (mbean.getName().equals("ZGC Cycles")) {
metricService.remove(
MetricType.COUNTER, SystemMetric.JVM_ZGC_CYCLES_COUNT.toString());
} else if (mbean.getName().equals("ZGC Pauses")) {
metricService.remove(
MetricType.COUNTER, SystemMetric.JVM_ZGC_PAUSES_COUNT.toString());
}
if (GcGenerationAge.fromName(notificationInfo.getGcName()) == GcGenerationAge.OLD) {
metricService.remove(
MetricType.COUNTER, SystemMetric.JVM_GC_YOUNG_GC_COUNT.toString());
} else if (GcGenerationAge.fromName(notificationInfo.getGcName())
== GcGenerationAge.YOUNG) {
metricService.remove(MetricType.COUNTER, SystemMetric.JVM_GC_OLD_GC_COUNT.toString());
}
};
NotificationEmitter notificationEmitter = (NotificationEmitter) mbean;
notificationEmitter.addNotificationListener(
Expand Down Expand Up @@ -426,12 +420,6 @@ public void close() {
notificationListenerCleanUpRunnables.forEach(Runnable::run);
}

public long calculateMemoryUsagePercentage() {
return (ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getUsed()
* 100
/ Runtime.getRuntime().maxMemory());
}

enum GcGenerationAge {
OLD,
YOUNG,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,13 @@ public enum SystemMetric {
JVM_THREADS_STATUS_THREADS("jvm_threads_states_threads"),
JVM_GC_MAX_DATA_SIZE_BYTES("jvm_gc_max_data_size_bytes"),
JVM_GC_LIVE_DATA_SIZE_BYTES("jvm_gc_live_data_size_bytes"),
JVM_GC_MEMORY_ALLOCATED_BYTES("jvm_gc_memory_allocated_bytes"),
JVM_GC_YOUNG_MEMORY_ALLOCATED_BYTES("jvm_gc_young_memory_allocated_bytes"),
JVM_GC_OLD_MEMORY_ALLOCATED_BYTES("jvm_gc_old_memory_allocated_bytes"),
JVM_GC_NON_GEN_MEMORY_ALLOCATED_BYTES("jvm_gc_non_gen_memory_allocated_bytes"),
JVM_GC_MEMORY_PROMOTED_BYTES("jvm_gc_memory_promoted_bytes"),
JVM_GC_MEMORY_USED_PERCENT("jvm_gc_memory_used_percent"),
JVM_GC_PAUSE("jvm_gc_pause"),
JVM_ZGC_CYCLES_COUNT("jvm_zgc_cycles_count"),
JVM_ZGC_PAUSES_COUNT("jvm_zgc_pauses_count"),
JVM_GC_YOUNG_GC_COUNT("jvm_gc_old_gc_count"),
JVM_GC_OLD_GC_COUNT("jvm_gc_young_gc_count"),
JVM_GC_ACCUMULATED_TIME_PERCENTAGE("jvm_gc_accumulated_time_percentage"),
// net related
RECEIVED_BYTES("received_bytes"),
Expand Down