Skip to content

Commit

Permalink
[#1598] fix(server) Fix inaccurate used_direct_memory_size metric (#1599
Browse files Browse the repository at this point in the history
)

### What changes were proposed in this pull request?

Fix the inaccurate `used_direct_memory_size` metric.
Also `used_direct_memory_size_by_netty` and `used_direct_memory_size_by_grpc_netty` metrics are added to provide more detailed indicators for locating and analyzing in production.

### Why are the changes needed?

Fix #1598.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Tested in our env.
  • Loading branch information
rickyma committed Mar 25, 2024
1 parent 220d9cb commit 3a1b4d2
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,20 @@ public void start() {
service.scheduleAtFixedRate(
() -> {
try {
long usedDirectMemory = PlatformDependent.usedDirectMemory();
long usedDirectMemoryByNetty = PlatformDependent.usedDirectMemory();
long usedDirectMemoryByGrpcNetty =
io.grpc.netty.shaded.io.netty.util.internal.PlatformDependent.usedDirectMemory();
if (LOG.isDebugEnabled()) {
LOG.debug("Current usedDirectMemory:{}", usedDirectMemory);
LOG.debug(
"Current usedDirectMemoryByNetty:{}, usedDirectMemoryByGrpcNetty:{}",
usedDirectMemoryByNetty,
usedDirectMemoryByGrpcNetty);
}
ShuffleServerMetrics.gaugeUsedDirectMemorySize.set(usedDirectMemory);
ShuffleServerMetrics.gaugeUsedDirectMemorySizeByNetty.set(usedDirectMemoryByNetty);
ShuffleServerMetrics.gaugeUsedDirectMemorySizeByGrpcNetty.set(
usedDirectMemoryByGrpcNetty);
ShuffleServerMetrics.gaugeUsedDirectMemorySize.set(
usedDirectMemoryByNetty + usedDirectMemoryByGrpcNetty);
} catch (Throwable t) {
LOG.error("Failed to report direct memory.", t);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,9 @@ public class ShuffleServerMetrics {
private static final String USED_BUFFER_SIZE = "used_buffer_size";
private static final String READ_USED_BUFFER_SIZE = "read_used_buffer_size";
private static final String USED_DIRECT_MEMORY_SIZE = "used_direct_memory_size";
private static final String ALLOCATED_DIRECT_MEMORY_SIZE = "allocated_direct_memory_size";
private static final String PINNED_DIRECT_MEMORY_SIZE = "pinned_direct_memory_size";
private static final String USED_DIRECT_MEMORY_SIZE_BY_NETTY = "used_direct_memory_size_by_netty";
private static final String USED_DIRECT_MEMORY_SIZE_BY_GRPC_NETTY =
"used_direct_memory_size_by_grpc_netty";
private static final String TOTAL_FAILED_WRITTEN_EVENT_NUM = "total_failed_written_event_num";
private static final String TOTAL_DROPPED_EVENT_NUM = "total_dropped_event_num";
private static final String TOTAL_HADOOP_WRITE_DATA = "total_hadoop_write_data";
Expand Down Expand Up @@ -186,6 +187,8 @@ public class ShuffleServerMetrics {
public static Gauge.Child gaugeUsedBufferSize;
public static Gauge.Child gaugeReadBufferUsedSize;
public static Gauge.Child gaugeUsedDirectMemorySize;
public static Gauge.Child gaugeUsedDirectMemorySizeByNetty;
public static Gauge.Child gaugeUsedDirectMemorySizeByGrpcNetty;
public static Gauge.Child gaugeWriteHandler;
public static Gauge.Child gaugeEventQueueSize;
public static Gauge.Child gaugeHadoopFlushThreadPoolQueueSize;
Expand Down Expand Up @@ -382,6 +385,10 @@ private static void setUpMetrics() {
gaugeUsedBufferSize = metricsManager.addLabeledGauge(USED_BUFFER_SIZE);
gaugeReadBufferUsedSize = metricsManager.addLabeledGauge(READ_USED_BUFFER_SIZE);
gaugeUsedDirectMemorySize = metricsManager.addLabeledGauge(USED_DIRECT_MEMORY_SIZE);
gaugeUsedDirectMemorySizeByNetty =
metricsManager.addLabeledGauge(USED_DIRECT_MEMORY_SIZE_BY_NETTY);
gaugeUsedDirectMemorySizeByGrpcNetty =
metricsManager.addLabeledGauge(USED_DIRECT_MEMORY_SIZE_BY_GRPC_NETTY);
gaugeWriteHandler = metricsManager.addLabeledGauge(TOTAL_WRITE_HANDLER);
gaugeEventQueueSize = metricsManager.addLabeledGauge(EVENT_QUEUE_SIZE);
gaugeHadoopFlushThreadPoolQueueSize =
Expand Down

0 comments on commit 3a1b4d2

Please sign in to comment.