Skip to content

Commit

Permalink
feat: introduce hdfs host as the total_hadoop_write_data metric label
Browse files Browse the repository at this point in the history
  • Loading branch information
zuston committed Aug 7, 2023
1 parent ef7f392 commit 45f0554
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ public class ShuffleServerMetrics {
private static final String STORAGE_FAILED_WRITE_LOCAL = "storage_failed_write_local";
private static final String STORAGE_SUCCESS_WRITE_LOCAL = "storage_success_write_local";
private static final String STORAGE_HOST_LABEL = "storage_host";
public static final String STORAGE_HOST_LABEL_ALL = "ALL";
public static final String STORAGE_TOTAL_WRITE_REMOTE = "storage_total_write_remote";
public static final String STORAGE_RETRY_WRITE_REMOTE = "storage_retry_write_remote";
public static final String STORAGE_FAILED_WRITE_REMOTE = "storage_failed_write_remote";
Expand Down Expand Up @@ -125,7 +126,7 @@ public class ShuffleServerMetrics {
public static Counter.Child counterTotalReadTime;
public static Counter.Child counterTotalFailedWrittenEventNum;
public static Counter.Child counterTotalDroppedEventNum;
public static Counter.Child counterTotalHadoopWriteDataSize;
public static Counter counterTotalHadoopWriteDataSize;
public static Counter.Child counterTotalLocalFileWriteDataSize;
public static Counter.Child counterTotalRequireBufferFailed;
public static Counter.Child counterTotalRequireBufferFailedForHugePartition;
Expand Down Expand Up @@ -231,6 +232,14 @@ public static void incStorageFailedCounter(String storageHost) {
}
}

public static void incHadoopStorageWriteDataSize(String storageHost, long size) {
if (StringUtils.isEmpty(storageHost)) {
return;
}
counterTotalHadoopWriteDataSize.labels(tags, storageHost).inc(size);
counterTotalHadoopWriteDataSize.labels(tags, STORAGE_HOST_LABEL_ALL).inc(size);
}

private static void setUpMetrics() {
counterTotalReceivedDataSize = metricsManager.addLabeledCounter(TOTAL_RECEIVED_DATA);
counterTotalWriteDataSize = metricsManager.addLabeledCounter(TOTAL_WRITE_DATA);
Expand All @@ -253,7 +262,9 @@ private static void setUpMetrics() {
counterTotalDroppedEventNum = metricsManager.addLabeledCounter(TOTAL_DROPPED_EVENT_NUM);
counterTotalFailedWrittenEventNum =
metricsManager.addLabeledCounter(TOTAL_FAILED_WRITTEN_EVENT_NUM);
counterTotalHadoopWriteDataSize = metricsManager.addLabeledCounter(TOTAL_HADOOP_WRITE_DATA);
counterTotalHadoopWriteDataSize =
metricsManager.addCounter(
TOTAL_HADOOP_WRITE_DATA, Constants.METRICS_TAG_LABEL_NAME, STORAGE_HOST_LABEL);
counterTotalLocalFileWriteDataSize =
metricsManager.addLabeledCounter(TOTAL_LOCALFILE_WRITE_DATA);
counterTotalRequireBufferFailed = metricsManager.addLabeledCounter(TOTAL_REQUIRE_BUFFER_FAILED);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,12 @@ public class HadoopStorageManager extends SingleStorageManager {
@Override
public void updateWriteMetrics(ShuffleDataFlushEvent event, long writeTime) {
super.updateWriteMetrics(event, writeTime);
ShuffleServerMetrics.counterTotalHadoopWriteDataSize.inc(event.getSize());
Storage storage = event.getUnderStorage();
if (storage == null) {
LOG.warn("The storage owned by event: {} is null, this should not happen", event);
return;
}
ShuffleServerMetrics.incHadoopStorageWriteDataSize(storage.getStorageHost(), event.getSize());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,42 @@ private void validateMetrics(
assertTrue(bingo);
}

@Test
public void testHadoopStorageWriteDataSize() {
// case1
String host1 = "hadoop-cluster01";
ShuffleServerMetrics.incHadoopStorageWriteDataSize(host1, 1000);
assertEquals(
1000.0,
ShuffleServerMetrics.counterTotalHadoopWriteDataSize
.labels(Constants.SHUFFLE_SERVER_VERSION, host1)
.get());

// case2
ShuffleServerMetrics.incHadoopStorageWriteDataSize(host1, 500);
assertEquals(
1500.0,
ShuffleServerMetrics.counterTotalHadoopWriteDataSize
.labels(Constants.SHUFFLE_SERVER_VERSION, host1)
.get());

// case3
String host2 = "hadoop-cluster2";
ShuffleServerMetrics.incHadoopStorageWriteDataSize(host2, 2000);
assertEquals(
2000.0,
ShuffleServerMetrics.counterTotalHadoopWriteDataSize
.labels(Constants.SHUFFLE_SERVER_VERSION, host2)
.get());

// case4
assertEquals(
3500.0,
ShuffleServerMetrics.counterTotalHadoopWriteDataSize
.labels(Constants.SHUFFLE_SERVER_VERSION, ShuffleServerMetrics.STORAGE_HOST_LABEL_ALL)
.get());
}

@Test
public void testStorageCounter() {
// test for local storage
Expand Down

0 comments on commit 45f0554

Please sign in to comment.