diff --git a/server/src/main/java/org/apache/uniffle/server/LocalStorageChecker.java b/server/src/main/java/org/apache/uniffle/server/LocalStorageChecker.java index cbd9afd41f..c4151fa3af 100644 --- a/server/src/main/java/org/apache/uniffle/server/LocalStorageChecker.java +++ b/server/src/main/java/org/apache/uniffle/server/LocalStorageChecker.java @@ -78,7 +78,8 @@ public LocalStorageChecker(ShuffleServerConf conf, List storages) public boolean checkIsHealthy() { AtomicInteger num = new AtomicInteger(0); AtomicLong totalSpace = new AtomicLong(0L); - AtomicLong usedSpace = new AtomicLong(0L); + AtomicLong wholeDiskUsedSpace = new AtomicLong(0L); + AtomicLong serviceUsedSpace = new AtomicLong(0L); AtomicInteger corruptedDirs = new AtomicInteger(0); CountDownLatch cdl = new CountDownLatch(storageInfos.size()); storageInfos @@ -93,7 +94,8 @@ public boolean checkIsHealthy() { } totalSpace.addAndGet(getTotalSpace(storageInfo.storageDir)); - usedSpace.addAndGet(getUsedSpace(storageInfo.storageDir)); + wholeDiskUsedSpace.addAndGet(getWholeDiskUsedSpace(storageInfo.storageDir)); + serviceUsedSpace.addAndGet(getServiceUsedSpace(storageInfo.storageDir)); if (storageInfo.checkIsSpaceEnough()) { num.incrementAndGet(); @@ -106,11 +108,12 @@ public boolean checkIsHealthy() { LOG.error("Failed to check local storage!"); } ShuffleServerMetrics.gaugeLocalStorageTotalSpace.set(totalSpace.get()); - ShuffleServerMetrics.gaugeLocalStorageUsedSpace.set(usedSpace.get()); + ShuffleServerMetrics.gaugeLocalStorageWholeDiskUsedSpace.set(wholeDiskUsedSpace.get()); + ShuffleServerMetrics.gaugeLocalStorageServiceUsedSpace.set(serviceUsedSpace.get()); ShuffleServerMetrics.gaugeLocalStorageTotalDirsNum.set(storageInfos.size()); ShuffleServerMetrics.gaugeLocalStorageCorruptedDirsNum.set(corruptedDirs.get()); ShuffleServerMetrics.gaugeLocalStorageUsedSpaceRatio.set( - usedSpace.get() * 1.0 / totalSpace.get()); + wholeDiskUsedSpace.get() * 1.0 / totalSpace.get()); if (storageInfos.isEmpty()) { if (isHealthy) { @@ -143,10 +146,36 @@ long getTotalSpace(File file) { // Only for testing @VisibleForTesting - long getUsedSpace(File file) { + long getWholeDiskUsedSpace(File file) { return file.getTotalSpace() - file.getUsableSpace(); } + protected static long getServiceUsedSpace(File storageDir) { + if (storageDir == null || !storageDir.exists()) { + return 0; + } + + if (storageDir.isFile()) { + return storageDir.length(); + } + + File[] files = storageDir.listFiles(); + if (files == null) { + return 0; + } + + long totalUsage = 0; + for (File file : files) { + if (file.isFile()) { + totalUsage += file.length(); + } else { + totalUsage += getServiceUsedSpace(file); + } + } + + return totalUsage; + } + // todo: This function will be integrated to MultiStorageManager, currently we only support disk // check. class StorageInfo { @@ -167,7 +196,7 @@ boolean checkIsSpaceEnough() { this.isHealthy = false; return false; } - double usagePercent = getUsedSpace(storageDir) * 100.0 / getTotalSpace(storageDir); + double usagePercent = getWholeDiskUsedSpace(storageDir) * 100.0 / getTotalSpace(storageDir); if (isHealthy) { if (Double.compare(usagePercent, diskMaxUsagePercentage) >= 0) { isHealthy = false; diff --git a/server/src/main/java/org/apache/uniffle/server/ShuffleServerMetrics.java b/server/src/main/java/org/apache/uniffle/server/ShuffleServerMetrics.java index 0a8b5431fe..f7334b75ca 100644 --- a/server/src/main/java/org/apache/uniffle/server/ShuffleServerMetrics.java +++ b/server/src/main/java/org/apache/uniffle/server/ShuffleServerMetrics.java @@ -61,7 +61,9 @@ public class ShuffleServerMetrics { private static final String LOCAL_STORAGE_TOTAL_DIRS_NUM = "local_storage_total_dirs_num"; private static final String LOCAL_STORAGE_CORRUPTED_DIRS_NUM = "local_storage_corrupted_dirs_num"; private static final String LOCAL_STORAGE_TOTAL_SPACE = "local_storage_total_space"; - private static final String LOCAL_STORAGE_USED_SPACE = "local_storage_used_space"; + private static final String LOCAL_STORAGE_WHOLE_DISK_USED_SPACE = + "local_storage_whole_disk_used_space"; + private static final String LOCAL_STORAGE_SERVICE_USED_SPACE = "local_storage_service_used_space"; private static final String LOCAL_STORAGE_USED_SPACE_RATIO = "local_storage_used_space_ratio"; private static final String IS_HEALTHY = "is_healthy"; @@ -143,7 +145,8 @@ public class ShuffleServerMetrics { public static Gauge.Child gaugeLocalStorageTotalDirsNum; public static Gauge.Child gaugeLocalStorageCorruptedDirsNum; public static Gauge.Child gaugeLocalStorageTotalSpace; - public static Gauge.Child gaugeLocalStorageUsedSpace; + public static Gauge.Child gaugeLocalStorageWholeDiskUsedSpace; + public static Gauge.Child gaugeLocalStorageServiceUsedSpace; public static Gauge.Child gaugeLocalStorageUsedSpaceRatio; public static Gauge.Child gaugeIsHealthy; @@ -290,7 +293,10 @@ private static void setUpMetrics() { gaugeLocalStorageCorruptedDirsNum = metricsManager.addLabeledGauge(LOCAL_STORAGE_CORRUPTED_DIRS_NUM); gaugeLocalStorageTotalSpace = metricsManager.addLabeledGauge(LOCAL_STORAGE_TOTAL_SPACE); - gaugeLocalStorageUsedSpace = metricsManager.addLabeledGauge(LOCAL_STORAGE_USED_SPACE); + gaugeLocalStorageWholeDiskUsedSpace = + metricsManager.addLabeledGauge(LOCAL_STORAGE_WHOLE_DISK_USED_SPACE); + gaugeLocalStorageServiceUsedSpace = + metricsManager.addLabeledGauge(LOCAL_STORAGE_SERVICE_USED_SPACE); gaugeLocalStorageUsedSpaceRatio = metricsManager.addLabeledGauge(LOCAL_STORAGE_USED_SPACE_RATIO); diff --git a/server/src/test/java/org/apache/uniffle/server/LocalStorageCheckerTest.java b/server/src/test/java/org/apache/uniffle/server/LocalStorageCheckerTest.java new file mode 100644 index 0000000000..e5eaba11d6 --- /dev/null +++ b/server/src/test/java/org/apache/uniffle/server/LocalStorageCheckerTest.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.uniffle.server; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +public class LocalStorageCheckerTest { + + @Test + public void testGetUniffleUsedSpace(@TempDir File tempDir) throws IOException { + File file1 = createTempFile(tempDir, "file1.txt", 1000); + File file2 = createTempFile(tempDir, "file2.txt", 2000); + File subdir1 = createTempSubDirectory(tempDir, "subdir1"); + File file3 = createTempFile(subdir1, "file3.txt", 500); + File subdir2 = createTempSubDirectory(subdir1, "subdir2"); + File file4 = createTempFile(subdir2, "file4.txt", 1500); + + // Call the method to calculate disk usage + long calculatedUsage = LocalStorageChecker.getServiceUsedSpace(tempDir); + + // The expected total usage should be the sum of file1 + file2 + file3 + file4 + long expectedUsage = file1.length() + file2.length() + file3.length() + file4.length(); + + // Assert that the calculated result matches the expected value + Assertions.assertEquals(expectedUsage, calculatedUsage); + } + + private File createTempFile(File directory, String fileName, long fileSize) throws IOException { + File file = new File(directory, fileName); + Files.write(file.toPath(), new byte[(int) fileSize]); + return file; + } + + private File createTempSubDirectory(File parentDirectory, String directoryName) { + File subDir = new File(parentDirectory, directoryName); + subDir.mkdirs(); + return subDir; + } +} diff --git a/server/src/test/java/org/apache/uniffle/server/StorageCheckerTest.java b/server/src/test/java/org/apache/uniffle/server/StorageCheckerTest.java index 632a8abc5b..71f97ebbaa 100644 --- a/server/src/test/java/org/apache/uniffle/server/StorageCheckerTest.java +++ b/server/src/test/java/org/apache/uniffle/server/StorageCheckerTest.java @@ -69,7 +69,7 @@ public void checkTest(@TempDir File baseDir) throws Exception { assertTrue(checker.checkIsHealthy()); assertEquals(3000, ShuffleServerMetrics.gaugeLocalStorageTotalSpace.get()); - assertEquals(600, ShuffleServerMetrics.gaugeLocalStorageUsedSpace.get()); + assertEquals(600, ShuffleServerMetrics.gaugeLocalStorageWholeDiskUsedSpace.get()); assertEquals(0.2, ShuffleServerMetrics.gaugeLocalStorageUsedSpaceRatio.get()); assertEquals(3, ShuffleServerMetrics.gaugeLocalStorageTotalDirsNum.get()); assertEquals(0, ShuffleServerMetrics.gaugeLocalStorageCorruptedDirsNum.get()); @@ -77,14 +77,14 @@ public void checkTest(@TempDir File baseDir) throws Exception { callTimes++; assertTrue(checker.checkIsHealthy()); assertEquals(3000, ShuffleServerMetrics.gaugeLocalStorageTotalSpace.get()); - assertEquals(1400, ShuffleServerMetrics.gaugeLocalStorageUsedSpace.get()); + assertEquals(1400, ShuffleServerMetrics.gaugeLocalStorageWholeDiskUsedSpace.get()); assertEquals(3, ShuffleServerMetrics.gaugeLocalStorageTotalDirsNum.get()); assertEquals(0, ShuffleServerMetrics.gaugeLocalStorageCorruptedDirsNum.get()); callTimes++; assertFalse(checker.checkIsHealthy()); assertEquals(3000, ShuffleServerMetrics.gaugeLocalStorageTotalSpace.get()); - assertEquals(2100, ShuffleServerMetrics.gaugeLocalStorageUsedSpace.get()); + assertEquals(2100, ShuffleServerMetrics.gaugeLocalStorageWholeDiskUsedSpace.get()); assertEquals(3, ShuffleServerMetrics.gaugeLocalStorageTotalDirsNum.get()); assertEquals(0, ShuffleServerMetrics.gaugeLocalStorageCorruptedDirsNum.get()); @@ -94,7 +94,7 @@ public void checkTest(@TempDir File baseDir) throws Exception { checker = new MockStorageChecker(conf, storages); assertFalse(checker.checkIsHealthy()); assertEquals(3000, ShuffleServerMetrics.gaugeLocalStorageTotalSpace.get()); - assertEquals(1600, ShuffleServerMetrics.gaugeLocalStorageUsedSpace.get()); + assertEquals(1600, ShuffleServerMetrics.gaugeLocalStorageWholeDiskUsedSpace.get()); assertEquals(3, ShuffleServerMetrics.gaugeLocalStorageTotalDirsNum.get()); assertEquals(0, ShuffleServerMetrics.gaugeLocalStorageCorruptedDirsNum.get()); @@ -102,7 +102,7 @@ public void checkTest(@TempDir File baseDir) throws Exception { checker.checkIsHealthy(); assertTrue(checker.checkIsHealthy()); assertEquals(3000, ShuffleServerMetrics.gaugeLocalStorageTotalSpace.get()); - assertEquals(250, ShuffleServerMetrics.gaugeLocalStorageUsedSpace.get()); + assertEquals(250, ShuffleServerMetrics.gaugeLocalStorageWholeDiskUsedSpace.get()); assertEquals(3, ShuffleServerMetrics.gaugeLocalStorageTotalDirsNum.get()); assertEquals(0, ShuffleServerMetrics.gaugeLocalStorageCorruptedDirsNum.get()); @@ -133,7 +133,7 @@ long getTotalSpace(File file) { // we mock this method, and will return different values according // to call times. @Override - long getUsedSpace(File file) { + long getWholeDiskUsedSpace(File file) { long result = 0; switch (file.getName()) { case "st1":