Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[#1074] feat: Introduce the metric of local_storage_uniffle_used_space #1075

Merged
merged 4 commits into from
Aug 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ public LocalStorageChecker(ShuffleServerConf conf, List<LocalStorage> storages)
public boolean checkIsHealthy() {
AtomicInteger num = new AtomicInteger(0);
AtomicLong totalSpace = new AtomicLong(0L);
AtomicLong usedSpace = new AtomicLong(0L);
AtomicLong wholeDiskUsedSpace = new AtomicLong(0L);
AtomicLong serviceUsedSpace = new AtomicLong(0L);
AtomicInteger corruptedDirs = new AtomicInteger(0);
CountDownLatch cdl = new CountDownLatch(storageInfos.size());
storageInfos
Expand All @@ -93,7 +94,8 @@ public boolean checkIsHealthy() {
}

totalSpace.addAndGet(getTotalSpace(storageInfo.storageDir));
usedSpace.addAndGet(getUsedSpace(storageInfo.storageDir));
wholeDiskUsedSpace.addAndGet(getWholeDiskUsedSpace(storageInfo.storageDir));
serviceUsedSpace.addAndGet(getServiceUsedSpace(storageInfo.storageDir));

if (storageInfo.checkIsSpaceEnough()) {
num.incrementAndGet();
Expand All @@ -106,11 +108,12 @@ public boolean checkIsHealthy() {
LOG.error("Failed to check local storage!");
}
ShuffleServerMetrics.gaugeLocalStorageTotalSpace.set(totalSpace.get());
ShuffleServerMetrics.gaugeLocalStorageUsedSpace.set(usedSpace.get());
ShuffleServerMetrics.gaugeLocalStorageWholeDiskUsedSpace.set(wholeDiskUsedSpace.get());
ShuffleServerMetrics.gaugeLocalStorageServiceUsedSpace.set(serviceUsedSpace.get());
ShuffleServerMetrics.gaugeLocalStorageTotalDirsNum.set(storageInfos.size());
ShuffleServerMetrics.gaugeLocalStorageCorruptedDirsNum.set(corruptedDirs.get());
ShuffleServerMetrics.gaugeLocalStorageUsedSpaceRatio.set(
usedSpace.get() * 1.0 / totalSpace.get());
wholeDiskUsedSpace.get() * 1.0 / totalSpace.get());

if (storageInfos.isEmpty()) {
if (isHealthy) {
Expand Down Expand Up @@ -143,10 +146,36 @@ long getTotalSpace(File file) {

// Only for testing
@VisibleForTesting
long getUsedSpace(File file) {
long getWholeDiskUsedSpace(File file) {
return file.getTotalSpace() - file.getUsableSpace();
}

protected static long getServiceUsedSpace(File storageDir) {
if (storageDir == null || !storageDir.exists()) {
return 0;
}

if (storageDir.isFile()) {
return storageDir.length();
}

File[] files = storageDir.listFiles();
if (files == null) {
return 0;
}

long totalUsage = 0;
for (File file : files) {
if (file.isFile()) {
totalUsage += file.length();
} else {
totalUsage += getServiceUsedSpace(file);
}
}

return totalUsage;
}

// todo: This function will be integrated to MultiStorageManager, currently we only support disk
// check.
class StorageInfo {
Expand All @@ -167,7 +196,7 @@ boolean checkIsSpaceEnough() {
this.isHealthy = false;
return false;
}
double usagePercent = getUsedSpace(storageDir) * 100.0 / getTotalSpace(storageDir);
double usagePercent = getWholeDiskUsedSpace(storageDir) * 100.0 / getTotalSpace(storageDir);
if (isHealthy) {
if (Double.compare(usagePercent, diskMaxUsagePercentage) >= 0) {
isHealthy = false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,9 @@ public class ShuffleServerMetrics {
private static final String LOCAL_STORAGE_TOTAL_DIRS_NUM = "local_storage_total_dirs_num";
private static final String LOCAL_STORAGE_CORRUPTED_DIRS_NUM = "local_storage_corrupted_dirs_num";
private static final String LOCAL_STORAGE_TOTAL_SPACE = "local_storage_total_space";
private static final String LOCAL_STORAGE_USED_SPACE = "local_storage_used_space";
private static final String LOCAL_STORAGE_WHOLE_DISK_USED_SPACE =
"local_storage_whole_disk_used_space";
private static final String LOCAL_STORAGE_SERVICE_USED_SPACE = "local_storage_service_used_space";
private static final String LOCAL_STORAGE_USED_SPACE_RATIO = "local_storage_used_space_ratio";

private static final String IS_HEALTHY = "is_healthy";
Expand Down Expand Up @@ -143,7 +145,8 @@ public class ShuffleServerMetrics {
public static Gauge.Child gaugeLocalStorageTotalDirsNum;
public static Gauge.Child gaugeLocalStorageCorruptedDirsNum;
public static Gauge.Child gaugeLocalStorageTotalSpace;
public static Gauge.Child gaugeLocalStorageUsedSpace;
public static Gauge.Child gaugeLocalStorageWholeDiskUsedSpace;
public static Gauge.Child gaugeLocalStorageServiceUsedSpace;
public static Gauge.Child gaugeLocalStorageUsedSpaceRatio;

public static Gauge.Child gaugeIsHealthy;
Expand Down Expand Up @@ -290,7 +293,10 @@ private static void setUpMetrics() {
gaugeLocalStorageCorruptedDirsNum =
metricsManager.addLabeledGauge(LOCAL_STORAGE_CORRUPTED_DIRS_NUM);
gaugeLocalStorageTotalSpace = metricsManager.addLabeledGauge(LOCAL_STORAGE_TOTAL_SPACE);
gaugeLocalStorageUsedSpace = metricsManager.addLabeledGauge(LOCAL_STORAGE_USED_SPACE);
gaugeLocalStorageWholeDiskUsedSpace =
metricsManager.addLabeledGauge(LOCAL_STORAGE_WHOLE_DISK_USED_SPACE);
gaugeLocalStorageServiceUsedSpace =
metricsManager.addLabeledGauge(LOCAL_STORAGE_SERVICE_USED_SPACE);
gaugeLocalStorageUsedSpaceRatio =
metricsManager.addLabeledGauge(LOCAL_STORAGE_USED_SPACE_RATIO);

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.uniffle.server;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;

import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;

public class LocalStorageCheckerTest {

@Test
public void testGetUniffleUsedSpace(@TempDir File tempDir) throws IOException {
File file1 = createTempFile(tempDir, "file1.txt", 1000);
File file2 = createTempFile(tempDir, "file2.txt", 2000);
File subdir1 = createTempSubDirectory(tempDir, "subdir1");
File file3 = createTempFile(subdir1, "file3.txt", 500);
File subdir2 = createTempSubDirectory(subdir1, "subdir2");
File file4 = createTempFile(subdir2, "file4.txt", 1500);

// Call the method to calculate disk usage
long calculatedUsage = LocalStorageChecker.getServiceUsedSpace(tempDir);

// The expected total usage should be the sum of file1 + file2 + file3 + file4
long expectedUsage = file1.length() + file2.length() + file3.length() + file4.length();

// Assert that the calculated result matches the expected value
Assertions.assertEquals(expectedUsage, calculatedUsage);
}

private File createTempFile(File directory, String fileName, long fileSize) throws IOException {
File file = new File(directory, fileName);
Files.write(file.toPath(), new byte[(int) fileSize]);
return file;
}

private File createTempSubDirectory(File parentDirectory, String directoryName) {
File subDir = new File(parentDirectory, directoryName);
subDir.mkdirs();
return subDir;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -69,22 +69,22 @@ public void checkTest(@TempDir File baseDir) throws Exception {

assertTrue(checker.checkIsHealthy());
assertEquals(3000, ShuffleServerMetrics.gaugeLocalStorageTotalSpace.get());
assertEquals(600, ShuffleServerMetrics.gaugeLocalStorageUsedSpace.get());
assertEquals(600, ShuffleServerMetrics.gaugeLocalStorageWholeDiskUsedSpace.get());
assertEquals(0.2, ShuffleServerMetrics.gaugeLocalStorageUsedSpaceRatio.get());
assertEquals(3, ShuffleServerMetrics.gaugeLocalStorageTotalDirsNum.get());
assertEquals(0, ShuffleServerMetrics.gaugeLocalStorageCorruptedDirsNum.get());

callTimes++;
assertTrue(checker.checkIsHealthy());
assertEquals(3000, ShuffleServerMetrics.gaugeLocalStorageTotalSpace.get());
assertEquals(1400, ShuffleServerMetrics.gaugeLocalStorageUsedSpace.get());
assertEquals(1400, ShuffleServerMetrics.gaugeLocalStorageWholeDiskUsedSpace.get());
assertEquals(3, ShuffleServerMetrics.gaugeLocalStorageTotalDirsNum.get());
assertEquals(0, ShuffleServerMetrics.gaugeLocalStorageCorruptedDirsNum.get());

callTimes++;
assertFalse(checker.checkIsHealthy());
assertEquals(3000, ShuffleServerMetrics.gaugeLocalStorageTotalSpace.get());
assertEquals(2100, ShuffleServerMetrics.gaugeLocalStorageUsedSpace.get());
assertEquals(2100, ShuffleServerMetrics.gaugeLocalStorageWholeDiskUsedSpace.get());
assertEquals(3, ShuffleServerMetrics.gaugeLocalStorageTotalDirsNum.get());
assertEquals(0, ShuffleServerMetrics.gaugeLocalStorageCorruptedDirsNum.get());

Expand All @@ -94,15 +94,15 @@ public void checkTest(@TempDir File baseDir) throws Exception {
checker = new MockStorageChecker(conf, storages);
assertFalse(checker.checkIsHealthy());
assertEquals(3000, ShuffleServerMetrics.gaugeLocalStorageTotalSpace.get());
assertEquals(1600, ShuffleServerMetrics.gaugeLocalStorageUsedSpace.get());
assertEquals(1600, ShuffleServerMetrics.gaugeLocalStorageWholeDiskUsedSpace.get());
assertEquals(3, ShuffleServerMetrics.gaugeLocalStorageTotalDirsNum.get());
assertEquals(0, ShuffleServerMetrics.gaugeLocalStorageCorruptedDirsNum.get());

callTimes++;
checker.checkIsHealthy();
assertTrue(checker.checkIsHealthy());
assertEquals(3000, ShuffleServerMetrics.gaugeLocalStorageTotalSpace.get());
assertEquals(250, ShuffleServerMetrics.gaugeLocalStorageUsedSpace.get());
assertEquals(250, ShuffleServerMetrics.gaugeLocalStorageWholeDiskUsedSpace.get());
assertEquals(3, ShuffleServerMetrics.gaugeLocalStorageTotalDirsNum.get());
assertEquals(0, ShuffleServerMetrics.gaugeLocalStorageCorruptedDirsNum.get());

Expand Down Expand Up @@ -133,7 +133,7 @@ long getTotalSpace(File file) {
// we mock this method, and will return different values according
// to call times.
@Override
long getUsedSpace(File file) {
long getWholeDiskUsedSpace(File file) {
long result = 0;
switch (file.getName()) {
case "st1":
Expand Down