Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
import javax.management.ObjectName;
import javax.management.StandardMBean;

import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
Expand Down Expand Up @@ -113,6 +114,8 @@ public class RBFMetrics implements RouterMBean, FederationMBean {
/** Prevent holding the page from load too long. */
private final long timeOut;

/** Enable/Disable getNodeUsage. **/
private boolean enableGetDNUsage;

/** Router interface. */
private final Router router;
Expand Down Expand Up @@ -175,6 +178,8 @@ public RBFMetrics(Router router) throws IOException {
Configuration conf = router.getConfig();
this.timeOut = conf.getTimeDuration(RBFConfigKeys.DN_REPORT_TIME_OUT,
RBFConfigKeys.DN_REPORT_TIME_OUT_MS_DEFAULT, TimeUnit.MILLISECONDS);
this.enableGetDNUsage = conf.getBoolean(RBFConfigKeys.DFS_ROUTER_ENABLE_GET_DN_USAGE_KEY,
RBFConfigKeys.DFS_ROUTER_ENABLE_GET_DN_USAGE_DEFAULT);
this.topTokenRealOwners = conf.getInt(
RBFConfigKeys.DFS_ROUTER_METRICS_TOP_NUM_TOKEN_OWNERS_KEY,
RBFConfigKeys.DFS_ROUTER_METRICS_TOP_NUM_TOKEN_OWNERS_KEY_DEFAULT);
Expand All @@ -184,6 +189,11 @@ public RBFMetrics(Router router) throws IOException {
ms.register(RBFMetrics.class.getName(), "RBFActivity Metrics", this);
}

@VisibleForTesting
public void setEnableGetDNUsage(boolean enableGetDNUsage) {
this.enableGetDNUsage = enableGetDNUsage;
}

/**
* Unregister the JMX beans.
*/
Expand Down Expand Up @@ -537,35 +547,34 @@ public int getNumEnteringMaintenanceDataNodes() {

@Override // NameNodeMXBean
public String getNodeUsage() {
float median = 0;
float max = 0;
float min = 0;
float dev = 0;
double median = 0;
double max = 0;
double min = 0;
double dev = 0;

final Map<String, Map<String, Object>> info = new HashMap<>();
try {
RouterRpcServer rpcServer = this.router.getRpcServer();
DatanodeInfo[] live = rpcServer.getDatanodeReport(
DatanodeReportType.LIVE, false, timeOut);
DatanodeInfo[] live = null;
if (this.enableGetDNUsage) {
RouterRpcServer rpcServer = this.router.getRpcServer();
live = rpcServer.getDatanodeReport(DatanodeReportType.LIVE, false, timeOut);
} else {
LOG.debug("Getting node usage is disabled.");
}

if (live.length > 0) {
float totalDfsUsed = 0;
float[] usages = new float[live.length];
if (live != null && live.length > 0) {
double[] usages = new double[live.length];
int i = 0;
for (DatanodeInfo dn : live) {
usages[i++] = dn.getDfsUsedPercent();
totalDfsUsed += dn.getDfsUsedPercent();
}
totalDfsUsed /= live.length;
Arrays.sort(usages);
median = usages[usages.length / 2];
max = usages[usages.length - 1];
min = usages[0];

for (i = 0; i < usages.length; i++) {
dev += (usages[i] - totalDfsUsed) * (usages[i] - totalDfsUsed);
}
dev = (float) Math.sqrt(dev / usages.length);
StandardDeviation deviation = new StandardDeviation();
dev = deviation.evaluate(usages);
}
} catch (IOException e) {
LOG.error("Cannot get the live nodes: {}", e.getMessage());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel it would be better this way.

LOG.error("Cannot get the live nodes.", e).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel it would be better this way.

LOG.error("Cannot get the live nodes.", e).

Do we want to have the full stack trace? I think it is pretty clear what the error is here without it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @slfan1989 @goiri for your review. I think e.getMessage() is enough. @slfan1989 Do you have some cases that need the full stack?

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,9 @@ public class RBFConfigKeys extends CommonConfigurationKeysPublic {
FEDERATION_ROUTER_PREFIX + "dn-report.cache-expire";
public static final long DN_REPORT_CACHE_EXPIRE_MS_DEFAULT =
TimeUnit.SECONDS.toMillis(10);
public static final String DFS_ROUTER_ENABLE_GET_DN_USAGE_KEY =
FEDERATION_ROUTER_PREFIX + "enable.get.dn.usage";
public static final boolean DFS_ROUTER_ENABLE_GET_DN_USAGE_DEFAULT = true;

// HDFS Router-based federation quota
public static final String DFS_ROUTER_QUOTA_ENABLE =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,16 @@
</description>
</property>

<property>
<name>dfs.federation.router.enable.get.dn.usage</name>
<value>true</value>
<description>
If true, the getNodeUsage method in RBFMetrics will return an up-to-date
result collecting from downstream nameservices. But it will take a long
time and take up thread resources. If false, it will return a mock result with all 0.
</description>
</property>

<property>
<name>dfs.federation.router.metrics.class</name>
<value>org.apache.hadoop.hdfs.server.federation.metrics.FederationRPCPerformanceMonitor</value>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@
import org.apache.hadoop.service.Service.STATE;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.test.LambdaTestUtils;
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONObject;
import org.junit.AfterClass;
import org.junit.Before;
Expand Down Expand Up @@ -2144,4 +2145,34 @@ public void testContentSummaryWithSnapshot() throws Exception {
routerDFS.delete(dirPath, true);
}
}

@Test
public void testDisableNodeUsageInRBFMetrics() throws JSONException {
RBFMetrics rbfMetrics = router.getRouter().getMetrics();
FederationRPCMetrics federationRPCMetrics = router.getRouter().getRpcServer().getRPCMetrics();

long proxyOpBefore = federationRPCMetrics.getProxyOps();
String nodeUsageEnable = router.getRouter().getMetrics().getNodeUsage();
assertNotNull(nodeUsageEnable);
long proxyOpAfterWithEnable = federationRPCMetrics.getProxyOps();
assertEquals(proxyOpBefore + 2, proxyOpAfterWithEnable);

rbfMetrics.setEnableGetDNUsage(false);
String nodeUsageDisable = rbfMetrics.getNodeUsage();
assertNotNull(nodeUsageDisable);
long proxyOpAfterWithDisable = federationRPCMetrics.getProxyOps();
assertEquals(proxyOpAfterWithEnable, proxyOpAfterWithDisable);
JSONObject jsonObject = new JSONObject(nodeUsageDisable);
JSONObject json = jsonObject.getJSONObject("nodeUsage");
assertEquals("0.00%", json.get("min"));
assertEquals("0.00%", json.get("median"));
assertEquals("0.00%", json.get("max"));
assertEquals("0.00%", json.get("stdDev"));

rbfMetrics.setEnableGetDNUsage(true);
String nodeUsageWithReEnable = rbfMetrics.getNodeUsage();
assertNotNull(nodeUsageWithReEnable);
long proxyOpAfterWithReEnable = federationRPCMetrics.getProxyOps();
assertEquals(proxyOpAfterWithDisable + 2, proxyOpAfterWithReEnable);
}
}