From 004e3db22040a15868c8d8d3dc0b41c61c768efc Mon Sep 17 00:00:00 2001 From: tom lee Date: Thu, 9 Dec 2021 23:50:44 +0800 Subject: [PATCH 1/5] HDFS-16376. Expose metrics of NodeNotChosenReason to JMX --- .../blockmanagement/BlockPlacementMXBean.java | 32 ++++++++ .../BlockPlacementPolicyDefault.java | 73 ++++++++++++++++++- ...TestReplicationPolicyExcludeSlowNodes.java | 15 ++++ 3 files changed, 117 insertions(+), 3 deletions(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementMXBean.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementMXBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementMXBean.java new file mode 100644 index 0000000000000..7c9d074e37fc9 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementMXBean.java @@ -0,0 +1,32 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.blockmanagement; + +/** + * This is an interface used to retrieve statistic information related to + * block placement policy. + */ +public interface BlockPlacementMXBean { + + /** + * The statistics of why the target nodes are not chosen. + * + * @return Get the number of reasons why the target nodes are not chosen. + */ + BlockPlacementPolicyDefault.NodeNotChosenReasonMetrics getNumberOfEachNotChosenReason(); +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java index dec98d85b52df..56201a6bb090b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java @@ -24,8 +24,11 @@ import static org.apache.hadoop.util.Time.monotonicNow; import java.util.*; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.LongAdder; +import org.apache.hadoop.metrics2.util.MBeans; import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; @@ -41,6 +44,8 @@ import org.apache.hadoop.classification.VisibleForTesting; +import javax.management.ObjectName; + /** * The class is responsible for choosing the desired number of targets * for placing block replicas. @@ -56,7 +61,8 @@ * rack as the second replica. */ @InterfaceAudience.Private -public class BlockPlacementPolicyDefault extends BlockPlacementPolicy { +public class BlockPlacementPolicyDefault extends BlockPlacementPolicy + implements BlockPlacementMXBean { private static final String enableDebugLogging = "For more information, please enable DEBUG log level on " @@ -78,7 +84,10 @@ protected StringBuilder initialValue() { private static final BlockPlacementStatus ONE_RACK_PLACEMENT = new BlockPlacementStatusDefault(1, 1, 1); - private enum NodeNotChosenReason { + private static final ConcurrentHashMap notChosenReasonMap = + new ConcurrentHashMap<>(); + + public enum NodeNotChosenReason { NOT_IN_SERVICE("the node is not in service"), NODE_STALE("the node is stale"), NODE_TOO_BUSY("the node is too busy"), @@ -109,7 +118,12 @@ private String getText() { private FSClusterStats stats; protected long heartbeatInterval; // interval for DataNode heartbeats private long staleInterval; // interval used to identify stale DataNodes - + private ObjectName mxBeanName; + + public ObjectName getMxBeanName() { + return mxBeanName; + } + /** * A miss of that many heartbeats is tolerated for replica deletion policy. */ @@ -155,6 +169,9 @@ public void initialize(Configuration conf, FSClusterStats stats, this.excludeSlowNodesEnabled = conf.getBoolean( DFS_NAMENODE_BLOCKPLACEMENTPOLICY_EXCLUDE_SLOW_NODES_ENABLED_KEY, DFS_NAMENODE_BLOCKPLACEMENTPOLICY_EXCLUDE_SLOW_NODES_ENABLED_DEFAULT); + if (mxBeanName == null) { + mxBeanName = MBeans.register("NameNode", "BlockPlacementStats", this); + } } @Override @@ -988,6 +1005,13 @@ private static void logNodeIsNotChosen(DatanodeDescriptor node, base = 0; } reasonMap.put(reason, base + 1); + // To calculate the metrics of NodeNotChosenReason. + incrNotChosenReasonNum(reason); + } + + private static void incrNotChosenReasonNum(NodeNotChosenReason reason) { + notChosenReasonMap.computeIfAbsent(reason, k -> new LongAdder()) + .increment(); } /** @@ -1369,5 +1393,48 @@ public void setExcludeSlowNodesEnabled(boolean enable) { public boolean getExcludeSlowNodesEnabled() { return excludeSlowNodesEnabled; } + + @Override + public NodeNotChosenReasonMetrics getNumberOfEachNotChosenReason() { + return new NodeNotChosenReasonMetrics(); + } + + public class NodeNotChosenReasonMetrics { + + public long getNotInService() { + return notChosenReasonMap + .getOrDefault(NodeNotChosenReason.NOT_IN_SERVICE, new LongAdder()).longValue(); + } + + public long getNodeStale() { + return notChosenReasonMap + .getOrDefault(NodeNotChosenReason.NODE_STALE, new LongAdder()).longValue(); + } + + public long getNodeTooBusy() { + return notChosenReasonMap + .getOrDefault(NodeNotChosenReason.NODE_TOO_BUSY, new LongAdder()).longValue(); + } + + public long getTooManyNodesOnRack() { + return notChosenReasonMap + .getOrDefault(NodeNotChosenReason.TOO_MANY_NODES_ON_RACK, new LongAdder()).longValue(); + } + + public long getNotEnoughStorageSpace() { + return notChosenReasonMap + .getOrDefault(NodeNotChosenReason.NOT_ENOUGH_STORAGE_SPACE, new LongAdder()).longValue(); + } + + public long getNoRequiredStorageType() { + return notChosenReasonMap + .getOrDefault(NodeNotChosenReason.NO_REQUIRED_STORAGE_TYPE, new LongAdder()).longValue(); + } + + public long getNodeSlow() { + return notChosenReasonMap + .getOrDefault(NodeNotChosenReason.NODE_SLOW, new LongAdder()).longValue(); + } + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyExcludeSlowNodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyExcludeSlowNodes.java index f2c24a646b84a..e0ef803767104 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyExcludeSlowNodes.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyExcludeSlowNodes.java @@ -26,6 +26,10 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; +import javax.management.MBeanServer; +import javax.management.ObjectName; +import javax.management.openmbean.CompositeDataSupport; +import java.lang.management.ManagementFactory; import java.util.ArrayList; import java.util.Arrays; import java.util.Set; @@ -122,6 +126,17 @@ public void testChooseTargetExcludeSlowNodes() throws Exception { assertTrue(!slowPeers.contains(targets[i].getDatanodeDescriptor() .getDatanodeUuid())); } + + // Fetch metrics. + MBeanServer mbs = ManagementFactory.getPlatformMBeanServer(); + ObjectName mxbeanNameFs = + new ObjectName("Hadoop:service=NameNode,name=BlockPlacementStats"); + CompositeDataSupport metrics = + (CompositeDataSupport) mbs.getAttribute(mxbeanNameFs, + "NumberOfEachNotChosenReason"); + + // Assert NodeSlow. + assertTrue((long) metrics.get("nodeSlow") > 0); } finally { namenode.getNamesystem().writeUnlock(); } From 07e7e2126e8b8209549761ea9154e885a2f55d6e Mon Sep 17 00:00:00 2001 From: tom lee Date: Fri, 10 Dec 2021 08:07:59 +0800 Subject: [PATCH 2/5] fix spotbug --- .../BlockPlacementPolicyDefault.java | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java index 56201a6bb090b..efee87330a0b0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java @@ -84,7 +84,7 @@ protected StringBuilder initialValue() { private static final BlockPlacementStatus ONE_RACK_PLACEMENT = new BlockPlacementStatusDefault(1, 1, 1); - private static final ConcurrentHashMap notChosenReasonMap = + private static final ConcurrentHashMap NOT_CHOSEN_REASON_MAP = new ConcurrentHashMap<>(); public enum NodeNotChosenReason { @@ -1010,7 +1010,7 @@ private static void logNodeIsNotChosen(DatanodeDescriptor node, } private static void incrNotChosenReasonNum(NodeNotChosenReason reason) { - notChosenReasonMap.computeIfAbsent(reason, k -> new LongAdder()) + NOT_CHOSEN_REASON_MAP.computeIfAbsent(reason, k -> new LongAdder()) .increment(); } @@ -1399,40 +1399,40 @@ public NodeNotChosenReasonMetrics getNumberOfEachNotChosenReason() { return new NodeNotChosenReasonMetrics(); } - public class NodeNotChosenReasonMetrics { + public static class NodeNotChosenReasonMetrics { public long getNotInService() { - return notChosenReasonMap + return NOT_CHOSEN_REASON_MAP .getOrDefault(NodeNotChosenReason.NOT_IN_SERVICE, new LongAdder()).longValue(); } public long getNodeStale() { - return notChosenReasonMap + return NOT_CHOSEN_REASON_MAP .getOrDefault(NodeNotChosenReason.NODE_STALE, new LongAdder()).longValue(); } public long getNodeTooBusy() { - return notChosenReasonMap + return NOT_CHOSEN_REASON_MAP .getOrDefault(NodeNotChosenReason.NODE_TOO_BUSY, new LongAdder()).longValue(); } public long getTooManyNodesOnRack() { - return notChosenReasonMap + return NOT_CHOSEN_REASON_MAP .getOrDefault(NodeNotChosenReason.TOO_MANY_NODES_ON_RACK, new LongAdder()).longValue(); } public long getNotEnoughStorageSpace() { - return notChosenReasonMap + return NOT_CHOSEN_REASON_MAP .getOrDefault(NodeNotChosenReason.NOT_ENOUGH_STORAGE_SPACE, new LongAdder()).longValue(); } public long getNoRequiredStorageType() { - return notChosenReasonMap + return NOT_CHOSEN_REASON_MAP .getOrDefault(NodeNotChosenReason.NO_REQUIRED_STORAGE_TYPE, new LongAdder()).longValue(); } public long getNodeSlow() { - return notChosenReasonMap + return NOT_CHOSEN_REASON_MAP .getOrDefault(NodeNotChosenReason.NODE_SLOW, new LongAdder()).longValue(); } } From 064ad3d719312b027051ca0c696692e35f80a5fe Mon Sep 17 00:00:00 2001 From: tom lee Date: Fri, 10 Dec 2021 08:54:29 +0800 Subject: [PATCH 3/5] fix unit test --- .../hdfs/server/blockmanagement/BlockManager.java | 10 ++++++++++ .../server/blockmanagement/BlockPlacementPolicy.java | 5 +++++ .../blockmanagement/BlockPlacementPolicyDefault.java | 8 +++++++- 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index 9ec9f9bd47224..624717d887b13 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -5057,6 +5057,16 @@ public void shutdown() { blocksMap.close(); MBeans.unregister(mxBeanName); mxBeanName = null; + BlockPlacementPolicy replicationPolicy = + placementPolicies.getPolicy(CONTIGUOUS); + if (replicationPolicy != null) { + replicationPolicy.clear(); + } + BlockPlacementPolicy ecPolicy = + placementPolicies.getPolicy(STRIPED); + if (ecPolicy != null) { + ecPolicy.clear(); + } } public void clear() { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicy.java index 68b3bcd5ad6da..cf686719a1937 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicy.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicy.java @@ -273,4 +273,9 @@ public void splitNodesWithRack( public abstract void setExcludeSlowNodesEnabled(boolean enable); public abstract boolean getExcludeSlowNodesEnabled(); + + /** + * Clean up resources, such as MxBeans. + */ + public abstract void clear(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java index efee87330a0b0..5cfc97fa09f31 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java @@ -118,7 +118,7 @@ private String getText() { private FSClusterStats stats; protected long heartbeatInterval; // interval for DataNode heartbeats private long staleInterval; // interval used to identify stale DataNodes - private ObjectName mxBeanName; + private static ObjectName mxBeanName; public ObjectName getMxBeanName() { return mxBeanName; @@ -1399,6 +1399,12 @@ public NodeNotChosenReasonMetrics getNumberOfEachNotChosenReason() { return new NodeNotChosenReasonMetrics(); } + @Override + public void clear() { + MBeans.unregister(mxBeanName); + mxBeanName = null; + } + public static class NodeNotChosenReasonMetrics { public long getNotInService() { From aabc700f8b8dc3bfdde0fc2cc9273227e64316ff Mon Sep 17 00:00:00 2001 From: tom lee Date: Wed, 15 Dec 2021 21:58:30 +0800 Subject: [PATCH 4/5] fix spot bug --- .../server/blockmanagement/BlockPlacementPolicyDefault.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java index 5cfc97fa09f31..63a602a9d0d4b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java @@ -118,7 +118,7 @@ private String getText() { private FSClusterStats stats; protected long heartbeatInterval; // interval for DataNode heartbeats private long staleInterval; // interval used to identify stale DataNodes - private static ObjectName mxBeanName; + private volatile static ObjectName mxBeanName; public ObjectName getMxBeanName() { return mxBeanName; From 199a7118f3e6841c6137a6366be641dad1fec75e Mon Sep 17 00:00:00 2001 From: tom lee Date: Thu, 16 Dec 2021 09:14:59 +0800 Subject: [PATCH 5/5] fix bug --- .../blockmanagement/BlockPlacementPolicyDefault.java | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java index 63a602a9d0d4b..f3d5afc9439b6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java @@ -170,7 +170,7 @@ public void initialize(Configuration conf, FSClusterStats stats, DFS_NAMENODE_BLOCKPLACEMENTPOLICY_EXCLUDE_SLOW_NODES_ENABLED_KEY, DFS_NAMENODE_BLOCKPLACEMENTPOLICY_EXCLUDE_SLOW_NODES_ENABLED_DEFAULT); if (mxBeanName == null) { - mxBeanName = MBeans.register("NameNode", "BlockPlacementStats", this); + registerMxBeans(this); } } @@ -1401,6 +1401,14 @@ public NodeNotChosenReasonMetrics getNumberOfEachNotChosenReason() { @Override public void clear() { + unregisterMxBeans(); + } + + private synchronized static void registerMxBeans(Object obj) { + mxBeanName = MBeans.register("NameNode", "BlockPlacementStats", obj); + } + + private synchronized static void unregisterMxBeans() { MBeans.unregister(mxBeanName); mxBeanName = null; }