From 8c376005d3397428727eacc696b772ca097390b1 Mon Sep 17 00:00:00 2001 From: "Haiyang.Hu" Date: Sat, 9 Sep 2023 18:43:34 +0800 Subject: [PATCH 1/6] HADOOP-18885. Add rpcRejectedByObserverCalls metric to quantify the number of rejected RPCs by Observer NameNode --- .../java/org/apache/hadoop/ipc/Server.java | 3 +++ .../apache/hadoop/ipc/metrics/RpcMetrics.java | 18 ++++++++++++++ .../namenode/ha/TestMultiObserverNode.java | 24 +++++++++++++++++-- 3 files changed, 43 insertions(+), 2 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java index a594d2be01ccb..82c132ed94c10 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java @@ -2945,6 +2945,9 @@ private void processRpcRequest(RpcRequestHeaderProto header, } } } catch (IOException ioe) { + if (ioe instanceof RetriableException) { + rpcMetrics.incrRcRejectedByObserverCalls(); + } throw new RpcServerException("Processing RPC request caught ", ioe); } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/metrics/RpcMetrics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/metrics/RpcMetrics.java index b9be973204d21..3992bec7b504f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/metrics/RpcMetrics.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/metrics/RpcMetrics.java @@ -147,6 +147,8 @@ public static RpcMetrics create(Server server, Configuration conf) { MutableCounterLong rpcRequeueCalls; @Metric("Number of successful RPC calls") MutableCounterLong rpcCallSuccesses; + @Metric("Number of observer namenode rejected RPC calls") + MutableCounterLong rpcRejectedByObserverCalls; @Metric("Number of open connections") public int numOpenConnections() { return server.getNumOpenConnections(); @@ -363,6 +365,13 @@ public void incrRpcCallSuccesses() { rpcCallSuccesses.incr(); } + /** + * Increments the Observer NameNode rejected RPC Calls Counter. + */ + public void incrRcRejectedByObserverCalls() { + rpcRejectedByObserverCalls.incr(); + } + /** * Returns a MutableRate Counter. * @return Mutable Rate @@ -412,6 +421,15 @@ public long getRpcRequeueCalls() { return rpcRequeueCalls.value(); } + /** + * Returns the number of observer namenode rejected RPC calls. + * @return long + */ + @VisibleForTesting + public long getRpcRejectedByObserverCalls() { + return rpcRejectedByObserverCalls.value(); + } + public MutableRate getDeferredRpcProcessingTime() { return deferredRpcProcessingTime; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestMultiObserverNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestMultiObserverNode.java index a0913e4c5e447..fc179e845bf75 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestMultiObserverNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestMultiObserverNode.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdfs.server.namenode.ha; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_STATE_CONTEXT_ENABLED_KEY; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import java.io.IOException; @@ -28,6 +29,9 @@ import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster; +import org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer; +import org.apache.hadoop.ipc.RPC; +import org.apache.hadoop.ipc.metrics.RpcMetrics; import org.junit.After; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -145,13 +149,29 @@ public void testMultiObserver() throws Exception { public void testObserverFallBehind() throws Exception { dfs.mkdir(testPath, FsPermission.getDefault()); assertSentTo(0); + RPC.Server clientRpcServer2 = ((NameNodeRpcServer)dfsCluster + .getNameNodeRpc(2)).getClientRpcServer(); + RpcMetrics rpcMetrics2 = clientRpcServer2.getRpcMetrics(); + assertEquals(0, rpcMetrics2.getRpcRejectedByObserverCalls()); + RPC.Server clientRpcServer3 = ((NameNodeRpcServer)dfsCluster + .getNameNodeRpc(3)).getClientRpcServer(); + RpcMetrics rpcMetrics3 = clientRpcServer3.getRpcMetrics(); + assertEquals(0, rpcMetrics3.getRpcRejectedByObserverCalls()); - // Set large state Id on the client + dfsCluster.rollEditLogAndTail(0); + dfs.getFileStatus(testPath); + assertSentTo(2, 3); + + // Set large state Id on the client. long realStateId = HATestUtil.setACStateId(dfs, 500000); dfs.getFileStatus(testPath); - // Should end up on ANN + // Should end up on ANN. assertSentTo(0); HATestUtil.setACStateId(dfs, realStateId); + + // Validate rpcRejectedByObserverCalls metric. + assertEquals(1, rpcMetrics2.getRpcRejectedByObserverCalls()); + assertEquals(1, rpcMetrics3.getRpcRejectedByObserverCalls()); } private void assertSentTo(int... nnIndices) throws IOException { From 9a78a52d4d4683fcd393593cd7f98a4863e3e9c6 Mon Sep 17 00:00:00 2001 From: "Haiyang.Hu" Date: Sun, 10 Sep 2023 10:23:04 +0800 Subject: [PATCH 2/6] HADOOP-18885. Add metrics.md --- hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md | 1 + 1 file changed, 1 insertion(+) diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md index 01d89b81356e4..f8c5c3e7fc89c 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md @@ -90,6 +90,7 @@ The default timeunit used for RPC metrics is milliseconds (as per the below desc | `RpcSlowCalls` | Total number of slow RPC calls | | `RpcRequeueCalls` | Total number of requeue RPC calls | | `RpcCallsSuccesses` | Total number of RPC calls that are successfully processed | +| `RpcRejectedByObserverCalls` | Total number of RPC calls that are observer namenode rejected | | `NumOpenConnections` | Current number of open connections | | `NumInProcessHandler` | Current number of handlers on working | | `CallQueueLength` | Current length of the call queue | From 22aa648245fbb03498cb0c3845eea4ce8f4b0f4e Mon Sep 17 00:00:00 2001 From: "Haiyang.Hu" Date: Fri, 22 Sep 2023 13:16:51 +0800 Subject: [PATCH 3/6] HADOOP-18885. Modify code based on comments --- .../src/main/java/org/apache/hadoop/ipc/Server.java | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java index 82c132ed94c10..6d099c8f27c5b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java @@ -2937,17 +2937,18 @@ private void processRpcRequest(RpcRequestHeaderProto header, if (alignmentContext.isCoordinatedCall(protoName, methodName)) { call.markCallCoordinated(true); long stateId; - stateId = alignmentContext.receiveRequestState( - header, getMaxIdleTime()); + try { + stateId = alignmentContext.receiveRequestState(header, getMaxIdleTime()); + } catch (RetriableException re) { + rpcMetrics.incrRcRejectedByObserverCalls(); + throw re; + } call.setClientStateId(stateId); if (header.hasRouterFederatedState()) { call.setFederatedNamespaceState(header.getRouterFederatedState()); } } } catch (IOException ioe) { - if (ioe instanceof RetriableException) { - rpcMetrics.incrRcRejectedByObserverCalls(); - } throw new RpcServerException("Processing RPC request caught ", ioe); } } From fd03eba2dd390e5449bbc2e23d854f1a2df18c2e Mon Sep 17 00:00:00 2001 From: "Haiyang.Hu" Date: Fri, 22 Sep 2023 14:14:05 +0800 Subject: [PATCH 4/6] HADOOP-18885. Modify code based on comments --- .../src/main/java/org/apache/hadoop/ipc/Server.java | 2 +- .../java/org/apache/hadoop/ipc/metrics/RpcMetrics.java | 10 +++++----- .../hadoop-common/src/site/markdown/Metrics.md | 2 +- .../hdfs/server/namenode/ha/TestMultiObserverNode.java | 10 +++++----- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java index 6d099c8f27c5b..5dba40f6ff5c0 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java @@ -2940,7 +2940,7 @@ private void processRpcRequest(RpcRequestHeaderProto header, try { stateId = alignmentContext.receiveRequestState(header, getMaxIdleTime()); } catch (RetriableException re) { - rpcMetrics.incrRcRejectedByObserverCalls(); + rpcMetrics.incrRpcCallsRejectedByObserver(); throw re; } call.setClientStateId(stateId); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/metrics/RpcMetrics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/metrics/RpcMetrics.java index 3992bec7b504f..44ae4ff8cd11c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/metrics/RpcMetrics.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/metrics/RpcMetrics.java @@ -148,7 +148,7 @@ public static RpcMetrics create(Server server, Configuration conf) { @Metric("Number of successful RPC calls") MutableCounterLong rpcCallSuccesses; @Metric("Number of observer namenode rejected RPC calls") - MutableCounterLong rpcRejectedByObserverCalls; + MutableCounterLong rpcCallsRejectedByObserver; @Metric("Number of open connections") public int numOpenConnections() { return server.getNumOpenConnections(); @@ -368,8 +368,8 @@ public void incrRpcCallSuccesses() { /** * Increments the Observer NameNode rejected RPC Calls Counter. */ - public void incrRcRejectedByObserverCalls() { - rpcRejectedByObserverCalls.incr(); + public void incrRpcCallsRejectedByObserver() { + rpcCallsRejectedByObserver.incr(); } /** @@ -426,8 +426,8 @@ public long getRpcRequeueCalls() { * @return long */ @VisibleForTesting - public long getRpcRejectedByObserverCalls() { - return rpcRejectedByObserverCalls.value(); + public long getRpcCallsRejectedByObserver() { + return rpcCallsRejectedByObserver.value(); } public MutableRate getDeferredRpcProcessingTime() { diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md index f8c5c3e7fc89c..01f35c3507624 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md @@ -90,7 +90,7 @@ The default timeunit used for RPC metrics is milliseconds (as per the below desc | `RpcSlowCalls` | Total number of slow RPC calls | | `RpcRequeueCalls` | Total number of requeue RPC calls | | `RpcCallsSuccesses` | Total number of RPC calls that are successfully processed | -| `RpcRejectedByObserverCalls` | Total number of RPC calls that are observer namenode rejected | +| `RpcCallsRejectedByObserver` | Total number of RPC calls that are observer namenode rejected | | `NumOpenConnections` | Current number of open connections | | `NumInProcessHandler` | Current number of handlers on working | | `CallQueueLength` | Current length of the call queue | diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestMultiObserverNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestMultiObserverNode.java index fc179e845bf75..cb6ffd60f5b1e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestMultiObserverNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestMultiObserverNode.java @@ -152,11 +152,11 @@ public void testObserverFallBehind() throws Exception { RPC.Server clientRpcServer2 = ((NameNodeRpcServer)dfsCluster .getNameNodeRpc(2)).getClientRpcServer(); RpcMetrics rpcMetrics2 = clientRpcServer2.getRpcMetrics(); - assertEquals(0, rpcMetrics2.getRpcRejectedByObserverCalls()); + assertEquals(0, rpcMetrics2.getRpcCallsRejectedByObserver()); RPC.Server clientRpcServer3 = ((NameNodeRpcServer)dfsCluster .getNameNodeRpc(3)).getClientRpcServer(); RpcMetrics rpcMetrics3 = clientRpcServer3.getRpcMetrics(); - assertEquals(0, rpcMetrics3.getRpcRejectedByObserverCalls()); + assertEquals(0, rpcMetrics3.getRpcCallsRejectedByObserver()); dfsCluster.rollEditLogAndTail(0); dfs.getFileStatus(testPath); @@ -169,9 +169,9 @@ public void testObserverFallBehind() throws Exception { assertSentTo(0); HATestUtil.setACStateId(dfs, realStateId); - // Validate rpcRejectedByObserverCalls metric. - assertEquals(1, rpcMetrics2.getRpcRejectedByObserverCalls()); - assertEquals(1, rpcMetrics3.getRpcRejectedByObserverCalls()); + // Validate rpcCallsRejectedByObserver metric. + assertEquals(1, rpcMetrics2.getRpcCallsRejectedByObserver()); + assertEquals(1, rpcMetrics3.getRpcCallsRejectedByObserver()); } private void assertSentTo(int... nnIndices) throws IOException { From 33130edecc7ea4c8c2b129ee74eecc373716efa3 Mon Sep 17 00:00:00 2001 From: "Haiyang.Hu" Date: Fri, 22 Sep 2023 16:13:08 +0800 Subject: [PATCH 5/6] trigger ci From a4349102c73234f1100de188e869c063518489bd Mon Sep 17 00:00:00 2001 From: "Haiyang.Hu" Date: Sat, 23 Sep 2023 12:35:09 +0800 Subject: [PATCH 6/6] trigger ci