From 64ec927303574e9dc08644d4e71af519f46f4103 Mon Sep 17 00:00:00 2001 From: Abhishek Dixit Date: Thu, 20 Jul 2023 09:11:43 +0000 Subject: [PATCH 1/6] [YARN-11421] Graceful Decommission ignores launched containers and gets deactivated before timeout During Graceful Decommission, a Node gets deactivated before timeout even though there are launched containers on that node. We have observed cases when graceful decommission signal is sent to node and Containers are launched at NodeManager and at the same time, in such cases ResourceManager moves the node from Decommissioning to Decommissioned state because launced containers are not checked in DeactivateNodeTransition. We will suggest waiting for yarn.resourcemanager.decommissioning-nodes-watcher.delay-ms to complete before marking node ready to be decommissioned. No delay if set to 0. Expire interval should not be configured more than RM_AM_EXPIRY_INTERVAL_MS. Open Source JIRA: https://issues.apache.org/jira/browse/YARN-11421 --- .../hadoop/yarn/conf/YarnConfiguration.java | 10 +++ .../src/main/resources/yarn-default.xml | 9 +++ .../DecommissioningNodesWatcher.java | 18 ++++- .../resourcemanager/rmnode/RMNodeImpl.java | 7 +- .../TestDecommissioningNodesWatcher.java | 74 +++++++++++++++++++ .../TestResourceTrackerService.java | 59 +++++++++++++++ .../src/site/markdown/GracefulDecommission.md | 13 ++-- 7 files changed, 182 insertions(+), 8 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index faa5c82d7e9a2..fdeaf4ced1bc9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -1280,6 +1280,16 @@ public static boolean isAclEnabled(Configuration conf) { public static final int DEFAULT_RM_DECOMMISSIONING_NODES_WATCHER_POLL_INTERVAL = 20; + /** + * Allow Delayed Removal of Decommissioning Nodes to account for scheduled AM containers + * on that node. No delay if set to 0. + */ + public static final String RM_DECOMMISSIONING_NODES_WATCHER_DELAY_MS = + RM_PREFIX + "decommissioning-nodes-watcher.delay-ms"; + + public static final int + DEFAULT_RM_DECOMMISSIONING_NODES_WATCHER_DELAY_MS = 0; + //////////////////////////////// // Node Manager Configs //////////////////////////////// diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index b643bd8d08d49..e4dae71288748 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -3635,6 +3635,15 @@ 20 + + + Allow Delayed Removal of Decommissioning Nodes to account for scheduled AM containers on that node. + No delay if set to 0. Expire interval should not be configured more than RM_AM_EXPIRY_INTERVAL_MS. + + yarn.resourcemanager.decommissioning-nodes-watcher.delay-ms + 0 + + Used to specify custom web services for Resourcemanager. Value can be diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DecommissioningNodesWatcher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DecommissioningNodesWatcher.java index 61691690df878..51cd06e480005 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DecommissioningNodesWatcher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DecommissioningNodesWatcher.java @@ -114,6 +114,7 @@ void updateTimeout(int timeoutSec) { private Timer pollTimer; private MonotonicClock mclock; + private int expireIntvl; public DecommissioningNodesWatcher(RMContext rmContext) { this.rmContext = rmContext; @@ -126,6 +127,11 @@ public void init(Configuration conf) { YarnConfiguration.RM_DECOMMISSIONING_NODES_WATCHER_POLL_INTERVAL, YarnConfiguration .DEFAULT_RM_DECOMMISSIONING_NODES_WATCHER_POLL_INTERVAL); + // expire interval should not be configured more than RM_AM_EXPIRY_INTERVAL_MS + this.expireIntvl = Math.min(conf.getInt(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, + YarnConfiguration.DEFAULT_RM_AM_EXPIRY_INTERVAL_MS), + conf.getInt(YarnConfiguration.RM_DECOMMISSIONING_NODES_WATCHER_DELAY_MS, + YarnConfiguration.DEFAULT_RM_DECOMMISSIONING_NODES_WATCHER_DELAY_MS)); pollTimer.schedule(new PollTimerTask(rmContext), 0, (1000L * v)); } @@ -221,10 +227,14 @@ public enum DecommissioningNodeStatus { // The node has already been decommissioned DECOMMISSIONED, + + // wait for possibility of scheduled AM containers + WAIT_SCHEDULED_APPS } public boolean checkReadyToBeDecommissioned(NodeId nodeId) { DecommissioningNodeStatus s = checkDecommissioningStatus(nodeId); + LOG.debug("checkReadyToBeDecommissioned " + nodeId + " status " + s); return (s == DecommissioningNodeStatus.READY || s == DecommissioningNodeStatus.TIMEOUT); } @@ -247,7 +257,13 @@ public DecommissioningNodeStatus checkDecommissioningStatus(NodeId nodeId) { } if (context.appIds.size() == 0) { - return DecommissioningNodeStatus.READY; + // wait for am expire interval or decommission timeout whichever is smaller + // if decommission timeout is negative, use am expire interval + long effectiveTimeout = context.timeoutMs > 0 ? Math.min(context.timeoutMs, expireIntvl) : expireIntvl; + LOG.debug("checkReadyToBeDecommissioned " + nodeId + ", context.timeoutMs=" + context.timeoutMs + + ", expireIntvl=" + expireIntvl + ", waitTime=" + waitTime + ", effectiveTimeout=" + effectiveTimeout); + return waitTime >= effectiveTimeout? + DecommissioningNodeStatus.READY : DecommissioningNodeStatus.WAIT_SCHEDULED_APPS; } else { return (context.timeoutMs < 0 || waitTime < context.timeoutMs)? DecommissioningNodeStatus.WAIT_APP : diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java index 81a9ed8efc1fe..458ef416626d1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java @@ -1439,10 +1439,15 @@ public NodeState transition(RMNodeImpl rmNode, RMNodeEvent event) { * @return true if node has any AM scheduled on it. */ private boolean hasScheduledAMContainers(RMNodeImpl rmNode) { - return rmNode.context.getScheduler() + boolean hasScheduledAMContainers = rmNode.context.getScheduler() .getSchedulerNode(rmNode.getNodeID()) .getCopiedListOfRunningContainers() .stream().anyMatch(RMContainer::isAMContainer); + if (hasScheduledAMContainers) { + LOG.info("Node " + rmNode.nodeId + " has AM containers scheduled on it." + + " Will not deactivate it."); + } + return hasScheduledAMContainers; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestDecommissioningNodesWatcher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestDecommissioningNodesWatcher.java index c662b55e8db4a..a6c07aff21cf0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestDecommissioningNodesWatcher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestDecommissioningNodesWatcher.java @@ -162,6 +162,80 @@ public void testDecommissioningNodesWatcherWithPreviousRunningApps() watcher.checkDecommissioningStatus(id1)); } + @Test + public void testDecommissioningNodesWatcherWithScheduledAMContainers() + throws Exception { + Configuration conf = new Configuration(); + // decommission timeout is 10 min + conf.set(YarnConfiguration.RM_NODE_GRACEFUL_DECOMMISSION_TIMEOUT, "600"); + // watcher delay is 5min + conf.set(YarnConfiguration.RM_DECOMMISSIONING_NODES_WATCHER_DELAY_MS, "300000"); + + rm = new MockRM(conf); + rm.start(); + + DecommissioningNodesWatcher watcher = + new DecommissioningNodesWatcher(rm.getRMContext()); + watcher.init(conf); + + MockNM nm1 = rm.registerNode("host1:1234", 10240); + RMNodeImpl node1 = + (RMNodeImpl) rm.getRMContext().getRMNodes().get(nm1.getNodeId()); + NodeId id1 = nm1.getNodeId(); + + rm.waitForState(id1, NodeState.RUNNING); + + // just submit the app + RMApp app = MockRMAppSubmitter.submit(rm, + MockRMAppSubmissionData.Builder.createWithMemory(2000, rm).build()); + MockAM am = MockRM.launchAndRegisterAM(app, rm, nm1); + + // rmnode still thinks there are 0 apps because it hasn't received updated node status + Assert.assertEquals(0, node1.getRunningApps().size()); + + // Setup nm1 as DECOMMISSIONING for DecommissioningNodesWatcher. Right now + // there is no container running on the node. + rm.sendNodeGracefulDecommission(nm1, + YarnConfiguration.DEFAULT_RM_NODE_GRACEFUL_DECOMMISSION_TIMEOUT); + rm.waitForState(id1, NodeState.DECOMMISSIONING); + + // we should still get WAIT_SCHEDULED_APPS as expiry time is not over + NodeHealthStatus status = NodeHealthStatus.newInstance(true, "", + System.currentTimeMillis() - 1000); + NodeStatus nodeStatus = NodeStatus.newInstance(id1, 0, + new ArrayList<>(), Collections.emptyList(), status, null, null, null); + watcher.update(node1, nodeStatus); + + Assert.assertFalse(watcher.checkReadyToBeDecommissioned(id1)); + Assert.assertEquals(DecommissioningNodeStatus.WAIT_SCHEDULED_APPS, + watcher.checkDecommissioningStatus(id1)); + + // update node with 3 running containers + nodeStatus = createNodeStatus(id1, app, 3); + node1.handle(new RMNodeStatusEvent(nm1.getNodeId(), nodeStatus)); + watcher.update(node1, nodeStatus); + Assert.assertEquals(1, node1.getRunningApps().size()); + Assert.assertFalse(watcher.checkReadyToBeDecommissioned(id1)); + Assert.assertEquals(DecommissioningNodeStatus.WAIT_CONTAINER, + watcher.checkDecommissioningStatus(id1)); + + // update node with 0 running containers + nodeStatus = createNodeStatus(id1, app, 0); + node1.handle(new RMNodeStatusEvent(nm1.getNodeId(), nodeStatus)); + watcher.update(node1, nodeStatus); + Assert.assertFalse(watcher.checkReadyToBeDecommissioned(id1)); + // we still get status as WAIT_APP since app is still running even if + // containers are 0 + Assert.assertEquals(DecommissioningNodeStatus.WAIT_APP, + watcher.checkDecommissioningStatus(id1)); + + // Set app to be FINISHED and verified DecommissioningNodeStatus is READY. + MockRM.finishAMAndVerifyAppState(app, rm, nm1, am); + rm.waitForState(app.getApplicationId(), RMAppState.FINISHED); + Assert.assertEquals(0, node1.getRunningApps().size()); + watcher.update(node1, nodeStatus); + } + @After public void tearDown() { if (rm != null) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java index 358cf9e0f8f77..8f20e9f5f2df5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java @@ -351,6 +351,65 @@ public void testGracefulDecommissionNoApp() throws Exception { Assert.assertEquals(NodeAction.SHUTDOWN, nodeHeartbeat3.getNodeAction()); } + @Test (timeout = 60000) + public void testGracefulDecommissionRaceCondition() throws Exception { + Configuration conf = new Configuration(); + conf.set(YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH, hostFile + .getAbsolutePath()); + conf.set(YarnConfiguration.RM_DECOMMISSIONING_NODES_WATCHER_DELAY_MS, "120000"); + + writeToHostsFile(""); + rm = new MockRM(conf); + rm.start(); + + MockNM nm1 = rm.registerNode("host1:1234", 10240); + MockNM nm2 = rm.registerNode("host2:5678", 20480); + MockNM nm3 = rm.registerNode("host3:4433", 10240); + + NodeId id1 = nm1.getNodeId(); + NodeId id2 = nm2.getNodeId(); + NodeId id3 = nm3.getNodeId(); + + rm.waitForState(id1, NodeState.RUNNING); + rm.waitForState(id2, NodeState.RUNNING); + rm.waitForState(id3, NodeState.RUNNING); + + // Create an app and schedule AM on host1. + RMApp app = MockRMAppSubmitter.submitWithMemory(2000, rm); + MockAM am = MockRM.launchAM(app, rm, nm1); + + // Before sending heartbeat we gracefully decommission the node on which AM + // is scheduled to simulate race condition. + writeToHostsFile("host1", "host3"); + rm.getNodesListManager().refreshNodesGracefully(conf, 30); + rm.waitForState(id1, NodeState.DECOMMISSIONING); + rm.waitForState(id3, NodeState.DECOMMISSIONING); + + nm1.nodeHeartbeat(true); + nm3.nodeHeartbeat(true); + // host1 should stay in DECOMMISSIONING as we are waiting for am launch timeout + // and the there are scheduled am containers on host1. + // host3 should be decommissioned as there is no container scheduled on it. + rm.waitForState(id1, NodeState.DECOMMISSIONING); + rm.waitForState(id3, NodeState.DECOMMISSIONED); + + ApplicationAttemptId aaid = app.getCurrentAppAttempt().getAppAttemptId(); + nm1.nodeHeartbeat(aaid, 1, ContainerState.RUNNING); + nm3.nodeHeartbeat(true); + + // host1 is in Decommissioning due to containers on the node, host3 should be in DECOMMISSIONED + // because no containers on the node + rm.waitForState(id1, NodeState.DECOMMISSIONING); + rm.waitForState(id3, NodeState.DECOMMISSIONED); + + am.registerAppAttempt(); + rm.waitForState(app.getApplicationId(), RMAppState.RUNNING); + MockRM.finishAMAndVerifyAppState(app, rm, nm1, am); + nm1.nodeHeartbeat(aaid, 1, ContainerState.COMPLETE); + rm.waitForState(app.getApplicationId(), RMAppState.FINISHED); + rm.waitForState(id1, NodeState.DECOMMISSIONED); + } + @Test public void testGracefulDecommissionDefaultTimeoutResolution() throws Exception { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/GracefulDecommission.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/GracefulDecommission.md index e7ce657d41b2d..21b45b02ee5c4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/GracefulDecommission.md +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/GracefulDecommission.md @@ -160,9 +160,10 @@ Following are the sub-status of a decommissioning node: Configuration -------- -| Property | Value | -| ---------------------------------------- | ---------------------------------------- | -| yarn.resourcemanager.nodemanager-graceful-decommission-timeout-secs | Timeout in seconds for YARN node graceful decommission. This is the maximal time to wait for running containers and applications to complete before transition a DECOMMISSIONING node into DECOMMISSIONED. The default value is 3600 seconds. Negative value (like -1) is handled as infinite timeout. | -| yarn.resourcemanager.decommissioning-nodes-watcher.poll-interval-secs | Period in seconds of the poll timer task inside DecommissioningNodesWatcher to identify and take care of DECOMMISSIONING nodes missing regular heart beat. The default value is 20 seconds. | -| yarn.resourcemanager.nodes.exclude-path | Path to file with nodes to exclude. | -| yarn.resourcemanager.nodes.include-path | Path to file with nodes to include. | +| Property | Value | +|-----------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| yarn.resourcemanager.nodemanager-graceful-decommission-timeout-secs | Timeout in seconds for YARN node graceful decommission. This is the maximal time to wait for running containers and applications to complete before transition a DECOMMISSIONING node into DECOMMISSIONED. The default value is 3600 seconds. Negative value (like -1) is handled as infinite timeout. | +| yarn.resourcemanager.decommissioning-nodes-watcher.poll-interval-secs | Period in seconds of the poll timer task inside DecommissioningNodesWatcher to identify and take care of DECOMMISSIONING nodes missing regular heart beat. The default value is 20 seconds. | +| yarn.resourcemanager.decommissioning-nodes-watcher.delay-ms | Allow Delayed Removal of Decommissioning Nodes to account for Scheduled AM containers on that node. Default value is 0 which means no delay in removing node. Expire interval should not be configured more than RM_AM_EXPIRY_INTERVAL_MS. | +| yarn.resourcemanager.nodes.exclude-path | Path to file with nodes to exclude. | +| yarn.resourcemanager.nodes.include-path | Path to file with nodes to include. | From 6312eb5db56410526d58944e5b2f9d1115bafed9 Mon Sep 17 00:00:00 2001 From: Abhishek Dixit Date: Mon, 31 Jul 2023 12:46:44 +0530 Subject: [PATCH 2/6] pr comments --- .../java/org/apache/hadoop/yarn/conf/YarnConfiguration.java | 4 ++-- .../server/resourcemanager/DecommissioningNodesWatcher.java | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index fdeaf4ced1bc9..4e3d3811c47bc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -1285,10 +1285,10 @@ public static boolean isAclEnabled(Configuration conf) { * on that node. No delay if set to 0. */ public static final String RM_DECOMMISSIONING_NODES_WATCHER_DELAY_MS = - RM_PREFIX + "decommissioning-nodes-watcher.delay-ms"; + RM_PREFIX + "decommissioning-nodes-watcher.delay-ms"; public static final int - DEFAULT_RM_DECOMMISSIONING_NODES_WATCHER_DELAY_MS = 0; + DEFAULT_RM_DECOMMISSIONING_NODES_WATCHER_DELAY_MS = 0; //////////////////////////////// // Node Manager Configs diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DecommissioningNodesWatcher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DecommissioningNodesWatcher.java index 51cd06e480005..1b7c50055a6ac 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DecommissioningNodesWatcher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DecommissioningNodesWatcher.java @@ -114,7 +114,7 @@ void updateTimeout(int timeoutSec) { private Timer pollTimer; private MonotonicClock mclock; - private int expireIntvl; + private long expireIntvl; public DecommissioningNodesWatcher(RMContext rmContext) { this.rmContext = rmContext; @@ -128,7 +128,7 @@ public void init(Configuration conf) { YarnConfiguration .DEFAULT_RM_DECOMMISSIONING_NODES_WATCHER_POLL_INTERVAL); // expire interval should not be configured more than RM_AM_EXPIRY_INTERVAL_MS - this.expireIntvl = Math.min(conf.getInt(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, + this.expireIntvl = Math.min(conf.getLong(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, YarnConfiguration.DEFAULT_RM_AM_EXPIRY_INTERVAL_MS), conf.getInt(YarnConfiguration.RM_DECOMMISSIONING_NODES_WATCHER_DELAY_MS, YarnConfiguration.DEFAULT_RM_DECOMMISSIONING_NODES_WATCHER_DELAY_MS)); @@ -234,7 +234,7 @@ public enum DecommissioningNodeStatus { public boolean checkReadyToBeDecommissioned(NodeId nodeId) { DecommissioningNodeStatus s = checkDecommissioningStatus(nodeId); - LOG.debug("checkReadyToBeDecommissioned " + nodeId + " status " + s); + LOG.debug("checkReadyToBeDecommissioned {} status {}.", nodeId, s); return (s == DecommissioningNodeStatus.READY || s == DecommissioningNodeStatus.TIMEOUT); } From 4adedca490012ff2c763646391343ce3d84cbd10 Mon Sep 17 00:00:00 2001 From: Abhishek Dixit Date: Mon, 14 Aug 2023 22:53:42 +0530 Subject: [PATCH 3/6] pr comments --- .../DecommissioningNodesWatcher.java | 8 +++++--- .../TestDecommissioningNodesWatcher.java | 16 ++++++++-------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DecommissioningNodesWatcher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DecommissioningNodesWatcher.java index 1b7c50055a6ac..03e112e360fe7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DecommissioningNodesWatcher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DecommissioningNodesWatcher.java @@ -259,9 +259,11 @@ public DecommissioningNodeStatus checkDecommissioningStatus(NodeId nodeId) { if (context.appIds.size() == 0) { // wait for am expire interval or decommission timeout whichever is smaller // if decommission timeout is negative, use am expire interval - long effectiveTimeout = context.timeoutMs > 0 ? Math.min(context.timeoutMs, expireIntvl) : expireIntvl; - LOG.debug("checkReadyToBeDecommissioned " + nodeId + ", context.timeoutMs=" + context.timeoutMs + - ", expireIntvl=" + expireIntvl + ", waitTime=" + waitTime + ", effectiveTimeout=" + effectiveTimeout); + long effectiveTimeout = context.timeoutMs > 0 ? Math.min(context.timeoutMs, expireIntvl) : + expireIntvl; + LOG.debug("checkReadyToBeDecommissioned " + nodeId + ", context.timeoutMs=" + + context.timeoutMs + ", expireIntvl=" + expireIntvl + ", waitTime=" + waitTime + + ", effectiveTimeout=" + effectiveTimeout); return waitTime >= effectiveTimeout? DecommissioningNodeStatus.READY : DecommissioningNodeStatus.WAIT_SCHEDULED_APPS; } else { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestDecommissioningNodesWatcher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestDecommissioningNodesWatcher.java index a6c07aff21cf0..64044a6c131ce 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestDecommissioningNodesWatcher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestDecommissioningNodesWatcher.java @@ -210,14 +210,14 @@ public void testDecommissioningNodesWatcherWithScheduledAMContainers() Assert.assertEquals(DecommissioningNodeStatus.WAIT_SCHEDULED_APPS, watcher.checkDecommissioningStatus(id1)); - // update node with 3 running containers - nodeStatus = createNodeStatus(id1, app, 3); - node1.handle(new RMNodeStatusEvent(nm1.getNodeId(), nodeStatus)); - watcher.update(node1, nodeStatus); - Assert.assertEquals(1, node1.getRunningApps().size()); - Assert.assertFalse(watcher.checkReadyToBeDecommissioned(id1)); - Assert.assertEquals(DecommissioningNodeStatus.WAIT_CONTAINER, - watcher.checkDecommissioningStatus(id1)); + // update node with 3 running containers + nodeStatus = createNodeStatus(id1, app, 3); + node1.handle(new RMNodeStatusEvent(nm1.getNodeId(), nodeStatus)); + watcher.update(node1, nodeStatus); + Assert.assertEquals(1, node1.getRunningApps().size()); + Assert.assertFalse(watcher.checkReadyToBeDecommissioned(id1)); + Assert.assertEquals(DecommissioningNodeStatus.WAIT_CONTAINER, + watcher.checkDecommissioningStatus(id1)); // update node with 0 running containers nodeStatus = createNodeStatus(id1, app, 0); From 5db1905199ad2573d62630b94bf4d3180a4bbde3 Mon Sep 17 00:00:00 2001 From: Abhishek Dixit Date: Tue, 15 Aug 2023 08:56:42 +0530 Subject: [PATCH 4/6] pr comments --- .../server/resourcemanager/DecommissioningNodesWatcher.java | 6 +++--- .../resourcemanager/TestDecommissioningNodesWatcher.java | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DecommissioningNodesWatcher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DecommissioningNodesWatcher.java index 03e112e360fe7..ff6a414777c06 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DecommissioningNodesWatcher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DecommissioningNodesWatcher.java @@ -260,12 +260,12 @@ public DecommissioningNodeStatus checkDecommissioningStatus(NodeId nodeId) { // wait for am expire interval or decommission timeout whichever is smaller // if decommission timeout is negative, use am expire interval long effectiveTimeout = context.timeoutMs > 0 ? Math.min(context.timeoutMs, expireIntvl) : - expireIntvl; + expireIntvl; LOG.debug("checkReadyToBeDecommissioned " + nodeId + ", context.timeoutMs=" + - context.timeoutMs + ", expireIntvl=" + expireIntvl + ", waitTime=" + waitTime + + context.timeoutMs + ", expireIntvl=" + expireIntvl + ", waitTime=" + waitTime + ", effectiveTimeout=" + effectiveTimeout); return waitTime >= effectiveTimeout? - DecommissioningNodeStatus.READY : DecommissioningNodeStatus.WAIT_SCHEDULED_APPS; + DecommissioningNodeStatus.READY : DecommissioningNodeStatus.WAIT_SCHEDULED_APPS; } else { return (context.timeoutMs < 0 || waitTime < context.timeoutMs)? DecommissioningNodeStatus.WAIT_APP : diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestDecommissioningNodesWatcher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestDecommissioningNodesWatcher.java index 64044a6c131ce..667ac42f351d5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestDecommissioningNodesWatcher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestDecommissioningNodesWatcher.java @@ -216,7 +216,7 @@ public void testDecommissioningNodesWatcherWithScheduledAMContainers() watcher.update(node1, nodeStatus); Assert.assertEquals(1, node1.getRunningApps().size()); Assert.assertFalse(watcher.checkReadyToBeDecommissioned(id1)); - Assert.assertEquals(DecommissioningNodeStatus.WAIT_CONTAINER, + Assert.assertEquals(DecommissioningNodeStatus.WAIT_CONTAINER, watcher.checkDecommissioningStatus(id1)); // update node with 0 running containers From 1d9c17c05353f07aa3c72ef598fe702a8b3276e9 Mon Sep 17 00:00:00 2001 From: Abhishek Dixit Date: Sun, 20 Aug 2023 17:05:47 +0530 Subject: [PATCH 5/6] pr comments --- .../DecommissioningNodesWatcher.java | 23 ++++++++++++------- .../TestDecommissioningNodesWatcher.java | 2 +- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DecommissioningNodesWatcher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DecommissioningNodesWatcher.java index ff6a414777c06..f9541f1a92ff5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DecommissioningNodesWatcher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DecommissioningNodesWatcher.java @@ -127,14 +127,21 @@ public void init(Configuration conf) { YarnConfiguration.RM_DECOMMISSIONING_NODES_WATCHER_POLL_INTERVAL, YarnConfiguration .DEFAULT_RM_DECOMMISSIONING_NODES_WATCHER_POLL_INTERVAL); - // expire interval should not be configured more than RM_AM_EXPIRY_INTERVAL_MS - this.expireIntvl = Math.min(conf.getLong(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, - YarnConfiguration.DEFAULT_RM_AM_EXPIRY_INTERVAL_MS), - conf.getInt(YarnConfiguration.RM_DECOMMISSIONING_NODES_WATCHER_DELAY_MS, - YarnConfiguration.DEFAULT_RM_DECOMMISSIONING_NODES_WATCHER_DELAY_MS)); + this.expireIntvl = setExpireInterval(conf); pollTimer.schedule(new PollTimerTask(rmContext), 0, (1000L * v)); } + // set value of decommissioned nodes watcher delay to allow delayed removal of + // decommissioning nodes, but delay should not be more than RM_AM_EXPIRY_INTERVAL_MS + private long setExpireInterval(Configuration conf) { + return Math.min( + conf.getInt(YarnConfiguration.RM_DECOMMISSIONING_NODES_WATCHER_DELAY_MS, + YarnConfiguration.DEFAULT_RM_DECOMMISSIONING_NODES_WATCHER_DELAY_MS), + conf.getLong(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, + YarnConfiguration.DEFAULT_RM_AM_EXPIRY_INTERVAL_MS) + ); + } + /** * Update rmNode decommissioning status based on NodeStatus. * @param rmNode The node @@ -261,9 +268,9 @@ public DecommissioningNodeStatus checkDecommissioningStatus(NodeId nodeId) { // if decommission timeout is negative, use am expire interval long effectiveTimeout = context.timeoutMs > 0 ? Math.min(context.timeoutMs, expireIntvl) : expireIntvl; - LOG.debug("checkReadyToBeDecommissioned " + nodeId + ", context.timeoutMs=" + - context.timeoutMs + ", expireIntvl=" + expireIntvl + ", waitTime=" + waitTime + - ", effectiveTimeout=" + effectiveTimeout); + LOG.debug("checkReadyToBeDecommissioned {}, context.timeoutMs={}, expireIntvl={}, " + + "waitTime={}, effectiveTimeout={}", nodeId, context.timeoutMs, expireIntvl, waitTime, + effectiveTimeout); return waitTime >= effectiveTimeout? DecommissioningNodeStatus.READY : DecommissioningNodeStatus.WAIT_SCHEDULED_APPS; } else { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestDecommissioningNodesWatcher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestDecommissioningNodesWatcher.java index 667ac42f351d5..65905a6e2af13 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestDecommissioningNodesWatcher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestDecommissioningNodesWatcher.java @@ -227,7 +227,7 @@ public void testDecommissioningNodesWatcherWithScheduledAMContainers() // we still get status as WAIT_APP since app is still running even if // containers are 0 Assert.assertEquals(DecommissioningNodeStatus.WAIT_APP, - watcher.checkDecommissioningStatus(id1)); + watcher.checkDecommissioningStatus(id1)); // Set app to be FINISHED and verified DecommissioningNodeStatus is READY. MockRM.finishAMAndVerifyAppState(app, rm, nm1, am); From e933d600a29b9955b14e57b208708aef29e7dd72 Mon Sep 17 00:00:00 2001 From: Abhishek Dixit Date: Sun, 20 Aug 2023 22:55:44 +0530 Subject: [PATCH 6/6] pr comments --- .../server/resourcemanager/DecommissioningNodesWatcher.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DecommissioningNodesWatcher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DecommissioningNodesWatcher.java index f9541f1a92ff5..e4f83110571b6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DecommissioningNodesWatcher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DecommissioningNodesWatcher.java @@ -268,7 +268,7 @@ public DecommissioningNodeStatus checkDecommissioningStatus(NodeId nodeId) { // if decommission timeout is negative, use am expire interval long effectiveTimeout = context.timeoutMs > 0 ? Math.min(context.timeoutMs, expireIntvl) : expireIntvl; - LOG.debug("checkReadyToBeDecommissioned {}, context.timeoutMs={}, expireIntvl={}, " + + LOG.debug("checkReadyToBeDecommissioned {}, context.timeoutMs={}, expireIntvl={}, " + "waitTime={}, effectiveTimeout={}", nodeId, context.timeoutMs, expireIntvl, waitTime, effectiveTimeout); return waitTime >= effectiveTimeout?