From 57c35ad340e376c75b17b4ca7ffbd91376f10963 Mon Sep 17 00:00:00 2001 From: Xianming Lei <31424839+leixm@users.noreply.github.com> Date: Fri, 28 Jul 2023 14:05:04 +0800 Subject: [PATCH] [#1048] fix(coord): ExcludeNodes does not take effect when the coordinator restarts. (#1049) ### What changes were proposed in this pull request? Make updateExcludeNodes no delay first time. ### Why are the changes needed? Fix: #1048. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? UT. Co-authored-by: leixianming --- .../coordinator/SimpleClusterManager.java | 2 +- .../coordinator/SimpleClusterManagerTest.java | 27 +++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/coordinator/src/main/java/org/apache/uniffle/coordinator/SimpleClusterManager.java b/coordinator/src/main/java/org/apache/uniffle/coordinator/SimpleClusterManager.java index 8c5929170c..4ea6ee0d09 100644 --- a/coordinator/src/main/java/org/apache/uniffle/coordinator/SimpleClusterManager.java +++ b/coordinator/src/main/java/org/apache/uniffle/coordinator/SimpleClusterManager.java @@ -114,7 +114,7 @@ public SimpleClusterManager(CoordinatorConf conf, Configuration hadoopConf) thro ThreadUtils.getDaemonSingleThreadScheduledExecutor("UpdateExcludeNodes"); checkNodesExecutorService.scheduleAtFixedRate( () -> updateExcludeNodes(excludeNodesPath), - updateNodesInterval, + 0, updateNodesInterval, TimeUnit.MILLISECONDS); } diff --git a/coordinator/src/test/java/org/apache/uniffle/coordinator/SimpleClusterManagerTest.java b/coordinator/src/test/java/org/apache/uniffle/coordinator/SimpleClusterManagerTest.java index b3502dcf6f..1391308f8b 100644 --- a/coordinator/src/test/java/org/apache/uniffle/coordinator/SimpleClusterManagerTest.java +++ b/coordinator/src/test/java/org/apache/uniffle/coordinator/SimpleClusterManagerTest.java @@ -28,6 +28,7 @@ import java.util.stream.Collectors; import com.google.common.collect.Sets; +import com.google.common.util.concurrent.Uninterruptibles; import org.apache.commons.collections.CollectionUtils; import org.apache.hadoop.conf.Configuration; import org.junit.jupiter.api.AfterEach; @@ -487,6 +488,32 @@ public void updateExcludeNodesTest() throws Exception { } } + @Test + public void excludeNodesNoDelayTest() throws Exception { + String excludeNodesFolder = + (new File(ClassLoader.getSystemResource("empty").getFile())).getParent(); + String excludeNodesPath = excludeNodesFolder + "/excludeNodes"; + CoordinatorConf ssc = new CoordinatorConf(); + ssc.setString( + CoordinatorConf.COORDINATOR_EXCLUDE_NODES_FILE_PATH, + URI.create(excludeNodesPath).toString()); + ssc.setLong(CoordinatorConf.COORDINATOR_EXCLUDE_NODES_CHECK_INTERVAL, 5000); + + final Set nodes = Sets.newHashSet("node1-1999", "node2-1999"); + writeExcludeHosts(excludeNodesPath, nodes); + + try (SimpleClusterManager scm = new SimpleClusterManager(ssc, new Configuration())) { + // waiting for excludeNode file parse. + Uninterruptibles.sleepUninterruptibly(20, TimeUnit.MILLISECONDS); + scm.add(new ServerNode("node1-1999", "ip", 0, 100L, 50L, 20, 10, testTags)); + scm.add(new ServerNode("node2-1999", "ip", 0, 100L, 50L, 20, 10, testTags)); + scm.add(new ServerNode("node3-1999", "ip", 0, 100L, 50L, 20, 10, testTags)); + scm.add(new ServerNode("node4-1999", "ip", 0, 100L, 50L, 20, 10, testTags)); + assertEquals(4, scm.getNodesNum()); + assertEquals(2, scm.getExcludeNodes().size()); + } + } + private void writeExcludeHosts(String path, Set values) throws Exception { try (PrintWriter pw = new PrintWriter(new FileWriter(path))) { // have empty line as value