From 706e1f058d1988ae4d39da12ee4d57e4360e771b Mon Sep 17 00:00:00 2001 From: Igor Skokov Date: Thu, 21 Mar 2019 09:51:46 +0300 Subject: [PATCH 01/18] ZOOKEEPER-3320: configurable retry count for election port bind in QuorumCnxManager.Listener --- .../apache/zookeeper/server/quorum/QuorumCnxManager.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java index 7870bb37ab0..1f69c5da8e8 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java @@ -853,12 +853,17 @@ private void resetConnectionThreadCount() { */ public class Listener extends ZooKeeperThread { + private final int portBindMaxRetry; volatile ServerSocket ss = null; public Listener() { // During startup of thread, thread name will be overridden to // specific election address super("ListenerThread"); + + // maximum retry count while trying to bind to election port + // see ZOOKEEPER-3320 for more details + portBindMaxRetry = Integer.getInteger("zookeeper.electionPortBindRetry", 3); } /** @@ -870,7 +875,7 @@ public void run() { InetSocketAddress addr; Socket client = null; Exception exitException = null; - while((!shutdown) && (numRetries < 3)){ + while((!shutdown) && (numRetries < portBindMaxRetry)){ try { if (self.shouldUsePortUnification()) { LOG.info("Creating TLS-enabled quorum server socket"); From b448f36034f352eeaf3cfa29a471bfc40a40e3c7 Mon Sep 17 00:00:00 2001 From: Igor Skokov Date: Fri, 22 Mar 2019 09:55:03 +0300 Subject: [PATCH 02/18] ZOOKEEPER-3320: add validation and logging of zookeeper.electionPortBindRetry value --- .../zookeeper/server/quorum/QuorumCnxManager.java | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java index 1f69c5da8e8..63d4c76cb5b 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java @@ -853,6 +853,9 @@ private void resetConnectionThreadCount() { */ public class Listener extends ZooKeeperThread { + private static final String ELECTION_PORT_BIND_RETRY = "zookeeper.electionPortBindRetry"; + private static final int DEFAULT_PORT_BIND_MAX_RETRY = 3; + private final int portBindMaxRetry; volatile ServerSocket ss = null; @@ -863,7 +866,17 @@ public Listener() { // maximum retry count while trying to bind to election port // see ZOOKEEPER-3320 for more details - portBindMaxRetry = Integer.getInteger("zookeeper.electionPortBindRetry", 3); + final Integer maxRetry = Integer.getInteger(ELECTION_PORT_BIND_RETRY, + DEFAULT_PORT_BIND_MAX_RETRY); + if (maxRetry >= 0) { + LOG.info("Election port bind maximum retries is {}", maxRetry); + portBindMaxRetry = maxRetry; + } else { + LOG.info("'{}' contains invalid value: {}(must be >= 0). " + + "Use default value of {} instead.", + ELECTION_PORT_BIND_RETRY, maxRetry, DEFAULT_PORT_BIND_MAX_RETRY); + portBindMaxRetry = DEFAULT_PORT_BIND_MAX_RETRY; + } } /** From 883d35eb0bf28efa29d866319aa4ef91996cd88d Mon Sep 17 00:00:00 2001 From: Igor Skokov Date: Mon, 25 Mar 2019 19:45:57 +0300 Subject: [PATCH 03/18] ZOOKEEPER-3320: add documentation for zookeeper.electionPortBindRetry property --- .../src/main/resources/markdown/zookeeperAdmin.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md b/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md index b0b07dc0588..e9a51623651 100644 --- a/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md +++ b/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md @@ -1054,6 +1054,18 @@ As an example, this will enable all four letter word commands: properly, check your operating system's options regarding TCP keepalive for more information. Defaults to **false**. + +* *zookeeper.electionPortBindRetry* : + (Java system property only: **zookeeper.electionPortBindRetry**) + Property set max retry count when Zookeeper server fails to bind + leader election port. Such errors can be temporary and recoverable, + such as DNS issue described in [ZOOKEEPER-3320](https://issues.apache.org/jira/projects/ZOOKEEPER/issues/ZOOKEEPER-3320), + or non-retryable, such as port already in use. + In case of transient errors, this property can improve availability + of Zookeeper server and help it to self recover. + Default value 3. In container environment, especially in Kubernetes, + this value should be increased to overcome issues related to DNS name resolving. + * *observer.reconnectDelayMs* : (Java system property: **zookeeper.observer.reconnectDelayMs**) From 914295895b7c1159edbf962023c5316b83b421f5 Mon Sep 17 00:00:00 2001 From: Igor Skokov Date: Wed, 27 Mar 2019 16:17:18 +0300 Subject: [PATCH 04/18] ZOOKEEPER-3320: QuorumCnxManager.Listener extends ZookeeperCriticalThread, add test to CnxManagerTest to check configurable retries of leader election port bind --- .../server/quorum/QuorumCnxManager.java | 24 ++++++++------- .../zookeeper/server/quorum/QuorumPeer.java | 10 +++++++ .../server/quorum/CnxManagerTest.java | 29 +++++++++++++++++++ 3 files changed, 52 insertions(+), 11 deletions(-) diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java index 63d4c76cb5b..c06246f5cf5 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java @@ -18,6 +18,8 @@ package org.apache.zookeeper.server.quorum; +import static org.apache.zookeeper.common.NetUtils.formatInetAddr; + import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.DataInputStream; @@ -36,6 +38,7 @@ import java.util.Enumeration; import java.util.HashSet; import java.util.Map; +import java.util.NoSuchElementException; import java.util.Set; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.ConcurrentHashMap; @@ -43,24 +46,20 @@ import java.util.concurrent.ThreadFactory; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; -import java.util.NoSuchElementException; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; - +import javax.net.ssl.SSLSocket; import org.apache.zookeeper.common.X509Exception; import org.apache.zookeeper.server.ExitCode; -import org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException; -import org.apache.zookeeper.server.util.ConfigUtils; import org.apache.zookeeper.server.ZooKeeperThread; +import org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException; import org.apache.zookeeper.server.quorum.auth.QuorumAuthLearner; import org.apache.zookeeper.server.quorum.auth.QuorumAuthServer; import org.apache.zookeeper.server.quorum.flexible.QuorumVerifier; +import org.apache.zookeeper.server.util.ConfigUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import javax.net.ssl.SSLSocket; -import static org.apache.zookeeper.common.NetUtils.formatInetAddr; - /** * This class implements a connection manager for leader election using TCP. It * maintains one connection for every pair of servers. The tricky part is to @@ -958,10 +957,13 @@ public void run() { } LOG.info("Leaving listener"); if (!shutdown) { - LOG.error("As I'm leaving the listener thread, " - + "I won't be able to participate in leader " - + "election any longer: " - + formatInetAddr(self.getElectionAddress())); + LOG.error("As I'm leaving the listener thread after " + + numRetries + " errors. " + + "I won't be able to participate in leader " + + "election any longer: " + + formatInetAddr(self.getElectionAddress()) + + ". Use " + ELECTION_PORT_BIND_RETRY + " property to " + + "increase retry count."); if (exitException instanceof BindException) { // After leaving listener thread, the host cannot join the // quorum anymore, this is a severe error that we cannot diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java index f3217af362d..d5e3cdcece3 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java @@ -1344,6 +1344,7 @@ public void run() { start_fle = Time.currentElapsedTime(); } } finally { + shutdown(); LOG.warn("QuorumPeer main thread exited"); MBeanRegistry instance = MBeanRegistry.getInstance(); instance.unregister(jmxQuorumBean); @@ -1930,6 +1931,15 @@ public boolean isRunning() { return running; } + /** + * Request graceful shutdown of quorum peer. + * Method don't wait when quorum peer will be stopped, it's only send a 'signal' + * which will be handled in QuorumPeer.run(). + */ + public void requestStop() { + running = false; + } + /** * get reference to QuorumCnxManager */ diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CnxManagerTest.java b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CnxManagerTest.java index d3a631b7f5d..7259eb756fe 100644 --- a/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CnxManagerTest.java +++ b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CnxManagerTest.java @@ -290,6 +290,35 @@ public void testCnxManagerSpinLock() throws Exception { Assert.assertFalse(cnxManager.listener.isAlive()); } + /** + * Test for bug described in {@link https://issues.apache.org/jira/browse/ZOOKEEPER-3320}. + * Test create peer with address which contains unresolvable DNS name, + * leader election listener thread should stop after N errors. + * + * @throws Exception + */ + @Test + public void testCnxManagerListenerThreadConfigurableRetry() throws Exception { + final Map unresolvablePeers = new HashMap<>(); + final long myid = 1L; + unresolvablePeers.put(myid, new QuorumServer(myid, "unresolvable-domain.org:2182:2183;2181")); + final QuorumPeer peer = new QuorumPeer(unresolvablePeers, + ClientBase.createTmpDir(), + ClientBase.createTmpDir(), + 2181, 3, myid, 1000, 2, 2); + final QuorumCnxManager cnxManager = peer.createCnxnManager(); + QuorumCnxManager.Listener listener = cnxManager.listener; + listener.start(); + // listener thread should stop and throws error which notify QuorumPeer about error. + // QuorumPeer should start shutdown process + listener.join(15000); // set wait time, if listener contains bug and thread not stops. + Assert.assertFalse(listener.isAlive()); + Assert.assertFalse(peer.isRunning()); + peer.join(15000); + Assert.assertFalse(QuorumPeer.class.getSimpleName() + " not stopped after " + + "listener thread death", listener.isAlive()); + } + /** * Tests a bug in QuorumCnxManager that causes a NPE when a 3.4.6 * observer connects to a 3.5.0 server. From bb0c77f7a78b3f0122f40447759c1de275cae306 Mon Sep 17 00:00:00 2001 From: Igor Skokov Date: Thu, 11 Jul 2019 11:27:45 +0300 Subject: [PATCH 05/18] ZOOKEEPER-3320: use existing scheme to stop server when QuorumCnxManager.Listener fails to bind to election port --- .../zookeeper/server/quorum/QuorumCnxManager.java | 2 +- .../org/apache/zookeeper/server/quorum/QuorumPeer.java | 10 ---------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java index c06246f5cf5..517906464bb 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java @@ -964,7 +964,7 @@ public void run() { + formatInetAddr(self.getElectionAddress()) + ". Use " + ELECTION_PORT_BIND_RETRY + " property to " + "increase retry count."); - if (exitException instanceof BindException) { + if (exitException instanceof SocketException) { // After leaving listener thread, the host cannot join the // quorum anymore, this is a severe error that we cannot // recover from, so we need to exit diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java index d5e3cdcece3..f3217af362d 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java @@ -1344,7 +1344,6 @@ public void run() { start_fle = Time.currentElapsedTime(); } } finally { - shutdown(); LOG.warn("QuorumPeer main thread exited"); MBeanRegistry instance = MBeanRegistry.getInstance(); instance.unregister(jmxQuorumBean); @@ -1931,15 +1930,6 @@ public boolean isRunning() { return running; } - /** - * Request graceful shutdown of quorum peer. - * Method don't wait when quorum peer will be stopped, it's only send a 'signal' - * which will be handled in QuorumPeer.run(). - */ - public void requestStop() { - running = false; - } - /** * get reference to QuorumCnxManager */ From e9db1e445dbe6bfca74b57387a1acc5e527d4fa5 Mon Sep 17 00:00:00 2001 From: Igor Skokov Date: Mon, 29 Jul 2019 11:25:17 +0300 Subject: [PATCH 06/18] ZOOKEEPER-3320: fix of test compilation --- .../java/org/apache/zookeeper/server/quorum/CnxManagerTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CnxManagerTest.java b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CnxManagerTest.java index b5d6b14a5a6..200ed99cbb6 100644 --- a/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CnxManagerTest.java +++ b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CnxManagerTest.java @@ -305,7 +305,7 @@ public void testCnxManagerListenerThreadConfigurableRetry() throws Exception { final QuorumPeer peer = new QuorumPeer(unresolvablePeers, ClientBase.createTmpDir(), ClientBase.createTmpDir(), - 2181, 3, myid, 1000, 2, 2); + 2181, 3, myid, 1000, 2, 2, 2); final QuorumCnxManager cnxManager = peer.createCnxnManager(); QuorumCnxManager.Listener listener = cnxManager.listener; listener.start(); From b4abdc7f2c01c7df0f7c0b342149a255e43ea16b Mon Sep 17 00:00:00 2001 From: Igor Skokov Date: Thu, 1 Aug 2019 11:54:52 +0300 Subject: [PATCH 07/18] ZOOKEEPER-3320: handle 0 value for zookeeper.electionPortBindRetry as infinite, fix CnxManagerTest. testCnxManagerListenerThreadConfigurableRetry to prevent JVM exit during testing --- .../src/main/resources/markdown/zookeeperAdmin.md | 3 ++- .../zookeeper/server/quorum/QuorumCnxManager.java | 11 +++++++++-- .../zookeeper/server/quorum/CnxManagerTest.java | 6 +++++- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md b/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md index 835e3579ef3..4299ca55872 100644 --- a/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md +++ b/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md @@ -1086,7 +1086,8 @@ As an example, this will enable all four letter word commands: In case of transient errors, this property can improve availability of Zookeeper server and help it to self recover. Default value 3. In container environment, especially in Kubernetes, - this value should be increased to overcome issues related to DNS name resolving. + this value should be increased or set to 0(infinite retry) to overcome issues + related to DNS name resolving. * *observer.reconnectDelayMs* : diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java index 4be8fa68102..829577f6dd4 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java @@ -851,6 +851,7 @@ public class Listener extends ZooKeeperThread { private static final int DEFAULT_PORT_BIND_MAX_RETRY = 3; private final int portBindMaxRetry; + private Runnable socketBindErrorHandler = () -> System.exit(ExitCode.UNABLE_TO_BIND_QUORUM_PORT.getValue()); volatile ServerSocket ss = null; public Listener() { @@ -863,7 +864,8 @@ public Listener() { final Integer maxRetry = Integer.getInteger(ELECTION_PORT_BIND_RETRY, DEFAULT_PORT_BIND_MAX_RETRY); if (maxRetry >= 0) { - LOG.info("Election port bind maximum retries is {}", maxRetry); + LOG.info("Election port bind maximum retries is {}", + maxRetry == 0 ? "infinite" : maxRetry); portBindMaxRetry = maxRetry; } else { LOG.info("'{}' contains invalid value: {}(must be >= 0). " @@ -873,6 +875,11 @@ public Listener() { } } + // VisibleForTesting + void setSocketBindErrorHandler(Runnable errorHandler) { + this.socketBindErrorHandler = errorHandler; + } + /** * Sleeps on accept(). */ @@ -882,7 +889,7 @@ public void run() { InetSocketAddress addr; Socket client = null; Exception exitException = null; - while((!shutdown) && (numRetries < portBindMaxRetry)){ + while ((!shutdown) && (portBindMaxRetry == 0 || numRetries < portBindMaxRetry)) { try { if (self.shouldUsePortUnification()) { LOG.info("Creating TLS-enabled quorum server socket"); diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CnxManagerTest.java b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CnxManagerTest.java index 200ed99cbb6..d657de08407 100644 --- a/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CnxManagerTest.java +++ b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CnxManagerTest.java @@ -36,6 +36,7 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.net.Socket; +import java.util.concurrent.atomic.AtomicBoolean; import javax.net.ssl.SSLSession; import javax.net.ssl.SSLSocket; import javax.net.ssl.HandshakeCompletedListener; @@ -307,13 +308,16 @@ public void testCnxManagerListenerThreadConfigurableRetry() throws Exception { ClientBase.createTmpDir(), 2181, 3, myid, 1000, 2, 2, 2); final QuorumCnxManager cnxManager = peer.createCnxnManager(); - QuorumCnxManager.Listener listener = cnxManager.listener; + final QuorumCnxManager.Listener listener = cnxManager.listener; + final AtomicBoolean errorHappend = new AtomicBoolean(); + listener.setSocketBindErrorHandler(() -> errorHappend.set(true)); listener.start(); // listener thread should stop and throws error which notify QuorumPeer about error. // QuorumPeer should start shutdown process listener.join(15000); // set wait time, if listener contains bug and thread not stops. Assert.assertFalse(listener.isAlive()); Assert.assertFalse(peer.isRunning()); + Assert.assertTrue(errorHappend.get()); peer.join(15000); Assert.assertFalse(QuorumPeer.class.getSimpleName() + " not stopped after " + "listener thread death", listener.isAlive()); From e25b445510f223c334933d3d1422732f5e0df8af Mon Sep 17 00:00:00 2001 From: Igor Skokov Date: Fri, 2 Aug 2019 08:38:20 +0300 Subject: [PATCH 08/18] ZOOKEEPER-3320: support custom socket bind error handler in QuorumCnxManager.Listener --- .../apache/zookeeper/server/quorum/QuorumCnxManager.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java index 829577f6dd4..6ac430ead1b 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java @@ -875,7 +875,9 @@ public Listener() { } } - // VisibleForTesting + /** + * Change socket bind error handler. Used for testing. + */ void setSocketBindErrorHandler(Runnable errorHandler) { this.socketBindErrorHandler = errorHandler; } @@ -970,7 +972,7 @@ public void run() { // After leaving listener thread, the host cannot join the // quorum anymore, this is a severe error that we cannot // recover from, so we need to exit - System.exit(ExitCode.UNABLE_TO_BIND_QUORUM_PORT.getValue()); + socketBindErrorHandler.run(); } } else if (ss != null) { // Clean up for shutdown. From da33c1d3a147102857e76e0f61d1288d0e76d4db Mon Sep 17 00:00:00 2001 From: Igor Skokov Date: Thu, 21 Mar 2019 09:51:46 +0300 Subject: [PATCH 09/18] ZOOKEEPER-3320: configurable retry count for election port bind in QuorumCnxManager.Listener --- .../apache/zookeeper/server/quorum/QuorumCnxManager.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java index d97da2a074a..14556c524a3 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java @@ -848,12 +848,17 @@ private void resetConnectionThreadCount() { */ public class Listener extends ZooKeeperThread { + private final int portBindMaxRetry; volatile ServerSocket ss = null; public Listener() { // During startup of thread, thread name will be overridden to // specific election address super("ListenerThread"); + + // maximum retry count while trying to bind to election port + // see ZOOKEEPER-3320 for more details + portBindMaxRetry = Integer.getInteger("zookeeper.electionPortBindRetry", 3); } /** @@ -865,7 +870,7 @@ public void run() { InetSocketAddress addr; Socket client = null; Exception exitException = null; - while((!shutdown) && (numRetries < 3)){ + while((!shutdown) && (numRetries < portBindMaxRetry)){ try { if (self.shouldUsePortUnification()) { LOG.info("Creating TLS-enabled quorum server socket"); From a9a9342549988ed36724191472a96243f3091e92 Mon Sep 17 00:00:00 2001 From: Igor Skokov Date: Fri, 22 Mar 2019 09:55:03 +0300 Subject: [PATCH 10/18] ZOOKEEPER-3320: add validation and logging of zookeeper.electionPortBindRetry value --- .../zookeeper/server/quorum/QuorumCnxManager.java | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java index 14556c524a3..9f67f8021cb 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java @@ -848,6 +848,9 @@ private void resetConnectionThreadCount() { */ public class Listener extends ZooKeeperThread { + private static final String ELECTION_PORT_BIND_RETRY = "zookeeper.electionPortBindRetry"; + private static final int DEFAULT_PORT_BIND_MAX_RETRY = 3; + private final int portBindMaxRetry; volatile ServerSocket ss = null; @@ -858,7 +861,17 @@ public Listener() { // maximum retry count while trying to bind to election port // see ZOOKEEPER-3320 for more details - portBindMaxRetry = Integer.getInteger("zookeeper.electionPortBindRetry", 3); + final Integer maxRetry = Integer.getInteger(ELECTION_PORT_BIND_RETRY, + DEFAULT_PORT_BIND_MAX_RETRY); + if (maxRetry >= 0) { + LOG.info("Election port bind maximum retries is {}", maxRetry); + portBindMaxRetry = maxRetry; + } else { + LOG.info("'{}' contains invalid value: {}(must be >= 0). " + + "Use default value of {} instead.", + ELECTION_PORT_BIND_RETRY, maxRetry, DEFAULT_PORT_BIND_MAX_RETRY); + portBindMaxRetry = DEFAULT_PORT_BIND_MAX_RETRY; + } } /** From 0888a29531bae15ce5c9044538855161be78f9c8 Mon Sep 17 00:00:00 2001 From: Igor Skokov Date: Mon, 25 Mar 2019 19:45:57 +0300 Subject: [PATCH 11/18] ZOOKEEPER-3320: add documentation for zookeeper.electionPortBindRetry property --- .../src/main/resources/markdown/zookeeperAdmin.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md b/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md index 7a4773fd921..cef443028d6 100644 --- a/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md +++ b/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md @@ -1088,6 +1088,18 @@ As an example, this will enable all four letter word commands: properly, check your operating system's options regarding TCP keepalive for more information. Defaults to **false**. + +* *zookeeper.electionPortBindRetry* : + (Java system property only: **zookeeper.electionPortBindRetry**) + Property set max retry count when Zookeeper server fails to bind + leader election port. Such errors can be temporary and recoverable, + such as DNS issue described in [ZOOKEEPER-3320](https://issues.apache.org/jira/projects/ZOOKEEPER/issues/ZOOKEEPER-3320), + or non-retryable, such as port already in use. + In case of transient errors, this property can improve availability + of Zookeeper server and help it to self recover. + Default value 3. In container environment, especially in Kubernetes, + this value should be increased to overcome issues related to DNS name resolving. + * *observer.reconnectDelayMs* : (Java system property: **zookeeper.observer.reconnectDelayMs**) From 587fd95a03b8e93003a31a626e646449ca304239 Mon Sep 17 00:00:00 2001 From: Igor Skokov Date: Wed, 27 Mar 2019 16:17:18 +0300 Subject: [PATCH 12/18] ZOOKEEPER-3320: QuorumCnxManager.Listener extends ZookeeperCriticalThread, add test to CnxManagerTest to check configurable retries of leader election port bind --- .../server/quorum/QuorumCnxManager.java | 24 ++++++++------- .../zookeeper/server/quorum/QuorumPeer.java | 10 +++++++ .../server/quorum/CnxManagerTest.java | 29 +++++++++++++++++++ 3 files changed, 52 insertions(+), 11 deletions(-) diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java index 9f67f8021cb..81e65172ad3 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java @@ -18,6 +18,8 @@ package org.apache.zookeeper.server.quorum; +import static org.apache.zookeeper.common.NetUtils.formatInetAddr; + import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.DataInputStream; @@ -36,6 +38,7 @@ import java.util.Enumeration; import java.util.HashSet; import java.util.Map; +import java.util.NoSuchElementException; import java.util.Set; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.ConcurrentHashMap; @@ -43,24 +46,20 @@ import java.util.concurrent.ThreadFactory; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; -import java.util.NoSuchElementException; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; - +import javax.net.ssl.SSLSocket; import org.apache.zookeeper.common.X509Exception; import org.apache.zookeeper.server.ExitCode; -import org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException; -import org.apache.zookeeper.server.util.ConfigUtils; import org.apache.zookeeper.server.ZooKeeperThread; +import org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException; import org.apache.zookeeper.server.quorum.auth.QuorumAuthLearner; import org.apache.zookeeper.server.quorum.auth.QuorumAuthServer; import org.apache.zookeeper.server.quorum.flexible.QuorumVerifier; +import org.apache.zookeeper.server.util.ConfigUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import javax.net.ssl.SSLSocket; -import static org.apache.zookeeper.common.NetUtils.formatInetAddr; - /** * This class implements a connection manager for leader election using TCP. It * maintains one connection for every pair of servers. The tricky part is to @@ -953,10 +952,13 @@ public void run() { } LOG.info("Leaving listener"); if (!shutdown) { - LOG.error("As I'm leaving the listener thread, " - + "I won't be able to participate in leader " - + "election any longer: " - + formatInetAddr(self.getElectionAddress())); + LOG.error("As I'm leaving the listener thread after " + + numRetries + " errors. " + + "I won't be able to participate in leader " + + "election any longer: " + + formatInetAddr(self.getElectionAddress()) + + ". Use " + ELECTION_PORT_BIND_RETRY + " property to " + + "increase retry count."); if (exitException instanceof BindException) { // After leaving listener thread, the host cannot join the // quorum anymore, this is a severe error that we cannot diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java index 521f6d42bab..f72421cbe8e 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java @@ -1402,6 +1402,7 @@ public void run() { start_fle = Time.currentElapsedTime(); } } finally { + shutdown(); LOG.warn("QuorumPeer main thread exited"); MBeanRegistry instance = MBeanRegistry.getInstance(); instance.unregister(jmxQuorumBean); @@ -2002,6 +2003,15 @@ public boolean isRunning() { return running; } + /** + * Request graceful shutdown of quorum peer. + * Method don't wait when quorum peer will be stopped, it's only send a 'signal' + * which will be handled in QuorumPeer.run(). + */ + public void requestStop() { + running = false; + } + /** * get reference to QuorumCnxManager */ diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CnxManagerTest.java b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CnxManagerTest.java index 878e41b4c00..b5d6b14a5a6 100644 --- a/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CnxManagerTest.java +++ b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CnxManagerTest.java @@ -290,6 +290,35 @@ public void testCnxManagerSpinLock() throws Exception { Assert.assertFalse(cnxManager.listener.isAlive()); } + /** + * Test for bug described in {@link https://issues.apache.org/jira/browse/ZOOKEEPER-3320}. + * Test create peer with address which contains unresolvable DNS name, + * leader election listener thread should stop after N errors. + * + * @throws Exception + */ + @Test + public void testCnxManagerListenerThreadConfigurableRetry() throws Exception { + final Map unresolvablePeers = new HashMap<>(); + final long myid = 1L; + unresolvablePeers.put(myid, new QuorumServer(myid, "unresolvable-domain.org:2182:2183;2181")); + final QuorumPeer peer = new QuorumPeer(unresolvablePeers, + ClientBase.createTmpDir(), + ClientBase.createTmpDir(), + 2181, 3, myid, 1000, 2, 2); + final QuorumCnxManager cnxManager = peer.createCnxnManager(); + QuorumCnxManager.Listener listener = cnxManager.listener; + listener.start(); + // listener thread should stop and throws error which notify QuorumPeer about error. + // QuorumPeer should start shutdown process + listener.join(15000); // set wait time, if listener contains bug and thread not stops. + Assert.assertFalse(listener.isAlive()); + Assert.assertFalse(peer.isRunning()); + peer.join(15000); + Assert.assertFalse(QuorumPeer.class.getSimpleName() + " not stopped after " + + "listener thread death", listener.isAlive()); + } + /** * Tests a bug in QuorumCnxManager that causes a NPE when a 3.4.6 * observer connects to a 3.5.0 server. From eeb5c4155a63f6231deacf918af15a4ec378a167 Mon Sep 17 00:00:00 2001 From: Igor Skokov Date: Thu, 11 Jul 2019 11:27:45 +0300 Subject: [PATCH 13/18] ZOOKEEPER-3320: use existing scheme to stop server when QuorumCnxManager.Listener fails to bind to election port --- .../zookeeper/server/quorum/QuorumCnxManager.java | 2 +- .../org/apache/zookeeper/server/quorum/QuorumPeer.java | 10 ---------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java index 81e65172ad3..4be8fa68102 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java @@ -959,7 +959,7 @@ public void run() { + formatInetAddr(self.getElectionAddress()) + ". Use " + ELECTION_PORT_BIND_RETRY + " property to " + "increase retry count."); - if (exitException instanceof BindException) { + if (exitException instanceof SocketException) { // After leaving listener thread, the host cannot join the // quorum anymore, this is a severe error that we cannot // recover from, so we need to exit diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java index f72421cbe8e..521f6d42bab 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java @@ -1402,7 +1402,6 @@ public void run() { start_fle = Time.currentElapsedTime(); } } finally { - shutdown(); LOG.warn("QuorumPeer main thread exited"); MBeanRegistry instance = MBeanRegistry.getInstance(); instance.unregister(jmxQuorumBean); @@ -2003,15 +2002,6 @@ public boolean isRunning() { return running; } - /** - * Request graceful shutdown of quorum peer. - * Method don't wait when quorum peer will be stopped, it's only send a 'signal' - * which will be handled in QuorumPeer.run(). - */ - public void requestStop() { - running = false; - } - /** * get reference to QuorumCnxManager */ From 5051b4cdf08e1565e09014072fb56871245cf447 Mon Sep 17 00:00:00 2001 From: Igor Skokov Date: Mon, 29 Jul 2019 11:25:17 +0300 Subject: [PATCH 14/18] ZOOKEEPER-3320: fix of test compilation --- .../java/org/apache/zookeeper/server/quorum/CnxManagerTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CnxManagerTest.java b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CnxManagerTest.java index b5d6b14a5a6..200ed99cbb6 100644 --- a/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CnxManagerTest.java +++ b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CnxManagerTest.java @@ -305,7 +305,7 @@ public void testCnxManagerListenerThreadConfigurableRetry() throws Exception { final QuorumPeer peer = new QuorumPeer(unresolvablePeers, ClientBase.createTmpDir(), ClientBase.createTmpDir(), - 2181, 3, myid, 1000, 2, 2); + 2181, 3, myid, 1000, 2, 2, 2); final QuorumCnxManager cnxManager = peer.createCnxnManager(); QuorumCnxManager.Listener listener = cnxManager.listener; listener.start(); From 7b222efbe2c3b7c0056a50687052d954d21fb723 Mon Sep 17 00:00:00 2001 From: Igor Skokov Date: Thu, 1 Aug 2019 11:54:52 +0300 Subject: [PATCH 15/18] ZOOKEEPER-3320: handle 0 value for zookeeper.electionPortBindRetry as infinite, fix CnxManagerTest. testCnxManagerListenerThreadConfigurableRetry to prevent JVM exit during testing --- .../src/main/resources/markdown/zookeeperAdmin.md | 3 ++- .../zookeeper/server/quorum/QuorumCnxManager.java | 11 +++++++++-- .../zookeeper/server/quorum/CnxManagerTest.java | 6 +++++- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md b/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md index cef443028d6..f7d38df9c71 100644 --- a/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md +++ b/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md @@ -1098,7 +1098,8 @@ As an example, this will enable all four letter word commands: In case of transient errors, this property can improve availability of Zookeeper server and help it to self recover. Default value 3. In container environment, especially in Kubernetes, - this value should be increased to overcome issues related to DNS name resolving. + this value should be increased or set to 0(infinite retry) to overcome issues + related to DNS name resolving. * *observer.reconnectDelayMs* : diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java index 4be8fa68102..829577f6dd4 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java @@ -851,6 +851,7 @@ public class Listener extends ZooKeeperThread { private static final int DEFAULT_PORT_BIND_MAX_RETRY = 3; private final int portBindMaxRetry; + private Runnable socketBindErrorHandler = () -> System.exit(ExitCode.UNABLE_TO_BIND_QUORUM_PORT.getValue()); volatile ServerSocket ss = null; public Listener() { @@ -863,7 +864,8 @@ public Listener() { final Integer maxRetry = Integer.getInteger(ELECTION_PORT_BIND_RETRY, DEFAULT_PORT_BIND_MAX_RETRY); if (maxRetry >= 0) { - LOG.info("Election port bind maximum retries is {}", maxRetry); + LOG.info("Election port bind maximum retries is {}", + maxRetry == 0 ? "infinite" : maxRetry); portBindMaxRetry = maxRetry; } else { LOG.info("'{}' contains invalid value: {}(must be >= 0). " @@ -873,6 +875,11 @@ public Listener() { } } + // VisibleForTesting + void setSocketBindErrorHandler(Runnable errorHandler) { + this.socketBindErrorHandler = errorHandler; + } + /** * Sleeps on accept(). */ @@ -882,7 +889,7 @@ public void run() { InetSocketAddress addr; Socket client = null; Exception exitException = null; - while((!shutdown) && (numRetries < portBindMaxRetry)){ + while ((!shutdown) && (portBindMaxRetry == 0 || numRetries < portBindMaxRetry)) { try { if (self.shouldUsePortUnification()) { LOG.info("Creating TLS-enabled quorum server socket"); diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CnxManagerTest.java b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CnxManagerTest.java index 200ed99cbb6..d657de08407 100644 --- a/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CnxManagerTest.java +++ b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CnxManagerTest.java @@ -36,6 +36,7 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.net.Socket; +import java.util.concurrent.atomic.AtomicBoolean; import javax.net.ssl.SSLSession; import javax.net.ssl.SSLSocket; import javax.net.ssl.HandshakeCompletedListener; @@ -307,13 +308,16 @@ public void testCnxManagerListenerThreadConfigurableRetry() throws Exception { ClientBase.createTmpDir(), 2181, 3, myid, 1000, 2, 2, 2); final QuorumCnxManager cnxManager = peer.createCnxnManager(); - QuorumCnxManager.Listener listener = cnxManager.listener; + final QuorumCnxManager.Listener listener = cnxManager.listener; + final AtomicBoolean errorHappend = new AtomicBoolean(); + listener.setSocketBindErrorHandler(() -> errorHappend.set(true)); listener.start(); // listener thread should stop and throws error which notify QuorumPeer about error. // QuorumPeer should start shutdown process listener.join(15000); // set wait time, if listener contains bug and thread not stops. Assert.assertFalse(listener.isAlive()); Assert.assertFalse(peer.isRunning()); + Assert.assertTrue(errorHappend.get()); peer.join(15000); Assert.assertFalse(QuorumPeer.class.getSimpleName() + " not stopped after " + "listener thread death", listener.isAlive()); From 1af098d337cceb9eeecf09cb18df9bd5c2125995 Mon Sep 17 00:00:00 2001 From: Igor Skokov Date: Fri, 2 Aug 2019 08:38:20 +0300 Subject: [PATCH 16/18] ZOOKEEPER-3320: support custom socket bind error handler in QuorumCnxManager.Listener --- .../apache/zookeeper/server/quorum/QuorumCnxManager.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java index 829577f6dd4..6ac430ead1b 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java @@ -875,7 +875,9 @@ public Listener() { } } - // VisibleForTesting + /** + * Change socket bind error handler. Used for testing. + */ void setSocketBindErrorHandler(Runnable errorHandler) { this.socketBindErrorHandler = errorHandler; } @@ -970,7 +972,7 @@ public void run() { // After leaving listener thread, the host cannot join the // quorum anymore, this is a severe error that we cannot // recover from, so we need to exit - System.exit(ExitCode.UNABLE_TO_BIND_QUORUM_PORT.getValue()); + socketBindErrorHandler.run(); } } else if (ss != null) { // Clean up for shutdown. From f95ee187b34575cf7589549fcc22137f3639f113 Mon Sep 17 00:00:00 2001 From: Igor Skokov Date: Fri, 2 Aug 2019 09:30:52 +0300 Subject: [PATCH 17/18] ZOOKEEPER-3320: CnxManagerTest.testCnxManagerListenerThreadConfigurableRetry fix --- .../java/org/apache/zookeeper/server/quorum/CnxManagerTest.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CnxManagerTest.java b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CnxManagerTest.java index d657de08407..276f35f4767 100644 --- a/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CnxManagerTest.java +++ b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CnxManagerTest.java @@ -316,9 +316,7 @@ public void testCnxManagerListenerThreadConfigurableRetry() throws Exception { // QuorumPeer should start shutdown process listener.join(15000); // set wait time, if listener contains bug and thread not stops. Assert.assertFalse(listener.isAlive()); - Assert.assertFalse(peer.isRunning()); Assert.assertTrue(errorHappend.get()); - peer.join(15000); Assert.assertFalse(QuorumPeer.class.getSimpleName() + " not stopped after " + "listener thread death", listener.isAlive()); } From 50d64659e014408a04025f56324423883859badb Mon Sep 17 00:00:00 2001 From: Igor Skokov Date: Fri, 2 Aug 2019 16:29:12 +0300 Subject: [PATCH 18/18] ZOOKEEPER-3320: doc fix, rename config property 'zookeeper.electionPortBindRetry' to 'electionPortBindRetry' --- zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md b/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md index f7d38df9c71..5f19fb92c93 100644 --- a/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md +++ b/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md @@ -1089,7 +1089,7 @@ As an example, this will enable all four letter word commands: keepalive for more information. Defaults to **false**. -* *zookeeper.electionPortBindRetry* : +* *electionPortBindRetry* : (Java system property only: **zookeeper.electionPortBindRetry**) Property set max retry count when Zookeeper server fails to bind leader election port. Such errors can be temporary and recoverable,