From 9969979ce8c85741904cfe6ad566424c5d8939b3 Mon Sep 17 00:00:00 2001 From: Michael Han Date: Thu, 16 Mar 2017 16:10:18 -0700 Subject: [PATCH 1/2] ZOOKEEPER-2722: fix flaky test testSessionEstablishment. --- .../org/apache/zookeeper/test/ClientBase.java | 33 ++++++++++++++++--- .../zookeeper/test/ReadOnlyModeTest.java | 19 +++++++++-- 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/src/java/test/org/apache/zookeeper/test/ClientBase.java b/src/java/test/org/apache/zookeeper/test/ClientBase.java index 309b2b2f8e6..98b1ba5e6a1 100644 --- a/src/java/test/org/apache/zookeeper/test/ClientBase.java +++ b/src/java/test/org/apache/zookeeper/test/ClientBase.java @@ -96,7 +96,10 @@ public void process(WatchedEvent event) { /* nada */ } public static class CountdownWatcher implements Watcher { // XXX this doesn't need to be volatile! (Should probably be final) volatile CountDownLatch clientConnected; + // Set to true when connected to a read-only server, or a read-write (quorum) server. volatile boolean connected; + // Set to true when connected to a quorum server. + volatile boolean syncConnected; public CountdownWatcher() { reset(); @@ -104,16 +107,23 @@ public CountdownWatcher() { synchronized public void reset() { clientConnected = new CountDownLatch(1); connected = false; + syncConnected = false; } synchronized public void process(WatchedEvent event) { - if (event.getState() == KeeperState.SyncConnected || - event.getState() == KeeperState.ConnectedReadOnly) { + KeeperState state = event.getState(); + if (state == KeeperState.SyncConnected) { + connected = true; + syncConnected = true; + } else if (state == KeeperState.ConnectedReadOnly) { connected = true; - notifyAll(); - clientConnected.countDown(); } else { connected = false; - notifyAll(); + syncConnected = false; + } + + notifyAll(); + if (connected) { + clientConnected.countDown(); } } synchronized public boolean isConnected() { @@ -133,6 +143,19 @@ synchronized public void waitForConnected(long timeout) } } + synchronized public void waitForSyncConnected(long timeout) + throws InterruptedException, TimeoutException + { + long expire = Time.currentElapsedTime() + timeout; + long left = timeout; + while(!syncConnected && left > 0) { + wait(left); + left = expire - Time.currentElapsedTime(); + } + if (!syncConnected) { + throw new TimeoutException("Failed to connect to read-write ZooKeeper server."); + } + } synchronized public void waitForDisconnected(long timeout) throws InterruptedException, TimeoutException { diff --git a/src/java/test/org/apache/zookeeper/test/ReadOnlyModeTest.java b/src/java/test/org/apache/zookeeper/test/ReadOnlyModeTest.java index 21ff9f3c430..5dfbb215b63 100644 --- a/src/java/test/org/apache/zookeeper/test/ReadOnlyModeTest.java +++ b/src/java/test/org/apache/zookeeper/test/ReadOnlyModeTest.java @@ -47,8 +47,11 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import org.slf4j.LoggerFactory; public class ReadOnlyModeTest extends ZKTestCase { + private static final org.slf4j.Logger LOG = LoggerFactory + .getLogger(ReadOnlyModeTest.class); private static int CONNECTION_TIMEOUT = QuorumBase.CONNECTION_TIMEOUT; private QuorumUtil qu = new QuorumUtil(1); @@ -229,17 +232,29 @@ public void testSessionEstablishment() throws Exception { Assert.assertSame("should be in r/o mode", States.CONNECTEDREADONLY, zk .getState()); long fakeId = zk.getSessionId(); + LOG.info("Connected as r/o mode with state {} and session id {}", + zk.getState(), fakeId); watcher.reset(); qu.start(2); Assert.assertTrue("waiting for server up", ClientBase.waitForServerUp( "127.0.0.1:" + qu.getPeer(2).clientPort, CONNECTION_TIMEOUT)); - watcher.waitForConnected(CONNECTION_TIMEOUT); + LOG.info("Server 127.0.0.1:{} is up", qu.getPeer(2).clientPort); + // ZOOKEEPER-2722: wait until we can connect to a read-write server after the quorum + // is formed. Otherwise, it is possible that client first connects to a read-only server, + // then drops the connection because of shutting down of the read-only server caused + // by leader election / quorum forming between the read-only server and the newly started + // server. If we happen to execute the zk.create after the read-only server is shutdown and + // before the quorum is formed, we will get a ConnectLossException. + watcher.waitForSyncConnected(CONNECTION_TIMEOUT); + Assert.assertEquals("Should be in read-write mode", States.CONNECTED, + zk.getState()); + LOG.info("Connected as rw mode with state {} and session id {}", + zk.getState(), zk.getSessionId()); zk.create("/test", "test".getBytes(), ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); Assert.assertFalse("fake session and real session have same id", zk .getSessionId() == fakeId); - zk.close(); } From 99bf87f20eda49194eea30f5746f0ca89eb8d8d0 Mon Sep 17 00:00:00 2001 From: Michael Han Date: Tue, 21 Mar 2017 13:02:54 -0700 Subject: [PATCH 2/2] Make sure syncConnected flag is set to false in read only connected state. --- src/java/test/org/apache/zookeeper/test/ClientBase.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/java/test/org/apache/zookeeper/test/ClientBase.java b/src/java/test/org/apache/zookeeper/test/ClientBase.java index 98b1ba5e6a1..5a1e0718e2d 100644 --- a/src/java/test/org/apache/zookeeper/test/ClientBase.java +++ b/src/java/test/org/apache/zookeeper/test/ClientBase.java @@ -116,6 +116,7 @@ synchronized public void process(WatchedEvent event) { syncConnected = true; } else if (state == KeeperState.ConnectedReadOnly) { connected = true; + syncConnected = false; } else { connected = false; syncConnected = false;