Skip to content

Commit

Permalink
ZOOKEEPER-3042: testFailedTxnAsPartOfQuorumLoss is flaky
Browse files Browse the repository at this point in the history
- relaxed check of outstanding proposals queue
- close clients after restart
- restart client after old leader restart

Author: Bogdan Kanivets <bkanivets@gmail.com>

Reviewers: Andor Molnar <andor@apache.org>

Closes #521 from lavacat/testFailedTxnAsPartOfQuorumLoss-fix and squashes the following commits:

c361efa [Bogdan Kanivets] ZOOKEEPER-1932: ignore LETest
427ab8c [Bogdan Kanivets] ZOOKEEPER-3042: testFailedTxnAsPartOfQuorumLoss is flaky
  • Loading branch information
lavacat authored and anmolnar committed Jul 10, 2018
1 parent cdbf03a commit f1429d0
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 5 deletions.
Expand Up @@ -446,6 +446,7 @@ private void waitForAll(ZooKeeper[] zks, States state) throws InterruptedExcepti
boolean someoneNotConnected = true;
while (someoneNotConnected) {
if (iterations-- == 0) {
logStates(zks);
ClientBase.logAllStackTraces();
throw new RuntimeException("Waiting too long");
}
Expand All @@ -460,6 +461,15 @@ private void waitForAll(ZooKeeper[] zks, States state) throws InterruptedExcepti
Thread.sleep(1000);
}
}

private void logStates(ZooKeeper[] zks) {
StringBuilder sbBuilder = new StringBuilder("Connection States: {");
for (int i = 0; i < zks.length; i++) {
sbBuilder.append(i + " : " + zks[i].getState() + ", ");
}
sbBuilder.append('}');
LOG.error(sbBuilder.toString());
}

// This class holds the servers and clients for those servers
private static class Servers {
Expand All @@ -473,7 +483,7 @@ public void shutDownAllServers() throws InterruptedException {
}
}

public void restartAllServersAndClients(Watcher watcher) throws IOException {
public void restartAllServersAndClients(Watcher watcher) throws IOException, InterruptedException {
for (MainThread t : mt) {
if (!t.isAlive()) {
t.start();
Expand All @@ -484,7 +494,10 @@ public void restartAllServersAndClients(Watcher watcher) throws IOException {
}
}

public void restartClient(int clientIndex, Watcher watcher) throws IOException {
public void restartClient(int clientIndex, Watcher watcher) throws IOException, InterruptedException {
if (zk[clientIndex] != null) {
zk[clientIndex].close();
}
zk[clientIndex] = new ZooKeeper("127.0.0.1:" + clientPorts[clientIndex], ClientBase.CONNECTION_TIMEOUT, watcher);
}

Expand Down Expand Up @@ -967,9 +980,11 @@ public void testFailedTxnAsPartOfQuorumLoss() throws Exception {

// just make sure that we actually did get it in process at the
// leader
Assert.assertEquals(1, outstanding.size());
Proposal p = outstanding.values().iterator().next();
Assert.assertEquals(OpCode.create, p.request.getHdr().getType());
// there can be extra sessionClose proposals
Assert.assertTrue(outstanding.size() > 0);
Proposal p = findProposalOfType(outstanding, OpCode.create);
LOG.info(String.format("Old leader id: %d. All proposals: %s", leader, outstanding));
Assert.assertNotNull("Old leader doesn't have 'create' proposal", p);

// make sure it has a chance to write it to disk
int sleepTime = 0;
Expand Down Expand Up @@ -1003,6 +1018,8 @@ public void testFailedTxnAsPartOfQuorumLoss() throws Exception {
// 7. restart the previous leader to force it to replay the edits and possibly come up in a bad state
servers.mt[leader].shutdown();
servers.mt[leader].start();
// old client session can expire, restart it
servers.restartClient(leader, this);
waitForAll(servers, States.CONNECTED);

// 8. check the node exist in previous leader but not others
Expand Down Expand Up @@ -1169,4 +1186,13 @@ private QuorumPeer waitForQuorumPeer(MainThread mainThread, int timeout) throws
}
}
}

private Proposal findProposalOfType(Map<Long, Proposal> proposals, int type) {
for (Proposal proposal : proposals.values()) {
if (proposal.request.getHdr().getType() == type) {
return proposal;
}
}
return null;
}
}
2 changes: 2 additions & 0 deletions src/java/test/org/apache/zookeeper/test/LETest.java
Expand Up @@ -33,6 +33,7 @@
import org.apache.zookeeper.server.quorum.Vote;
import org.apache.zookeeper.server.quorum.QuorumPeer.QuorumServer;
import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Test;

@SuppressWarnings("deprecation")
Expand Down Expand Up @@ -90,6 +91,7 @@ public void run() {
}

@Test
@Ignore("ZOOKEEPER-1932, this test is flaky and already removed in master")
public void testLE() throws Exception {
int count = 30;
HashMap<Long,QuorumServer> peers = new HashMap<Long,QuorumServer>(count);
Expand Down

0 comments on commit f1429d0

Please sign in to comment.