From 84b0c2f7d52d156707bda2899b0635a58674b518 Mon Sep 17 00:00:00 2001 From: Martyn Taylor Date: Fri, 8 Dec 2017 12:47:03 +0000 Subject: [PATCH] ARTEMIS-1543 Fix Quorum Vote with remain live setting --- .../server/cluster/qourum/QuorumManager.java | 1 + .../qourum/QuorumVoteServerConnect.java | 16 +- .../server/impl/ServerConnectVoteHandler.java | 5 +- .../impl/SharedNothingLiveActivation.java | 2 +- .../ClusterWithBackupFailoverTest.java | 344 ++++++++++++++++++ .../ClusterWithBackupFailoverTestBase.java | 328 +---------------- ...iscoveryClusterWithBackupFailoverTest.java | 2 +- .../LiveVoteOnBackupFailureClusterTest.java | 132 +++++++ 8 files changed, 497 insertions(+), 333 deletions(-) create mode 100644 tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/ClusterWithBackupFailoverTest.java create mode 100644 tests/integration-tests/src/test/java/org/apache/activemq/artemis/tests/integration/cluster/failover/LiveVoteOnBackupFailureClusterTest.java diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/qourum/QuorumManager.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/qourum/QuorumManager.java index f8b39081517..77a7d18c677 100644 --- a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/qourum/QuorumManager.java +++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/qourum/QuorumManager.java @@ -299,6 +299,7 @@ public void run() { clusterControl.authorize(); //if we are successful get the vote and check whether we need to send it to the target server, //just connecting may be enough + vote = quorumVote.connected(); if (vote.isRequestServerVote()) { vote = clusterControl.sendQuorumVote(quorumVote.getName(), vote); diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/qourum/QuorumVoteServerConnect.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/qourum/QuorumVoteServerConnect.java index f6c608ef965..dcc1892c473 100644 --- a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/qourum/QuorumVoteServerConnect.java +++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/qourum/QuorumVoteServerConnect.java @@ -38,6 +38,9 @@ public class QuorumVoteServerConnect extends QuorumVote servers[3].isReplicaSync()); + } + + // Register failure listener to detect when live recognises the backup has died. + final CountDownLatch latch = new CountDownLatch(1); + servers[0].getReplicationManager().getBackupTransportConnection().addFailureListener(new FailureListener() { + @Override + public void connectionFailed(ActiveMQException exception, boolean failedOver) { + latch.countDown(); + } + + @Override + public void connectionFailed(ActiveMQException exception, boolean failedOver, String scaleDownTargetNodeID) { + latch.countDown(); + } + }); + + servers[3].stop(); + + // Wait for live to notice backup is down. + latch.await(30, TimeUnit.SECONDS); + + // The quorum vote time out is hardcoded 5s. Wait for double the time then check server is live + Thread.sleep(10000); + assertTrue(servers[0].isStarted()); + } + + private void startCluster() throws Exception { + int[] liveServerIDs = new int[]{0, 1, 2}; + setupCluster(); + startServers(0, 1, 2); + new BackupSyncDelay(servers[4], servers[1], PacketImpl.REPLICATION_SCHEDULED_FAILOVER); + startServers(3, 4, 5); + + for (int i : liveServerIDs) { + waitForTopology(servers[i], 3, 3); + } + + waitForFailoverTopology(3, 0, 1, 2); + waitForFailoverTopology(4, 0, 1, 2); + waitForFailoverTopology(5, 0, 1, 2); + } +}