Skip to content
Permalink
Browse files
CURATOR-564 (#351)
Like was done for TestingServer, catch startup issues for TestingCluster and then re-recreate and re-start the cluster one time. Hopefully this will make the tests more stable.

Co-authored-by: randgalt <randgalt@apache.org>
  • Loading branch information
Randgalt and randgalt committed Mar 23, 2020
1 parent d65669b commit f204083e3d1d887971aa6ed34cc8d7f4cd1ecb5e
Showing 10 changed files with 54 additions and 38 deletions.
@@ -110,9 +110,8 @@ public void testProtectionWithKilledSession() throws Exception
// by the Instance Curator is connected to but the session kill needs a quorum vote (it's a
// transaction)

try (TestingCluster cluster = new TestingCluster(3))
try (TestingCluster cluster = createAndStartCluster(3))
{
cluster.start();
InstanceSpec instanceSpec0 = cluster.getServers().get(0).getInstanceSpec();

CountDownLatch serverStoppedLatch = new CountDownLatch(1);
@@ -27,6 +27,7 @@
import org.apache.curator.retry.ExponentialBackoffRetry;
import org.apache.curator.retry.RetryNTimes;
import org.apache.curator.retry.RetryOneTime;
import org.apache.curator.test.BaseClassForTests;
import org.apache.curator.test.InstanceSpec;
import org.apache.curator.test.TestingCluster;
import org.apache.curator.test.Timing;
@@ -40,7 +41,7 @@
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;

public class TestReadOnly
public class TestReadOnly extends BaseClassForTests
{
@BeforeMethod
public void setup()
@@ -58,12 +59,10 @@ public void tearDown()
public void testConnectionStateNewClient() throws Exception
{
Timing timing = new Timing();
TestingCluster cluster = new TestingCluster(3);
CuratorFramework client = null;
TestingCluster cluster = createAndStartCluster(3);
try
{
cluster.start();

client = CuratorFrameworkFactory.newClient(cluster.getConnectString(), timing.session(), timing.connection(), new RetryOneTime(100));
client.start();
client.checkExists().forPath("/");
@@ -116,11 +115,9 @@ public void testReadOnly() throws Exception
Timing timing = new Timing();

CuratorFramework client = null;
TestingCluster cluster = new TestingCluster(2);
TestingCluster cluster = createAndStartCluster(2);
try
{
cluster.start();

client = CuratorFrameworkFactory.builder().connectString(cluster.getConnectString()).canBeReadOnly(true).connectionTimeoutMs(timing.connection()).sessionTimeoutMs(timing.session()).retryPolicy(new ExponentialBackoffRetry(100, 3)).build();
client.start();

@@ -167,4 +164,10 @@ else if ( newState == ConnectionState.RECONNECTED )
CloseableUtils.closeQuietly(cluster);
}
}

@Override
protected void createServer() throws Exception
{
// NOP
}
}
@@ -75,9 +75,7 @@ public void setup() throws Exception
System.setProperty("zookeeper.DigestAuthenticationProvider.superDigest", superUserPasswordDigest);

CloseableUtils.closeQuietly(server);
server = null;
cluster = new TestingCluster(3);
cluster.start();
cluster = createAndStartCluster(3);
}

@AfterMethod
@@ -406,6 +404,12 @@ public void testMixedIPv2() throws Exception
Assert.assertEquals("127.0.0.1:2181", configString);
}

@Override
protected void createServer() throws Exception
{
// NOP
}

private CuratorFramework newClient()
{
return newClient(cluster.getConnectString());
@@ -42,8 +42,7 @@ public void testSessionSurvives() throws Exception
Timing timing = new Timing();

CuratorFramework client = null;
TestingCluster cluster = new TestingCluster(3);
cluster.start();
TestingCluster cluster = createAndStartCluster(3);
try
{
client = CuratorFrameworkFactory.newClient(cluster.getConnectString(), timing.session(), timing.connection(), new ExponentialBackoffRetry(100, 3));
@@ -90,8 +89,7 @@ public void testSplitBrain() throws Exception
Timing timing = new Timing();

CuratorFramework client = null;
TestingCluster cluster = new TestingCluster(3);
cluster.start();
TestingCluster cluster = createAndStartCluster(3);
try
{
// make sure all instances are up
@@ -45,11 +45,9 @@ public void testMissedDelete() throws Exception
PathChildrenCache cache = null;
CuratorFramework client1 = null;
CuratorFramework client2 = null;
TestingCluster cluster = new TestingCluster(3);
TestingCluster cluster = createAndStartCluster(3);
try
{
cluster.start();

// client 1 only connects to 1 server
InstanceSpec client1Instance = cluster.getInstances().iterator().next();
client1 = CuratorFrameworkFactory.newClient(client1Instance.getConnectString(), 1000, 1000, new RetryOneTime(1));
@@ -103,11 +101,9 @@ public void testServerLoss() throws Exception

CuratorFramework client = null;
PathChildrenCache cache = null;
TestingCluster cluster = new TestingCluster(3);
TestingCluster cluster = createAndStartCluster(3);
try
{
cluster.start();

client = CuratorFrameworkFactory.newClient(cluster.getConnectString(), timing.session(), timing.connection(), new RetryOneTime(1));
client.start();
client.create().creatingParentsIfNeeded().forPath("/test");
@@ -187,7 +187,7 @@ public void testWatchedNodeDeletedOnReconnect() throws Exception
try ( LeaderLatch latch2 = new LeaderLatch(client, latchPath, "2") )
{
latch1.start();
latch1.await();
Assert.assertTrue(latch1.await(timing.milliseconds(), TimeUnit.MILLISECONDS));

latch2.start(); // will get a watcher on latch1's node
timing.sleepABit();
@@ -57,11 +57,9 @@ public void testInCluster() throws Exception
final int sessionLength = timing.session() / 4;

List<ClientAndLatch> clients = Lists.newArrayList();
TestingCluster cluster = new TestingCluster(PARTICIPANT_QTY);
TestingCluster cluster = createAndStartCluster(PARTICIPANT_QTY);
try
{
cluster.start();

List<InstanceSpec> instances = Lists.newArrayList(cluster.getInstances());
for ( int i = 0; i < PARTICIPANT_QTY; ++i )
{
@@ -45,8 +45,7 @@ public void testRestart() throws Exception
final Timing timing = new Timing();

CuratorFramework client = null;
TestingCluster cluster = new TestingCluster(3);
cluster.start();
TestingCluster cluster = createAndStartCluster(3);
try
{
client = CuratorFrameworkFactory.newClient(cluster.getConnectString(), timing.session(), timing.connection(), new RetryOneTime(1));
@@ -91,8 +90,7 @@ public void testLostRestart() throws Exception
final Timing timing = new Timing();

CuratorFramework client = null;
TestingCluster cluster = new TestingCluster(3);
cluster.start();
TestingCluster cluster = createAndStartCluster(3);
try
{
client = CuratorFrameworkFactory.newClient(cluster.getConnectString(), timing.session(), timing.connection(), new RetryOneTime(1));
@@ -59,11 +59,9 @@ public void testKilledServerWithEnsembleProvider() throws Exception

ExecutorService executorService = Executors.newFixedThreadPool(CLIENT_QTY);
ExecutorCompletionService<Void> completionService = new ExecutorCompletionService<Void>(executorService);
TestingCluster cluster = new TestingCluster(3);
TestingCluster cluster = createAndStartCluster(3);
try
{
cluster.start();

final AtomicReference<String> connectionString = new AtomicReference<String>(cluster.getConnectString());
final EnsembleProvider provider = new EnsembleProvider()
{
@@ -178,8 +176,7 @@ public void stateChanged(CuratorFramework client, ConnectionState newState)
timing.forWaiting().sleepABit();
Assert.assertEquals(0, acquireCount.get());

cluster = new TestingCluster(3);
cluster.start();
cluster = createAndStartCluster(3);

connectionString.set(cluster.getConnectString());
timing.forWaiting().sleepABit();
@@ -207,12 +204,10 @@ public void testCluster() throws Exception
ExecutorService executorService = Executors.newFixedThreadPool(QTY);
ExecutorCompletionService<Void> completionService = new ExecutorCompletionService<Void>(executorService);
final Timing timing = new Timing();
TestingCluster cluster = new TestingCluster(3);
List<SemaphoreClient> semaphoreClients = Lists.newArrayList();
TestingCluster cluster = createAndStartCluster(3);
try
{
cluster.start();

final AtomicInteger opCount = new AtomicInteger(0);
for ( int i = 0; i < QTY; ++i )
{
@@ -136,6 +136,31 @@ public void setup() throws Exception
}
}

public TestingCluster createAndStartCluster(int qty) throws Exception
{
TestingCluster cluster = new TestingCluster(qty);
try
{
cluster.start();
}
catch ( FailedServerStartException e )
{
log.warn("Failed to start cluster - retrying 1 more time");
// cluster creation failed - we've sometime seen this with re-used addresses, etc. - retry one more time
try
{
cluster.close();
}
catch ( Exception ex )
{
// ignore
}
cluster = new TestingCluster(qty);
cluster.start();
}
return cluster;
}

protected void createServer() throws Exception
{
while ( server == null )

0 comments on commit f204083

Please sign in to comment.