Skip to content
Permalink
Browse files
  • Loading branch information
randgalt committed Apr 19, 2020
2 parents 7770d47 + 003f6f0 commit e73b0349b53ae6bbc4c684821204ac699017946d
Showing 3 changed files with 74 additions and 0 deletions.
@@ -816,6 +816,13 @@ EnsembleTracker getEnsembleTracker()
return ensembleTracker;
}

@VisibleForTesting
volatile CountDownLatch debugCheckBackgroundRetryLatch;
@VisibleForTesting
volatile CountDownLatch debugCheckBackgroundRetryReadyLatch;
@VisibleForTesting
volatile KeeperException.Code injectedCode;

@SuppressWarnings({"ThrowableResultOfMethodCallIgnored"})
private <DATA_TYPE> boolean checkBackgroundRetry(OperationAndData<DATA_TYPE> operationAndData, CuratorEvent event)
{
@@ -851,6 +858,26 @@ private <DATA_TYPE> boolean checkBackgroundRetry(OperationAndData<DATA_TYPE> ope
e = new Exception("Unknown result codegetResultCode()");
}

if ( debugCheckBackgroundRetryLatch != null ) // scaffolding to test CURATOR-525
{
if ( debugCheckBackgroundRetryReadyLatch != null )
{
debugCheckBackgroundRetryReadyLatch.countDown();
}
try
{
debugCheckBackgroundRetryLatch.await();
if (injectedCode != null)
{
code = injectedCode;
}
}
catch ( InterruptedException ex )
{
Thread.currentThread().interrupt();
}
}

validateConnection(codeToState(code));
logError("Background operation retry gave up", e);
}
@@ -285,6 +285,17 @@ else if ( sessionExpirationPercent > 0 )
checkSessionExpiration();
}
}

synchronized(this)
{
if ( (currentConnectionState == ConnectionState.LOST) && client.getZookeeperClient().isConnected() )
{
// CURATOR-525 - there is a race whereby LOST is sometimes set after the connection has been repaired
// this "hack" fixes it by forcing the state to RECONNECTED
log.warn("ConnectionState is LOST but isConnected() is true. Forcing RECONNECTED.");
addStateChange(ConnectionState.RECONNECTED);
}
}
}
catch ( InterruptedException e )
{
@@ -75,6 +75,42 @@ public static void setUpClass()
System.setProperty("zookeeper.extendedTypesEnabled", "true");
}

@Test(description = "test case for CURATOR-525")
public void testValidateConnectionEventRaces() throws Exception
{
// test for CURATOR-525 - there is a race whereby Curator can go to LOST
// after the connection has been repaired. Prior to the fix, the Curator
// instance would become a zombie, never leaving the LOST state
try (CuratorFramework client = CuratorFrameworkFactory.newClient(server.getConnectString(), 2000, 1000, new RetryOneTime(1)))
{
CuratorFrameworkImpl clientImpl = (CuratorFrameworkImpl)client;

client.start();
client.getChildren().forPath("/");
client.create().forPath("/foo");

BlockingQueue<ConnectionState> stateQueue = new LinkedBlockingQueue<>();
client.getConnectionStateListenable().addListener((__, newState) -> stateQueue.add(newState));

server.stop();
Assert.assertEquals(timing.takeFromQueue(stateQueue), ConnectionState.SUSPENDED);
Assert.assertEquals(timing.takeFromQueue(stateQueue), ConnectionState.LOST);

clientImpl.debugCheckBackgroundRetryReadyLatch = new CountDownLatch(1);
clientImpl.debugCheckBackgroundRetryLatch = new CountDownLatch(1);

client.delete().guaranteed().inBackground().forPath("/foo");
timing.awaitLatch(clientImpl.debugCheckBackgroundRetryReadyLatch);
server.restart();
Assert.assertEquals(timing.takeFromQueue(stateQueue), ConnectionState.RECONNECTED);
clientImpl.injectedCode = KeeperException.Code.SESSIONEXPIRED; // simulate an expiration being handled after the connection is repaired
clientImpl.debugCheckBackgroundRetryLatch.countDown();
Assert.assertEquals(timing.takeFromQueue(stateQueue), ConnectionState.LOST);

Assert.assertEquals(timing.takeFromQueue(stateQueue), ConnectionState.RECONNECTED);
}
}

@Test
public void testInjectSessionExpiration() throws Exception
{

0 comments on commit e73b034

Please sign in to comment.