Skip to content

Commit

Permalink
zookeeper: work-around SessionTracker racy initialisation on startup
Browse files Browse the repository at this point in the history
Motivation:

ZooKeeper server accepts requests before the server is fully
initialised. This creates a race-condition for whether the
SessionTracker is initialised.  If the thread initialising
ZooKeeperServer looses this race then there are stack-traces like:

java.lang.NullPointerException: null
	at org.apache.zookeeper.server.ZooKeeperServer.createSession(ZooKeeperServer.java:569) ~[zookeeper-3.4.8.jar:3.4.8--1]
	at org.apache.zookeeper.server.ZooKeeperServer.processConnectRequest(ZooKeeperServer.java:902) ~[zookeeper-3.4.8.jar:3.4.8--1]
	at org.apache.zookeeper.server.NIOServerCnxn.readConnectRequest(NIOServerCnxn.java:418) ~[zookeeper-3.4.8.jar:3.4.8--1]
	at org.apache.zookeeper.server.NIOServerCnxn.readPayload(NIOServerCnxn.java:198) ~[zookeeper-3.4.8.jar:3.4.8--1]
	at org.apache.zookeeper.server.NIOServerCnxn.doIO(NIOServerCnxn.java:244) ~[zookeeper-3.4.8.jar:3.4.8--1]
	at org.apache.zookeeper.server.NIOServerCnxnFactory.run(NIOServerCnxnFactory.java:203) ~[zookeeper-3.4.8.jar:3.4.8--1]
	at java.lang.Thread.run(Thread.java:748) [na:1.8.0_131]

Modification:

Add work-around by manually creating the SessionTracker.  To do this,
the access for the method must be made public, requiring the existing
implicit class be made explicit.

Result:

Hopefully fewer races in the zookeeper service.

Target: master
Patch: https://rb.dcache.org/r/10260/
Request: 3.1
Request: 3.0
Request: 2.16
Require-notes: no
Require-book: no
Fixes: #3247
Acked-by: Tigran Mkrtchyan

Conflicts:
	modules/dcache/src/main/java/org/dcache/zookeeper/service/ZooKeeperCell.java
  • Loading branch information
paulmillar committed Jun 20, 2017
1 parent 8e35378 commit 58451b7
Showing 1 changed file with 24 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import org.apache.zookeeper.server.SessionTrackerImpl;
import org.apache.zookeeper.server.ZKDatabase;
import org.apache.zookeeper.server.ZooKeeperServer;
import org.apache.zookeeper.server.ZooTrace;
import org.apache.zookeeper.server.persistence.FileTxnSnapLog;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -48,6 +47,27 @@ public class ZooKeeperCell extends AbstractCell
{
private static final Logger LOG = LoggerFactory.getLogger(ZooKeeperCell.class);

private class PatchedZooKeeperServer extends ZooKeeperServer
{
// Work around https://issues.apache.org/jira/browse/ZOOKEEPER-2515
// and https://issues.apache.org/jira/browse/ZOOKEEPER-2812
@Override
public void createSessionTracker() {
sessionTracker = new SessionTrackerImpl(this,
getZKDatabase().getSessionWithTimeOuts(), tickTime, 1)
{
@Override
public void shutdown() {
synchronized (this) {
running = false;
notifyAll();
}
super.shutdown();
}
};
}
}

@Option(name = "data-log-dir", required = true)
protected File dataLogDir;

Expand Down Expand Up @@ -92,7 +112,7 @@ public class ZooKeeperCell extends AbstractCell

private ServerCnxnFactory cnxnFactory;
private FileTxnSnapLog txnLog;
private ZooKeeperServer zkServer;
private PatchedZooKeeperServer zkServer;

public ZooKeeperCell(String cellName, String arguments)
{
Expand All @@ -108,24 +128,7 @@ protected void startUp() throws Exception
Strings.isNullOrEmpty(address) ? new InetSocketAddress(port) : new InetSocketAddress(address, port);

checkArgument(autoPurgeInterval > 0, "zookeeper.auto-purge.purge-interval must be non-negative.");
zkServer = new ZooKeeperServer() {
// Work around https://issues.apache.org/jira/browse/ZOOKEEPER-2515
@Override
protected void createSessionTracker() {
sessionTracker = new SessionTrackerImpl(this,
getZKDatabase().getSessionWithTimeOuts(), tickTime, 1)
{
@Override
public void shutdown() {
synchronized (this) {
running = false;
notifyAll();
}
super.shutdown();
}
};
}
};
zkServer = new PatchedZooKeeperServer();

txnLog = new FileTxnSnapLog(dataLogDir, dataDir);
zkServer.setTxnLogFactory(txnLog);
Expand All @@ -134,6 +137,7 @@ public void shutdown() {
zkServer.setMaxSessionTimeout(maxSessionTimeout == -1 ? -1 : (int) maxSessionTimeoutUnit.toMillis(maxSessionTimeout));

zkServer.setZKDatabase(new ZKDatabase(txnLog)); // Work-around https://issues.apache.org/jira/browse/ZOOKEEPER-2810
zkServer.createSessionTracker(); // Work around https://issues.apache.org/jira/browse/ZOOKEEPER-2812

ServerCnxnFactory cnxnFactory;
cnxnFactory = new NIOServerCnxnFactory() {
Expand Down

0 comments on commit 58451b7

Please sign in to comment.