From 0e9ca5ebde89abf690eb972059ff348020bad4cf Mon Sep 17 00:00:00 2001 From: Boaz Leskes Date: Thu, 3 Jul 2014 12:24:04 +0200 Subject: [PATCH] [Discovery] immediately start Master|Node fault detection pinging After a node joins the clusters, it starts pinging the master to verify it's health. Before, the cluster join request was processed async and we had to give some time to complete. With #6480 we changed this to wait for the join process to complete on the master. We can therefore start pinging immediately for fast detection of failures. Similar change can be made to the Node fault detection from the master side. Closes #6706 --- .../discovery/zen/fd/MasterFaultDetection.java | 8 +++++--- .../discovery/zen/fd/NodesFaultDetection.java | 6 ++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/elasticsearch/discovery/zen/fd/MasterFaultDetection.java b/src/main/java/org/elasticsearch/discovery/zen/fd/MasterFaultDetection.java index d5b8f49ef8188..3bc66296ba653 100644 --- a/src/main/java/org/elasticsearch/discovery/zen/fd/MasterFaultDetection.java +++ b/src/main/java/org/elasticsearch/discovery/zen/fd/MasterFaultDetection.java @@ -153,8 +153,9 @@ private void innerStart(final DiscoveryNode masterNode) { masterPinger.stop(); } this.masterPinger = new MasterPinger(); - // start the ping process - threadPool.schedule(pingInterval, ThreadPool.Names.SAME, masterPinger); + + // we use schedule with a 0 time value to run the pinger on the pool as it will run on later + threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, masterPinger); } public void stop(String reason) { @@ -198,7 +199,8 @@ private void handleTransportDisconnect(DiscoveryNode node) { masterPinger.stop(); } this.masterPinger = new MasterPinger(); - threadPool.schedule(pingInterval, ThreadPool.Names.SAME, masterPinger); + // we use schedule with a 0 time value to run the pinger on the pool as it will run on later + threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, masterPinger); } catch (Exception e) { logger.trace("[master] [{}] transport disconnected (with verified connect)", masterNode); notifyMasterFailure(masterNode, "transport disconnected (with verified connect)"); diff --git a/src/main/java/org/elasticsearch/discovery/zen/fd/NodesFaultDetection.java b/src/main/java/org/elasticsearch/discovery/zen/fd/NodesFaultDetection.java index 71e5d78675523..3df262564b9b0 100644 --- a/src/main/java/org/elasticsearch/discovery/zen/fd/NodesFaultDetection.java +++ b/src/main/java/org/elasticsearch/discovery/zen/fd/NodesFaultDetection.java @@ -119,7 +119,8 @@ public void updateNodes(DiscoveryNodes nodes) { } if (!nodesFD.containsKey(newNode)) { nodesFD.put(newNode, new NodeFD()); - threadPool.schedule(pingInterval, ThreadPool.Names.SAME, new SendPingRequest(newNode)); + // we use schedule with a 0 time value to run the pinger on the pool as it will run on later + threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, new SendPingRequest(newNode)); } } for (DiscoveryNode removedNode : delta.removedNodes()) { @@ -165,7 +166,8 @@ private void handleTransportDisconnect(DiscoveryNode node) { try { transportService.connectToNode(node); nodesFD.put(node, new NodeFD()); - threadPool.schedule(pingInterval, ThreadPool.Names.SAME, new SendPingRequest(node)); + // we use schedule with a 0 time value to run the pinger on the pool as it will run on later + threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, new SendPingRequest(node)); } catch (Exception e) { logger.trace("[node ] [{}] transport disconnected (with verified connect)", node); notifyNodeFailure(node, "transport disconnected (with verified connect)");