Use applied cluster state in cluster health (#44426)

In #44348 we changed the cluster health action so that it sometimes uses the cluster state directly from the master service rather than from the cluster applier. If the state is not recovered then this is inappropriate, because prior to state recovery the state available to the cluster applier contains no indices. This commit moves us back to using the state from the applier. Fixes #44416.
elastic · Jul 17, 2019 · dca8a91 · dca8a91
1 parent 0fd33b0
commit dca8a91
Show file tree

Hide file tree

Showing 2 changed files with 15 additions and 8 deletions.
diff --git a/...main/java/org/elasticsearch/action/admin/cluster/health/TransportClusterHealthAction.java b/...main/java/org/elasticsearch/action/admin/cluster/health/TransportClusterHealthAction.java
@@ -145,7 +145,13 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS
                         final long timeoutInMillis = Math.max(0, endTimeRelativeMillis - threadPool.relativeTimeInMillis());
                         final TimeValue newTimeout = TimeValue.timeValueMillis(timeoutInMillis);
                         request.timeout(newTimeout);
-                        executeHealth(request, newState, listener, waitCount,
+
+                        // we must use the state from the applier service, because if the state-not-recovered block is in place then the
+                        // applier service has a different view of the cluster state from the one supplied here
+                        final ClusterState appliedState = clusterService.state();
+                        assert newState.stateUUID().equals(appliedState.stateUUID())
+                            : newState.stateUUID() + " vs " + appliedState.stateUUID();
+                        executeHealth(request, appliedState, listener, waitCount,
                             observedState -> waitForEventsAndExecuteHealth(request, listener, waitCount, endTimeRelativeMillis));
                     }
 

diff --git a/server/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java b/server/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java
@@ -211,7 +211,6 @@ public void testSimpleOpenClose() throws Exception {
         client().prepareIndex("test", "type1", "2").setSource("field1", "value1").execute().actionGet();
     }
 
-    @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/44416")
     public void testJustMasterNode() throws Exception {
         logger.info("--> cleaning nodes");
 
@@ -221,11 +220,13 @@ public void testJustMasterNode() throws Exception {
         logger.info("--> create an index");
         client().admin().indices().prepareCreate("test").setWaitForActiveShards(ActiveShardCount.NONE).execute().actionGet();
 
-        logger.info("--> closing master node");
-        internalCluster().closeNonSharedNodes(false);
-
-        logger.info("--> starting 1 master node non data again");
-        internalCluster().startNode(Settings.builder().put(Node.NODE_DATA_SETTING.getKey(), false).build());
+        logger.info("--> restarting master node");
+        internalCluster().fullRestart(new RestartCallback(){
+            @Override
+            public Settings onNodeStopped(String nodeName) {
+                return Settings.builder().put(Node.NODE_DATA_SETTING.getKey(), false).build();
+            }
+        });
 
         logger.info("--> waiting for test index to be created");
         ClusterHealthResponse health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setIndices("test")
@@ -237,7 +238,7 @@ public void testJustMasterNode() throws Exception {
         assertThat(clusterStateResponse.getState().metaData().hasIndex("test"), equalTo(true));
     }
 
-    public void testJustMasterNodeAndJustDataNode() throws Exception {
+    public void testJustMasterNodeAndJustDataNode() {
         logger.info("--> cleaning nodes");
 
         logger.info("--> starting 1 master node non data");