Skip to content

Commit

Permalink
Avoid double-recovery when state recovery delayed
Browse files Browse the repository at this point in the history
Today if state recovery is delayed by the `gateway.recover_after_*` settings
then we may end up performing state recovery twice: once when enough nodes have
joined the cluster, and again when the timeout elapses. The second state
recovery reinitializes the routing table, effectively discarding all
recovered/recovering shards and starting again from scratch. This commit adds a
check to prevent this second state recovery.

Closes #55564
  • Loading branch information
chengyang14 authored and DaveCTurner committed Apr 29, 2020
1 parent c1f1b79 commit a91e9b7
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ public void onFailure(final Exception e) {

@Override
protected void doRun() {
logger.debug("performing state recovery...");
recoveryRunnable.run();
}
});
Expand All @@ -248,6 +249,11 @@ class RecoverStateUpdateTask extends ClusterStateUpdateTask {

@Override
public ClusterState execute(final ClusterState currentState) {
if (currentState.blocks().hasGlobalBlock(STATE_NOT_RECOVERED_BLOCK) == false) {
logger.debug("cluster is already recovered");
return currentState;
}

final ClusterState newState = Function.<ClusterState>identity()
.andThen(ClusterStateUpdaters::updateRoutingTable)
.andThen(ClusterStateUpdaters::removeStateNotRecoveredBlock)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,49 @@

package org.elasticsearch.gateway;

import org.elasticsearch.Version;
import org.elasticsearch.cluster.ClusterName;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.ClusterStateUpdateTask;
import org.elasticsearch.cluster.EmptyClusterInfoService;
import org.elasticsearch.cluster.block.ClusterBlockLevel;
import org.elasticsearch.cluster.block.ClusterBlocks;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.node.DiscoveryNodes;
import org.elasticsearch.cluster.routing.allocation.AllocationService;
import org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator;
import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders;
import org.elasticsearch.cluster.routing.allocation.decider.ReplicaAfterPrimaryActiveAllocationDecider;
import org.elasticsearch.cluster.routing.allocation.decider.SameShardAllocationDecider;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.settings.ClusterSettings;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.node.Node;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.gateway.TestGatewayAllocator;
import org.hamcrest.Matchers;

import java.util.Arrays;
import java.util.HashSet;

import static org.elasticsearch.gateway.GatewayService.STATE_NOT_RECOVERED_BLOCK;
import static org.hamcrest.CoreMatchers.not;
import static org.hamcrest.Matchers.hasItem;

public class GatewayServiceTests extends ESTestCase {

private GatewayService createService(final Settings.Builder settings) {
final ClusterService clusterService = new ClusterService(Settings.builder().put("cluster.name", "GatewayServiceTests").build(),
new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS),
null);
return new GatewayService(settings.build(), null, clusterService, null, null, null);
final AllocationService allocationService = new AllocationService(new AllocationDeciders(new HashSet<>(
Arrays.asList(new SameShardAllocationDecider(Settings.EMPTY, new ClusterSettings(Settings.EMPTY,
ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)), new ReplicaAfterPrimaryActiveAllocationDecider()))),
new TestGatewayAllocator(), new BalancedShardsAllocator(Settings.EMPTY), EmptyClusterInfoService.INSTANCE);
return new GatewayService(settings.build(), allocationService, clusterService, null, null, null);
}

public void testDefaultRecoverAfterTime() {
Expand Down Expand Up @@ -69,4 +97,23 @@ public void testDeprecatedSettings() {
assertSettingDeprecationsAndWarnings(new Setting<?>[] {GatewayService.RECOVER_AFTER_MASTER_NODES_SETTING });
}

public void testRecoverStateUpdateTask() throws Exception {
GatewayService service = createService(Settings.builder());
ClusterStateUpdateTask clusterStateUpdateTask = service.new RecoverStateUpdateTask();
String nodeId = randomAlphaOfLength(10);
DiscoveryNode masterNode = DiscoveryNode.createLocal(settings(Version.CURRENT)
.put(Node.NODE_MASTER_SETTING.getKey(), true).build(),
new TransportAddress(TransportAddress.META_ADDRESS, 9300), nodeId);
ClusterState stateWithBlock = ClusterState.builder(ClusterName.DEFAULT)
.nodes(DiscoveryNodes.builder().localNodeId(nodeId).masterNodeId(nodeId).add(masterNode).build()).
blocks(ClusterBlocks.builder().addGlobalBlock(STATE_NOT_RECOVERED_BLOCK).build()).build();

ClusterState recoveredState = clusterStateUpdateTask.execute(stateWithBlock);
assertNotEquals(recoveredState, stateWithBlock);
assertThat(recoveredState.blocks().global(ClusterBlockLevel.METADATA_WRITE), not(hasItem(STATE_NOT_RECOVERED_BLOCK)));

ClusterState clusterState = clusterStateUpdateTask.execute(recoveredState);
assertSame(recoveredState, clusterState);
}

}

0 comments on commit a91e9b7

Please sign in to comment.