Skip to content

Commit

Permalink
Make TransportAddVotingConfigExclusionsAction retryable (#98568)
Browse files Browse the repository at this point in the history
The docs for this API say the following:

> If the API fails, you can safely retry it. Only a successful response
> guarantees that the node has been removed from the voting
> configuration and will not be reinstated.

Unfortunately this isn't true today: if the request adds no exclusions
then we do not wait before responding. This commit makes the API wait
until all exclusions are really applied.

Backport of #98386, plus the test changes from #98146 and #98356.
  • Loading branch information
DaveCTurner committed Aug 17, 2023
1 parent bb57a0c commit fe18a67
Show file tree
Hide file tree
Showing 3 changed files with 227 additions and 194 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.TransportService;

import java.util.HashSet;
import java.util.Set;
import java.util.function.Predicate;
import java.util.stream.Collectors;

public class TransportAddVotingConfigExclusionsAction extends TransportMasterNodeAction<
AddVotingConfigExclusionsRequest,
Expand Down Expand Up @@ -99,13 +99,14 @@ protected void masterOperation(

clusterService.submitStateUpdateTask("add-voting-config-exclusions", new ClusterStateUpdateTask(Priority.URGENT) {

private Set<VotingConfigExclusion> resolvedExclusions;

@Override
public ClusterState execute(ClusterState currentState) {
assert resolvedExclusions == null : resolvedExclusions;
final int finalMaxVotingConfigExclusions = TransportAddVotingConfigExclusionsAction.this.maxVotingConfigExclusions;
resolvedExclusions = resolveVotingConfigExclusionsAndCheckMaximum(request, currentState, finalMaxVotingConfigExclusions);
final Set<VotingConfigExclusion> resolvedExclusions = resolveVotingConfigExclusionsAndCheckMaximum(
request,
currentState,
finalMaxVotingConfigExclusions
);

final CoordinationMetadata.Builder builder = CoordinationMetadata.builder(currentState.coordinationMetadata());
resolvedExclusions.forEach(builder::addVotingConfigExclusion);
Expand All @@ -130,13 +131,13 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS
threadPool.getThreadContext()
);

final Set<String> excludedNodeIds = resolvedExclusions.stream()
.map(VotingConfigExclusion::getNodeId)
.collect(Collectors.toSet());

final Predicate<ClusterState> allNodesRemoved = clusterState -> {
final Set<String> votingConfigNodeIds = clusterState.getLastCommittedConfiguration().getNodeIds();
return excludedNodeIds.stream().noneMatch(votingConfigNodeIds::contains);
final Set<String> votingConfigNodeIds = new HashSet<>();
votingConfigNodeIds.addAll(clusterState.getLastCommittedConfiguration().getNodeIds());
votingConfigNodeIds.addAll(clusterState.getLastAcceptedConfiguration().getNodeIds());
return clusterState.getVotingConfigExclusions()
.stream()
.noneMatch(votingConfigExclusion -> votingConfigNodeIds.contains(votingConfigExclusion.getNodeId()));
};

final Listener clusterStateListener = new Listener() {
Expand All @@ -148,20 +149,14 @@ public void onNewClusterState(ClusterState state) {
@Override
public void onClusterServiceClose() {
listener.onFailure(
new ElasticsearchException(
"cluster service closed while waiting for voting config exclusions "
+ resolvedExclusions
+ " to take effect"
)
new ElasticsearchException("cluster service closed while waiting for voting config exclusions to take effect")
);
}

@Override
public void onTimeout(TimeValue timeout) {
listener.onFailure(
new ElasticsearchTimeoutException(
"timed out waiting for voting config exclusions " + resolvedExclusions + " to take effect"
)
new ElasticsearchTimeoutException("timed out waiting for voting config exclusions to take effect")
);
}
};
Expand Down

0 comments on commit fe18a67

Please sign in to comment.