Skip to content

Commit

Permalink
Retry clean and create snapshot if it already exists #83694 (#84829)
Browse files Browse the repository at this point in the history
During a master restart or failover the step CreateSnapshotStep of the action SearchableSnapshotAction can be invoked twice. When this occurs we mark the creation as incomplete and we follow the existing mechanism of returning back to CleanupSnapshotStep.

Closes #83694
  • Loading branch information
gmarouli committed Mar 23, 2022
1 parent 9313896 commit e03c2b0
Show file tree
Hide file tree
Showing 8 changed files with 94 additions and 9 deletions.
6 changes: 6 additions & 0 deletions docs/changelog/84829.yaml
@@ -0,0 +1,6 @@
pr: 84829
summary: "Retry clean and create snapshot if it already exists #83694"
area: ILM+SLM
type: bug
issues:
- 83694
Expand Up @@ -1565,6 +1565,12 @@ private enum ElasticsearchExceptionHandle {
org.elasticsearch.cluster.desirednodes.VersionConflictException::new,
164,
Version.V_8_1_0
),
SNAPSHOT_NAME_ALREADY_IN_USE_EXCEPTION(
org.elasticsearch.snapshots.SnapshotNameAlreadyInUseException.class,
org.elasticsearch.snapshots.SnapshotNameAlreadyInUseException::new,
165,
Version.V_8_2_0
);

final Class<? extends ElasticsearchException> exceptionClass;
Expand Down
Expand Up @@ -30,5 +30,4 @@ public InvalidSnapshotNameException(StreamInput in) throws IOException {
public RestStatus status() {
return RestStatus.BAD_REQUEST;
}

}
@@ -0,0 +1,27 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.snapshots;

import org.elasticsearch.common.io.stream.StreamInput;

import java.io.IOException;

/**
* Thrown on the attempt to create a snapshot with a name that is taken by a snapshot in progress and a snapshot that already exists.
*/
public class SnapshotNameAlreadyInUseException extends InvalidSnapshotNameException {

public SnapshotNameAlreadyInUseException(final String repositoryName, final String snapshotName, String desc) {
super(repositoryName, snapshotName, desc);
}

public SnapshotNameAlreadyInUseException(StreamInput in) throws IOException {
super(in);
}
}
Expand Up @@ -442,7 +442,7 @@ public void clusterStateProcessed(ClusterState oldState, final ClusterState newS

private static void ensureSnapshotNameNotRunning(SnapshotsInProgress runningSnapshots, String repositoryName, String snapshotName) {
if (runningSnapshots.forRepo(repositoryName).stream().anyMatch(s -> s.snapshot().getSnapshotId().getName().equals(snapshotName))) {
throw new InvalidSnapshotNameException(repositoryName, snapshotName, "snapshot with the same name is already in-progress");
throw new SnapshotNameAlreadyInUseException(repositoryName, snapshotName, "snapshot with the same name is already in-progress");
}
}

Expand Down Expand Up @@ -568,7 +568,7 @@ private static void ensureNoCleanupInProgress(
private static void ensureSnapshotNameAvailableInRepo(RepositoryData repositoryData, String snapshotName, Repository repository) {
// check if the snapshot name already exists in the repository
if (repositoryData.getSnapshotIds().stream().anyMatch(s -> s.getName().equals(snapshotName))) {
throw new InvalidSnapshotNameException(
throw new SnapshotNameAlreadyInUseException(
repository.getMetadata().name(),
snapshotName,
"snapshot with the same name already exists"
Expand Down
Expand Up @@ -76,6 +76,7 @@
import org.elasticsearch.snapshots.SnapshotException;
import org.elasticsearch.snapshots.SnapshotId;
import org.elasticsearch.snapshots.SnapshotInProgressException;
import org.elasticsearch.snapshots.SnapshotNameAlreadyInUseException;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.VersionUtils;
import org.elasticsearch.transport.ActionNotFoundTransportException;
Expand Down Expand Up @@ -826,6 +827,7 @@ public void testIds() {
ids.put(162, ElasticsearchAuthenticationProcessingError.class);
ids.put(163, RepositoryConflictException.class);
ids.put(164, VersionConflictException.class);
ids.put(165, SnapshotNameAlreadyInUseException.class);

Map<Class<? extends ElasticsearchException>, Integer> reverse = new HashMap<>();
for (Map.Entry<Integer, Class<? extends ElasticsearchException>> entry : ids.entrySet()) {
Expand Down
Expand Up @@ -18,6 +18,7 @@
import org.elasticsearch.common.Strings;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.snapshots.SnapshotInfo;
import org.elasticsearch.snapshots.SnapshotNameAlreadyInUseException;

import java.util.Locale;
import java.util.Objects;
Expand Down Expand Up @@ -61,7 +62,18 @@ public void onResponse(Boolean complete) {

@Override
public void onFailure(Exception e) {
listener.onFailure(e);
if (e instanceof SnapshotNameAlreadyInUseException snapshotNameAlreadyInUseException) {
// we treat a snapshot that was already created before this step as an incomplete snapshot. This scenario is triggered
// by a master restart or a failover which can result in a double invocation of this step.
logger.warn(
"snapshot [{}] is already in-progress or in-use for index [{}], ILM will attempt to clean it up and recreate it",
snapshotNameAlreadyInUseException.getSnapshotName(),
indexMetadata.getIndex().getName()
);
onResponse(false);
} else {
listener.onFailure(e);
}
}
});
}
Expand Down
Expand Up @@ -18,6 +18,7 @@
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.metadata.LifecycleExecutionState;
import org.elasticsearch.cluster.metadata.Metadata;
import org.elasticsearch.snapshots.SnapshotNameAlreadyInUseException;
import org.elasticsearch.test.client.NoOpClient;
import org.elasticsearch.xpack.core.ilm.Step.StepKey;

Expand Down Expand Up @@ -49,13 +50,15 @@ protected CreateSnapshotStep copyInstance(CreateSnapshotStep instance) {
@Override
public CreateSnapshotStep mutateInstance(CreateSnapshotStep instance) {
StepKey key = instance.getKey();
StepKey nextKeyOnCompleteResponse = instance.getNextKeyOnComplete();
StepKey nextKeyOnIncompleteResponse = instance.getNextKeyOnIncomplete();
switch (between(0, 1)) {
switch (between(0, 2)) {
case 0 -> key = new StepKey(key.getPhase(), key.getAction(), key.getName() + randomAlphaOfLength(5));
case 1 -> nextKeyOnIncompleteResponse = randomStepKey();
case 1 -> nextKeyOnCompleteResponse = randomStepKey();
case 2 -> nextKeyOnIncompleteResponse = randomStepKey();
default -> throw new AssertionError("Illegal randomisation branch");
}
return new CreateSnapshotStep(key, randomStepKey(), nextKeyOnIncompleteResponse, instance.getClient());
return new CreateSnapshotStep(key, nextKeyOnCompleteResponse, nextKeyOnIncompleteResponse, instance.getClient());
}

public void testPerformActionFailure() {
Expand Down Expand Up @@ -170,7 +173,7 @@ void createSnapshot(IndexMetadata indexMetadata, ActionListener<Boolean> listene
listener.onResponse(true);
}
};
completeStep.performAction(indexMetadata, clusterState, null, new ActionListener<Void>() {
completeStep.performAction(indexMetadata, clusterState, null, new ActionListener<>() {
@Override
public void onResponse(Void unused) {

Expand Down Expand Up @@ -200,7 +203,7 @@ void createSnapshot(IndexMetadata indexMetadata, ActionListener<Boolean> listene
listener.onResponse(false);
}
};
incompleteStep.performAction(indexMetadata, clusterState, null, new ActionListener<Void>() {
incompleteStep.performAction(indexMetadata, clusterState, null, new ActionListener<>() {
@Override
public void onResponse(Void unused) {

Expand All @@ -214,6 +217,36 @@ public void onFailure(Exception e) {
assertThat(incompleteStep.getNextStepKey(), is(nextKeyOnIncomplete));
}
}

{
try (NoOpClient client = new NoOpClient(getTestName())) {
StepKey nextKeyOnComplete = randomStepKey();
StepKey nextKeyOnIncomplete = randomStepKey();
CreateSnapshotStep doubleInvocationStep = new CreateSnapshotStep(
randomStepKey(),
nextKeyOnComplete,
nextKeyOnIncomplete,
client
) {
@Override
void createSnapshot(IndexMetadata indexMetadata, ActionListener<Boolean> listener) {
listener.onFailure(new SnapshotNameAlreadyInUseException(repository, snapshotName, "simulated"));
}
};
doubleInvocationStep.performAction(indexMetadata, clusterState, null, new ActionListener<>() {
@Override
public void onResponse(Void unused) {

}

@Override
public void onFailure(Exception e) {

}
});
assertThat(doubleInvocationStep.getNextStepKey(), is(nextKeyOnIncomplete));
}
}
}

private NoOpClient getCreateSnapshotRequestAssertingClient(String expectedRepoName, String expectedSnapshotName, String indexName) {
Expand Down

0 comments on commit e03c2b0

Please sign in to comment.