Skip to content

Commit

Permalink
Fix Concurrent Snapshot Repository Remove Issues (#74880) (#75120)
Browse files Browse the repository at this point in the history
We need to fail repository operations as soon as a repository has been shut down
to avoid getting into broken cluster state situations where operations are created
in the cluster state but the repository they target was removed concurrently.

closes #74858
  • Loading branch information
original-brownbear committed Jul 8, 2021
1 parent 5182b9f commit 9d58799
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import org.elasticsearch.indices.recovery.RecoveryState;
import org.elasticsearch.node.Node;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.repositories.RepositoryException;
import org.elasticsearch.repositories.blobstore.BlobStoreRepository;
import org.elasticsearch.rest.AbstractRestChannel;
import org.elasticsearch.rest.RestRequest;
Expand Down Expand Up @@ -90,6 +91,7 @@
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertFutureThrows;
import static org.hamcrest.Matchers.allOf;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.either;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
Expand Down Expand Up @@ -1264,6 +1266,45 @@ public void testGetReposWithWildcard() {
assertThat(repositoryMetadata, empty());
}

public void testConcurrentSnapshotAndRepoDelete() throws Exception {
internalCluster().startMasterOnlyNodes(1);
internalCluster().startDataOnlyNode();
final String repoName = "test-repo";
createRepository(repoName, "fs");

// create a few snapshots so deletes will run for a while
final int snapshotCount = randomIntBetween(10, 25);
final List<String> snapshotNames = createNSnapshots(repoName, snapshotCount);

// concurrently trigger repository and snapshot deletes
final List<ActionFuture<AcknowledgedResponse>> deleteFutures = new ArrayList<>(snapshotCount);
final ActionFuture<AcknowledgedResponse> deleteRepoFuture = clusterAdmin().prepareDeleteRepository(repoName).execute();
for (String snapshotName : snapshotNames) {
deleteFutures.add(clusterAdmin().prepareDeleteSnapshot(repoName, snapshotName).execute());
}

try {
assertAcked(deleteRepoFuture.actionGet());
} catch (Exception e) {
assertThat(
e.getMessage(),
containsString(
"trying to modify or unregister repository [test-repo] that is currently used (snapshot deletion is in progress)"
)
);
}
for (ActionFuture<AcknowledgedResponse> deleteFuture : deleteFutures) {
try {
assertAcked(deleteFuture.actionGet());
} catch (RepositoryException e) {
assertThat(
e.getMessage(),
either(containsString("[test-repo] repository is not in started state")).or(containsString("[test-repo] missing"))
);
}
}
}

private long calculateTotalFilesSize(List<Path> files) {
return files.stream().mapToLong(f -> {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ private void cleanupRepo(String repositoryName, ActionListener<RepositoryCleanup

@Override
public ClusterState execute(ClusterState currentState) {
SnapshotsService.ensureRepositoryExists(repositoryName, currentState);
final RepositoryCleanupInProgress repositoryCleanupInProgress = currentState.custom(
RepositoryCleanupInProgress.TYPE,
RepositoryCleanupInProgress.EMPTY
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -675,6 +675,10 @@ protected BlobStore getBlobStore() {
protected BlobContainer blobContainer() {
assertSnapshotOrGenericThread();

if (lifecycle.started() == false) {
throw notStartedException();
}

BlobContainer blobContainer = this.blobContainer.get();
if (blobContainer == null) {
synchronized (lock) {
Expand Down Expand Up @@ -702,7 +706,7 @@ public BlobStore blobStore() {
store = blobStore.get();
if (store == null) {
if (lifecycle.started() == false) {
throw new RepositoryException(metadata.name(), "repository is not in started state");
throw notStartedException();
}
try {
store = createBlobStore();
Expand Down Expand Up @@ -1666,6 +1670,11 @@ public void getRepositoryData(ActionListener<RepositoryData> listener) {
// master-eligible or not.
assert clusterService.localNode().isMasterNode() : "should only load repository data on master nodes";

if (lifecycle.started() == false) {
listener.onFailure(notStartedException());
return;
}

if (latestKnownRepoGen.get() == RepositoryData.CORRUPTED_REPO_GEN) {
listener.onFailure(corruptedStateException(null, null));
return;
Expand Down Expand Up @@ -1708,6 +1717,10 @@ && isReadOnly() == false
}
}

private RepositoryException notStartedException() {
return new RepositoryException(metadata.name(), "repository is not in started state");
}

// Listener used to ensure that repository data is only initialized once in the cluster state by #initializeRepoGenerationTracking
private ListenableActionFuture<RepositoryData> repoDataInitialized;

Expand Down Expand Up @@ -2484,7 +2497,8 @@ private RepositoryMetadata getRepoMetadata(ClusterState state) {
final RepositoryMetadata repositoryMetadata = state.getMetadata()
.<RepositoriesMetadata>custom(RepositoriesMetadata.TYPE)
.repository(metadata.name());
assert repositoryMetadata != null;
assert repositoryMetadata != null || lifecycle.stoppedOrClosed()
: "did not find metadata for repo [" + metadata.name() + "] in state [" + lifecycleState() + "]";
return repositoryMetadata;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ public void createSnapshotLegacy(final CreateSnapshotRequest request, final Acti

@Override
public ClusterState execute(ClusterState currentState) {
ensureRepositoryExists(repositoryName, currentState);
validate(repositoryName, snapshotName, currentState);
SnapshotDeletionsInProgress deletionsInProgress = currentState.custom(SnapshotDeletionsInProgress.TYPE);
if (deletionsInProgress != null && deletionsInProgress.hasDeletionsInProgress()) {
Expand Down Expand Up @@ -460,6 +461,7 @@ public void createSnapshot(final CreateSnapshotRequest request, final ActionList

@Override
public ClusterState execute(ClusterState currentState) {
ensureRepositoryExists(repositoryName, currentState);
ensureSnapshotNameAvailableInRepo(repositoryData, snapshotName, repository);
final SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY);
final List<SnapshotsInProgress.Entry> runningSnapshots = snapshots.entries();
Expand Down Expand Up @@ -652,6 +654,7 @@ public void cloneSnapshot(CloneSnapshotRequest request, ActionListener<Void> lis

@Override
public ClusterState execute(ClusterState currentState) {
ensureRepositoryExists(repositoryName, currentState);
ensureSnapshotNameAvailableInRepo(repositoryData, snapshotName, repository);
ensureNoCleanupInProgress(currentState, repositoryName, snapshotName);
final SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY);
Expand Down Expand Up @@ -977,6 +980,15 @@ private void ensureBelowConcurrencyLimit(
}
}

/**
* Throws {@link RepositoryMissingException} if no repository by the given name is found in the given cluster state.
*/
public static void ensureRepositoryExists(String repoName, ClusterState state) {
if (state.metadata().custom(RepositoriesMetadata.TYPE, RepositoriesMetadata.EMPTY).repository(repoName) == null) {
throw new RepositoryMissingException(repoName);
}
}

/**
* Validates snapshot request
*
Expand Down Expand Up @@ -2357,6 +2369,7 @@ public ClusterState execute(ClusterState currentState) throws Exception {
+ "]"
);
}
ensureRepositoryExists(repoName, currentState);
final SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY);
final List<SnapshotsInProgress.Entry> snapshotEntries = findInProgressSnapshots(snapshots, snapshotNames, repoName);
final List<SnapshotId> snapshotIds = matchingSnapshotIds(
Expand Down

0 comments on commit 9d58799

Please sign in to comment.