diff --git a/docs/changelog/137210.yaml b/docs/changelog/137210.yaml new file mode 100644 index 0000000000000..070c6a003fb81 --- /dev/null +++ b/docs/changelog/137210.yaml @@ -0,0 +1,6 @@ +pr: 137210 +summary: "Introduce INDEX_SHARD_COUNT_FORMAT" +area: Snapshot/Restore +type: bug +issues: + - 131822 diff --git a/qa/repository-multi-version/src/test/java/org/elasticsearch/upgrades/MultiVersionRepositoryAccessIT.java b/qa/repository-multi-version/src/test/java/org/elasticsearch/upgrades/MultiVersionRepositoryAccessIT.java index 1b2fc46a7ff9d..57d9240772506 100644 --- a/qa/repository-multi-version/src/test/java/org/elasticsearch/upgrades/MultiVersionRepositoryAccessIT.java +++ b/qa/repository-multi-version/src/test/java/org/elasticsearch/upgrades/MultiVersionRepositoryAccessIT.java @@ -27,6 +27,7 @@ import java.io.IOException; import java.io.InputStream; import java.net.HttpURLConnection; +import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -227,6 +228,49 @@ public void testUpgradeMovesRepoToNewMetaVersion() throws IOException { } } + public void testSnapshotCreatedInOldVersionCanBeDeletedInNew() throws IOException { + final String repoName = getTestName(); + try { + final int shards = 3; + final String index = "test-index"; + createIndex(index, shards); + final IndexVersion minNodeVersion = minimumIndexVersion(); + // 7.12.0+ will try to load RepositoryData during repo creation if verify is true, which is impossible in case of version + // incompatibility in the downgrade test step. + final boolean verify = TEST_STEP != TestStep.STEP3_OLD_CLUSTER + || SnapshotsServiceUtils.includesUUIDs(minNodeVersion) + || minNodeVersion.before(IndexVersions.V_7_12_0); + createRepository(repoName, false, verify); + + // Create snapshots in the first step + if (TEST_STEP == TestStep.STEP1_OLD_CLUSTER) { + int numberOfSnapshots = randomIntBetween(5, 10); + for (int i = 0; i < numberOfSnapshots; i++) { + createSnapshot(repoName, "snapshot-" + i, index); + } + final List> snapshots = listSnapshots(repoName); + assertSnapshotStatusSuccessful(repoName, snapshots.stream().map(sn -> (String) sn.get("snapshot")).toArray(String[]::new)); + } else if (TEST_STEP == TestStep.STEP2_NEW_CLUSTER) { + final List> snapshots = listSnapshots(repoName); + List snapshotNames = new ArrayList<>(snapshots.stream().map(sn -> (String) sn.get("snapshot")).toList()); + + // Delete a single snapshot + deleteSnapshot(repoName, snapshotNames.removeFirst()); + + // Delete a bulk number of snapshots, avoiding the case where we delete all snapshots since this invokes + // cleanup code and bulk snapshot deletion logic which is tested in testUpgradeMovesRepoToNewMetaVersion + final List snapshotsToDeleteInBulk = randomSubsetOf(randomIntBetween(1, snapshotNames.size() - 1), snapshotNames); + deleteSnapshot(repoName, snapshotsToDeleteInBulk); + snapshotNames.removeAll(snapshotsToDeleteInBulk); + + // Delete the rest of the snapshots (will invoke bulk snapshot deletion logic) + deleteSnapshot(repoName, snapshotNames); + } + } finally { + deleteRepository(repoName); + } + } + private static void assertSnapshotStatusSuccessful(String repoName, String... snapshots) throws IOException { Request statusReq = new Request("GET", "/_snapshot/" + repoName + "/" + String.join(",", snapshots) + "/_status"); ObjectPath statusResp = ObjectPath.createFromResponse(client().performRequest(statusReq)); @@ -235,6 +279,12 @@ private static void assertSnapshotStatusSuccessful(String repoName, String... sn } } + private void deleteSnapshot(String repoName, List names) throws IOException { + assertAcknowledged( + client().performRequest(new Request("DELETE", "/_snapshot/" + repoName + "/" + Strings.collectionToCommaDelimitedString(names))) + ); + } + private void deleteSnapshot(String repoName, String name) throws IOException { assertAcknowledged(client().performRequest(new Request("DELETE", "/_snapshot/" + repoName + "/" + name))); } diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java b/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java index 38d0fada7d866..5802f0e50294b 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java @@ -587,7 +587,7 @@ public Iterator> settings() { static final String KEY_SETTINGS_VERSION = "settings_version"; static final String KEY_ALIASES_VERSION = "aliases_version"; static final String KEY_ROUTING_NUM_SHARDS = "routing_num_shards"; - static final String KEY_SETTINGS = "settings"; + public static final String KEY_SETTINGS = "settings"; static final String KEY_STATE = "state"; static final String KEY_MAPPINGS = "mappings"; static final String KEY_MAPPINGS_HASH = "mappings_hash"; @@ -596,7 +596,7 @@ public Iterator> settings() { static final String KEY_MAPPINGS_UPDATED_VERSION = "mappings_updated_version"; static final String KEY_SYSTEM = "system"; static final String KEY_TIMESTAMP_RANGE = "timestamp_range"; - static final String KEY_EVENT_INGESTED_RANGE = "event_ingested_range"; + public static final String KEY_EVENT_INGESTED_RANGE = "event_ingested_range"; public static final String KEY_PRIMARY_TERMS = "primary_terms"; public static final String KEY_STATS = "stats"; diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java index fdac63cc5466c..3c2a749938eee 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java @@ -398,6 +398,19 @@ public static String getRepositoryDataBlobName(long repositoryGeneration) { Function.identity() ); + /** + * Parses only the shard count from the IndexMetadata object written by INDEX_METADATA_FORMAT (#131822) + */ + public static final ChecksumBlobStoreFormat INDEX_SHARD_COUNT_FORMAT = new ChecksumBlobStoreFormat<>( + "index-metadata", + METADATA_NAME_FORMAT, + (repoName, parser) -> IndexShardCount.fromIndexMetadata(parser), + (ignored) -> { + assert false; + throw new UnsupportedOperationException(); + } + ); + private static final String SNAPSHOT_CODEC = "snapshot"; public static final ChecksumBlobStoreFormat SNAPSHOT_FORMAT = new ChecksumBlobStoreFormat<>( @@ -1327,17 +1340,16 @@ private void determineShardCount(ActionListener listener) { private void getOneShardCount(String indexMetaGeneration) { try { updateShardCount( - INDEX_METADATA_FORMAT.read(getProjectRepo(), indexContainer, indexMetaGeneration, namedXContentRegistry) - .getNumberOfShards() + INDEX_SHARD_COUNT_FORMAT.read(getProjectRepo(), indexContainer, indexMetaGeneration, namedXContentRegistry).count() ); } catch (Exception ex) { - logger.warn(() -> format("[%s] [%s] failed to read metadata for index", indexMetaGeneration, indexId.getName()), ex); + logger.warn(() -> format("[%s] [%s] failed to read shard count for index", indexMetaGeneration, indexId.getName()), ex); // Definitely indicates something fairly badly wrong with the repo, but not immediately fatal here: we might get the - // shard count from another metadata blob, or we might just not process these shards. If we skip these shards then the - // repository will technically enter an invalid state (these shards' index-XXX blobs will refer to snapshots that no - // longer exist) and may contain dangling blobs too. A subsequent delete that hits this index may repair the state if - // the metadata read error is transient, but if not then the stale indices cleanup will eventually remove this index - // and all its extra data anyway. + // shard count from another metadata blob, or we might just not process these shards. If we skip these shards + // then the repository will technically enter an invalid state (these shards' index-XXX blobs will refer to snapshots + // that no longer exist) and may contain dangling blobs too. A subsequent delete that hits this index may repair + // the state if the metadata read error is transient, but if not then the stale indices cleanup will eventually + // remove this index and all its extra data anyway. // TODO: Should we fail the delete here? See https://github.com/elastic/elasticsearch/issues/100569. } } diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/IndexShardCount.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/IndexShardCount.java new file mode 100644 index 0000000000000..5758fc4ebf259 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/IndexShardCount.java @@ -0,0 +1,71 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.repositories.blobstore; + +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.common.xcontent.XContentParserUtils; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; + +import static org.elasticsearch.cluster.metadata.IndexMetadata.KEY_SETTINGS; +import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; + +/** + * A subset of {@link IndexMetadata} storing only the shard count of an index + * Prior to v9.3, the entire {@link IndexMetadata} object was stored in heap and then loaded during snapshotting to determine + * the shard count. As per ES-12539, this is replaced with the {@link IndexShardCount} class that writes and loads only the index's + * shard count to and from heap memory, reducing the possibility of smaller nodes going OOMe during snapshotting + */ +public record IndexShardCount(int count) { + /** + * Parses an {@link IndexMetadata} object, reading only the shard count and skipping the rest + * @param parser The parser of the {@link IndexMetadata} object + * @return Returns an {@link IndexShardCount} containing the shard count for the index + * @throws IOException Thrown if the {@link IndexMetadata} object cannot be parsed correctly + */ + public static IndexShardCount fromIndexMetadata(XContentParser parser) throws IOException { + parser.nextToken(); // fresh parser so move to the first token + parser.nextToken(); // on a start object move to next token + XContentParserUtils.ensureExpectedToken(XContentParser.Token.FIELD_NAME, parser.currentToken(), parser); + String currentFieldName; + XContentParser.Token token = parser.nextToken(); + XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, token, parser); + + IndexShardCount indexShardCount = null; + // Skip over everything except the index.number_of_shards setting, or any unexpected tokens + while ((currentFieldName = parser.nextFieldName()) != null) { + token = parser.nextToken(); + if (token == XContentParser.Token.START_OBJECT) { + if (currentFieldName.equals(KEY_SETTINGS)) { + while (parser.nextToken() != XContentParser.Token.END_OBJECT) { + String fieldName = parser.currentName(); + parser.nextToken(); + if (SETTING_NUMBER_OF_SHARDS.equals(fieldName)) { + indexShardCount = new IndexShardCount(parser.intValue()); + } else { + parser.skipChildren(); + } + } + } else { + parser.skipChildren(); + } + } else if (token == XContentParser.Token.START_ARRAY) { + parser.skipChildren(); + } else if (token.isValue() == false) { + throw new IllegalArgumentException("Unexpected token " + token); + } + } + XContentParserUtils.ensureExpectedToken(XContentParser.Token.END_OBJECT, parser.nextToken(), parser); + + // indexShardCount is null if corruption when parsing + return indexShardCount != null ? indexShardCount : new IndexShardCount(-1); + } +} diff --git a/server/src/test/java/org/elasticsearch/repositories/blobstore/IndexShardCountTests.java b/server/src/test/java/org/elasticsearch/repositories/blobstore/IndexShardCountTests.java new file mode 100644 index 0000000000000..7b2b5adb585f9 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/repositories/blobstore/IndexShardCountTests.java @@ -0,0 +1,241 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.repositories.blobstore; + +import org.elasticsearch.action.admin.indices.rollover.MaxAgeCondition; +import org.elasticsearch.action.admin.indices.rollover.MaxDocsCondition; +import org.elasticsearch.action.admin.indices.rollover.MaxPrimaryShardDocsCondition; +import org.elasticsearch.action.admin.indices.rollover.MaxPrimaryShardSizeCondition; +import org.elasticsearch.action.admin.indices.rollover.MaxSizeCondition; +import org.elasticsearch.action.admin.indices.rollover.OptimalShardCountCondition; +import org.elasticsearch.action.admin.indices.rollover.RolloverInfo; +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.metadata.IndexMetadataStats; +import org.elasticsearch.cluster.metadata.IndexReshardingMetadata; +import org.elasticsearch.cluster.metadata.IndexWriteLoad; +import org.elasticsearch.cluster.metadata.InferenceFieldMetadata; +import org.elasticsearch.cluster.metadata.InferenceFieldMetadataTests; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.common.xcontent.XContentHelper; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.shard.IndexLongFieldRange; +import org.elasticsearch.index.shard.ShardLongFieldRange; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.index.IndexVersionUtils; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xcontent.json.JsonXContent; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.cluster.metadata.IndexMetadata.KEY_SETTINGS; +import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; + +public class IndexShardCountTests extends ESTestCase { + public void testFromIndexMetaDataWithValidIndexMetaDataObject() throws IOException { + int numberOfShards = randomFrom(1, 2, 4, 8, 16); + IndexMetadata indexMetadata = randomIndexMetadata(numberOfShards); + + final XContentBuilder builder = JsonXContent.contentBuilder(); + builder.startObject(); + IndexMetadata.FORMAT.toXContent(builder, indexMetadata); + builder.endObject(); + XContentParser parser = createParser(builder); + + IndexShardCount count = IndexShardCount.fromIndexMetadata(parser); + assertEquals(numberOfShards, count.count()); + } + + public void testFromIndexMetaDataWithValidIndexMetaDataObjectWithoutEventIngestedField() throws IOException { + int numberOfShards = randomFrom(1, 2, 4, 8, 16); + IndexMetadata indexMetadata = randomIndexMetadata(numberOfShards); + + final XContentBuilder builder = JsonXContent.contentBuilder(); + builder.startObject(); + IndexMetadata.FORMAT.toXContent(builder, indexMetadata); + builder.endObject(); + XContentParser parser = createParser(builder); + + // convert XContent to a map and remove the IndexMetadata.KEY_EVENT_INGESTED_RANGE entry + // to simulate IndexMetadata from an older cluster version (before TransportVersions.EVENT_INGESTED_RANGE_IN_CLUSTER_STATE) + Map indexMetadataMap = XContentHelper.convertToMap(BytesReference.bytes(builder), true, XContentType.JSON).v2(); + removeEventIngestedField(indexMetadataMap); + + IndexShardCount count = IndexShardCount.fromIndexMetadata(parser); + assertEquals(numberOfShards, count.count()); + } + + public void testFromIndexMetaDataWithoutNumberOfShardsSettingReturnsNegativeOne() throws IOException { + XContentBuilder builder = XContentFactory.jsonBuilder() + .startObject() + .field("my_index") + .startObject() + .startObject(KEY_SETTINGS) + // no shard count + .endObject() + .endObject() + .endObject(); + XContentParser parser = createParser(builder); + + IndexShardCount count = IndexShardCount.fromIndexMetadata(parser); + assertEquals(-1, count.count()); + } + + // IndexMetadata specifies two parsing methods legacyFromXContent and fromXContent to be used depending + // on the IndexVersion. Since we are only reading the shard count, we should succeed in either case + public void testFromIndexMetaDataWithOldVersionSucceeds() throws IOException { + int numberOfShards = randomFrom(1, 2, 4, 8, 16); + XContentBuilder indexMetadataBuilder = buildLegacyIndexMetadata( + numberOfShards, + IndexVersion.getMinimumCompatibleIndexVersion(1_000_000) + ); + XContentParser parser = createParser(indexMetadataBuilder); + IndexShardCount count = IndexShardCount.fromIndexMetadata(parser); + assertEquals(numberOfShards, count.count()); + } + + private XContentBuilder buildLegacyIndexMetadata(int numberOfShards, IndexVersion compatibilityVersion) throws IOException { + return XContentFactory.jsonBuilder() + .startObject() + .field("my_index") + .startObject() + .startObject(KEY_SETTINGS) + .field(SETTING_NUMBER_OF_SHARDS, numberOfShards) + .field("index.version.compatibility", compatibilityVersion) + .endObject() + .endObject() + .endObject(); + } + + private Map randomCustomMap() { + Map customMap = new HashMap<>(); + customMap.put(randomAlphaOfLength(5), randomAlphaOfLength(10)); + customMap.put(randomAlphaOfLength(10), randomAlphaOfLength(15)); + return customMap; + } + + private IndexMetadata randomIndexMetadata(int numberOfShards) { + final boolean system = randomBoolean(); + Map customMap = randomCustomMap(); + return randomIndexMetadata(numberOfShards, system, customMap); + } + + private IndexMetadata randomIndexMetadata(int numberOfShards, boolean system, Map customMap) { + int numberOfReplicas = randomIntBetween(0, 10); + IndexVersion mappingsUpdatedVersion = IndexVersionUtils.randomVersion(); + IndexMetadataStats indexStats = randomBoolean() ? randomIndexStats(numberOfShards) : null; + Double indexWriteLoadForecast = randomBoolean() ? randomDoubleBetween(0.0, 128, true) : null; + Long shardSizeInBytesForecast = randomBoolean() ? randomLongBetween(1024, 10240) : null; + Map inferenceFields = randomInferenceFields(); + IndexReshardingMetadata reshardingMetadata = randomBoolean() ? randomIndexReshardingMetadata(numberOfShards) : null; + + return IndexMetadata.builder("foo") + .settings(randomSettings(numberOfShards, numberOfReplicas)) + .creationDate(randomLong()) + .primaryTerm(0, 2) + .setRoutingNumShards(32) + .system(system) + .putCustom("my_custom", customMap) + .putRolloverInfo( + new RolloverInfo( + randomAlphaOfLength(5), + List.of( + new MaxAgeCondition(TimeValue.timeValueMillis(randomNonNegativeLong())), + new MaxDocsCondition(randomNonNegativeLong()), + new MaxSizeCondition(ByteSizeValue.ofBytes(randomNonNegativeLong())), + new MaxPrimaryShardSizeCondition(ByteSizeValue.ofBytes(randomNonNegativeLong())), + new MaxPrimaryShardDocsCondition(randomNonNegativeLong()), + new OptimalShardCountCondition(3) + ), + randomNonNegativeLong() + ) + ) + .mappingsUpdatedVersion(mappingsUpdatedVersion) + .stats(indexStats) + .indexWriteLoadForecast(indexWriteLoadForecast) + .shardSizeInBytesForecast(shardSizeInBytesForecast) + .putInferenceFields(inferenceFields) + .eventIngestedRange( + randomFrom( + IndexLongFieldRange.UNKNOWN, + IndexLongFieldRange.EMPTY, + IndexLongFieldRange.NO_SHARDS, + IndexLongFieldRange.NO_SHARDS.extendWithShardRange(0, 1, ShardLongFieldRange.of(5000000, 5500000)) + ) + ) + .reshardingMetadata(reshardingMetadata) + .build(); + } + + private Settings.Builder randomSettings(int numberOfShards, int numberOfReplicas) { + return indexSettings(numberOfShards, numberOfReplicas).put("index.version.created", 1) + .putList("index.query.default_field", "title", "description", "tags"); + } + + private void removeEventIngestedField(Map indexMetadataMap) { + // convert XContent to a map and remove the IndexMetadata.KEY_EVENT_INGESTED_RANGE entry + // to simulate IndexMetadata from an older cluster version (before TransportVersions.EVENT_INGESTED_RANGE_IN_CLUSTER_STATE) + // Map indexMetadataMap = XContentHelper.convertToMap(BytesReference.bytes(builder), true, XContentType.JSON).v2(); + + @SuppressWarnings("unchecked") + Map inner = (Map) indexMetadataMap.get("foo"); + assertTrue(inner.containsKey(IndexMetadata.KEY_EVENT_INGESTED_RANGE)); + inner.remove(IndexMetadata.KEY_EVENT_INGESTED_RANGE); + // validate that the IndexMetadata.KEY_EVENT_INGESTED_RANGE has been removed before calling fromXContent + assertFalse(inner.containsKey(IndexMetadata.KEY_EVENT_INGESTED_RANGE)); + } + + public static Map randomInferenceFields() { + Map map = new HashMap<>(); + int numFields = randomIntBetween(0, 5); + for (int i = 0; i < numFields; i++) { + String field = randomAlphaOfLengthBetween(5, 10); + map.put(field, randomInferenceFieldMetadata(field)); + } + return map; + } + + private static InferenceFieldMetadata randomInferenceFieldMetadata(String name) { + return new InferenceFieldMetadata( + name, + randomIdentifier(), + randomIdentifier(), + randomSet(1, 5, ESTestCase::randomIdentifier).toArray(String[]::new), + InferenceFieldMetadataTests.generateRandomChunkingSettings() + ); + } + + private IndexMetadataStats randomIndexStats(int numberOfShards) { + IndexWriteLoad.Builder indexWriteLoadBuilder = IndexWriteLoad.builder(numberOfShards); + int numberOfPopulatedWriteLoads = randomIntBetween(0, numberOfShards); + for (int i = 0; i < numberOfPopulatedWriteLoads; i++) { + indexWriteLoadBuilder.withShardWriteLoad( + i, + randomDoubleBetween(0.0, 128.0, true), + randomDoubleBetween(0.0, 128.0, true), + randomDoubleBetween(0.0, 128.0, true), + randomNonNegativeLong() + ); + } + return new IndexMetadataStats(indexWriteLoadBuilder.build(), randomLongBetween(100, 1024), randomIntBetween(1, 2)); + } + + private IndexReshardingMetadata randomIndexReshardingMetadata(int oldShards) { + return IndexReshardingMetadata.newSplitByMultiple(oldShards, randomIntBetween(2, 5)); + } +}