Skip to content

Commit

Permalink
Reuse MappingMetadata instances in Metadata class. (#81036)
Browse files Browse the repository at this point in the history
Backporting #80348 to 8.0 branch.

Hash the mapping source of a MappingMetadata instance and then
cache it in Metadata class. A mapping with the same hash
will use a cached MappingMetadata instance. This can
significantly reduce the number of MappingMetadata instances
for data streams and index patterns.

Idea originated from #69772, but just focusses on the jvm heap memory savings.
And hashes the mapping instead of assigning it an uuid.

Relates to #77466
  • Loading branch information
martijnvg committed Nov 25, 2021
1 parent 42ea3b8 commit b07f71a
Show file tree
Hide file tree
Showing 6 changed files with 304 additions and 83 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,45 @@ private IndexMetadata(
assert numberOfShards * routingFactor == routingNumShards : routingNumShards + " must be a multiple of " + numberOfShards;
}

IndexMetadata withMappingMetadata(MappingMetadata mapping) {
ImmutableOpenMap.Builder<String, MappingMetadata> mappingBuilder = ImmutableOpenMap.builder();
mappingBuilder.put(MapperService.SINGLE_MAPPING_NAME, mapping);

return new IndexMetadata(
this.index,
this.version,
this.mappingVersion,
this.settingsVersion,
this.aliasesVersion,
this.primaryTerms,
this.state,
this.numberOfShards,
this.numberOfReplicas,
this.settings,
mappingBuilder.build(),
this.aliases,
this.customData,
this.inSyncAllocationIds,
this.requireFilters,
this.initialRecoveryFilters,
this.includeFilters,
this.excludeFilters,
this.indexCreatedVersion,
this.routingNumShards,
this.routingPartitionSize,
this.routingPaths,
this.waitForActiveShards,
this.rolloverInfos,
this.isSystem,
this.isHidden,
this.timestampRange,
this.priority,
this.creationDate,
this.ignoreDiskWatermarks,
this.tierPreference
);
}

public Index getIndex() {
return index;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,10 @@ public boolean routingRequired() {
return this.routingRequired;
}

public String getSha256() {
return source.getSha256();
}

@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(type());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ public interface NonRestorableCustom extends Custom {}
private final String[] visibleClosedIndices;

private SortedMap<String, IndexAbstraction> indicesLookup;
private final Map<String, MappingMetadata> mappingsByHash;

private Metadata(
String clusterUUID,
Expand All @@ -229,7 +230,8 @@ private Metadata(
String[] visibleOpenIndices,
String[] allClosedIndices,
String[] visibleClosedIndices,
SortedMap<String, IndexAbstraction> indicesLookup
SortedMap<String, IndexAbstraction> indicesLookup,
Map<String, MappingMetadata> mappingsByHash
) {
this.clusterUUID = clusterUUID;
this.clusterUUIDCommitted = clusterUUIDCommitted;
Expand All @@ -251,6 +253,7 @@ private Metadata(
this.allClosedIndices = allClosedIndices;
this.visibleClosedIndices = visibleClosedIndices;
this.indicesLookup = indicesLookup;
this.mappingsByHash = mappingsByHash;
}

public Metadata withIncrementedVersion() {
Expand All @@ -274,7 +277,8 @@ public Metadata withIncrementedVersion() {
visibleOpenIndices,
allClosedIndices,
visibleClosedIndices,
indicesLookup
indicesLookup,
mappingsByHash
);
}

Expand Down Expand Up @@ -942,6 +946,10 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
return builder;
}

Map<String, MappingMetadata> getMappingsByHash() {
return mappingsByHash;
}

private static class MetadataDiff implements Diff<Metadata> {

private final long version;
Expand Down Expand Up @@ -1096,6 +1104,7 @@ public static class Builder {
private final ImmutableOpenMap.Builder<String, Custom> customs;

private SortedMap<String, IndexAbstraction> previousIndicesLookup;
private final Map<String, MappingMetadata> mappingsByHash;

public Builder() {
clusterUUID = UNKNOWN_CLUSTER_UUID;
Expand All @@ -1104,6 +1113,7 @@ public Builder() {
customs = ImmutableOpenMap.builder();
indexGraveyard(IndexGraveyard.builder().build()); // create new empty index graveyard to initialize
previousIndicesLookup = null;
mappingsByHash = new HashMap<>();
}

Builder(Metadata metadata) {
Expand All @@ -1118,11 +1128,13 @@ public Builder() {
this.templates = ImmutableOpenMap.builder(metadata.templates);
this.customs = ImmutableOpenMap.builder(metadata.customs);
previousIndicesLookup = metadata.getIndicesLookup();
this.mappingsByHash = new HashMap<>(metadata.mappingsByHash);
}

public Builder put(IndexMetadata.Builder indexMetadataBuilder) {
// we know its a new one, increment the version and store
indexMetadataBuilder.version(indexMetadataBuilder.version() + 1);
dedupeMapping(indexMetadataBuilder);
IndexMetadata indexMetadata = indexMetadataBuilder.build();
IndexMetadata previous = indices.put(indexMetadata.getIndex().getName(), indexMetadata);
if (unsetPreviousIndicesLookup(previous, indexMetadata)) {
Expand All @@ -1135,6 +1147,7 @@ public Builder put(IndexMetadata indexMetadata, boolean incrementVersion) {
if (indices.get(indexMetadata.getIndex().getName()) == indexMetadata) {
return this;
}
indexMetadata = dedupeMapping(indexMetadata);
// if we put a new index metadata, increment its version
if (incrementVersion) {
indexMetadata = IndexMetadata.builder(indexMetadata).version(indexMetadata.getVersion() + 1).build();
Expand Down Expand Up @@ -1201,13 +1214,16 @@ public Builder removeAllIndices() {
previousIndicesLookup = null;

indices.clear();
mappingsByHash.clear();
return this;
}

public Builder indices(ImmutableOpenMap<String, IndexMetadata> indices) {
previousIndicesLookup = null;

this.indices.putAll(indices);
for (var cursor : indices) {
put(cursor.value, false);
}
return this;
}

Expand Down Expand Up @@ -1652,6 +1668,8 @@ public Metadata build(boolean builtIndicesLookupEagerly) {
}
}

purgeUnusedEntries(indices);

// build all concrete indices arrays:
// TODO: I think we can remove these arrays. it isn't worth the effort, for operations on all indices.
// When doing an operation across all indices, most of the time is spent on actually going to all shards and
Expand Down Expand Up @@ -1692,7 +1710,8 @@ public Metadata build(boolean builtIndicesLookupEagerly) {
visibleOpenIndicesArray,
allClosedIndicesArray,
visibleClosedIndicesArray,
indicesLookup
indicesLookup,
Collections.unmodifiableMap(mappingsByHash)
);
}

Expand Down Expand Up @@ -1911,6 +1930,63 @@ public static Metadata fromXContent(XContentParser parser) throws IOException {
XContentParserUtils.ensureExpectedToken(XContentParser.Token.END_OBJECT, parser.nextToken(), parser);
return builder.build();
}

/**
* Dedupes {@link MappingMetadata} instance from the provided indexMetadata parameter using the sha256
* hash from the compressed source of the mapping. If there is a mapping with the same sha256 hash then
* a new {@link IndexMetadata} is returned with the found {@link MappingMetadata} instance, otherwise
* the {@link MappingMetadata} instance of the indexMetadata parameter is recorded and the indexMetadata
* parameter is then returned.
*/
private IndexMetadata dedupeMapping(IndexMetadata indexMetadata) {
if (indexMetadata.mapping() == null) {
return indexMetadata;
}

String digest = indexMetadata.mapping().getSha256();
MappingMetadata entry = mappingsByHash.get(digest);
if (entry != null) {
return indexMetadata.withMappingMetadata(entry);
} else {
mappingsByHash.put(digest, indexMetadata.mapping());
return indexMetadata;
}
}

/**
* Similar to {@link #dedupeMapping(IndexMetadata)}.
*/
private void dedupeMapping(IndexMetadata.Builder indexMetadataBuilder) {
if (indexMetadataBuilder.mapping() == null) {
return;
}

String digest = indexMetadataBuilder.mapping().getSha256();
MappingMetadata entry = mappingsByHash.get(digest);
if (entry != null) {
indexMetadataBuilder.putMapping(entry);
} else {
mappingsByHash.put(digest, indexMetadataBuilder.mapping());
}
}

private void purgeUnusedEntries(ImmutableOpenMap<String, IndexMetadata> indices) {
final Set<String> sha256HashesInUse = new HashSet<>(mappingsByHash.size());
for (var im : indices.values()) {
if (im.mapping() != null) {
sha256HashesInUse.add(im.mapping().getSha256());
}
}

final var iterator = mappingsByHash.entrySet().iterator();
while (iterator.hasNext()) {
final var cacheKey = iterator.next().getKey();
if (sha256HashesInUse.contains(cacheKey) == false) {
iterator.remove();
}
}
}

}

private static final ToXContent.Params FORMAT_PARAMS;
Expand Down

0 comments on commit b07f71a

Please sign in to comment.