Skip to content

Commit

Permalink
Implement Setting Deduplication via String Interning (#80493) (#82659)
Browse files Browse the repository at this point in the history
This is a somewhat crude solution to #78892 that addresses
95%+ of duplicate setting entry memory consumption in large clusters.
The remaining duplicate structures (lists of all the same strings) are
comparatively cheap in their heap consumption.
In heavy benchmarking for #77466 no runtime impact of adding this extra step
to setting creation has been found despite pushing setting creation harder
than is expected in real-world usage (part of the low relative impact here is
the fact that populating a tree-map is quite expensive to begin with so adding
the string interning which is fast via the CHM cache doesn't add much overhead).
On the other hand, the heap use impact for use-cases that come with a large number
of duplicate settings (many similar indices) is significant. As an example,
10k AuditBeat indices consume about 500M of heap for duplicate settings data structures
without this change. This cahnge brings the heap consumption from duplicate settings down to
O(1M) on every node in the cluster.

Relates and addresses most of #78892
Relates #77466
  • Loading branch information
original-brownbear committed Jan 17, 2022
1 parent bc2a6f6 commit e724fe9
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -880,4 +880,47 @@ public void testNoopUpdate() {
assertNotSame(currentState, clusterService.state());
}

public void testAllSettingStringInterned() {
final String masterNode = internalCluster().startMasterOnlyNode();
final String dataNode = internalCluster().startDataOnlyNode();

final String index1 = "index-1";
final String index2 = "index-2";
createIndex(index1, index2);
final ClusterService clusterServiceMaster = internalCluster().getInstance(ClusterService.class, masterNode);
final ClusterService clusterServiceData = internalCluster().getInstance(ClusterService.class, dataNode);
final Settings index1SettingsMaster = clusterServiceMaster.state().metadata().index(index1).getSettings();
final Settings index1SettingsData = clusterServiceData.state().metadata().index(index1).getSettings();
assertNotSame(index1SettingsMaster, index1SettingsData);
assertSame(index1SettingsMaster.get(IndexMetadata.SETTING_INDEX_UUID), index1SettingsData.get(IndexMetadata.SETTING_INDEX_UUID));

// Create a list of not interned strings to make sure interning setting values works
final List<String> queryFieldsSetting = org.elasticsearch.core.List.of(new String("foo"), new String("bar"), new String("bla"));
assertAcked(
admin().indices()
.prepareUpdateSettings(index1, index2)
.setSettings(Settings.builder().putList("query.default_field", queryFieldsSetting))
);
final Settings updatedIndex1SettingsMaster = clusterServiceMaster.state().metadata().index(index1).getSettings();
final Settings updatedIndex1SettingsData = clusterServiceData.state().metadata().index(index1).getSettings();
assertNotSame(updatedIndex1SettingsMaster, updatedIndex1SettingsData);
assertEqualsAndStringsInterned(queryFieldsSetting, updatedIndex1SettingsMaster);
assertEqualsAndStringsInterned(queryFieldsSetting, updatedIndex1SettingsData);
assertEqualsAndStringsInterned(queryFieldsSetting, clusterServiceMaster.state().metadata().index(index2).getSettings());
assertEqualsAndStringsInterned(queryFieldsSetting, clusterServiceData.state().metadata().index(index2).getSettings());
}

private void assertEqualsAndStringsInterned(List<String> queryFieldsSetting, Settings settings) {
final List<String> defaultFields = settings.getAsList("index.query.default_field");
assertEquals(queryFieldsSetting, defaultFields);
assertNotSame(queryFieldsSetting, defaultFields);
// all setting strings should be interned
assertSame("foo", defaultFields.get(0));
assertSame("bar", defaultFields.get(1));
assertSame("bla", defaultFields.get(2));
for (String key : settings.keySet()) {
assertSame(key, key.intern());
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -2030,7 +2030,7 @@ public static class SimpleKey implements Key {
protected final String key;

public SimpleKey(String key) {
this.key = key;
this.key = Settings.internKeyOrValue(key);
}

@Override
Expand Down Expand Up @@ -2121,7 +2121,7 @@ public static final class AffixKey implements Key {
sb.append('.');
sb.append(suffix);
}
keyString = sb.toString();
keyString = Settings.internKeyOrValue(sb.toString());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.elasticsearch.common.unit.ByteSizeUnit;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.unit.MemorySizeValue;
import org.elasticsearch.common.util.StringLiteralDeduplicator;
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
import org.elasticsearch.common.xcontent.XContentParserUtils;
import org.elasticsearch.core.Booleans;
Expand Down Expand Up @@ -99,7 +100,27 @@ private static Settings of(Map<String, Object> settings, SecureSettings secureSe

private Settings(Map<String, Object> settings, SecureSettings secureSettings) {
// we use a sorted map for consistent serialization when using getAsMap()
this.settings = Collections.unmodifiableNavigableMap(new TreeMap<>(settings));
final TreeMap<String, Object> tree = new TreeMap<>();
for (Map.Entry<String, Object> settingEntry : settings.entrySet()) {
final Object value = settingEntry.getValue();
final Object internedValue;
if (value instanceof String) {
internedValue = internKeyOrValue((String) value);
} else if (value instanceof List) {
@SuppressWarnings("unchecked")
List<String> valueList = (List<String>) value;
final int listSize = valueList.size();
final String[] internedArr = new String[listSize];
for (int i = 0; i < valueList.size(); i++) {
internedArr[i] = internKeyOrValue(valueList.get(i));
}
internedValue = org.elasticsearch.core.List.of(internedArr);
} else {
internedValue = value;
}
tree.put(internKeyOrValue(settingEntry.getKey()), internedValue);
}
this.settings = Collections.unmodifiableNavigableMap(tree);
this.secureSettings = secureSettings;
}

Expand Down Expand Up @@ -418,7 +439,7 @@ public List<String> getAsList(String key, List<String> defaultValue, Boolean com
if (valueFromPrefix instanceof List) {
@SuppressWarnings("unchecked")
final List<String> valuesAsList = (List<String>) valueFromPrefix;
return Collections.unmodifiableList(valuesAsList);
return valuesAsList;
} else if (commaDelimited) {
String[] strings = Strings.splitStringByCommaToArray(get(key));
if (strings.length > 0) {
Expand Down Expand Up @@ -1222,11 +1243,19 @@ public boolean shouldRemoveMissingPlaceholder(String placeholderName) {
}
if (entry.getValue() instanceof List) {
@SuppressWarnings("unchecked")
final ListIterator<String> li = ((List<String>) entry.getValue()).listIterator();
final List<String> mutableList = new ArrayList<>((List<String>) entry.getValue());
final ListIterator<String> li = mutableList.listIterator();
boolean changed = false;
while (li.hasNext()) {
final String settingValueRaw = li.next();
final String settingValueResolved = propertyPlaceholder.replacePlaceholders(settingValueRaw, placeholderResolver);
li.set(settingValueResolved);
if (settingValueResolved.equals(settingValueRaw) == false) {
li.set(settingValueResolved);
changed = true;
}
}
if (changed) {
entry.setValue(org.elasticsearch.core.List.copyOf(mutableList));
}
continue;
}
Expand Down Expand Up @@ -1478,4 +1507,18 @@ private static String toString(Object o) {
return o == null ? null : o.toString();
}

private static final StringLiteralDeduplicator settingLiteralDeduplicator = new StringLiteralDeduplicator();

/**
* Interns the given string which should be either a setting key or value or part of a setting value list. This is used to reduce the
* memory footprint of similar setting instances like index settings that may contain mostly the same keys and values. Interning these
* strings at some runtime cost is considered a reasonable trade-off here since neither setting keys nor values change frequently
* while duplicate keys values may consume significant amounts of memory.
*
* @param s string to intern
* @return interned string
*/
static String internKeyOrValue(String s) {
return settingLiteralDeduplicator.deduplicate(s);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
*/
package org.elasticsearch.common.util;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;

import java.util.Map;
Expand All @@ -20,8 +18,6 @@
*/
public final class StringLiteralDeduplicator {

private static final Logger logger = LogManager.getLogger(StringLiteralDeduplicator.class);

private static final int MAX_SIZE = 1000;

private final Map<String, String> map = ConcurrentCollections.newConcurrentMapWithAggressiveConcurrency();
Expand All @@ -36,7 +32,6 @@ public String deduplicate(String string) {
final String interned = string.intern();
if (map.size() > MAX_SIZE) {
map.clear();
logger.debug("clearing intern cache");
}
map.put(interned, interned);
return interned;
Expand Down

0 comments on commit e724fe9

Please sign in to comment.