Skip to content

Commit

Permalink
Deduplicate default FieldType in KeywordFieldMapper (#86346)
Browse files Browse the repository at this point in the history
The default type is incredibly common and instances are not trivial
in size with 16 fields. Heap dumps from larger data nodes holding many
keyword fields with the default field type can contain hundreds of MB
of heap used for these.
Same reasoning applies to the `TextSearchInfo` deduplication.
`TextSearchInfo` was turned into a record to give us an `equals` implementation.
  • Loading branch information
original-brownbear committed May 3, 2022
1 parent 0eda627 commit cb41ed0
Show file tree
Hide file tree
Showing 26 changed files with 132 additions and 127 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -105,12 +105,12 @@ protected void registerParameters(ParameterChecker checker) throws IOException {
checker.registerUpdateCheck(b -> {
b.field("analyzer", "default");
b.field("search_analyzer", "keyword");
}, m -> assertEquals("keyword", m.fieldType().getTextSearchInfo().getSearchAnalyzer().name()));
}, m -> assertEquals("keyword", m.fieldType().getTextSearchInfo().searchAnalyzer().name()));
checker.registerUpdateCheck(b -> {
b.field("analyzer", "default");
b.field("search_analyzer", "keyword");
b.field("search_quote_analyzer", "keyword");
}, m -> assertEquals("keyword", m.fieldType().getTextSearchInfo().getSearchQuoteAnalyzer().name()));
}, m -> assertEquals("keyword", m.fieldType().getTextSearchInfo().searchQuoteAnalyzer().name()));

}

Expand Down Expand Up @@ -712,7 +712,7 @@ private static void assertSearchAsYouTypeFieldType(

assertThat(fieldType.shingleFields.length, equalTo(maxShingleSize - 1));
NamedAnalyzer indexAnalyzer = mapper.indexAnalyzers().get(fieldType.name());
for (NamedAnalyzer analyzer : asList(indexAnalyzer, fieldType.getTextSearchInfo().getSearchAnalyzer())) {
for (NamedAnalyzer analyzer : asList(indexAnalyzer, fieldType.getTextSearchInfo().searchAnalyzer())) {
assertThat(analyzer.name(), equalTo(analyzerName));
}
int shingleSize = 2;
Expand All @@ -734,7 +734,7 @@ private static void assertShingleFieldType(
ShingleFieldType fieldType = mapper.fieldType();
assertThat(fieldType.shingleSize, equalTo(shingleSize));

for (NamedAnalyzer analyzer : asList(indexAnalyzers.get(fieldType.name()), fieldType.getTextSearchInfo().getSearchAnalyzer())) {
for (NamedAnalyzer analyzer : asList(indexAnalyzers.get(fieldType.name()), fieldType.getTextSearchInfo().searchAnalyzer())) {
assertThat(analyzer.name(), equalTo(analyzerName));
if (shingleSize > 1) {
final SearchAsYouTypeAnalyzer wrappedAnalyzer = (SearchAsYouTypeAnalyzer) analyzer.analyzer();
Expand All @@ -755,13 +755,13 @@ private static void assertPrefixFieldType(
) {
PrefixFieldType fieldType = mapper.fieldType();
NamedAnalyzer indexAnalyzer = indexAnalyzers.get(fieldType.name());
for (NamedAnalyzer analyzer : asList(indexAnalyzer, fieldType.getTextSearchInfo().getSearchAnalyzer())) {
for (NamedAnalyzer analyzer : asList(indexAnalyzer, fieldType.getTextSearchInfo().searchAnalyzer())) {
assertThat(analyzer.name(), equalTo(analyzerName));
}

final SearchAsYouTypeAnalyzer wrappedIndexAnalyzer = (SearchAsYouTypeAnalyzer) indexAnalyzer.analyzer();
final SearchAsYouTypeAnalyzer wrappedSearchAnalyzer = (SearchAsYouTypeAnalyzer) fieldType.getTextSearchInfo()
.getSearchAnalyzer()
.searchAnalyzer()
.analyzer();
for (SearchAsYouTypeAnalyzer analyzer : asList(wrappedIndexAnalyzer, wrappedSearchAnalyzer)) {
assertThat(analyzer.shingleSize(), equalTo(shingleSize));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,12 @@ protected void registerParameters(ParameterChecker checker) throws IOException {
checker.registerUpdateCheck(b -> {
b.field("analyzer", "default");
b.field("search_analyzer", "keyword");
}, m -> assertEquals("keyword", m.fieldType().getTextSearchInfo().getSearchAnalyzer().name()));
}, m -> assertEquals("keyword", m.fieldType().getTextSearchInfo().searchAnalyzer().name()));
checker.registerUpdateCheck(b -> {
b.field("analyzer", "default");
b.field("search_analyzer", "keyword");
b.field("search_quote_analyzer", "keyword");
}, m -> assertEquals("keyword", m.fieldType().getTextSearchInfo().getSearchQuoteAnalyzer().name()));
}, m -> assertEquals("keyword", m.fieldType().getTextSearchInfo().searchQuoteAnalyzer().name()));

checker.registerConflictCheck("store", b -> b.field("store", true));
checker.registerConflictCheck("index_options", b -> b.field("index_options", "docs"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ public ContextMappings getContextMappings() {
*/
public CompletionQuery prefixQuery(Object value) {
return new PrefixCompletionQuery(
getTextSearchInfo().getSearchAnalyzer().analyzer(),
getTextSearchInfo().searchAnalyzer().analyzer(),
new Term(name(), indexedValueForSearch(value))
);
}
Expand All @@ -302,7 +302,7 @@ public CompletionQuery fuzzyQuery(
boolean unicodeAware
) {
return new FuzzyCompletionQuery(
getTextSearchInfo().getSearchAnalyzer().analyzer(),
getTextSearchInfo().searchAnalyzer().analyzer(),
new Term(name(), indexedValueForSearch(value)),
null,
fuzziness.asDistance(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.search.AutomatonQueries;
import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.fielddata.FieldData;
Expand Down Expand Up @@ -95,6 +96,13 @@ public static class Defaults {
FIELD_TYPE.freeze();
}

public static TextSearchInfo TEXT_SEARCH_INFO = new TextSearchInfo(
FIELD_TYPE,
null,
Lucene.KEYWORD_ANALYZER,
Lucene.KEYWORD_ANALYZER
);

public static final int IGNORE_ABOVE = Integer.MAX_VALUE;
}

Expand All @@ -106,6 +114,19 @@ public KeywordField(String field, BytesRef term, FieldType ft) {

}

private static TextSearchInfo textSearchInfo(
FieldType fieldType,
@Nullable SimilarityProvider similarity,
NamedAnalyzer searchAnalyzer,
NamedAnalyzer searchQuoteAnalyzer
) {
final TextSearchInfo textSearchInfo = new TextSearchInfo(fieldType, similarity, searchAnalyzer, searchQuoteAnalyzer);
if (textSearchInfo.equals(Defaults.TEXT_SEARCH_INFO)) {
return Defaults.TEXT_SEARCH_INFO;
}
return textSearchInfo;
}

private static KeywordFieldMapper toType(FieldMapper in) {
return (KeywordFieldMapper) in;
}
Expand Down Expand Up @@ -283,6 +304,10 @@ public KeywordFieldMapper build(MapperBuilderContext context) {
fieldtype.setOmitNorms(this.hasNorms.getValue() == false);
fieldtype.setIndexOptions(TextParams.toIndexOptions(this.indexed.getValue(), this.indexOptions.getValue()));
fieldtype.setStored(this.stored.getValue());
if (fieldtype.equals(Defaults.FIELD_TYPE)) {
// deduplicate in the common default case to save some memory
fieldtype = Defaults.FIELD_TYPE;
}
return new KeywordFieldMapper(
name,
fieldtype,
Expand Down Expand Up @@ -323,7 +348,7 @@ public KeywordFieldType(
fieldType.indexOptions() != IndexOptions.NONE && builder.indexCreatedVersion.isLegacyIndexVersion() == false,
fieldType.stored(),
builder.hasDocValues.getValue(),
new TextSearchInfo(fieldType, builder.similarity.getValue(), searchAnalyzer, quoteAnalyzer),
textSearchInfo(fieldType, builder.similarity.getValue(), searchAnalyzer, quoteAnalyzer),
builder.meta.getValue()
);
this.eagerGlobalOrdinals = builder.eagerGlobalOrdinals.getValue();
Expand Down Expand Up @@ -354,7 +379,7 @@ public KeywordFieldType(String name, FieldType fieldType) {
fieldType.indexOptions() != IndexOptions.NONE,
false,
false,
new TextSearchInfo(fieldType, null, Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER),
textSearchInfo(fieldType, null, Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER),
Collections.emptyMap()
);
this.normalizer = Lucene.KEYWORD_ANALYZER;
Expand All @@ -366,7 +391,7 @@ public KeywordFieldType(String name, FieldType fieldType) {
}

public KeywordFieldType(String name, NamedAnalyzer analyzer) {
super(name, true, false, true, new TextSearchInfo(Defaults.FIELD_TYPE, null, analyzer, analyzer), Collections.emptyMap());
super(name, true, false, true, textSearchInfo(Defaults.FIELD_TYPE, null, analyzer, analyzer), Collections.emptyMap());
this.normalizer = Lucene.KEYWORD_ANALYZER;
this.ignoreAbove = Integer.MAX_VALUE;
this.nullValue = null;
Expand Down Expand Up @@ -698,7 +723,7 @@ public Object valueForDisplay(Object value) {

@Override
protected BytesRef indexedValueForSearch(Object value) {
if (getTextSearchInfo().getSearchAnalyzer() == Lucene.KEYWORD_ANALYZER) {
if (getTextSearchInfo().searchAnalyzer() == Lucene.KEYWORD_ANALYZER) {
// keyword analyzer with the default attribute source which encodes terms using UTF8
// in that case we skip normalization, which may be slow if there many terms need to
// parse (eg. large terms query) since Analyzer.normalize involves things like creating
Expand All @@ -713,7 +738,7 @@ protected BytesRef indexedValueForSearch(Object value) {
if (value instanceof BytesRef) {
value = ((BytesRef) value).utf8ToString();
}
return getTextSearchInfo().getSearchAnalyzer().normalize(name(), value.toString());
return getTextSearchInfo().searchAnalyzer().normalize(name(), value.toString());
}

/**
Expand All @@ -730,8 +755,8 @@ public Query wildcardQuery(
if (isIndexed()) {
return super.wildcardQuery(value, method, caseInsensitive, true, context);
} else {
if (getTextSearchInfo().getSearchAnalyzer() != null) {
value = normalizeWildcardPattern(name(), value, getTextSearchInfo().getSearchAnalyzer());
if (getTextSearchInfo().searchAnalyzer() != null) {
value = normalizeWildcardPattern(name(), value, getTextSearchInfo().searchAnalyzer());
} else {
value = indexedValueForSearch(value).utf8ToString();
}
Expand All @@ -751,8 +776,8 @@ public Query normalizedWildcardQuery(String value, MultiTermQuery.RewriteMethod
if (isIndexed()) {
return super.normalizedWildcardQuery(value, method, context);
} else {
if (getTextSearchInfo().getSearchAnalyzer() != null) {
value = normalizeWildcardPattern(name(), value, getTextSearchInfo().getSearchAnalyzer());
if (getTextSearchInfo().searchAnalyzer() != null) {
value = normalizeWildcardPattern(name(), value, getTextSearchInfo().searchAnalyzer());
} else {
value = indexedValueForSearch(value).utf8ToString();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,8 @@ protected Query wildcardQuery(
}

Term term;
if (getTextSearchInfo().getSearchAnalyzer() != null && shouldNormalize) {
value = normalizeWildcardPattern(name(), value, getTextSearchInfo().getSearchAnalyzer());
if (getTextSearchInfo().searchAnalyzer() != null && shouldNormalize) {
value = normalizeWildcardPattern(name(), value, getTextSearchInfo().searchAnalyzer());
term = new Term(name(), value);
} else {
term = new Term(name(), indexedValueForSearch(value));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ public Analyzers(
this.searchAnalyzer = Parameter.analyzerParam(
"search_analyzer",
true,
m -> m.fieldType().getTextSearchInfo().getSearchAnalyzer(),
m -> m.fieldType().getTextSearchInfo().searchAnalyzer(),
() -> {
if (indexAnalyzer.isConfigured() == false) {
NamedAnalyzer defaultAnalyzer = indexAnalyzers.get(AnalysisRegistry.DEFAULT_SEARCH_ANALYZER_NAME);
Expand All @@ -67,7 +67,7 @@ public Analyzers(
this.searchQuoteAnalyzer = Parameter.analyzerParam(
"search_quote_analyzer",
true,
m -> m.fieldType().getTextSearchInfo().getSearchQuoteAnalyzer(),
m -> m.fieldType().getTextSearchInfo().searchQuoteAnalyzer(),
() -> {
if (searchAnalyzer.isConfigured() == false && indexAnalyzer.isConfigured() == false) {
NamedAnalyzer defaultAnalyzer = indexAnalyzers.get(AnalysisRegistry.DEFAULT_SEARCH_QUOTED_ANALYZER_NAME);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,15 @@
/**
* Encapsulates information about how to perform text searches over a field
*/
public class TextSearchInfo {
public record TextSearchInfo(
FieldType luceneFieldType,
SimilarityProvider similarity,
NamedAnalyzer searchAnalyzer,
NamedAnalyzer searchQuoteAnalyzer
) {

private static final FieldType SIMPLE_MATCH_ONLY_FIELD_TYPE = new FieldType();

static {
SIMPLE_MATCH_ONLY_FIELD_TYPE.setTokenized(false);
SIMPLE_MATCH_ONLY_FIELD_TYPE.setOmitNorms(true);
Expand Down Expand Up @@ -79,19 +85,14 @@ protected TokenStreamComponents createComponents(String fieldName) {
FORBIDDEN_ANALYZER
);

private final FieldType luceneFieldType;
private final SimilarityProvider similarity;
private final NamedAnalyzer searchAnalyzer;
private final NamedAnalyzer searchQuoteAnalyzer;

/**
* Create a new TextSearchInfo
*
* @param luceneFieldType the lucene {@link FieldType} of the field to be searched
* @param similarity defines which Similarity to use when searching. If set to {@code null}
* then the default Similarity will be used.
* @param searchAnalyzer the search-time analyzer to use. May not be {@code null}
* @param searchQuoteAnalyzer the search-time analyzer to use for phrase searches. May not be {@code null}
* @param luceneFieldType the lucene {@link FieldType} of the field to be searched
* @param similarity defines which Similarity to use when searching. If set to {@code null}
* then the default Similarity will be used.
* @param searchAnalyzer the search-time analyzer to use. May not be {@code null}
* @param searchQuoteAnalyzer the search-time analyzer to use for phrase searches. May not be {@code null}
*/
public TextSearchInfo(
FieldType luceneFieldType,
Expand All @@ -105,18 +106,6 @@ public TextSearchInfo(
this.searchQuoteAnalyzer = Objects.requireNonNull(searchQuoteAnalyzer);
}

public SimilarityProvider getSimilarity() {
return similarity;
}

public NamedAnalyzer getSearchAnalyzer() {
return searchAnalyzer;
}

public NamedAnalyzer getSearchQuoteAnalyzer() {
return searchQuoteAnalyzer;
}

/**
* @return whether or not this field supports positional queries
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ protected Query doToQuery(SearchExecutionContext context) throws IOException {
float boost = entry.getValue() == null ? 1.0f : entry.getValue();
fieldsAndBoosts.add(new FieldAndBoost(fieldType, boost));

Analyzer analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer();
Analyzer analyzer = fieldType.getTextSearchInfo().searchAnalyzer();
if (sharedAnalyzer != null && analyzer.equals(sharedAnalyzer) == false) {
throw new IllegalArgumentException("All fields in [" + NAME + "] query must have the same search analyzer");
}
Expand Down Expand Up @@ -338,7 +338,7 @@ private static void validateSimilarity(SearchExecutionContext context, Map<Strin
for (Map.Entry<String, Float> entry : fields.entrySet()) {
String name = entry.getKey();
MappedFieldType fieldType = context.getFieldType(name);
if (fieldType != null && fieldType.getTextSearchInfo().getSimilarity() != null) {
if (fieldType != null && fieldType.getTextSearchInfo().similarity() != null) {
throw new IllegalArgumentException("[" + NAME + "] queries cannot be used with per-field similarities");
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ public IntervalsSource getSource(SearchExecutionContext context, MappedFieldType
assert fieldType != null;
}
if (analyzer == null) {
analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer();
analyzer = fieldType.getTextSearchInfo().searchAnalyzer();
}
IntervalsSource source = intervals(fieldType, query, maxGaps, ordered, analyzer, context);
if (useField != null) {
Expand Down Expand Up @@ -541,7 +541,7 @@ public IntervalsSource getSource(SearchExecutionContext context, MappedFieldType
assert fieldType != null;
}
if (analyzer == null) {
analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer();
analyzer = fieldType.getTextSearchInfo().searchAnalyzer();
}
final BytesRef prefixTerm = analyzer.normalize(fieldType.name(), prefix);
IntervalsSource source = fieldType.prefixIntervals(prefixTerm, context);
Expand Down Expand Up @@ -659,7 +659,7 @@ public IntervalsSource getSource(SearchExecutionContext context, MappedFieldType
assert fieldType != null;
}
if (analyzer == null) {
analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer();
analyzer = fieldType.getTextSearchInfo().searchAnalyzer();
}
BytesRef normalizedPattern = analyzer.normalize(fieldType.name(), pattern);
IntervalsSource source = fieldType.wildcardIntervals(normalizedPattern, context);
Expand Down Expand Up @@ -786,7 +786,7 @@ public IntervalsSource getSource(SearchExecutionContext context, MappedFieldType
assert fieldType != null;
}
if (analyzer == null) {
analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer();
analyzer = fieldType.getTextSearchInfo().searchAnalyzer();
}
// Fuzzy queries only work with unicode content so it's legal to call utf8ToString here.
String normalizedTerm = analyzer.normalize(fieldType.name(), term).utf8ToString();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ private NamedAnalyzer configuredAnalyzer(SearchExecutionContext context) {
}
MappedFieldType mft = context.getFieldType(fieldName);
if (mft != null) {
return mft.getTextSearchInfo().getSearchAnalyzer();
return mft.getTextSearchInfo().searchAnalyzer();
}
return null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ private NamedAnalyzer configuredAnalyzer(SearchExecutionContext context) {
}
MappedFieldType mft = context.getFieldType(fieldName);
if (mft != null) {
return mft.getTextSearchInfo().getSearchAnalyzer();
return mft.getTextSearchInfo().searchAnalyzer();
}
return null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ protected Analyzer getAnalyzer(MappedFieldType fieldType, boolean quoted) {
TextSearchInfo tsi = fieldType.getTextSearchInfo();
assert tsi != TextSearchInfo.NONE;
if (analyzer == null) {
return quoted ? tsi.getSearchQuoteAnalyzer() : tsi.getSearchAnalyzer();
return quoted ? tsi.searchQuoteAnalyzer() : tsi.searchAnalyzer();
} else {
return analyzer;
}
Expand Down

0 comments on commit cb41ed0

Please sign in to comment.