From 01eee1a97f9541f9a3df62231588cb81976d4e84 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Mon, 16 Mar 2020 06:49:37 -0400 Subject: [PATCH] Highlighters skip ignored keyword values (#53408) Keyword field values with length more than ignore_above are not indexed. But highlighters still were retrieving these values from _source and were trying to highlight them. This sometimes lead to errors if a field length exceeded max_analyzed_offset. But also this is a wrong behaviour to attempt to highlight something that was not ignored during indexing. This PR checks if a keyword value was ignored because of its length, and if yes, skips highlighting it. Closes #43800 --- .../search.highlight/40_keyword_ignore.yml | 64 +++++++++++++++++++ .../index/mapper/KeywordFieldMapper.java | 3 +- .../subphase/highlight/PlainHighlighter.java | 13 +++- .../highlight/UnifiedHighlighter.java | 14 +++- 4 files changed, 89 insertions(+), 5 deletions(-) create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search.highlight/40_keyword_ignore.yml diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.highlight/40_keyword_ignore.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.highlight/40_keyword_ignore.yml new file mode 100644 index 0000000000000..16442d2b3d823 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.highlight/40_keyword_ignore.yml @@ -0,0 +1,64 @@ +--- +setup: + - do: + indices.create: + index: test-index + body: + mappings: + "properties": + "k1": + "type": "keyword" + "k2": + "type": "keyword" + "ignore_above": 3 + - do: + bulk: + index: test-index + refresh: true + body: + - '{"index": {"_id": "1"}}' + - '{"k1": "123", "k2" : "123"}' + - '{"index": {"_id": "2"}}' + - '{"k1": "1234", "k2" : "1234"}' + +--- +"Plain Highligher should skip highlighting ignored keyword values": + - skip: + version: " - 7.9.99" + reason: "skip highlighting of ignored values was introduced in 7.7" + - do: + search: + index: test-index + body: + query: + prefix: + k1: "12" + highlight: + require_field_match: false + fields: + k2: + type: plain + + - match: {hits.hits.0.highlight.k2.0: "123"} + - is_false: hits.hits.1.highlight # no highlight for a value that was ignored + +--- +"Unified Highligher should skip highlighting ignored keyword values": + - skip: + version: " - 7.9.99" + reason: "skip highlighting of ignored values was introduced in 7.7" + - do: + search: + index: test-index + body: + query: + prefix: + k1: "12" + highlight: + require_field_match: false + fields: + k2: + type: unified + + - match: {hits.hits.0.highlight.k2.0: "123"} + - is_false: hits.hits.1.highlight # no highlight for a value that was ignored diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index 524a1eb79f133..b83427a53a3ed 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -322,8 +322,7 @@ protected KeywordFieldMapper(String simpleName, MappedFieldType fieldType, Mappe /** Values that have more chars than the return value of this method will * be skipped at parsing time. */ - // pkg-private for testing - int ignoreAbove() { + public int ignoreAbove() { return ignoreAbove; } diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java index b792be60f78fc..d1743e6182e2f 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java @@ -36,6 +36,7 @@ import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.common.text.Text; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.search.fetch.FetchPhaseExecutionException; @@ -102,6 +103,12 @@ public HighlightField highlight(HighlighterContext highlighterContext) { ArrayList fragsList = new ArrayList<>(); List textsToHighlight; Analyzer analyzer = context.getMapperService().documentMapper().mappers().indexAnalyzer(); + Integer keywordIgnoreAbove = null; + if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) { + KeywordFieldMapper mapper = (KeywordFieldMapper) context.getMapperService().documentMapper() + .mappers().getMapper(highlighterContext.fieldName); + keywordIgnoreAbove = mapper.ignoreAbove(); + } final int maxAnalyzedOffset = context.getIndexSettings().getHighlightMaxAnalyzedOffset(); try { @@ -110,7 +117,11 @@ public HighlightField highlight(HighlighterContext highlighterContext) { for (Object textToHighlight : textsToHighlight) { String text = convertFieldValue(fieldType, textToHighlight); - if (text.length() > maxAnalyzedOffset) { + int textLength = text.length(); + if (keywordIgnoreAbove != null && textLength > keywordIgnoreAbove) { + continue; // skip highlighting keyword terms that were ignored during indexing + } + if (textLength > maxAnalyzedOffset) { throw new IllegalArgumentException( "The length of [" + highlighterContext.fieldName + "] field of [" + hitContext.hit().getId() + "] doc of [" + context.index().getName() + "] index " + diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java index 81f700920f827..28ff7220d9bd7 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java @@ -36,6 +36,7 @@ import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.IdFieldMapper; +import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.search.fetch.FetchPhaseExecutionException; @@ -65,11 +66,16 @@ public HighlightField highlight(HighlighterContext highlighterContext) { FetchSubPhase.HitContext hitContext = highlighterContext.hitContext; Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT; final int maxAnalyzedOffset = context.getIndexSettings().getHighlightMaxAnalyzedOffset(); + Integer keywordIgnoreAbove = null; + if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) { + KeywordFieldMapper mapper = (KeywordFieldMapper) context.getMapperService().documentMapper() + .mappers().getMapper(highlighterContext.fieldName); + keywordIgnoreAbove = mapper.ignoreAbove(); + } List snippets = new ArrayList<>(); int numberOfFragments = field.fieldOptions().numberOfFragments(); try { - final Analyzer analyzer = getAnalyzer(context.getMapperService().documentMapper(), hitContext); List fieldValues = loadFieldValues(fieldType, field, context, hitContext, highlighterContext.highlight.forceSource(field)); @@ -81,7 +87,11 @@ public HighlightField highlight(HighlighterContext highlighterContext) { final CustomUnifiedHighlighter highlighter; final String fieldValue = mergeFieldValues(fieldValues, MULTIVAL_SEP_CHAR); final OffsetSource offsetSource = getOffsetSource(fieldType); - if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValue.length() > maxAnalyzedOffset)) { + int fieldValueLength = fieldValue.length(); + if (keywordIgnoreAbove != null && fieldValueLength > keywordIgnoreAbove) { + return null; // skip highlighting keyword terms that were ignored during indexing + } + if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValueLength > maxAnalyzedOffset)) { throw new IllegalArgumentException( "The length of [" + highlighterContext.fieldName + "] field of [" + hitContext.hit().getId() + "] doc of [" + context.index().getName() + "] index " + "has exceeded [" +