Highlighters skip ignored keyword values

Keyword field values with length more than ignore_above are not indexed. But highlighters still were retrieving these values from _source and were trying to highlight them. This sometimes lead to errors if a field length exceeded max_analyzed_offset. But also this is a wrong behaviour to attempt to highlight something that was not ignored during indexing. This PR checks if a keyword value was ignored because of its length, and if yes, skips highlighting it. Closes #43800
elastic · Mar 11, 2020 · d0fa6d6 · d0fa6d6
1 parent 5494675
commit d0fa6d6
Show file tree

Hide file tree

Showing 4 changed files with 89 additions and 5 deletions.
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.highlight/40_keyword_ignore.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.highlight/40_keyword_ignore.yml
@@ -0,0 +1,64 @@
+---
+setup:
+  - do:
+      indices.create:
+          index: test-index
+          body:
+            mappings:
+              "properties":
+                "k1":
+                  "type": "keyword"
+                "k2":
+                  "type": "keyword"
+                  "ignore_above": 3
+  - do:
+      bulk:
+        index: test-index
+        refresh: true
+        body:
+          - '{"index": {"_id": "1"}}'
+          - '{"k1": "123", "k2" : "123"}'
+          - '{"index": {"_id": "2"}}'
+          - '{"k1": "1234", "k2" : "1234"}'
+
+---
+"Plain Highligher should skip highlighting ignored keyword values":
+  - skip:
+      version: " - 7.9.99"
+      reason: "skip highlighting of ignored values was introduced in 7.7"
+  - do:
+      search:
+        index: test-index
+        body:
+          query:
+            prefix:
+              k1: "12"
+          highlight:
+            require_field_match: false
+            fields:
+              k2:
+                type: plain
+
+  - match: {hits.hits.0.highlight.k2.0: "<em>123</em>"}
+  - is_false: hits.hits.1.highlight # no highlight for a value that was ignored
+
+---
+"Unified Highligher should skip highlighting ignored keyword values":
+  - skip:
+      version: " - 7.9.99"
+      reason: "skip highlighting of ignored values was introduced in 7.7"
+  - do:
+      search:
+        index: test-index
+        body:
+          query:
+            prefix:
+              k1: "12"
+          highlight:
+            require_field_match: false
+            fields:
+              k2:
+                type: unified
+
+  - match: {hits.hits.0.highlight.k2.0: "<em>123</em>"}
+  - is_false: hits.hits.1.highlight # no highlight for a value that was ignored
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
@@ -321,8 +321,7 @@ protected KeywordFieldMapper(String simpleName, MappedFieldType fieldType, Mappe
 
     /** Values that have more chars than the return value of this method will
      *  be skipped at parsing time. */
-    // pkg-private for testing
-    int ignoreAbove() {
+    public int ignoreAbove() {
         return ignoreAbove;
     }
 

diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java
@@ -36,6 +36,7 @@
 import org.elasticsearch.ExceptionsHelper;
 import org.elasticsearch.common.text.Text;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.mapper.KeywordFieldMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.query.QueryShardContext;
 import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
@@ -102,6 +103,12 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
         ArrayList<TextFragment> fragsList = new ArrayList<>();
         List<Object> textsToHighlight;
         Analyzer analyzer = context.getMapperService().documentMapper().mappers().indexAnalyzer();
+        Integer keywordIgnoreAbove = null;
+        if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
+            KeywordFieldMapper mapper = (KeywordFieldMapper) context.getMapperService().documentMapper()
+                .mappers().getMapper(highlighterContext.fieldName);
+            keywordIgnoreAbove = mapper.ignoreAbove();
+        }
         final int maxAnalyzedOffset = context.getIndexSettings().getHighlightMaxAnalyzedOffset();
 
         try {
@@ -110,7 +117,11 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
 
             for (Object textToHighlight : textsToHighlight) {
                 String text = convertFieldValue(fieldType, textToHighlight);
-                if (text.length() > maxAnalyzedOffset) {
+                int textLength = text.length();
+                if (keywordIgnoreAbove != null  && textLength > keywordIgnoreAbove) {
+                    continue; // skip highlighting keyword terms that were ignored during indexing
+                }
+                if (textLength > maxAnalyzedOffset) {
                     throw new IllegalArgumentException(
                         "The length of [" + highlighterContext.fieldName + "] field of [" + hitContext.hit().getId() +
                             "] doc of [" + context.index().getName() + "] index " +

diff --git a/...r/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java b/...r/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java
@@ -36,6 +36,7 @@
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.mapper.DocumentMapper;
 import org.elasticsearch.index.mapper.IdFieldMapper;
+import org.elasticsearch.index.mapper.KeywordFieldMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.query.QueryShardContext;
 import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
@@ -65,11 +66,16 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
         FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
         Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT;
         final int maxAnalyzedOffset = context.getIndexSettings().getHighlightMaxAnalyzedOffset();
+        Integer keywordIgnoreAbove = null;
+        if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
+            KeywordFieldMapper mapper = (KeywordFieldMapper) context.getMapperService().documentMapper()
+                .mappers().getMapper(highlighterContext.fieldName);
+            keywordIgnoreAbove = mapper.ignoreAbove();
+        }
 
         List<Snippet> snippets = new ArrayList<>();
         int numberOfFragments = field.fieldOptions().numberOfFragments();
         try {
-
             final Analyzer analyzer = getAnalyzer(context.getMapperService().documentMapper(), hitContext);
             List<Object> fieldValues = loadFieldValues(fieldType, field, context, hitContext,
                 highlighterContext.highlight.forceSource(field));
@@ -81,7 +87,11 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
             final CustomUnifiedHighlighter highlighter;
             final String fieldValue = mergeFieldValues(fieldValues, MULTIVAL_SEP_CHAR);
             final OffsetSource offsetSource = getOffsetSource(fieldType);
-            if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValue.length() > maxAnalyzedOffset)) {
+            int fieldValueLength = fieldValue.length();
+            if (keywordIgnoreAbove != null  && fieldValueLength > keywordIgnoreAbove) {
+                return null; // skip highlighting keyword terms that were ignored during indexing
+            }
+            if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValueLength > maxAnalyzedOffset)) {
                 throw new IllegalArgumentException(
                     "The length of [" + highlighterContext.fieldName + "] field of [" + hitContext.hit().getId() +
                         "] doc of [" + context.index().getName() + "] index " + "has exceeded [" +