Skip to content

Commit

Permalink
Highlighters skip ignored keyword values
Browse files Browse the repository at this point in the history
Keyword field values with length more than ignore_above are not
indexed. But highlighters still were retrieving these values
from _source and were trying to highlight them. This sometimes lead to
errors if a field length exceeded  max_analyzed_offset. But also this
is a wrong behaviour to attempt to highlight something that was not
ignored during indexing.

This PR checks if a keyword value was ignored because of its length,
and if yes, skips highlighting it.

Closes #43800
  • Loading branch information
mayya-sharipova committed Mar 11, 2020
1 parent 5494675 commit d0fa6d6
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 5 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
---
setup:
- do:
indices.create:
index: test-index
body:
mappings:
"properties":
"k1":
"type": "keyword"
"k2":
"type": "keyword"
"ignore_above": 3
- do:
bulk:
index: test-index
refresh: true
body:
- '{"index": {"_id": "1"}}'
- '{"k1": "123", "k2" : "123"}'
- '{"index": {"_id": "2"}}'
- '{"k1": "1234", "k2" : "1234"}'

---
"Plain Highligher should skip highlighting ignored keyword values":
- skip:
version: " - 7.9.99"
reason: "skip highlighting of ignored values was introduced in 7.7"
- do:
search:
index: test-index
body:
query:
prefix:
k1: "12"
highlight:
require_field_match: false
fields:
k2:
type: plain

- match: {hits.hits.0.highlight.k2.0: "<em>123</em>"}
- is_false: hits.hits.1.highlight # no highlight for a value that was ignored

---
"Unified Highligher should skip highlighting ignored keyword values":
- skip:
version: " - 7.9.99"
reason: "skip highlighting of ignored values was introduced in 7.7"
- do:
search:
index: test-index
body:
query:
prefix:
k1: "12"
highlight:
require_field_match: false
fields:
k2:
type: unified

- match: {hits.hits.0.highlight.k2.0: "<em>123</em>"}
- is_false: hits.hits.1.highlight # no highlight for a value that was ignored
Original file line number Diff line number Diff line change
Expand Up @@ -321,8 +321,7 @@ protected KeywordFieldMapper(String simpleName, MappedFieldType fieldType, Mappe

/** Values that have more chars than the return value of this method will
* be skipped at parsing time. */
// pkg-private for testing
int ignoreAbove() {
public int ignoreAbove() {
return ignoreAbove;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
Expand Down Expand Up @@ -102,6 +103,12 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
ArrayList<TextFragment> fragsList = new ArrayList<>();
List<Object> textsToHighlight;
Analyzer analyzer = context.getMapperService().documentMapper().mappers().indexAnalyzer();
Integer keywordIgnoreAbove = null;
if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
KeywordFieldMapper mapper = (KeywordFieldMapper) context.getMapperService().documentMapper()
.mappers().getMapper(highlighterContext.fieldName);
keywordIgnoreAbove = mapper.ignoreAbove();
}
final int maxAnalyzedOffset = context.getIndexSettings().getHighlightMaxAnalyzedOffset();

try {
Expand All @@ -110,7 +117,11 @@ public HighlightField highlight(HighlighterContext highlighterContext) {

for (Object textToHighlight : textsToHighlight) {
String text = convertFieldValue(fieldType, textToHighlight);
if (text.length() > maxAnalyzedOffset) {
int textLength = text.length();
if (keywordIgnoreAbove != null && textLength > keywordIgnoreAbove) {
continue; // skip highlighting keyword terms that were ignored during indexing
}
if (textLength > maxAnalyzedOffset) {
throw new IllegalArgumentException(
"The length of [" + highlighterContext.fieldName + "] field of [" + hitContext.hit().getId() +
"] doc of [" + context.index().getName() + "] index " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
Expand Down Expand Up @@ -65,11 +66,16 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT;
final int maxAnalyzedOffset = context.getIndexSettings().getHighlightMaxAnalyzedOffset();
Integer keywordIgnoreAbove = null;
if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
KeywordFieldMapper mapper = (KeywordFieldMapper) context.getMapperService().documentMapper()
.mappers().getMapper(highlighterContext.fieldName);
keywordIgnoreAbove = mapper.ignoreAbove();
}

List<Snippet> snippets = new ArrayList<>();
int numberOfFragments = field.fieldOptions().numberOfFragments();
try {

final Analyzer analyzer = getAnalyzer(context.getMapperService().documentMapper(), hitContext);
List<Object> fieldValues = loadFieldValues(fieldType, field, context, hitContext,
highlighterContext.highlight.forceSource(field));
Expand All @@ -81,7 +87,11 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
final CustomUnifiedHighlighter highlighter;
final String fieldValue = mergeFieldValues(fieldValues, MULTIVAL_SEP_CHAR);
final OffsetSource offsetSource = getOffsetSource(fieldType);
if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValue.length() > maxAnalyzedOffset)) {
int fieldValueLength = fieldValue.length();
if (keywordIgnoreAbove != null && fieldValueLength > keywordIgnoreAbove) {
return null; // skip highlighting keyword terms that were ignored during indexing
}
if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValueLength > maxAnalyzedOffset)) {
throw new IllegalArgumentException(
"The length of [" + highlighterContext.fieldName + "] field of [" + hitContext.hit().getId() +
"] doc of [" + context.index().getName() + "] index " + "has exceeded [" +
Expand Down

0 comments on commit d0fa6d6

Please sign in to comment.