Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions docs/reference/search/search-your-data/highlighting.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -276,9 +276,11 @@ max_analyzed_offset:: By default, the maximum number of characters
analyzed for a highlight request is bounded by the value defined in the
<<index-max-analyzed-offset, `index.highlight.max_analyzed_offset`>> setting,
and when the number of characters exceeds this limit an error is returned. If
this setting is set to a non-negative value, the highlighting stops at this defined
this setting is set to a positive value, the highlighting stops at this defined
maximum limit, and the rest of the text is not processed, thus not highlighted and
no error is returned. The <<max-analyzed-offset, `max_analyzed_offset`>> query setting
no error is returned. If it is specifically set to -1 then the value of
<<index-max-analyzed-offset, `index.highlight.max_analyzed_offset`>> is used instead.
For values < -1 or 0, an error is returned. The <<max-analyzed-offset, `max_analyzed_offset`>> query setting
does *not* override the <<index-max-analyzed-offset, `index.highlight.max_analyzed_offset`>>
which prevails when it's set to lower value than the query setting.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter;
import org.elasticsearch.lucene.search.uhighlight.QueryMaxAnalyzedOffset;
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext;
Expand Down Expand Up @@ -52,7 +53,7 @@ protected List<Object> loadFieldValues(
}

@Override
protected Analyzer wrapAnalyzer(Analyzer analyzer, Integer maxAnalyzedOffset) {
protected Analyzer wrapAnalyzer(Analyzer analyzer, QueryMaxAnalyzedOffset maxAnalyzedOffset) {
return new AnnotatedHighlighterAnalyzer(super.wrapAnalyzer(analyzer, maxAnalyzedOffset));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotationAnalyzerWrapper;
import org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter;
import org.elasticsearch.lucene.search.uhighlight.QueryMaxAnalyzedOffset;
import org.elasticsearch.lucene.search.uhighlight.Snippet;
import org.elasticsearch.search.fetch.subphase.highlight.LimitTokenOffsetAnalyzer;
import org.elasticsearch.test.ESTestCase;
Expand Down Expand Up @@ -85,7 +86,7 @@ private void assertHighlightOneDoc(
int noMatchSize,
String[] expectedPassages,
int maxAnalyzedOffset,
Integer queryMaxAnalyzedOffset
Integer queryMaxAnalyzedOffsetIn
) throws Exception {

try (Directory dir = newDirectory()) {
Expand Down Expand Up @@ -116,8 +117,9 @@ private void assertHighlightOneDoc(
for (int i = 0; i < markedUpInputs.length; i++) {
annotations[i] = AnnotatedText.parse(markedUpInputs[i]);
}
QueryMaxAnalyzedOffset queryMaxAnalyzedOffset = QueryMaxAnalyzedOffset.create(queryMaxAnalyzedOffsetIn, maxAnalyzedOffset);
if (queryMaxAnalyzedOffset != null) {
wrapperAnalyzer = new LimitTokenOffsetAnalyzer(wrapperAnalyzer, queryMaxAnalyzedOffset);
wrapperAnalyzer = new LimitTokenOffsetAnalyzer(wrapperAnalyzer, queryMaxAnalyzedOffset.getNotNull());
}
AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer);
hiliteAnalyzer.setAnnotations(annotations);
Expand Down Expand Up @@ -311,6 +313,19 @@ public void testExceedMaxAnalyzedOffset() throws Exception {
e.getMessage()
);

// Same as before, but force using index maxOffset (20) as queryMaxOffset by passing -1.
assertHighlightOneDoc(
"text",
new String[] { "[Long Text exceeds](Long+Text+exceeds) MAX analyzed offset)" },
query,
Locale.ROOT,
breakIterator,
0,
new String[] { "Long Text [exceeds](_hit_term=exceeds) MAX analyzed offset)" },
20,
-1
);

assertHighlightOneDoc(
"text",
new String[] { "[Long Text Exceeds](Long+Text+Exceeds) MAX analyzed offset [Long Text Exceeds](Long+Text+Exceeds)" },
Expand Down
1 change: 1 addition & 0 deletions rest-api-spec/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ tasks.named("yamlRestTestV7CompatTransform").configure({ task ->
task.skipTest("logsdb/20_source_mapping/stored _source mode is supported", "no longer serialize source_mode")
task.skipTest("logsdb/20_source_mapping/include/exclude is supported with stored _source", "no longer serialize source_mode")
task.skipTest("logsdb/20_source_mapping/synthetic _source is default", "no longer serialize source_mode")
task.skipTest("search.highlight/30_max_analyzed_offset/Plain highlighter with max_analyzed_offset < 0 should FAIL", "semantics of test has changed")
task.skipTest("search/520_fetch_fields/fetch _seq_no via fields", "error code is changed from 5xx to 400 in 9.0")
task.skipTest("search.vectors/41_knn_search_bbq_hnsw/Test knn search", "Scoring has changed in latest versions")
task.skipTest("search.vectors/42_knn_search_bbq_flat/Test knn search", "Scoring has changed in latest versions")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,12 +115,70 @@ setup:
- match: {hits.hits.0.highlight.field2.0: "The quick brown <em>fox</em> went to the forest and saw another fox."}

---
"Plain highlighter with max_analyzed_offset < 0 should FAIL":
"Plain highlighter on a field WITH OFFSETS exceeding index.highlight.max_analyzed_offset with max_analyzed_offset=0 should FAIL":

- requires:
test_runner_features: [capabilities]
capabilities:
- method: GET
path: /_search
capabilities: [ highlight_max_analyzed_offset_default ]
reason: Behavior of max_analyzed_offset query param changed in 8.18.

- do:
catch: bad_request
search:
rest_total_hits_as_int: true
index: test1
body: {"query" : {"match" : {"field2" : "fox"}}, "highlight" : {"type" : "plain", "fields" : {"field2" : {}}, "max_analyzed_offset": 0}}
- match: { status: 400 }
- match: { error.root_cause.0.type: "x_content_parse_exception" }
- match: { error.caused_by.type: "illegal_argument_exception" }
- match: { error.caused_by.reason: "[max_analyzed_offset] must be a positive integer, or -1" }

---
"Plain highlighter on a field WITH OFFSETS exceeding index.highlight.max_analyzed_offset with max_analyzed_offset=1 should SUCCEED":

- requires:
cluster_features: ["gte_v7.12.0"]
reason: max_analyzed_offset query param added in 7.12.0

- do:
search:
rest_total_hits_as_int: true
index: test1
body: {"query" : {"match" : {"field2" : "fox"}}, "highlight" : {"type" : "plain", "fields" : {"field2" : {}}, "max_analyzed_offset": 1}}
- match: { hits.hits.0.highlight: null }

---
"Plain highlighter with max_analyzed_offset = -1 default to index analyze offset should SUCCEED":

- requires:
test_runner_features: [capabilities]
capabilities:
- method: GET
path: /_search
capabilities: [ highlight_max_analyzed_offset_default ]
reason: Behavior of max_analyzed_offset query param changed in 8.18.

- do:
search:
rest_total_hits_as_int: true
index: test1
body: {"query" : {"match" : {"field2" : "fox"}}, "highlight" : {"type" : "plain", "fields" : {"field2" : {}}, "max_analyzed_offset": -1}}
- match: {hits.hits.0.highlight.field2.0: "The quick brown <em>fox</em> went to the forest and saw another fox."}

---
"Plain highlighter with max_analyzed_offset < -1 should FAIL":

- requires:
test_runner_features: [capabilities]
capabilities:
- method: GET
path: /_search
capabilities: [ highlight_max_analyzed_offset_default ]
reason: Behavior of max_analyzed_offset query param changed in 8.18.

- do:
catch: bad_request
search:
Expand All @@ -130,4 +188,4 @@ setup:
- match: { status: 400 }
- match: { error.root_cause.0.type: "x_content_parse_exception" }
- match: { error.caused_by.type: "illegal_argument_exception" }
- match: { error.caused_by.reason: "[max_analyzed_offset] must be a positive integer" }
- match: { error.caused_by.reason: "[max_analyzed_offset] must be a positive integer, or -1" }
Original file line number Diff line number Diff line change
Expand Up @@ -2674,6 +2674,41 @@ public void testPostingsHighlighterOrderByScore() throws Exception {
});
}

public void testMaxQueryOffsetDefault() throws Exception {
assertAcked(
prepareCreate("test").setMapping(type1PostingsffsetsMapping())
.setSettings(Settings.builder().put("index.highlight.max_analyzed_offset", "10").build())
);
ensureGreen();

prepareIndex("test").setSource(
"field1",
new String[] {
"This sentence contains one match, not that short. This sentence contains zero sentence matches. "
+ "This one contains no matches.",
"This is the second value's first sentence. This one contains no matches. "
+ "This sentence contains three sentence occurrences (sentence).",
"One sentence match here and scored lower since the text is quite long, not that appealing. "
+ "This one contains no matches." }
).get();
refresh();

// Specific for this test: by passing "-1" as "maxAnalyzedOffset", the index highlight setting above will be used.
SearchSourceBuilder source = searchSource().query(termQuery("field1", "sentence"))
.highlighter(highlight().field("field1").order("score").maxAnalyzedOffset(-1));

assertResponse(client().search(new SearchRequest("test").source(source)), response -> {
Map<String, HighlightField> highlightFieldMap = response.getHits().getAt(0).getHighlightFields();
assertThat(highlightFieldMap.size(), equalTo(1));
HighlightField field1 = highlightFieldMap.get("field1");
assertThat(field1.fragments().length, equalTo(1));
assertThat(
field1.fragments()[0].string(),
equalTo("This <em>sentence</em> contains one match, not that short. This sentence contains zero sentence matches.")
);
});
}

public void testPostingsHighlighterEscapeHtml() throws Exception {
assertAcked(prepareCreate("test").setMapping("title", "type=text," + randomStoreField() + "index_options=offsets"));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class CustomFieldHighlighter extends FieldHighlighter {
private final Locale breakIteratorLocale;
private final int noMatchSize;
private String fieldValue;
private final Integer queryMaxAnalyzedOffset;
private final QueryMaxAnalyzedOffset queryMaxAnalyzedOffset;

CustomFieldHighlighter(
String field,
Expand All @@ -47,7 +47,7 @@ class CustomFieldHighlighter extends FieldHighlighter {
PassageFormatter passageFormatter,
Comparator<Passage> passageSortComparator,
int noMatchSize,
Integer queryMaxAnalyzedOffset
QueryMaxAnalyzedOffset queryMaxAnalyzedOffset
) {
super(
field,
Expand Down Expand Up @@ -113,7 +113,7 @@ protected Passage[] getSummaryPassagesNoHighlight(int maxPassages) {
@Override
protected Passage[] highlightOffsetsEnums(OffsetsEnum off) throws IOException {
if (queryMaxAnalyzedOffset != null) {
off = new LimitedOffsetsEnum(off, queryMaxAnalyzedOffset);
off = new LimitedOffsetsEnum(off, queryMaxAnalyzedOffset.getNotNull());
}
return super.highlightOffsetsEnums(off);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ public final class CustomUnifiedHighlighter extends UnifiedHighlighter {
private final int noMatchSize;
private final CustomFieldHighlighter fieldHighlighter;
private final int maxAnalyzedOffset;
private final Integer queryMaxAnalyzedOffset;
private final QueryMaxAnalyzedOffset queryMaxAnalyzedOffset;

/**
* Creates a new instance of {@link CustomUnifiedHighlighter}
Expand Down Expand Up @@ -94,7 +94,7 @@ public CustomUnifiedHighlighter(
int noMatchSize,
int maxPassages,
int maxAnalyzedOffset,
Integer queryMaxAnalyzedOffset,
QueryMaxAnalyzedOffset queryMaxAnalyzedOffset,
boolean requireFieldMatch,
boolean weightMatchesEnabled
) {
Expand Down Expand Up @@ -125,9 +125,9 @@ public Snippet[] highlightField(LeafReader reader, int docId, CheckedSupplier<St
return null;
}
int fieldValueLength = fieldValue.length();
if (((queryMaxAnalyzedOffset == null || queryMaxAnalyzedOffset > maxAnalyzedOffset)
if ((queryMaxAnalyzedOffset == null || queryMaxAnalyzedOffset.getNotNull() > maxAnalyzedOffset)
&& (getOffsetSource(field) == OffsetSource.ANALYSIS)
&& (fieldValueLength > maxAnalyzedOffset))) {
&& (fieldValueLength > maxAnalyzedOffset)) {
throw new IllegalArgumentException(
"The length ["
+ fieldValueLength
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.lucene.search.uhighlight;

public class QueryMaxAnalyzedOffset {
private final int queryMaxAnalyzedOffset;

private QueryMaxAnalyzedOffset(final int queryMaxAnalyzedOffset) {
// If we have a negative value, grab value for the actual maximum from the index.
this.queryMaxAnalyzedOffset = queryMaxAnalyzedOffset;
}

public static QueryMaxAnalyzedOffset create(final Integer queryMaxAnalyzedOffset, final int indexMaxAnalyzedOffset) {
if (queryMaxAnalyzedOffset == null) {
return null;
}
return new QueryMaxAnalyzedOffset(queryMaxAnalyzedOffset < 0 ? indexMaxAnalyzedOffset : queryMaxAnalyzedOffset);
}

public int getNotNull() {
return queryMaxAnalyzedOffset;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ private SearchCapabilities() {}
private static final String OPTIMIZED_SCALAR_QUANTIZATION_BBQ = "optimized_scalar_quantization_bbq";
private static final String KNN_QUANTIZED_VECTOR_RESCORE_OVERSAMPLE = "knn_quantized_vector_rescore_oversample";

private static final String HIGHLIGHT_MAX_ANALYZED_OFFSET_DEFAULT = "highlight_max_analyzed_offset_default";

public static final Set<String> CAPABILITIES;
static {
HashSet<String> capabilities = new HashSet<>();
Expand All @@ -58,6 +60,7 @@ private SearchCapabilities() {}
capabilities.add(K_DEFAULT_TO_SIZE);
capabilities.add(KQL_QUERY_SUPPORTED);
capabilities.add(RRF_WINDOW_SIZE_SUPPORT_DEPRECATED);
capabilities.add(HIGHLIGHT_MAX_ANALYZED_OFFSET_DEFAULT);
CAPABILITIES = Set.copyOf(capabilities);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -568,13 +568,12 @@ public Integer phraseLimit() {
}

/**
* Set to a non-negative value which represents the max offset used to analyze
* the field thus avoiding exceptions if the field exceeds this limit.
* "maxAnalyzedOffset" might be non-negative int, null (unknown), or a negative int (defaulting to index analyzed offset).
*/
@SuppressWarnings("unchecked")
public HB maxAnalyzedOffset(Integer maxAnalyzedOffset) {
if (maxAnalyzedOffset != null && maxAnalyzedOffset <= 0) {
throw new IllegalArgumentException("[" + MAX_ANALYZED_OFFSET_FIELD + "] must be a positive integer");
if (maxAnalyzedOffset != null && (maxAnalyzedOffset < -1 || maxAnalyzedOffset == 0)) {
throw new IllegalArgumentException("[" + MAX_ANALYZED_OFFSET_FIELD + "] must be a positive integer, or -1");
}
this.maxAnalyzedOffset = maxAnalyzedOffset;
return (HB) this;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import org.elasticsearch.lucene.search.uhighlight.BoundedBreakIteratorScanner;
import org.elasticsearch.lucene.search.uhighlight.CustomPassageFormatter;
import org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter;
import org.elasticsearch.lucene.search.uhighlight.QueryMaxAnalyzedOffset;
import org.elasticsearch.lucene.search.uhighlight.Snippet;
import org.elasticsearch.search.fetch.FetchContext;
import org.elasticsearch.search.fetch.FetchSubPhase;
Expand Down Expand Up @@ -121,7 +122,10 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) {
int maxAnalyzedOffset = indexSettings.getHighlightMaxAnalyzedOffset();
boolean weightMatchesEnabled = indexSettings.isWeightMatchesEnabled();
int numberOfFragments = fieldContext.field.fieldOptions().numberOfFragments();
Integer queryMaxAnalyzedOffset = fieldContext.field.fieldOptions().maxAnalyzedOffset();
QueryMaxAnalyzedOffset queryMaxAnalyzedOffset = QueryMaxAnalyzedOffset.create(
fieldContext.field.fieldOptions().maxAnalyzedOffset(),
maxAnalyzedOffset
);
Analyzer analyzer = wrapAnalyzer(
fieldContext.context.getSearchExecutionContext().getIndexAnalyzer(f -> Lucene.KEYWORD_ANALYZER),
queryMaxAnalyzedOffset
Expand Down Expand Up @@ -171,7 +175,7 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) {
fieldContext.field.fieldOptions().noMatchSize(),
highlighterNumberOfFragments,
maxAnalyzedOffset,
fieldContext.field.fieldOptions().maxAnalyzedOffset(),
queryMaxAnalyzedOffset,
fieldContext.field.fieldOptions().requireFieldMatch(),
weightMatchesEnabled
);
Expand All @@ -186,9 +190,9 @@ protected PassageFormatter getPassageFormatter(SearchHighlightContext.Field fiel
);
}

protected Analyzer wrapAnalyzer(Analyzer analyzer, Integer maxAnalyzedOffset) {
protected Analyzer wrapAnalyzer(Analyzer analyzer, QueryMaxAnalyzedOffset maxAnalyzedOffset) {
if (maxAnalyzedOffset != null) {
analyzer = new LimitTokenOffsetAnalyzer(analyzer, maxAnalyzedOffset);
analyzer = new LimitTokenOffsetAnalyzer(analyzer, maxAnalyzedOffset.getNotNull());
}
return analyzer;
}
Expand Down
Loading