Skip to content

Commit

Permalink
Fix bug where fvh fragments could be loaded from wrong doc (#66142)
Browse files Browse the repository at this point in the history
This PR fixes a regression where fvh fragments could be loaded from the wrong
document _source.

Some `FragmentsBuilder` implementations contain a `SourceLookup` to load from
_source. The lookup should be positioned to load from the current hit document.
However, since `FragmentsBuilder` are cached and shared across hits, the lookup
is never updated to load from the new documents. This means we accidentally
load _source from a different document.

The regression was introduced in #60179, which started storing `SourceLookup`
on `FragmentsBuilder`.

Fixes #65533.
  • Loading branch information
jtibshirani committed Dec 10, 2020
1 parent 30f1f5c commit b2d3c3f
Show file tree
Hide file tree
Showing 2 changed files with 136 additions and 61 deletions.
Original file line number Diff line number Diff line change
@@ -1,23 +1,42 @@
setup:
- do:
indices.create:
index: test
body:
mappings:
"properties":
"title":
"type": "text"
"term_vector": "with_positions_offsets"
"description":
"type": "text"
"term_vector": "with_positions_offsets"
index: test
body:
mappings:
"properties":
"id":
"type": "integer"
"title":
"type": "text"
"term_vector": "with_positions_offsets"
"description":
"type": "text"
"term_vector": "with_positions_offsets"
"nested":
"type": "nested"
"properties":
"title":
"type": "text"
"term_vector": "with_positions_offsets"
- do:
index:
index: test
body:
id: 1
"title" : "The quick brown fox is brown"
"description" : "The quick pink panther is pink"

- do:
index:
index: test
id: 1
body:
"title" : "The quick brown fox is brown"
"description" : "The quick pink panther is pink"
id: 2
"title" : "The quick blue fox is blue"
"nested":
- "title": "purple octopus"
- "title": "purple fish"

- do:
indices.refresh: {}

Expand All @@ -27,19 +46,69 @@ setup:
search:
rest_total_hits_as_int: true
body:
highlight:
type: fvh
fields:
description:
type: fvh
highlight_query:
prefix:
description: br
title:
type: fvh
highlight_query:
prefix:
title: br
highlight:
type: fvh
fields:
description:
type: fvh
highlight_query:
prefix:
description: br
title:
type: fvh
highlight_query:
prefix:
title: br

- match: {hits.hits.0.highlight.title.0: "The quick <em>brown</em> fox is <em>brown</em>"}
- is_false: hits.hits.0.highlight.description

---
"Highlight multiple documents":
- skip:
version: " - 7.10.1"
reason: Bug fixed in 7.10.2
- do:
search:
rest_total_hits_as_int: true
body:
query:
match:
title: fox
sort: ["id"]
highlight:
type: fvh
fields:
title:
type: fvh

- match: {hits.hits.0.highlight.title.0: "The quick brown <em>fox</em> is brown"}
- is_false: hits.hits.0.highlight.description
- match: {hits.hits.1.highlight.title.0: "The quick blue <em>fox</em> is blue"}
- is_false: hits.hits.1.highlight.description

---
"Highlight multiple nested documents":
- skip:
version: " - 7.10.1"
reason: Bug fixed in 7.10.2
- do:
search:
rest_total_hits_as_int: true
body:
query:
nested:
path: nested
query:
match:
nested.title: purple
inner_hits:
name: nested_hits
highlight:
type: fvh
fields:
nested.title:
type: fvh

- match: {hits.hits.0.inner_hits.nested_hits.hits.hits.0.highlight.nested\\.title.0: "<em>purple</em> octopus"}
- match: {hits.hits.0.inner_hits.nested_hits.hits.hits.1.highlight.nested\\.title.0: "<em>purple</em> fish"}
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,15 @@
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext.Field;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext.FieldOptions;
import org.elasticsearch.search.lookup.SourceLookup;

import java.io.IOException;
import java.text.BreakIterator;
import java.util.Collections;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.function.Function;

public class FastVectorHighlighter implements Highlighter {
private static final BoundaryScanner DEFAULT_SIMPLE_BOUNDARY_SCANNER = new SimpleBoundaryScanner();
Expand Down Expand Up @@ -88,42 +90,15 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
FieldHighlightEntry entry = cache.fields.get(fieldType);
if (entry == null) {
FragListBuilder fragListBuilder;
BaseFragmentsBuilder fragmentsBuilder;

final BoundaryScanner boundaryScanner = getBoundaryScanner(field);
if (field.fieldOptions().numberOfFragments() == 0) {
fragListBuilder = new SingleFragListBuilder();

if (!forceSource && fieldType.isStored()) {
fragmentsBuilder = new SimpleFragmentsBuilder(fieldType, field.fieldOptions().preTags(),
field.fieldOptions().postTags(), boundaryScanner);
} else {
fragmentsBuilder = new SourceSimpleFragmentsBuilder(fieldType, hitContext.sourceLookup(),
field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
}
} else {
fragListBuilder = field.fieldOptions().fragmentOffset() == -1 ?
new SimpleFragListBuilder() : new SimpleFragListBuilder(field.fieldOptions().fragmentOffset());
if (field.fieldOptions().scoreOrdered()) {
if (!forceSource && fieldType.isStored()) {
fragmentsBuilder = new ScoreOrderFragmentsBuilder(field.fieldOptions().preTags(),
field.fieldOptions().postTags(), boundaryScanner);
} else {
fragmentsBuilder = new SourceScoreOrderFragmentsBuilder(fieldType, hitContext.sourceLookup(),
field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
}
} else {
if (!forceSource && fieldType.isStored()) {
fragmentsBuilder = new SimpleFragmentsBuilder(fieldType, field.fieldOptions().preTags(),
field.fieldOptions().postTags(), boundaryScanner);
} else {
fragmentsBuilder =
new SourceSimpleFragmentsBuilder(fieldType, hitContext.sourceLookup(),
field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
}
}
}
fragmentsBuilder.setDiscreteMultiValueHighlighting(termVectorMultiValue);

Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier = fragmentsBuilderSupplier(field, fieldType, forceSource);

entry = new FieldHighlightEntry();
if (field.fieldOptions().requireFieldMatch()) {
/*
Expand All @@ -141,7 +116,7 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch());
}
entry.fragListBuilder = fragListBuilder;
entry.fragmentsBuilder = fragmentsBuilder;
entry.fragmentsBuilderSupplier = fragmentsBuilderSupplier;
if (cache.fvh == null) {
// parameters to FVH are not requires since:
// first two booleans are not relevant since they are set on the CustomFieldQuery
Expand All @@ -160,6 +135,7 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
cache.fvh.setPhraseLimit(field.fieldOptions().phraseLimit());

String[] fragments;
FragmentsBuilder fragmentsBuilder = entry.fragmentsBuilderSupplier.apply(hitContext.sourceLookup());

// a HACK to make highlighter do highlighting, even though its using the single frag list builder
int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ?
Expand All @@ -171,12 +147,12 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
if (field.fieldOptions().matchedFields() != null && !field.fieldOptions().matchedFields().isEmpty()) {
fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(),
fieldType.name(), field.fieldOptions().matchedFields(), fragmentCharSize,
numberOfFragments, entry.fragListBuilder, entry.fragmentsBuilder, field.fieldOptions().preTags(),
numberOfFragments, entry.fragListBuilder, fragmentsBuilder, field.fieldOptions().preTags(),
field.fieldOptions().postTags(), encoder);
} else {
fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(),
fieldType.name(), fragmentCharSize, numberOfFragments, entry.fragListBuilder,
entry.fragmentsBuilder, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder);
fragmentsBuilder, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder);
}

if (CollectionUtils.isEmpty(fragments) == false) {
Expand All @@ -189,7 +165,7 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
// the normal fragmentsBuilder
FieldFragList fieldFragList = new SimpleFieldFragList(-1 /*ignored*/);
fieldFragList.add(0, noMatchSize, Collections.emptyList());
fragments = entry.fragmentsBuilder.createFragments(hitContext.reader(), hitContext.docId(),
fragments = fragmentsBuilder.createFragments(hitContext.reader(), hitContext.docId(),
fieldType.name(), fieldFragList, 1, field.fieldOptions().preTags(),
field.fieldOptions().postTags(), encoder);
if (CollectionUtils.isEmpty(fragments) == false) {
Expand All @@ -200,6 +176,36 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
return null;
}

private Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier(SearchHighlightContext.Field field,
MappedFieldType fieldType,
boolean forceSource) {
BoundaryScanner boundaryScanner = getBoundaryScanner(field);
FieldOptions options = field.fieldOptions();
Function<SourceLookup, BaseFragmentsBuilder> supplier;
if (!forceSource && fieldType.isStored()) {
if (options.numberOfFragments() != 0 && options.scoreOrdered()) {
supplier = ignored -> new ScoreOrderFragmentsBuilder(options.preTags(), options.postTags(), boundaryScanner);
} else {
supplier = ignored -> new SimpleFragmentsBuilder(fieldType,
options.preTags(), options.postTags(), boundaryScanner);
}
} else {
if (options.numberOfFragments() != 0 && options.scoreOrdered()) {
supplier = lookup -> new SourceScoreOrderFragmentsBuilder(fieldType, lookup,
options.preTags(), options.postTags(), boundaryScanner);
} else {
supplier = lookup -> new SourceSimpleFragmentsBuilder(fieldType, lookup,
options.preTags(), options.postTags(), boundaryScanner);
}
}

return lookup -> {
BaseFragmentsBuilder builder = supplier.apply(lookup);
builder.setDiscreteMultiValueHighlighting(termVectorMultiValue);
return builder;
};
}

@Override
public boolean canHighlight(MappedFieldType ft) {
return ft.getTextSearchInfo().termVectors() == TextSearchInfo.TermVector.OFFSETS;
Expand Down Expand Up @@ -237,7 +243,7 @@ private static BoundaryScanner getBoundaryScanner(Field field) {

private static class FieldHighlightEntry {
public FragListBuilder fragListBuilder;
public FragmentsBuilder fragmentsBuilder;
public Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier;
public FieldQuery noFieldMatchFieldQuery;
public FieldQuery fieldMatchFieldQuery;
}
Expand Down

0 comments on commit b2d3c3f

Please sign in to comment.