Skip to content

Commit

Permalink
Make sure shared source always represents the top-level root document. (
Browse files Browse the repository at this point in the history
#67016)

We started passing down the root document's _source when processing
nested hits, to avoid reloading and reparsing the root source for each hit.
Unfortunately the approach did not work when there are multiple layers of
`inner_hits`. In this case, the second-layer inner hit received its immediate
parent's source instead of the root source. This parent source is filtered to
just contain the parts corresponding to the nested document, but the source
parsing logic is designed to always operate on the top-level root source. This
caused failures when loading the second-layer inner hits.

This PR makes sure to always pass the root document's _source when processing
inner hits, even if there are multiple layers.
  • Loading branch information
jtibshirani committed Jan 5, 2021
1 parent 3d9f639 commit 8ceb1ca
Show file tree
Hide file tree
Showing 5 changed files with 174 additions and 11 deletions.
13 changes: 13 additions & 0 deletions docs/reference/release-notes/7.10.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@

Also see <<breaking-changes-7.10,Breaking changes in 7.10>>.

[[known-issues-7.10.1]]
[discrete]
=== Known issues
* In {es} 7.10.0 there were several regressions around loading nested documents. These have been addressed in {es} 7.10.2.
** With multiple layers of nested `inner_hits`, we can fail to load the _source. ({es-issue}66577[#66577])
** With nested `inner_hits`, the fast vector highlighter may load snippets from the wrong document. ({es-issue}65533[#65533])
** When _source is disabled, we can fail load nested `inner_hits` and `top_hits`. ({es-issue}66572[#66572])

[[bug-7.10.1]]
[float]
=== Bug fixes
Expand Down Expand Up @@ -81,6 +89,11 @@ with a `NOT IN` operator.
We have fixed this issue in {es} 7.10.1 and later versions. For more details,
see {es-issue}65488[#65488].

* There were several regressions around loading nested documents. These have been addressed in {es} 7.10.2.
** With multiple layers of nested `inner_hits`, we can fail to load the _source. ({es-issue}66577[#66577])
** With nested `inner_hits`, the fast vector highlighter may load snippets from the wrong document. ({es-issue}65533[#65533])
** When _source is disabled, we can fail load nested `inner_hits` and `top_hits`. ({es-issue}66572[#66572])

[[breaking-7.10.0]]
[float]
=== Breaking changes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,72 @@ setup:
- match: { hits.hits.0.fields._seq_no: [1] }
- match: { hits.hits.0.inner_hits.nested_field.hits.hits.0._version: 2 }
- match: { hits.hits.0.inner_hits.nested_field.hits.hits.0.fields._seq_no: [1] }

---
"Inner hits with disabled _source":
- skip:
version: " - 7.3.0"
reason: "The bugfix for this case was introduced in 7.3.1"
- do:
indices.create:
index: disabled_source
body:
mappings:
_source:
enabled: false
properties:
nested_field:
type: nested
properties:
sub_nested_field:
type: nested

- do:
index:
index: disabled_source
id: 1
body:
nested_field:
field: value
sub_nested_field:
field: value

- do:
indices.refresh: {}

- do:
search:
index: disabled_source
rest_total_hits_as_int: true
body:
query:
nested:
path: "nested_field.sub_nested_field"
query: { match_all: {}}
inner_hits: {}
- match: { hits.total: 1 }
- match: { hits.hits.0._id: "1" }
- match: { hits.hits.0.inner_hits.nested_field\.sub_nested_field.hits.hits.0._id: "1" }
- is_false: hits.hits.0.inner_hits.nested_field\.sub_nested_field.hits.hits.0._source

- do:
search:
index: disabled_source
rest_total_hits_as_int: true
body:
query:
nested:
path: "nested_field"
inner_hits: {}
query:
nested:
path: "nested_field.sub_nested_field"
query: { match_all: {}}
inner_hits: {}

- match: { hits.total: 1 }
- match: { hits.hits.0._id: "1" }
- match: { hits.hits.0.inner_hits.nested_field.hits.hits.0._id: "1" }
- is_false: hits.hits.0.inner_hits.nested_field.hits.hits.0._source
- match: { hits.hits.0.inner_hits.nested_field.hits.hits.0.inner_hits.nested_field\.sub_nested_field.hits.hits.0._id: "1" }
- is_false: hits.hits.0.inner_hits.nested_field.hits.hits.0.inner_hits.nested_field\.sub_nested_field.hits.hits.0._source
Original file line number Diff line number Diff line change
Expand Up @@ -279,10 +279,14 @@ public void testNestedMultipleLayers() throws Exception {
requests.add(client().prepareIndex("articles", "article", "1").setSource(jsonBuilder().startObject()
.field("title", "quick brown fox")
.startArray("comments")
.startObject()
.field("message", "fox eat quick")
.startArray("remarks").startObject().field("message", "good").endObject().endArray()
.endObject()
.startObject()
.field("message", "fox eat quick")
.startArray("remarks").startObject().field("message", "good").endObject().endArray()
.endObject()
.startObject()
.field("message", "hippo is hungry")
.startArray("remarks").startObject().field("message", "neutral").endObject().endArray()
.endObject()
.endArray()
.endObject()));
requests.add(client().prepareIndex("articles", "article", "2").setSource(jsonBuilder().startObject()
Expand All @@ -296,6 +300,7 @@ public void testNestedMultipleLayers() throws Exception {
.endObject()));
indexRandom(true, requests);

// Check we can load the first doubly-nested document.
SearchResponse response = client().prepareSearch("articles")
.setQuery(
nestedQuery("comments",
Expand All @@ -322,6 +327,33 @@ public void testNestedMultipleLayers() throws Exception {
assertThat(innerHits.getAt(0).getNestedIdentity().getChild().getField().string(), equalTo("remarks"));
assertThat(innerHits.getAt(0).getNestedIdentity().getChild().getOffset(), equalTo(0));

// Check we can load the second doubly-nested document.
response = client().prepareSearch("articles")
.setQuery(
nestedQuery("comments",
nestedQuery("comments.remarks", matchQuery("comments.remarks.message", "neutral"), ScoreMode.Avg)
.innerHit(new InnerHitBuilder("remark")),
ScoreMode.Avg).innerHit(new InnerHitBuilder())
).get();
assertNoFailures(response);
assertHitCount(response, 1);
assertSearchHit(response, 1, hasId("1"));
assertThat(response.getHits().getAt(0).getInnerHits().size(), equalTo(1));
innerHits = response.getHits().getAt(0).getInnerHits().get("comments");
assertThat(innerHits.getTotalHits().value, equalTo(1L));
assertThat(innerHits.getHits().length, equalTo(1));
assertThat(innerHits.getAt(0).getId(), equalTo("1"));
assertThat(innerHits.getAt(0).getNestedIdentity().getField().string(), equalTo("comments"));
assertThat(innerHits.getAt(0).getNestedIdentity().getOffset(), equalTo(1));
innerHits = innerHits.getAt(0).getInnerHits().get("remark");
assertThat(innerHits.getTotalHits().value, equalTo(1L));
assertThat(innerHits.getHits().length, equalTo(1));
assertThat(innerHits.getAt(0).getId(), equalTo("1"));
assertThat(innerHits.getAt(0).getNestedIdentity().getField().string(), equalTo("comments"));
assertThat(innerHits.getAt(0).getNestedIdentity().getOffset(), equalTo(1));
assertThat(innerHits.getAt(0).getNestedIdentity().getChild().getField().string(), equalTo("remarks"));
assertThat(innerHits.getAt(0).getNestedIdentity().getChild().getOffset(), equalTo(0));

// Directly refer to the second level:
response = client().prepareSearch("articles")
.setQuery(nestedQuery("comments.remarks", matchQuery("comments.remarks.message", "bad"), ScoreMode.Avg)
Expand Down Expand Up @@ -364,6 +396,34 @@ public void testNestedMultipleLayers() throws Exception {
assertThat(innerHits.getAt(0).getNestedIdentity().getOffset(), equalTo(0));
assertThat(innerHits.getAt(0).getNestedIdentity().getChild().getField().string(), equalTo("remarks"));
assertThat(innerHits.getAt(0).getNestedIdentity().getChild().getOffset(), equalTo(0));

// Check that inner hits contain _source even when it's disabled on the parent request.
response = client().prepareSearch("articles")
.setFetchSource(false)
.setQuery(
nestedQuery("comments",
nestedQuery("comments.remarks", matchQuery("comments.remarks.message", "good"), ScoreMode.Avg)
.innerHit(new InnerHitBuilder("remark")), ScoreMode.Avg)
.innerHit(new InnerHitBuilder())
).get();
assertNoFailures(response);
innerHits = response.getHits().getAt(0).getInnerHits().get("comments");
innerHits = innerHits.getAt(0).getInnerHits().get("remark");
assertNotNull(innerHits.getAt(0).getSourceAsMap());
assertFalse(innerHits.getAt(0).getSourceAsMap().isEmpty());

response = client().prepareSearch("articles")
.setQuery(
nestedQuery("comments",
nestedQuery("comments.remarks", matchQuery("comments.remarks.message", "good"), ScoreMode.Avg)
.innerHit(new InnerHitBuilder("remark")), ScoreMode.Avg)
.innerHit(new InnerHitBuilder().setFetchSourceContext(new FetchSourceContext(false)))
).get();
assertNoFailures(response);
innerHits = response.getHits().getAt(0).getInnerHits().get("comments");
innerHits = innerHits.getAt(0).getInnerHits().get("remark");
assertNotNull(innerHits.getAt(0).getSourceAsMap());
assertFalse(innerHits.getAt(0).getSourceAsMap().isEmpty());
}

// Issue #9723
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,13 @@
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
import org.elasticsearch.search.fetch.subphase.FieldAndFormat;
import org.elasticsearch.search.fetch.subphase.InnerHitsContext;
import org.elasticsearch.search.fetch.subphase.InnerHitsContext.InnerHitSubContext;
import org.elasticsearch.search.fetch.subphase.ScriptFieldsContext;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext;
import org.elasticsearch.search.internal.ContextIndexSearcher;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.lookup.SearchLookup;
import org.elasticsearch.search.lookup.SourceLookup;
import org.elasticsearch.search.rescore.RescoreContext;

import java.util.Collections;
Expand Down Expand Up @@ -204,4 +206,23 @@ public SearchExtBuilder getSearchExt(String name) {
public QueryShardContext getQueryShardContext() {
return searchContext.getQueryShardContext();
}

/**
* For a hit document that's being processed, return the source lookup representing the
* root document. This method is used to pass down the root source when processing this
* document's nested inner hits.
*
* @param hitContext The context of the hit that's being processed.
*/
public SourceLookup getRootSourceLookup(FetchSubPhase.HitContext hitContext) {
// Usually the root source simply belongs to the hit we're processing. But if
// there are multiple layers of inner hits and we're in a nested context, then
// the root source is found on the inner hits context.
if (searchContext instanceof InnerHitSubContext && hitContext.hit().getNestedIdentity() != null) {
InnerHitSubContext innerHitsContext = (InnerHitSubContext) searchContext;
return innerHitsContext.getRootLookup();
} else {
return hitContext.sourceLookup();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -59,16 +59,16 @@ public void setNextReader(LeafReaderContext readerContext) {

@Override
public void process(HitContext hitContext) throws IOException {
hitExecute(innerHits, hitContext);
SearchHit hit = hitContext.hit();
SourceLookup rootLookup = searchContext.getRootSourceLookup(hitContext);
hitExecute(innerHits, hit, rootLookup);
}
};
}

private void hitExecute(Map<String, InnerHitsContext.InnerHitSubContext> innerHits, HitContext hitContext) throws IOException {

SearchHit hit = hitContext.hit();
SourceLookup sourceLookup = hitContext.sourceLookup();

private void hitExecute(Map<String, InnerHitsContext.InnerHitSubContext> innerHits,
SearchHit hit,
SourceLookup rootLookup) throws IOException {
for (Map.Entry<String, InnerHitsContext.InnerHitSubContext> entry : innerHits.entrySet()) {
InnerHitsContext.InnerHitSubContext innerHitsContext = entry.getValue();
TopDocsAndMaxScore topDoc = innerHitsContext.topDocs(hit);
Expand All @@ -84,7 +84,7 @@ private void hitExecute(Map<String, InnerHitsContext.InnerHitSubContext> innerHi
}
innerHitsContext.docIdsToLoad(docIdsToLoad, docIdsToLoad.length);
innerHitsContext.setRootId(new Uid(hit.getType(), hit.getId()));
innerHitsContext.setRootLookup(sourceLookup);
innerHitsContext.setRootLookup(rootLookup);

fetchPhase.execute(innerHitsContext);
FetchSearchResult fetchResult = innerHitsContext.fetchResult();
Expand Down

0 comments on commit 8ceb1ca

Please sign in to comment.