Skip to content

Commit

Permalink
Fix problem with MatchNoDocsQuery in disjunction queries
Browse files Browse the repository at this point in the history
Queries across multiple fields can generate MatchNoDocsQuerys if some of the
fields are unmapped. In certain situation this can lead to erroneous behaviour,
for example when an umapped field is used in a `query_string` query across
several fields where some are unmapped. If some tokens get eliminated by the
analyzer of the mapped fields, the remaining unmapped fields will currently
generate disjunction queries containing only MatchNoDocsQuerys, which in turn
leads to no matches in the overall query. Instead we should simply drop
MatchNoDocsQuerys from disjunctions entirely.

Closes #34708
  • Loading branch information
Christoph Büscher committed Nov 20, 2018
1 parent 7a779a9 commit d9bc616
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,7 @@ private void checkForPositions(String field) {
* Checks if graph analysis should be enabled for the field depending
* on the provided {@link Analyzer}
*/
@Override
protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator, String field,
String queryText, boolean quoted, int phraseSlop) {
assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,15 +121,21 @@ public Query parseGroup(Type type, String field, Float boostValue, Object value,
}

private Query combineGrouped(List<? extends Query> groupQuery) {
if (groupQuery == null || groupQuery.isEmpty()) {
if (groupQuery == null) {
return zeroTermsQuery();
}
if (groupQuery.size() == 1) {
return groupQuery.get(0);
}
List<Query> queries = new ArrayList<>();
List<Query> queries = new ArrayList<>(groupQuery.size());
for (Query query : groupQuery) {
queries.add(query);
if (query instanceof MatchNoDocsQuery == false) {
queries.add(query);
}
}
if (queries.isEmpty()) {
return zeroTermsQuery();
}
if (queries.size() == 1) {
Query query = queries.get(0);
return query;
}
return new DisjunctionMaxQuery(queries, tieBreaker);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -375,8 +375,7 @@ public void testDefaultField() throws Exception {
expected = new DisjunctionMaxQuery(
Arrays.asList(
new TermQuery(new Term(STRING_FIELD_NAME, "hello")),
new BoostQuery(new TermQuery(new Term(STRING_FIELD_NAME_2, "hello")), 5.0f),
new MatchNoDocsQuery("failed [mapped_int] query, caused by number_format_exception:[For input string: \"hello\"]")
new BoostQuery(new TermQuery(new Term(STRING_FIELD_NAME_2, "hello")), 5.0f)
), 0.0f
);
assertEquals(expected, query);
Expand Down Expand Up @@ -416,7 +415,7 @@ public void testWithStopWords() throws Exception {
.field(STRING_FIELD_NAME_2)
.analyzer("stop")
.toQuery(createShardContext());
expected = new DisjunctionMaxQuery(Arrays.asList(new MatchNoDocsQuery(), new MatchNoDocsQuery()), 0f);
expected = new MatchNoDocsQuery();
assertEquals(expected, query);

query = new BoolQueryBuilder()
Expand All @@ -440,7 +439,7 @@ public void testWithStopWords() throws Exception {
)
.toQuery(createShardContext());
expected = new BooleanQuery.Builder()
.add(new DisjunctionMaxQuery(Arrays.asList(new MatchNoDocsQuery(), new MatchNoDocsQuery()), 0f), BooleanClause.Occur.SHOULD)
.add(new MatchNoDocsQuery(), BooleanClause.Occur.SHOULD)
.build();
assertEquals(expected, query);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -687,6 +687,26 @@ public void testWithPrefixStopWords() throws Exception {
assertEquals(expected, query);
}

/**
* Test for behavior reported in https://github.com/elastic/elasticsearch/issues/34708
* Unmapped field can lead to MatchNoDocsQuerys in disjunction queries. If tokens are eliminated (e.g. because
* the tokenizer removed them as punctuation) on regular fields, this can leave only MatchNoDocsQuerys in the
* disjunction clause. Instead those disjunctions should be eliminated completely.
*/
public void testUnmappedFieldNoTokenWithAndOperator() throws IOException {
Query query = new SimpleQueryStringBuilder("first & second")
.field(STRING_FIELD_NAME)
.field("unmapped")
.field("another_unmapped")
.defaultOperator(Operator.AND)
.toQuery(createShardContext());
BooleanQuery expected = new BooleanQuery.Builder()
.add(new TermQuery(new Term(STRING_FIELD_NAME, "first")), BooleanClause.Occur.MUST)
.add(new TermQuery(new Term(STRING_FIELD_NAME, "second")), BooleanClause.Occur.MUST)
.build();
assertEquals(expected, query);
}

private static IndexMetaData newIndexMeta(String name, Settings oldIndexSettings, Settings indexSettings) {
Settings build = Settings.builder().put(oldIndexSettings)
.put(indexSettings)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SynonymQuery;
Expand Down Expand Up @@ -113,11 +112,7 @@ public void testCrossFieldMultiMatchQuery() throws IOException {
Query rewrittenQuery = searcher.searcher().rewrite(parsedQuery);
Query tq1 = new BoostQuery(new TermQuery(new Term("name.first", "banon")), 2);
Query tq2 = new BoostQuery(new TermQuery(new Term("name.last", "banon")), 3);
Query expected = new DisjunctionMaxQuery(
Arrays.asList(
new MatchNoDocsQuery("unknown field foobar"),
new DisjunctionMaxQuery(Arrays.asList(tq2, tq1), tieBreaker)
), tieBreaker);
Query expected = new DisjunctionMaxQuery(Arrays.asList(tq2, tq1), tieBreaker);
assertEquals(expected, rewrittenQuery);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ public void testExplainValidateQueryTwoNodes() throws IOException {
.execute().actionGet();
assertThat(response.isValid(), equalTo(true));
assertThat(response.getQueryExplanation().size(), equalTo(1));
assertThat(response.getQueryExplanation().get(0).getExplanation(), equalTo("(MatchNoDocsQuery(\"failed [bar] query, caused by number_format_exception:[For input string: \"foo\"]\") | foo:foo | baz:foo)"));
assertThat(response.getQueryExplanation().get(0).getExplanation(), equalTo("(foo:foo | baz:foo)"));
assertThat(response.getQueryExplanation().get(0).getError(), nullValue());
}
}
Expand Down

0 comments on commit d9bc616

Please sign in to comment.