From 61de3f22b6a36184bc1521da9ffcc315fc3a6cc9 Mon Sep 17 00:00:00 2001 From: Julie Tibshirani Date: Fri, 31 Jul 2020 14:52:27 -0700 Subject: [PATCH 1/3] LUCENE-9446: Remove MatchAllDocsQuery filters in boolean write. Previously, we only removed 'match all' FILTER clauses if there was at least one MUST clause. Now they're also removed if there is another distinct FILTER clause. This lets boolean queries like `#field:value #*:*` be written to `#field:value`. --- .../java/org/apache/lucene/search/BooleanQuery.java | 12 +++++++----- .../apache/lucene/search/TestBooleanRewrites.java | 10 +++++++++- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java index 2e2d81b53dec..d23bed1096a9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java @@ -315,11 +315,13 @@ public Query rewrite(IndexReader reader) throws IOException { } } - // remove FILTER clauses that are also MUST clauses - // or that match all documents - if (clauseSets.get(Occur.MUST).size() > 0 && clauseSets.get(Occur.FILTER).size() > 0) { - final Set filters = new HashSet(clauseSets.get(Occur.FILTER)); - boolean modified = filters.remove(new MatchAllDocsQuery()); + // remove FILTER clauses that are also MUST clauses or that match all documents + if (clauseSets.get(Occur.FILTER).size() > 0) { + final Set filters = new HashSet<>(clauseSets.get(Occur.FILTER)); + boolean modified = false; + if (filters.size() > 1 || clauseSets.get(Occur.MUST).isEmpty() == false) { + modified = filters.remove(new MatchAllDocsQuery()); + } modified |= filters.removeAll(clauseSets.get(Occur.MUST)); if (modified) { BooleanQuery.Builder builder = new BooleanQuery.Builder(); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java index 497035b83f83..5fbd425a9886 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java @@ -312,12 +312,20 @@ public void testRemoveMatchAllFilter() throws IOException { .add(new TermQuery(new Term("foo", "baz")), Occur.MUST) .add(new MatchAllDocsQuery(), Occur.FILTER) .build(); - BooleanQuery expected = new BooleanQuery.Builder() + Query expected = new BooleanQuery.Builder() .setMinimumNumberShouldMatch(bq.getMinimumNumberShouldMatch()) .add(new TermQuery(new Term("foo", "bar")), Occur.MUST) .add(new TermQuery(new Term("foo", "baz")), Occur.MUST) .build(); assertEquals(expected, searcher.rewrite(bq)); + + bq = new BooleanQuery.Builder() + .add(new TermQuery(new Term("foo", "bar")), Occur.FILTER) + .add(new MatchAllDocsQuery(), Occur.FILTER) + .build(); + expected = new BoostQuery(new ConstantScoreQuery( + new TermQuery(new Term("foo", "bar"))), 0.0f); + assertEquals(expected, searcher.rewrite(bq)); } public void testRandom() throws IOException { From 4a6d7de532a583823e3b9a4d9d7ebd1fc95c86f6 Mon Sep 17 00:00:00 2001 From: Julie Tibshirani Date: Mon, 3 Aug 2020 10:34:25 -0700 Subject: [PATCH 2/3] Add another test case. --- .../org/apache/lucene/search/TestBooleanRewrites.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java index 5fbd425a9886..51ca8e508592 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java @@ -326,6 +326,14 @@ public void testRemoveMatchAllFilter() throws IOException { expected = new BoostQuery(new ConstantScoreQuery( new TermQuery(new Term("foo", "bar"))), 0.0f); assertEquals(expected, searcher.rewrite(bq)); + + bq = new BooleanQuery.Builder() + .add(new MatchAllDocsQuery(), Occur.FILTER) + .add(new MatchAllDocsQuery(), Occur.FILTER) + .build(); + expected = new BoostQuery(new ConstantScoreQuery( + new MatchAllDocsQuery()), 0.0f); + assertEquals(expected, searcher.rewrite(bq)); } public void testRandom() throws IOException { From fa3f5b1bf0faa39c4c37e8885a031e517e3ac7f6 Mon Sep 17 00:00:00 2001 From: Julie Tibshirani Date: Mon, 3 Aug 2020 10:40:25 -0700 Subject: [PATCH 3/3] Add a changelog entry. --- lucene/CHANGES.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index dac0401abb37..831201070470 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -118,6 +118,8 @@ Improvements with doc values and points. In this case, there is an assumption that the same data is stored in these points and doc values (Mayya Sharipova, Jim Ferenczi, Adrien Grand) +* LUCENE-9446: In BooleanQuery rewrite, always remove MatchAllDocsQuery filter clauses + when possible. (Julie Tibshirani) Bug fixes