From 42d5806fd69400bb42b7d15f6311ac02d3104efe Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Fri, 26 Jan 2024 15:50:22 +0100 Subject: [PATCH] Propagate minimum competitive score in ReqOptSumScorer. (#13026) If the required clause doesn't contribute scores, which typically happens if the required clause is a `FILTER` clause, then the minimum competitive score can be propagated directly to the optional clause. --- lucene/CHANGES.txt | 5 ++ .../apache/lucene/search/ReqOptSumScorer.java | 11 +++- .../lucene/search/TestReqOptSumScorer.java | 62 +++++++++++++------ 3 files changed, 58 insertions(+), 20 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index b6549625f02..dd5fbafd125 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -60,6 +60,11 @@ Optimizations clause of a boolean query, this helps save work on other required clauses of the same boolean query. (Adrien Grand) +* GITHUB#13026: ReqOptSumScorer will now propagate minimum competitive scores + to the optional clause if the required clause doesn't score. In practice, + this will help boolean queries that consist of a mix OF FILTER clauses and + SHOULD clauses. (Adrien Grand) + Bug Fixes --------------------- * GITHUB#12866: Prevent extra similarity computation for single-level HNSW graphs. (Kaival Parikh) diff --git a/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java index f4730288296..091323d510b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java @@ -36,7 +36,7 @@ class ReqOptSumScorer extends Scorer { private final TwoPhaseIterator twoPhase; private float minScore = 0; - private float reqMaxScore; + private final float reqMaxScore; private boolean optIsRequired; /** @@ -295,6 +295,15 @@ public void setMinCompetitiveScore(float minScore) throws IOException { // Potentially move to a conjunction if (reqMaxScore < minScore) { optIsRequired = true; + if (reqMaxScore == 0) { + // If the required clause doesn't contribute scores, we can propagate the minimum + // competitive score to the optional clause. This happens when the required clause is a + // FILTER clause. + // In theory we could generalize this and set minScore - reqMaxScore as a minimum + // competitive score, but it's unlikely to help in practice unless reqMaxScore is much + // smaller than typical scores of the optional clause. + optScorer.setMinCompetitiveScore(minScore); + } } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestReqOptSumScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestReqOptSumScorer.java index cccc765cab9..79795958b4d 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestReqOptSumScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestReqOptSumScorer.java @@ -41,7 +41,15 @@ public class TestReqOptSumScorer extends LuceneTestCase { - public void testBasics() throws IOException { + public void testBasicsMust() throws IOException { + doTestBasics(Occur.MUST); + } + + public void testBasicsFilter() throws IOException { + doTestBasics(Occur.FILTER); + } + + private void doTestBasics(Occur reqOccur) throws IOException { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter( @@ -75,7 +83,7 @@ public void testBasics() throws IOException { IndexSearcher searcher = newSearcher(reader); Query query = new BooleanQuery.Builder() - .add(new ConstantScoreQuery(new TermQuery(new Term("f", "foo"))), Occur.MUST) + .add(new ConstantScoreQuery(new TermQuery(new Term("f", "foo"))), reqOccur) .add(new ConstantScoreQuery(new TermQuery(new Term("f", "bar"))), Occur.SHOULD) .build(); Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1); @@ -92,9 +100,13 @@ public void testBasics() throws IOException { ss.setTopLevelScoringClause(); scorer = ss.get(Long.MAX_VALUE); scorer.setMinCompetitiveScore(Math.nextDown(1f)); - assertEquals(0, scorer.iterator().nextDoc()); + if (reqOccur == Occur.MUST) { + assertEquals(0, scorer.iterator().nextDoc()); + } assertEquals(1, scorer.iterator().nextDoc()); - assertEquals(2, scorer.iterator().nextDoc()); + if (reqOccur == Occur.MUST) { + assertEquals(2, scorer.iterator().nextDoc()); + } assertEquals(4, scorer.iterator().nextDoc()); assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc()); @@ -102,8 +114,10 @@ public void testBasics() throws IOException { ss.setTopLevelScoringClause(); scorer = ss.get(Long.MAX_VALUE); scorer.setMinCompetitiveScore(Math.nextUp(1f)); - assertEquals(1, scorer.iterator().nextDoc()); - assertEquals(4, scorer.iterator().nextDoc()); + if (reqOccur == Occur.MUST) { + assertEquals(1, scorer.iterator().nextDoc()); + assertEquals(4, scorer.iterator().nextDoc()); + } assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc()); ss = weight.scorerSupplier(context); @@ -111,8 +125,10 @@ public void testBasics() throws IOException { scorer = ss.get(Long.MAX_VALUE); assertEquals(0, scorer.iterator().nextDoc()); scorer.setMinCompetitiveScore(Math.nextUp(1f)); - assertEquals(1, scorer.iterator().nextDoc()); - assertEquals(4, scorer.iterator().nextDoc()); + if (reqOccur == Occur.MUST) { + assertEquals(1, scorer.iterator().nextDoc()); + assertEquals(4, scorer.iterator().nextDoc()); + } assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc()); reader.close(); @@ -235,15 +251,23 @@ public void testMaxScoreSegment() throws IOException { dir.close(); } - public void testRandomFrequentOpt() throws IOException { - doTestRandom(0.5); + public void testMustRandomFrequentOpt() throws IOException { + doTestRandom(Occur.MUST, 0.5); } - public void testRandomRareOpt() throws IOException { - doTestRandom(0.05); + public void testMustRandomRareOpt() throws IOException { + doTestRandom(Occur.MUST, 0.05); } - private void doTestRandom(double optFreq) throws IOException { + public void testFilterRandomFrequentOpt() throws IOException { + doTestRandom(Occur.FILTER, 0.5); + } + + public void testFilterRandomRareOpt() throws IOException { + doTestRandom(Occur.FILTER, 0.05); + } + + private void doTestRandom(Occur reqOccur, double optFreq) throws IOException { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig()); int numDocs = atLeast(1000); @@ -269,7 +293,7 @@ private void doTestRandom(double optFreq) throws IOException { Query mustTerm = new TermQuery(new Term("f", "A")); Query shouldTerm = new TermQuery(new Term("f", "B")); Query query = - new BooleanQuery.Builder().add(mustTerm, Occur.MUST).add(shouldTerm, Occur.SHOULD).build(); + new BooleanQuery.Builder().add(mustTerm, reqOccur).add(shouldTerm, Occur.SHOULD).build(); TopScoreDocCollectorManager collectorManager = new TopScoreDocCollectorManager(10, Integer.MAX_VALUE); @@ -293,7 +317,7 @@ private void doTestRandom(double optFreq) throws IOException { { Query q = new BooleanQuery.Builder() - .add(new RandomApproximationQuery(mustTerm, random()), Occur.MUST) + .add(new RandomApproximationQuery(mustTerm, random()), reqOccur) .add(shouldTerm, Occur.SHOULD) .build(); @@ -304,7 +328,7 @@ private void doTestRandom(double optFreq) throws IOException { q = new BooleanQuery.Builder() - .add(mustTerm, Occur.MUST) + .add(mustTerm, reqOccur) .add(new RandomApproximationQuery(shouldTerm, random()), Occur.SHOULD) .build(); collectorManager = new TopScoreDocCollectorManager(10, 1); @@ -314,7 +338,7 @@ private void doTestRandom(double optFreq) throws IOException { q = new BooleanQuery.Builder() - .add(new RandomApproximationQuery(mustTerm, random()), Occur.MUST) + .add(new RandomApproximationQuery(mustTerm, random()), reqOccur) .add(new RandomApproximationQuery(shouldTerm, random()), Occur.SHOULD) .build(); collectorManager = new TopScoreDocCollectorManager(10, 1); @@ -348,7 +372,7 @@ private void doTestRandom(double optFreq) throws IOException { { query = new BooleanQuery.Builder() - .add(query, Occur.MUST) + .add(query, reqOccur) .add(new TermQuery(new Term("f", "C")), Occur.SHOULD) .build(); @@ -356,7 +380,7 @@ private void doTestRandom(double optFreq) throws IOException { query = new BooleanQuery.Builder() - .add(new TermQuery(new Term("f", "C")), Occur.MUST) + .add(new TermQuery(new Term("f", "C")), reqOccur) .add(query, Occur.SHOULD) .build();