From 80ac289540057c4440d2d18596b90e4e87653e90 Mon Sep 17 00:00:00 2001 From: Govind Balaji S Date: Thu, 20 Nov 2025 14:29:17 +0530 Subject: [PATCH 1/3] Fix segment-specific TermInSetQuery rewrites thrashing caching policy Fixes https://github.com/apache/lucene/issues/14986. A `TermInSetQuery` with `rewriteMethod = MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE` creates a `RewritingWeight`. Getting a scorer from this `RewritingWeight` for a segment could involve rewriting to a `BooleanQuery` of multiple `TermQuery` with only the terms present in that particular segment. These segment-specific `BooleanQuery` rewrites all thrash the `UsageTrackingQueryCachingPolicy` ring buffer, which is shared across all segments of the index. The expectation is that we mark a query only once per shard in this ring buffer - [ref](https://github.com/apache/lucene/blob/a3fa283cfd6aefbcc5a4983e065da0ce7d3209fe/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java#L686-L688) In this change: When initializing `AbstractMultiTermQueryConstantScoreWrapper.RewritingWeight`, we copy the supplied indexSearcher but with `setQueryCache(null)` and pass it along for the segment-specific rewrites. --- ...actMultiTermQueryConstantScoreWrapper.java | 9 +-- .../apache/lucene/search/IndexSearcher.java | 8 +++ .../lucene/search/TestTermInSetQuery.java | 62 +++++++++++++++++++ 3 files changed, 75 insertions(+), 4 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/AbstractMultiTermQueryConstantScoreWrapper.java b/lucene/core/src/java/org/apache/lucene/search/AbstractMultiTermQueryConstantScoreWrapper.java index b6ee4db540af..a6e97071acc4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/AbstractMultiTermQueryConstantScoreWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/AbstractMultiTermQueryConstantScoreWrapper.java @@ -126,14 +126,15 @@ protected static final class WeightOrDocIdSetIterator { protected abstract static class RewritingWeight extends ConstantScoreWeight { private final MultiTermQuery q; private final ScoreMode scoreMode; - private final IndexSearcher searcher; + private final IndexSearcher nonCachingSearcher; protected RewritingWeight( MultiTermQuery q, float boost, ScoreMode scoreMode, IndexSearcher searcher) { super(q, boost); this.q = q; this.scoreMode = scoreMode; - this.searcher = searcher; + this.nonCachingSearcher = new IndexSearcher(searcher); + this.nonCachingSearcher.setQueryCache(null); } /** @@ -158,12 +159,12 @@ private WeightOrDocIdSetIterator rewriteAsBooleanQuery( LeafReaderContext context, List collectedTerms) throws IOException { BooleanQuery.Builder bq = new BooleanQuery.Builder(); for (TermAndState t : collectedTerms) { - final TermStates termStates = new TermStates(searcher.getTopReaderContext()); + final TermStates termStates = new TermStates(nonCachingSearcher.getTopReaderContext()); termStates.register(t.state, context.ord, t.docFreq, t.totalTermFreq); bq.add(new TermQuery(new Term(q.field, t.term), termStates), BooleanClause.Occur.SHOULD); } Query q = new ConstantScoreQuery(bq.build()); - final Weight weight = searcher.rewrite(q).createWeight(searcher, scoreMode, score()); + final Weight weight = nonCachingSearcher.rewrite(q).createWeight(nonCachingSearcher, scoreMode, score()); return new WeightOrDocIdSetIterator(weight); } diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java index d1079b69089a..303b5c439412 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java @@ -242,6 +242,14 @@ public IndexSearcher(IndexReaderContext context) { this(context, null); } + public IndexSearcher(IndexSearcher searcher) { + this(searcher.getTopReaderContext(), searcher.getExecutor()); + this.similarity = searcher.getSimilarity(); + this.queryCache = searcher.getQueryCache(); + this.queryCachingPolicy = searcher.getQueryCachingPolicy(); + this.queryTimeout = searcher.getTimeout(); + } + /** * Return the maximum number of clauses permitted, 1024 by default. Attempts to add more than the * permitted number of clauses cause {@link TooManyClauses} to be thrown. diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTermInSetQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestTermInSetQuery.java index 9ea2d4672bdc..3df4494296f3 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestTermInSetQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestTermInSetQuery.java @@ -58,6 +58,68 @@ public class TestTermInSetQuery extends LuceneTestCase { + public void testCachingPolicyInteraction() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir); + // Use few enough terms to trigger the BooleanQuery rewrite logic (≤ threshold) + final int numTerms = AbstractMultiTermQueryConstantScoreWrapper.BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD; + List terms = new ArrayList<>(); + for (int i = 0; i < numTerms; ++i) { + String term = "term" + i; + terms.add(newBytesRef(term)); + Document doc = new Document(); + doc.add(new StringField("field", term, Store.NO)); + iw.addDocument(doc); + } + iw.commit(); + IndexReader reader = iw.getReader(); + IndexSearcher searcher = newSearcher(reader); + iw.close(); + + final AtomicInteger onUseCount = new AtomicInteger(0); + final Set seenQueries = new HashSet<>(); + QueryCachingPolicy policy = new QueryCachingPolicy() { + @Override + public void onUse(Query query) { + onUseCount.incrementAndGet(); + seenQueries.add(query); + } + + @Override + public boolean shouldCache(Query query) throws IOException { + return true; + } + }; + + searcher.setQueryCache(new LRUQueryCache(100, 10000)); + searcher.setQueryCachingPolicy(policy); + + TermInSetQuery query = new TermInSetQuery("field", terms); + // use count() to ensure scores are not needed, which triggers caching logic + searcher.count(query); + + // We expect only the top-level TermInSetQuery to be tracked. + // The inner rewrites (ConstantScoreQuery wrapping BooleanQuery) should + // effectively bypass the + // cache because they are executed by the non-caching private searcher. + // Verify that no BooleanQuery or ConstantScoreQuery wrapping BooleanQuery was + // tracked + assertFalse( + "Segment-specific BooleanQuery rewrites should not be tracked", + seenQueries.stream().anyMatch(q -> q instanceof BooleanQuery)); + assertFalse( + "ConstantScoreQuery wrapping BooleanQuery should not be tracked", + seenQueries.stream() + .anyMatch( + q -> q instanceof ConstantScoreQuery + && ((ConstantScoreQuery) q).getQuery() instanceof BooleanQuery)); + // The TermInSetQuery itself should be tracked + assertTrue("TermInSetQuery should be tracked", seenQueries.contains(query)); + + reader.close(); + dir.close(); + } + public void testAllDocsInFieldTerm() throws IOException { Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir); From c173e181720bfad6cd9800a0ac38bc47b7351020 Mon Sep 17 00:00:00 2001 From: Govind Balaji S Date: Thu, 20 Nov 2025 15:36:17 +0530 Subject: [PATCH 2/3] fix ./gradlew check -x :checkLicenses --- ...actMultiTermQueryConstantScoreWrapper.java | 3 +- .../apache/lucene/search/IndexSearcher.java | 3 +- .../lucene/search/TestTermInSetQuery.java | 31 ++++++++++--------- 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/AbstractMultiTermQueryConstantScoreWrapper.java b/lucene/core/src/java/org/apache/lucene/search/AbstractMultiTermQueryConstantScoreWrapper.java index a6e97071acc4..e8e83f479e24 100644 --- a/lucene/core/src/java/org/apache/lucene/search/AbstractMultiTermQueryConstantScoreWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/AbstractMultiTermQueryConstantScoreWrapper.java @@ -164,7 +164,8 @@ private WeightOrDocIdSetIterator rewriteAsBooleanQuery( bq.add(new TermQuery(new Term(q.field, t.term), termStates), BooleanClause.Occur.SHOULD); } Query q = new ConstantScoreQuery(bq.build()); - final Weight weight = nonCachingSearcher.rewrite(q).createWeight(nonCachingSearcher, scoreMode, score()); + final Weight weight = + nonCachingSearcher.rewrite(q).createWeight(nonCachingSearcher, scoreMode, score()); return new WeightOrDocIdSetIterator(weight); } diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java index 303b5c439412..d287776c0c7c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java @@ -242,8 +242,9 @@ public IndexSearcher(IndexReaderContext context) { this(context, null); } + /** Copies everything except the executor */ public IndexSearcher(IndexSearcher searcher) { - this(searcher.getTopReaderContext(), searcher.getExecutor()); + this(searcher.getTopReaderContext()); this.similarity = searcher.getSimilarity(); this.queryCache = searcher.getQueryCache(); this.queryCachingPolicy = searcher.getQueryCachingPolicy(); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTermInSetQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestTermInSetQuery.java index 3df4494296f3..313b101190ea 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestTermInSetQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestTermInSetQuery.java @@ -62,7 +62,8 @@ public void testCachingPolicyInteraction() throws IOException { Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir); // Use few enough terms to trigger the BooleanQuery rewrite logic (≤ threshold) - final int numTerms = AbstractMultiTermQueryConstantScoreWrapper.BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD; + final int numTerms = + AbstractMultiTermQueryConstantScoreWrapper.BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD; List terms = new ArrayList<>(); for (int i = 0; i < numTerms; ++i) { String term = "term" + i; @@ -78,18 +79,19 @@ public void testCachingPolicyInteraction() throws IOException { final AtomicInteger onUseCount = new AtomicInteger(0); final Set seenQueries = new HashSet<>(); - QueryCachingPolicy policy = new QueryCachingPolicy() { - @Override - public void onUse(Query query) { - onUseCount.incrementAndGet(); - seenQueries.add(query); - } + QueryCachingPolicy policy = + new QueryCachingPolicy() { + @Override + public void onUse(Query query) { + onUseCount.incrementAndGet(); + seenQueries.add(query); + } - @Override - public boolean shouldCache(Query query) throws IOException { - return true; - } - }; + @Override + public boolean shouldCache(Query query) throws IOException { + return true; + } + }; searcher.setQueryCache(new LRUQueryCache(100, 10000)); searcher.setQueryCachingPolicy(policy); @@ -111,8 +113,9 @@ public boolean shouldCache(Query query) throws IOException { "ConstantScoreQuery wrapping BooleanQuery should not be tracked", seenQueries.stream() .anyMatch( - q -> q instanceof ConstantScoreQuery - && ((ConstantScoreQuery) q).getQuery() instanceof BooleanQuery)); + q -> + q instanceof ConstantScoreQuery + && ((ConstantScoreQuery) q).getQuery() instanceof BooleanQuery)); // The TermInSetQuery itself should be tracked assertTrue("TermInSetQuery should be tracked", seenQueries.contains(query)); From 54d5657e8d0d94d8586efb1d35fac07feb53b268 Mon Sep 17 00:00:00 2001 From: Govind Balaji S Date: Thu, 20 Nov 2025 22:43:38 +0530 Subject: [PATCH 3/3] minor reword --- .../core/src/java/org/apache/lucene/search/IndexSearcher.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java index d287776c0c7c..837323484547 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java @@ -242,7 +242,7 @@ public IndexSearcher(IndexReaderContext context) { this(context, null); } - /** Copies everything except the executor */ + /** Reuses everything except the executor */ public IndexSearcher(IndexSearcher searcher) { this(searcher.getTopReaderContext()); this.similarity = searcher.getSimilarity();