From d40d5d5a3e141d7ef740c88d9c079b26791b393b Mon Sep 17 00:00:00 2001 From: Binlong Gao Date: Tue, 13 Jan 2026 21:13:29 +0800 Subject: [PATCH 1/2] Introduce FirstPassGroupingCollectorManager Signed-off-by: Binlong Gao Format code Signed-off-by: Binlong Gao --- lucene/CHANGES.txt | 2 + .../FirstPassGroupingCollectorManager.java | 99 +++++++++++++++++++ .../grouping/BaseGroupSelectorTestCase.java | 16 +-- .../lucene/search/grouping/TestGrouping.java | 48 +++++---- 4 files changed, 136 insertions(+), 29 deletions(-) create mode 100644 lucene/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollectorManager.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 0d756b2087c0..69be5a1d13e3 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -110,6 +110,8 @@ Improvements * GITHUB#15225: Improve package documentation for org.apache.lucene.util. (Syed Mohammad Saad) +* GITHUB#15574: Introduce FirstPassGroupingCollectorManager to parallelize search when using FirstPassGroupingCollector. (Binlong Gao) + Optimizations --------------------- * GITHUB#15681, GITHUB#15833: Replace pre-sized array or empty array with lambda expression to call Collection#toArray. (Zhou Hui) diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollectorManager.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollectorManager.java new file mode 100644 index 000000000000..7e24e6735c39 --- /dev/null +++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollectorManager.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search.grouping; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.function.Supplier; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.Sort; + +/** A CollectorManager implementation for FirstPassGroupingCollector. */ +public class FirstPassGroupingCollectorManager + implements CollectorManager, Collection>> { + + private final Supplier> groupSelectorFactory; + private final Sort groupSort; + private final int topNGroups; + private final boolean ignoreDocsWithoutGroupField; + private final List> collectors; + + /** + * Creates a new FirstPassGroupingCollectorManager. + * + * @param groupSelectorFactory factory to create group selectors for each collector + * @param groupSort the sort to use for groups + * @param topNGroups the number of top groups to collect + */ + public FirstPassGroupingCollectorManager( + Supplier> groupSelectorFactory, Sort groupSort, int topNGroups) { + this(groupSelectorFactory, groupSort, topNGroups, false); + } + + /** + * Creates a new FirstPassGroupingCollectorManager. + * + * @param groupSelectorFactory factory to create group selectors for each collector + * @param groupSort the sort to use for groups + * @param topNGroups the number of top groups to collect + * @param ignoreDocsWithoutGroupField whether to ignore documents without a group field + */ + public FirstPassGroupingCollectorManager( + Supplier> groupSelectorFactory, + Sort groupSort, + int topNGroups, + boolean ignoreDocsWithoutGroupField) { + this.groupSelectorFactory = groupSelectorFactory; + this.groupSort = groupSort; + this.topNGroups = topNGroups; + this.ignoreDocsWithoutGroupField = ignoreDocsWithoutGroupField; + this.collectors = new ArrayList<>(); + } + + @Override + public FirstPassGroupingCollector newCollector() throws IOException { + FirstPassGroupingCollector collector = + new FirstPassGroupingCollector<>( + groupSelectorFactory.get(), groupSort, topNGroups, ignoreDocsWithoutGroupField); + collectors.add(collector); + return collector; + } + + @Override + public Collection> reduce(Collection> collectors) + throws IOException { + if (collectors.isEmpty()) { + return null; + } + + List>> allGroups = new ArrayList<>(); + for (FirstPassGroupingCollector collector : collectors) { + Collection> groups = collector.getTopGroups(0); + if (groups != null) { + allGroups.add(groups); + } + } + + return SearchGroup.merge(allGroups, 0, topNGroups, groupSort); + } + + public List> getCollectors() { + return collectors; + } +} diff --git a/lucene/grouping/src/test/org/apache/lucene/search/grouping/BaseGroupSelectorTestCase.java b/lucene/grouping/src/test/org/apache/lucene/search/grouping/BaseGroupSelectorTestCase.java index 687c7080dba2..280edbea35fa 100644 --- a/lucene/grouping/src/test/org/apache/lucene/search/grouping/BaseGroupSelectorTestCase.java +++ b/lucene/grouping/src/test/org/apache/lucene/search/grouping/BaseGroupSelectorTestCase.java @@ -309,17 +309,17 @@ public void testShardedGrouping() throws IOException { // A grouped query run in two phases against the control should give us the same // result as the query run against shards and merged back together after each phase. - FirstPassGroupingCollector singletonFirstPass = - new FirstPassGroupingCollector<>(getGroupSelector(), sort, 5); - control.getIndexSearcher().search(topLevel, singletonFirstPass); - Collection> singletonGroups = singletonFirstPass.getTopGroups(0); + FirstPassGroupingCollectorManager firstPassGroupingCollectorManager = + new FirstPassGroupingCollectorManager<>(this::getGroupSelector, sort, 5); + Collection> singletonGroups = + control.getIndexSearcher().search(topLevel, firstPassGroupingCollectorManager); List>> shardGroups = new ArrayList<>(); for (Shard shard : shards) { - FirstPassGroupingCollector fc = - new FirstPassGroupingCollector<>(getGroupSelector(), sort, 5); - shard.getIndexSearcher().search(topLevel, fc); - shardGroups.add(fc.getTopGroups(0)); + FirstPassGroupingCollectorManager fcm = + new FirstPassGroupingCollectorManager<>(this::getGroupSelector, sort, 5); + Collection> topGroups = shard.getIndexSearcher().search(topLevel, fcm); + shardGroups.add(topGroups); } Collection> mergedGroups = SearchGroup.merge(shardGroups, 0, 5, sort); assertEquals(singletonGroups, mergedGroups); diff --git a/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java b/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java index 37ee890fbf1d..664bc9ede848 100644 --- a/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java +++ b/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java @@ -220,19 +220,20 @@ public void testIgnoreDocsWithoutGroupField() throws IOException { IndexSearcher searcher = newSearcher(reader); // Test default behavior (include null group) - FirstPassGroupingCollector collector1 = - new FirstPassGroupingCollector<>(new TermGroupSelector(groupField), Sort.RELEVANCE, 10); - searcher.search(MatchAllDocsQuery.INSTANCE, collector1); - Collection> groups1 = collector1.getTopGroups(0); + FirstPassGroupingCollectorManager firstPassGroupingCollectorManager1 = + new FirstPassGroupingCollectorManager<>( + () -> new TermGroupSelector(groupField), Sort.RELEVANCE, 10); + Collection> groups1 = + searcher.search(MatchAllDocsQuery.INSTANCE, firstPassGroupingCollectorManager1); assertEquals(3, groups1.size()); // Should include null group // Test ignoring docs without group field - FirstPassGroupingCollector collector2 = - new FirstPassGroupingCollector<>( - new TermGroupSelector(groupField), Sort.RELEVANCE, 10, true); - searcher.search(MatchAllDocsQuery.INSTANCE, collector2); - Collection> groups2 = collector2.getTopGroups(0); + FirstPassGroupingCollectorManager firstPassGroupingCollectorManager2 = + new FirstPassGroupingCollectorManager<>( + () -> new TermGroupSelector(groupField), Sort.RELEVANCE, 10, true); + Collection> groups2 = + searcher.search(MatchAllDocsQuery.INSTANCE, firstPassGroupingCollectorManager2); assertEquals(2, groups2.size()); // Should exclude null group @@ -258,10 +259,11 @@ public void testAllDocsWithoutGroupField() throws IOException { IndexSearcher searcher = newSearcher(reader); // Test ignoring docs without group field when all docs lack the field - FirstPassGroupingCollector collector = - new FirstPassGroupingCollector<>(new TermGroupSelector("group"), Sort.RELEVANCE, 10, true); - searcher.search(MatchAllDocsQuery.INSTANCE, collector); - Collection> groups = collector.getTopGroups(0); + FirstPassGroupingCollectorManager firstPassGroupingCollectorManager2 = + new FirstPassGroupingCollectorManager<>( + () -> new TermGroupSelector("group"), Sort.RELEVANCE, 10, true); + Collection> groups = + searcher.search(MatchAllDocsQuery.INSTANCE, firstPassGroupingCollectorManager2); assertNull(groups); // Should return null when no groups found @@ -277,11 +279,13 @@ private FirstPassGroupingCollector createRandomFirstPassCollector( String groupField, Sort groupSort, int topDocs) throws IOException { if (random().nextBoolean()) { ValueSource vs = new BytesRefFieldSource(groupField); - return new FirstPassGroupingCollector<>( - new ValueSourceGroupSelector(vs, new HashMap<>()), groupSort, topDocs); + return new FirstPassGroupingCollectorManager<>( + () -> new ValueSourceGroupSelector(vs, new HashMap<>()), groupSort, topDocs) + .newCollector(); } else { - return new FirstPassGroupingCollector<>( - new TermGroupSelector(groupField), groupSort, topDocs); + return new FirstPassGroupingCollectorManager<>( + () -> new TermGroupSelector(groupField), groupSort, topDocs) + .newCollector(); } } @@ -294,11 +298,13 @@ private FirstPassGroupingCollector createFirstPassCollector( GroupSelector selector = firstPassGroupingCollector.getGroupSelector(); if (TermGroupSelector.class.isAssignableFrom(selector.getClass())) { ValueSource vs = new BytesRefFieldSource(groupField); - return new FirstPassGroupingCollector<>( - new ValueSourceGroupSelector(vs, new HashMap<>()), groupSort, topDocs); + return new FirstPassGroupingCollectorManager<>( + () -> new ValueSourceGroupSelector(vs, new HashMap<>()), groupSort, topDocs) + .newCollector(); } else { - return new FirstPassGroupingCollector<>( - new TermGroupSelector(groupField), groupSort, topDocs); + return new FirstPassGroupingCollectorManager<>( + () -> new TermGroupSelector(groupField), groupSort, topDocs) + .newCollector(); } } From 79b499cafb130f964aea26558f31c75a428c796c Mon Sep 17 00:00:00 2001 From: Binlong Gao Date: Tue, 7 Apr 2026 17:22:36 +0800 Subject: [PATCH 2/2] Return directly if only one collector exists Signed-off-by: Binlong Gao --- .../search/grouping/FirstPassGroupingCollectorManager.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollectorManager.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollectorManager.java index 7e24e6735c39..ce366bc77b82 100644 --- a/lucene/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollectorManager.java +++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollectorManager.java @@ -82,6 +82,10 @@ public Collection> reduce(Collection>> allGroups = new ArrayList<>(); for (FirstPassGroupingCollector collector : collectors) { Collection> groups = collector.getTopGroups(0);