diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 8cdfb9e4f489..c3748290c278 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -115,6 +115,8 @@ Improvements * GITHUB#15225: Improve package documentation for org.apache.lucene.util. (Syed Mohammad Saad) +* GITHUB#15574: Introduce FirstPassGroupingCollectorManager to parallelize search when using FirstPassGroupingCollector. (Binlong Gao) + Optimizations --------------------- * GITHUB#15681, GITHUB#15833: Replace pre-sized array or empty array with lambda expression to call Collection#toArray. (Zhou Hui) diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollectorManager.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollectorManager.java new file mode 100644 index 000000000000..ce366bc77b82 --- /dev/null +++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollectorManager.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search.grouping; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.function.Supplier; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.Sort; + +/** A CollectorManager implementation for FirstPassGroupingCollector. */ +public class FirstPassGroupingCollectorManager + implements CollectorManager, Collection>> { + + private final Supplier> groupSelectorFactory; + private final Sort groupSort; + private final int topNGroups; + private final boolean ignoreDocsWithoutGroupField; + private final List> collectors; + + /** + * Creates a new FirstPassGroupingCollectorManager. + * + * @param groupSelectorFactory factory to create group selectors for each collector + * @param groupSort the sort to use for groups + * @param topNGroups the number of top groups to collect + */ + public FirstPassGroupingCollectorManager( + Supplier> groupSelectorFactory, Sort groupSort, int topNGroups) { + this(groupSelectorFactory, groupSort, topNGroups, false); + } + + /** + * Creates a new FirstPassGroupingCollectorManager. + * + * @param groupSelectorFactory factory to create group selectors for each collector + * @param groupSort the sort to use for groups + * @param topNGroups the number of top groups to collect + * @param ignoreDocsWithoutGroupField whether to ignore documents without a group field + */ + public FirstPassGroupingCollectorManager( + Supplier> groupSelectorFactory, + Sort groupSort, + int topNGroups, + boolean ignoreDocsWithoutGroupField) { + this.groupSelectorFactory = groupSelectorFactory; + this.groupSort = groupSort; + this.topNGroups = topNGroups; + this.ignoreDocsWithoutGroupField = ignoreDocsWithoutGroupField; + this.collectors = new ArrayList<>(); + } + + @Override + public FirstPassGroupingCollector newCollector() throws IOException { + FirstPassGroupingCollector collector = + new FirstPassGroupingCollector<>( + groupSelectorFactory.get(), groupSort, topNGroups, ignoreDocsWithoutGroupField); + collectors.add(collector); + return collector; + } + + @Override + public Collection> reduce(Collection> collectors) + throws IOException { + if (collectors.isEmpty()) { + return null; + } + + if (collectors.size() == 1) { + return collectors.iterator().next().getTopGroups(0); + } + + List>> allGroups = new ArrayList<>(); + for (FirstPassGroupingCollector collector : collectors) { + Collection> groups = collector.getTopGroups(0); + if (groups != null) { + allGroups.add(groups); + } + } + + return SearchGroup.merge(allGroups, 0, topNGroups, groupSort); + } + + public List> getCollectors() { + return collectors; + } +} diff --git a/lucene/grouping/src/test/org/apache/lucene/search/grouping/BaseGroupSelectorTestCase.java b/lucene/grouping/src/test/org/apache/lucene/search/grouping/BaseGroupSelectorTestCase.java index f17cbdc8b85b..1bbf19d5f627 100644 --- a/lucene/grouping/src/test/org/apache/lucene/search/grouping/BaseGroupSelectorTestCase.java +++ b/lucene/grouping/src/test/org/apache/lucene/search/grouping/BaseGroupSelectorTestCase.java @@ -309,17 +309,17 @@ public void testShardedGrouping() throws IOException { // A grouped query run in two phases against the control should give us the same // result as the query run against shards and merged back together after each phase. - FirstPassGroupingCollector singletonFirstPass = - new FirstPassGroupingCollector<>(getGroupSelector(), sort, 5); - control.getIndexSearcher().search(topLevel, singletonFirstPass); - Collection> singletonGroups = singletonFirstPass.getTopGroups(0); + FirstPassGroupingCollectorManager firstPassGroupingCollectorManager = + new FirstPassGroupingCollectorManager<>(this::getGroupSelector, sort, 5); + Collection> singletonGroups = + control.getIndexSearcher().search(topLevel, firstPassGroupingCollectorManager); List>> shardGroups = new ArrayList<>(); for (Shard shard : shards) { - FirstPassGroupingCollector fc = - new FirstPassGroupingCollector<>(getGroupSelector(), sort, 5); - shard.getIndexSearcher().search(topLevel, fc); - shardGroups.add(fc.getTopGroups(0)); + FirstPassGroupingCollectorManager fcm = + new FirstPassGroupingCollectorManager<>(this::getGroupSelector, sort, 5); + Collection> topGroups = shard.getIndexSearcher().search(topLevel, fcm); + shardGroups.add(topGroups); } Collection> mergedGroups = SearchGroup.merge(shardGroups, 0, 5, sort); assertEquals(singletonGroups, mergedGroups); diff --git a/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java b/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java index 37ee890fbf1d..664bc9ede848 100644 --- a/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java +++ b/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java @@ -220,19 +220,20 @@ public void testIgnoreDocsWithoutGroupField() throws IOException { IndexSearcher searcher = newSearcher(reader); // Test default behavior (include null group) - FirstPassGroupingCollector collector1 = - new FirstPassGroupingCollector<>(new TermGroupSelector(groupField), Sort.RELEVANCE, 10); - searcher.search(MatchAllDocsQuery.INSTANCE, collector1); - Collection> groups1 = collector1.getTopGroups(0); + FirstPassGroupingCollectorManager firstPassGroupingCollectorManager1 = + new FirstPassGroupingCollectorManager<>( + () -> new TermGroupSelector(groupField), Sort.RELEVANCE, 10); + Collection> groups1 = + searcher.search(MatchAllDocsQuery.INSTANCE, firstPassGroupingCollectorManager1); assertEquals(3, groups1.size()); // Should include null group // Test ignoring docs without group field - FirstPassGroupingCollector collector2 = - new FirstPassGroupingCollector<>( - new TermGroupSelector(groupField), Sort.RELEVANCE, 10, true); - searcher.search(MatchAllDocsQuery.INSTANCE, collector2); - Collection> groups2 = collector2.getTopGroups(0); + FirstPassGroupingCollectorManager firstPassGroupingCollectorManager2 = + new FirstPassGroupingCollectorManager<>( + () -> new TermGroupSelector(groupField), Sort.RELEVANCE, 10, true); + Collection> groups2 = + searcher.search(MatchAllDocsQuery.INSTANCE, firstPassGroupingCollectorManager2); assertEquals(2, groups2.size()); // Should exclude null group @@ -258,10 +259,11 @@ public void testAllDocsWithoutGroupField() throws IOException { IndexSearcher searcher = newSearcher(reader); // Test ignoring docs without group field when all docs lack the field - FirstPassGroupingCollector collector = - new FirstPassGroupingCollector<>(new TermGroupSelector("group"), Sort.RELEVANCE, 10, true); - searcher.search(MatchAllDocsQuery.INSTANCE, collector); - Collection> groups = collector.getTopGroups(0); + FirstPassGroupingCollectorManager firstPassGroupingCollectorManager2 = + new FirstPassGroupingCollectorManager<>( + () -> new TermGroupSelector("group"), Sort.RELEVANCE, 10, true); + Collection> groups = + searcher.search(MatchAllDocsQuery.INSTANCE, firstPassGroupingCollectorManager2); assertNull(groups); // Should return null when no groups found @@ -277,11 +279,13 @@ private FirstPassGroupingCollector createRandomFirstPassCollector( String groupField, Sort groupSort, int topDocs) throws IOException { if (random().nextBoolean()) { ValueSource vs = new BytesRefFieldSource(groupField); - return new FirstPassGroupingCollector<>( - new ValueSourceGroupSelector(vs, new HashMap<>()), groupSort, topDocs); + return new FirstPassGroupingCollectorManager<>( + () -> new ValueSourceGroupSelector(vs, new HashMap<>()), groupSort, topDocs) + .newCollector(); } else { - return new FirstPassGroupingCollector<>( - new TermGroupSelector(groupField), groupSort, topDocs); + return new FirstPassGroupingCollectorManager<>( + () -> new TermGroupSelector(groupField), groupSort, topDocs) + .newCollector(); } } @@ -294,11 +298,13 @@ private FirstPassGroupingCollector createFirstPassCollector( GroupSelector selector = firstPassGroupingCollector.getGroupSelector(); if (TermGroupSelector.class.isAssignableFrom(selector.getClass())) { ValueSource vs = new BytesRefFieldSource(groupField); - return new FirstPassGroupingCollector<>( - new ValueSourceGroupSelector(vs, new HashMap<>()), groupSort, topDocs); + return new FirstPassGroupingCollectorManager<>( + () -> new ValueSourceGroupSelector(vs, new HashMap<>()), groupSort, topDocs) + .newCollector(); } else { - return new FirstPassGroupingCollector<>( - new TermGroupSelector(groupField), groupSort, topDocs); + return new FirstPassGroupingCollectorManager<>( + () -> new TermGroupSelector(groupField), groupSort, topDocs) + .newCollector(); } }