Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ Improvements

* GITHUB#15225: Improve package documentation for org.apache.lucene.util. (Syed Mohammad Saad)

* GITHUB#15574: Introduce FirstPassGroupingCollectorManager to parallelize search when using FirstPassGroupingCollector. (Binlong Gao)

Optimizations
---------------------
* GITHUB#15681, GITHUB#15833: Replace pre-sized array or empty array with lambda expression to call Collection#toArray. (Zhou Hui)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.grouping;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.function.Supplier;
import org.apache.lucene.search.CollectorManager;
import org.apache.lucene.search.Sort;

/** A CollectorManager implementation for FirstPassGroupingCollector. */
public class FirstPassGroupingCollectorManager<T>
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

given the corresponding collector is experimental, should this be too?

implements CollectorManager<FirstPassGroupingCollector<T>, Collection<SearchGroup<T>>> {

private final Supplier<GroupSelector<T>> groupSelectorFactory;
private final Sort groupSort;
private final int topNGroups;
private final boolean ignoreDocsWithoutGroupField;
private final List<FirstPassGroupingCollector<T>> collectors;

/**
* Creates a new FirstPassGroupingCollectorManager.
*
* @param groupSelectorFactory factory to create group selectors for each collector
* @param groupSort the sort to use for groups
* @param topNGroups the number of top groups to collect
*/
public FirstPassGroupingCollectorManager(
Supplier<GroupSelector<T>> groupSelectorFactory, Sort groupSort, int topNGroups) {
this(groupSelectorFactory, groupSort, topNGroups, false);
}

/**
* Creates a new FirstPassGroupingCollectorManager.
*
* @param groupSelectorFactory factory to create group selectors for each collector
* @param groupSort the sort to use for groups
* @param topNGroups the number of top groups to collect
* @param ignoreDocsWithoutGroupField whether to ignore documents without a group field
*/
public FirstPassGroupingCollectorManager(
Supplier<GroupSelector<T>> groupSelectorFactory,
Sort groupSort,
int topNGroups,
boolean ignoreDocsWithoutGroupField) {
this.groupSelectorFactory = groupSelectorFactory;
this.groupSort = groupSort;
this.topNGroups = topNGroups;
this.ignoreDocsWithoutGroupField = ignoreDocsWithoutGroupField;
this.collectors = new ArrayList<>();
}

@Override
public FirstPassGroupingCollector<T> newCollector() throws IOException {
FirstPassGroupingCollector<T> collector =
new FirstPassGroupingCollector<>(
groupSelectorFactory.get(), groupSort, topNGroups, ignoreDocsWithoutGroupField);
collectors.add(collector);
return collector;
}

@Override
public Collection<SearchGroup<T>> reduce(Collection<FirstPassGroupingCollector<T>> collectors)
throws IOException {
if (collectors.isEmpty()) {
return null;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Returning null here could cause issues downstream.

}

if (collectors.size() == 1) {
return collectors.iterator().next().getTopGroups(0);
}
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have a slight preference to remove these two above conditionals. I wonder how much value they bring.


List<Collection<SearchGroup<T>>> allGroups = new ArrayList<>();
for (FirstPassGroupingCollector<T> collector : collectors) {
Collection<SearchGroup<T>> groups = collector.getTopGroups(0);
if (groups != null) {
allGroups.add(groups);
}
}

return SearchGroup.merge(allGroups, 0, topNGroups, groupSort);
}

public List<FirstPassGroupingCollector<T>> getCollectors() {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this looks unused, can we remove it?

return collectors;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -309,17 +309,17 @@ public void testShardedGrouping() throws IOException {
// A grouped query run in two phases against the control should give us the same
// result as the query run against shards and merged back together after each phase.

FirstPassGroupingCollector<T> singletonFirstPass =
new FirstPassGroupingCollector<>(getGroupSelector(), sort, 5);
control.getIndexSearcher().search(topLevel, singletonFirstPass);
Collection<SearchGroup<T>> singletonGroups = singletonFirstPass.getTopGroups(0);
FirstPassGroupingCollectorManager<T> firstPassGroupingCollectorManager =
new FirstPassGroupingCollectorManager<>(this::getGroupSelector, sort, 5);
Collection<SearchGroup<T>> singletonGroups =
control.getIndexSearcher().search(topLevel, firstPassGroupingCollectorManager);

List<Collection<SearchGroup<T>>> shardGroups = new ArrayList<>();
for (Shard shard : shards) {
FirstPassGroupingCollector<T> fc =
new FirstPassGroupingCollector<>(getGroupSelector(), sort, 5);
shard.getIndexSearcher().search(topLevel, fc);
shardGroups.add(fc.getTopGroups(0));
FirstPassGroupingCollectorManager<T> fcm =
new FirstPassGroupingCollectorManager<>(this::getGroupSelector, sort, 5);
Collection<SearchGroup<T>> topGroups = shard.getIndexSearcher().search(topLevel, fcm);
shardGroups.add(topGroups);
}
Collection<SearchGroup<T>> mergedGroups = SearchGroup.merge(shardGroups, 0, 5, sort);
assertEquals(singletonGroups, mergedGroups);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -220,19 +220,20 @@ public void testIgnoreDocsWithoutGroupField() throws IOException {
IndexSearcher searcher = newSearcher(reader);

// Test default behavior (include null group)
FirstPassGroupingCollector<BytesRef> collector1 =
new FirstPassGroupingCollector<>(new TermGroupSelector(groupField), Sort.RELEVANCE, 10);
searcher.search(MatchAllDocsQuery.INSTANCE, collector1);
Collection<SearchGroup<BytesRef>> groups1 = collector1.getTopGroups(0);
FirstPassGroupingCollectorManager<BytesRef> firstPassGroupingCollectorManager1 =
new FirstPassGroupingCollectorManager<>(
() -> new TermGroupSelector(groupField), Sort.RELEVANCE, 10);
Collection<SearchGroup<BytesRef>> groups1 =
searcher.search(MatchAllDocsQuery.INSTANCE, firstPassGroupingCollectorManager1);

assertEquals(3, groups1.size()); // Should include null group

// Test ignoring docs without group field
FirstPassGroupingCollector<BytesRef> collector2 =
new FirstPassGroupingCollector<>(
new TermGroupSelector(groupField), Sort.RELEVANCE, 10, true);
searcher.search(MatchAllDocsQuery.INSTANCE, collector2);
Collection<SearchGroup<BytesRef>> groups2 = collector2.getTopGroups(0);
FirstPassGroupingCollectorManager<BytesRef> firstPassGroupingCollectorManager2 =
new FirstPassGroupingCollectorManager<>(
() -> new TermGroupSelector(groupField), Sort.RELEVANCE, 10, true);
Collection<SearchGroup<BytesRef>> groups2 =
searcher.search(MatchAllDocsQuery.INSTANCE, firstPassGroupingCollectorManager2);

assertEquals(2, groups2.size()); // Should exclude null group

Expand All @@ -258,10 +259,11 @@ public void testAllDocsWithoutGroupField() throws IOException {
IndexSearcher searcher = newSearcher(reader);

// Test ignoring docs without group field when all docs lack the field
FirstPassGroupingCollector<BytesRef> collector =
new FirstPassGroupingCollector<>(new TermGroupSelector("group"), Sort.RELEVANCE, 10, true);
searcher.search(MatchAllDocsQuery.INSTANCE, collector);
Collection<SearchGroup<BytesRef>> groups = collector.getTopGroups(0);
FirstPassGroupingCollectorManager<BytesRef> firstPassGroupingCollectorManager2 =
new FirstPassGroupingCollectorManager<>(
() -> new TermGroupSelector("group"), Sort.RELEVANCE, 10, true);
Collection<SearchGroup<BytesRef>> groups =
searcher.search(MatchAllDocsQuery.INSTANCE, firstPassGroupingCollectorManager2);

assertNull(groups); // Should return null when no groups found

Expand All @@ -277,11 +279,13 @@ private FirstPassGroupingCollector<?> createRandomFirstPassCollector(
String groupField, Sort groupSort, int topDocs) throws IOException {
if (random().nextBoolean()) {
ValueSource vs = new BytesRefFieldSource(groupField);
return new FirstPassGroupingCollector<>(
new ValueSourceGroupSelector(vs, new HashMap<>()), groupSort, topDocs);
return new FirstPassGroupingCollectorManager<>(
() -> new ValueSourceGroupSelector(vs, new HashMap<>()), groupSort, topDocs)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should the hashmap be created for each supplier or shared across them?

.newCollector();
} else {
return new FirstPassGroupingCollector<>(
new TermGroupSelector(groupField), groupSort, topDocs);
return new FirstPassGroupingCollectorManager<>(
() -> new TermGroupSelector(groupField), groupSort, topDocs)
.newCollector();
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can usages like these also be replaced? Hopefully we will do a search in the test using the collector retrieved via the collector manager? Can we do the search providing the collector manager instead?

}
}

Expand All @@ -294,11 +298,13 @@ private FirstPassGroupingCollector<?> createFirstPassCollector(
GroupSelector<?> selector = firstPassGroupingCollector.getGroupSelector();
if (TermGroupSelector.class.isAssignableFrom(selector.getClass())) {
ValueSource vs = new BytesRefFieldSource(groupField);
return new FirstPassGroupingCollector<>(
new ValueSourceGroupSelector(vs, new HashMap<>()), groupSort, topDocs);
return new FirstPassGroupingCollectorManager<>(
() -> new ValueSourceGroupSelector(vs, new HashMap<>()), groupSort, topDocs)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same as above regarding the map

.newCollector();
} else {
return new FirstPassGroupingCollector<>(
new TermGroupSelector(groupField), groupSort, topDocs);
return new FirstPassGroupingCollectorManager<>(
() -> new TermGroupSelector(groupField), groupSort, topDocs)
.newCollector();
}
}

Expand Down
Loading