forked from apache/lucene-solr
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
SOLR-11831: Skip second grouping step if group.limit is 1 (aka Las Ve…
…gas patch) Summary: In cases where we do grouping and ask for {{group.limit=1}} only it is possible to skip the second grouping step. In our test datasets it improved speed by around 40%. Essentially, in the first grouping step each shard returns the top K groups based on the highest scoring document in each group. The top K groups from each shard are merged in the federator and in the second step we ask all the shards to return the top documents from each of the top ranking groups. If we only want to return the highest scoring document per group we can return the top document id in the first step, merge results in the federator to retain the top K groups and then skip the second grouping step entirely. QueryComponent: interim 'make it compile (somehow)' change (apache#228) add SearchGroupsContainer (apache#230) factor out SearchGroupsResultTransformer.serializeOneSearchGroup method (Christine) Refactor transformToNative adding deserializeOneSearchGroup increase GroupParams.GROUP_SKIP_DISTRIBUTED_SECOND use (see also 6bdf87) Remove error logging in allowSkipSecondGroupingStep Check that withinGroupSort is a prefix of groupSort SkipSecondStepSearchGroupShardResponseProcessor.addSearchGroupToShards now leaves ShardDoc.fields null factor out TopGroupsShardResponseProcessor.fillResultIds method Add regression test group.main=true and group.format=simple Improve GroupingSpecification validation adding validate() method SkipSecondStepSearchResultResultTransformer.serializeOneSearchGroup tweaks
- Loading branch information
1 parent
f71c2c8
commit 370b51b
Showing
11 changed files
with
394 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
116 changes: 116 additions & 0 deletions
116
...ouping/distributed/responseprocessor/SkipSecondStepSearchGroupShardResponseProcessor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.solr.search.grouping.distributed.responseprocessor; | ||
|
||
import java.util.Collection; | ||
import java.util.HashMap; | ||
import java.util.Map; | ||
|
||
import org.apache.lucene.search.Sort; | ||
import org.apache.lucene.search.TotalHits; | ||
import org.apache.lucene.search.grouping.GroupDocs; | ||
import org.apache.lucene.search.grouping.SearchGroup; | ||
import org.apache.lucene.search.grouping.TopGroups; | ||
import org.apache.lucene.util.BytesRef; | ||
import org.apache.solr.handler.component.ResponseBuilder; | ||
import org.apache.solr.handler.component.ShardDoc; | ||
import org.apache.solr.handler.component.ShardRequest; | ||
import org.apache.solr.handler.component.ShardResponse; | ||
import org.apache.solr.search.SolrIndexSearcher; | ||
import org.apache.solr.search.grouping.GroupingSpecification; | ||
import org.apache.solr.search.grouping.distributed.shardresultserializer.SearchGroupsResultTransformer; | ||
|
||
public class SkipSecondStepSearchGroupShardResponseProcessor extends SearchGroupShardResponseProcessor { | ||
|
||
@Override | ||
protected SearchGroupsResultTransformer newSearchGroupsResultTransformer(SolrIndexSearcher solrIndexSearcher) { | ||
return new SearchGroupsResultTransformer.SkipSecondStepSearchResultResultTransformer(solrIndexSearcher); | ||
} | ||
|
||
@Override | ||
protected SearchGroupsContainer newSearchGroupsContainer(ResponseBuilder rb) { | ||
return new SkipSecondStepSearchGroupsContainer(rb.getGroupingSpec().getFields()); | ||
} | ||
|
||
@Override | ||
public void process(ResponseBuilder rb, ShardRequest shardRequest) { | ||
super.process(rb, shardRequest); | ||
TopGroupsShardResponseProcessor.fillResultIds(rb); | ||
} | ||
|
||
protected static class SkipSecondStepSearchGroupsContainer extends SearchGroupsContainer { | ||
|
||
private final Map<Object, String> docIdToShard = new HashMap<>(); | ||
|
||
public SkipSecondStepSearchGroupsContainer(String[] fields) { | ||
super(fields); | ||
} | ||
|
||
@Override | ||
public void addSearchGroups(ShardResponse srsp, String field, Collection<SearchGroup<BytesRef>> searchGroups) { | ||
super.addSearchGroups(srsp, field, searchGroups); | ||
for (SearchGroup<BytesRef> searchGroup : searchGroups) { | ||
assert(srsp.getShard() != null); | ||
docIdToShard.put(searchGroup.topDocSolrId, srsp.getShard()); | ||
} | ||
} | ||
|
||
@Override | ||
public void addMergedSearchGroups(ResponseBuilder rb, String groupField, Collection<SearchGroup<BytesRef>> mergedTopGroups ) { | ||
// TODO: add comment or javadoc re: why this method is overridden as a no-op | ||
} | ||
|
||
@Override | ||
public void addSearchGroupToShards(ResponseBuilder rb, String groupField, Collection<SearchGroup<BytesRef>> mergedTopGroups) { | ||
super.addSearchGroupToShards(rb, groupField, mergedTopGroups); | ||
|
||
final GroupingSpecification groupingSpecification = rb.getGroupingSpec(); | ||
final Sort groupSort = groupingSpecification.getGroupSortSpec().getSort(); | ||
|
||
GroupDocs<BytesRef>[] groups = new GroupDocs[mergedTopGroups.size()]; | ||
|
||
// This is the max score found in any document on any group | ||
float maxScore = 0; | ||
int index = 0; | ||
|
||
for (SearchGroup<BytesRef> group : mergedTopGroups) { | ||
maxScore = Math.max(maxScore, group.topDocScore); | ||
final String shard = docIdToShard.get(group.topDocSolrId); | ||
assert(shard != null); | ||
final ShardDoc sdoc = new ShardDoc(); | ||
sdoc.score = group.topDocScore; | ||
sdoc.id = group.topDocSolrId; | ||
sdoc.shard = shard; | ||
|
||
groups[index++] = new GroupDocs<>(group.topDocScore, | ||
group.topDocScore, | ||
new TotalHits(1, TotalHits.Relation.EQUAL_TO), /* we don't know the actual number of hits in the group- we set it to 1 as we only keep track of the top doc */ | ||
new ShardDoc[] { sdoc }, /* only top doc */ | ||
group.groupValue, | ||
group.sortValues); | ||
} | ||
TopGroups<BytesRef> topMergedGroups = new TopGroups<BytesRef>(groupSort.getSort(), | ||
rb.getGroupingSpec().getWithinGroupSortSpec().getSort().getSort(), | ||
0, /*Set totalHitCount to 0 as we can't computed it as is */ | ||
0, /*Set totalGroupedHitCount to 0 as we can't computed it as is*/ | ||
groups, | ||
maxScore); | ||
rb.mergedTopGroups.put(groupField, topMergedGroups); | ||
} | ||
} | ||
|
||
} |
Oops, something went wrong.