diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java index 6a745b8e7352..c363d09d1f0d 100644 --- a/lucene/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java +++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java @@ -132,6 +132,7 @@ public Collection> getTopGroups(int groupOffset) throws IOExcepti final Collection> result = new ArrayList<>(); int upto = 0; final int sortFieldCount = comparators.length; + assert sortFieldCount > 0; // this must always be true because fields Sort must contain at least a field for(CollectedSearchGroup group : orderedGroups) { if (upto++ < groupOffset) { continue; @@ -139,10 +140,20 @@ public Collection> getTopGroups(int groupOffset) throws IOExcepti // System.out.println(" group=" + (group.groupValue == null ? "null" : group.groupValue.toString())); SearchGroup searchGroup = new SearchGroup<>(); searchGroup.groupValue = group.groupValue; + // We pass this around so that we can get the corresponding solr id when serializing the search group to send to the federator + searchGroup.topDocLuceneId = group.topDoc; searchGroup.sortValues = new Object[sortFieldCount]; for(int sortFieldIDX=0;sortFieldIDX { * been passed to {@link FirstPassGroupingCollector#getTopGroups} */ public Object[] sortValues; + /** The top doc of this group: we track the Lucene id, + * the Solr id and the score of the document */ + public Object topDocSolrId; + public float topDocScore; + + /** The topDocLuceneId will be null at the federator level because it is unique only at the shard level. + * It is used by the shard to get the corresponding solr id when serializing the search group to send to the federator + */ + public int topDocLuceneId; + @Override public String toString() { - return("SearchGroup(groupValue=" + groupValue + " sortValues=" + Arrays.toString(sortValues) + ")"); + return "SearchGroup{" + + "groupValue=" + groupValue + + ", sortValues=" + Arrays.toString(sortValues) + + ", topDocSolrId=" + topDocSolrId + + ", topDocScore=" + topDocScore + + ", topDocLuceneId=" + topDocLuceneId + + '}'; } @Override @@ -113,6 +129,11 @@ private static class MergedGroup { public boolean processed; public boolean inQueue; + /** The top doc of this group: + * the Solr id and the score of the document */ + public float topDocScore; + public Object topDocSolrId; + public MergedGroup(T groupValue) { this.groupValue = groupValue; } @@ -225,6 +246,8 @@ private void updateNextGroup(int topN, ShardIter shard) { // Start a new group: //System.out.println(" new"); mergedGroup = new MergedGroup<>(group.groupValue); + mergedGroup.topDocSolrId = group.topDocSolrId; + mergedGroup.topDocScore = group.topDocScore; mergedGroup.minShardIndex = shard.shardIndex; assert group.sortValues != null; mergedGroup.topValues = group.sortValues; @@ -262,6 +285,8 @@ private void updateNextGroup(int topN, ShardIter shard) { if (mergedGroup.inQueue) { queue.remove(mergedGroup); } + mergedGroup.topDocScore = group.topDocScore; + mergedGroup.topDocSolrId = group.topDocSolrId; mergedGroup.topValues = group.sortValues; mergedGroup.minShardIndex = shard.shardIndex; queue.add(mergedGroup); @@ -308,6 +333,8 @@ public Collection> merge(List>> shards, final SearchGroup newGroup = new SearchGroup<>(); newGroup.groupValue = group.groupValue; newGroup.sortValues = group.topValues; + newGroup.topDocSolrId = group.topDocSolrId; + newGroup.topDocScore = group.topDocScore; newTopGroups.add(newGroup); if (newTopGroups.size() == topN) { break; diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java index ae1daf87ac40..66ab5415c5a9 100644 --- a/lucene/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java +++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java @@ -135,12 +135,12 @@ public static TopGroups merge(TopGroups[] shardGroups, Sort groupSort, } else { shardTopDocs = new TopFieldDocs[shardGroups.length]; } - float totalMaxScore = Float.MIN_VALUE; + float totalMaxScore = Float.NaN; for(int groupIDX=0;groupIDX 0){ + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + GroupParams.GROUP_SKIP_DISTRIBUTED_SECOND + " does not support "+ GroupParams.GROUP_FUNC); + } + // group.query not supported + if (queries.length > 0){ + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + GroupParams.GROUP_SKIP_DISTRIBUTED_SECOND + " does not support "+ GroupParams.GROUP_QUERY); + } + + if (offset != 0) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + GroupParams.GROUP_SKIP_DISTRIBUTED_SECOND + " does not support " + GroupParams.GROUP_OFFSET + " != 0 (" + + GroupParams.GROUP_OFFSET + " is "+offset + ")"); + } + + if (includeGroupCount) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + GroupParams.GROUP_SKIP_DISTRIBUTED_SECOND + " does not support " + GroupParams.GROUP_TOTAL_COUNT + " == true"); + } + + final SortField[] withinGroupSortFields = withinGroupSortSpec.getSort().getSort(); + final SortField[] groupSortFields = groupSortSpec.getSort().getSort(); + + // Within group sort must be the same as group sort because if we skip second step no sorting within group will be done. + // This checks if withinGroupSortFields is a prefix of groupSortFields + if (Collections.indexOfSubList(Arrays.asList(groupSortFields), Arrays.asList(withinGroupSortFields)) != 0) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + GroupParams.GROUP_SKIP_DISTRIBUTED_SECOND + " does not allow the given within/global sort group configuration"); + } + } public String[] getFields() { return fields; @@ -129,4 +201,12 @@ public void setWithinGroupSortSpec(SortSpec withinGroupSortSpec) { this.withinGroupSortSpec = withinGroupSortSpec; } + public boolean isSkipSecondGroupingStep() { + return skipSecondGroupingStep; + } + + public void setSkipSecondGroupingStep(boolean skipSecondGroupingStep) { + this.skipSecondGroupingStep = skipSecondGroupingStep; + } + } diff --git a/solr/core/src/java/org/apache/solr/search/grouping/distributed/command/GroupConverter.java b/solr/core/src/java/org/apache/solr/search/grouping/distributed/command/GroupConverter.java index 0a21a6241188..85bcf01ba62e 100644 --- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/command/GroupConverter.java +++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/command/GroupConverter.java @@ -52,6 +52,8 @@ static Collection> fromMutable(SchemaField field, Collecti for (SearchGroup original : values) { SearchGroup converted = new SearchGroup(); converted.sortValues = original.sortValues; + converted.topDocLuceneId = original.topDocLuceneId; + converted.topDocScore = original.topDocScore; if (original.groupValue.exists) { BytesRefBuilder binary = new BytesRefBuilder(); fieldType.readableToIndexed(original.groupValue.toString(), binary); diff --git a/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/SearchGroupShardResponseProcessor.java b/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/SearchGroupShardResponseProcessor.java index 71c34b82b7e2..58c339f8c240 100644 --- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/SearchGroupShardResponseProcessor.java +++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/SearchGroupShardResponseProcessor.java @@ -37,6 +37,7 @@ import org.apache.solr.handler.component.ShardRequest; import org.apache.solr.handler.component.ShardResponse; import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SortSpec; import org.apache.solr.search.grouping.distributed.ShardResponseProcessor; import org.apache.solr.search.grouping.distributed.command.SearchGroupsFieldCommandResult; @@ -47,6 +48,14 @@ */ public class SearchGroupShardResponseProcessor implements ShardResponseProcessor { + protected SearchGroupsResultTransformer newSearchGroupsResultTransformer(SolrIndexSearcher solrIndexSearcher) { + return new SearchGroupsResultTransformer(solrIndexSearcher); + } + + protected SearchGroupsContainer newSearchGroupsContainer(ResponseBuilder rb) { + return new SearchGroupsContainer(rb.getGroupingSpec().getFields()); + } + @Override public void process(ResponseBuilder rb, ShardRequest shardRequest) { SortSpec groupSortSpec = rb.getGroupingSpec().getGroupSortSpec(); @@ -56,16 +65,14 @@ public void process(ResponseBuilder rb, ShardRequest shardRequest) { assert withinGroupSort != null; final Map>>> commandSearchGroups = new HashMap<>(fields.length, 1.0f); - final Map, Set>> tempSearchGroupToShards = new HashMap<>(fields.length, 1.0f); for (String field : fields) { commandSearchGroups.put(field, new ArrayList>>(shardRequest.responses.size())); - tempSearchGroupToShards.put(field, new HashMap, Set>()); if (!rb.searchGroupToShards.containsKey(field)) { rb.searchGroupToShards.put(field, new HashMap, Set>()); } } - SearchGroupsResultTransformer serializer = new SearchGroupsResultTransformer(rb.req.getSearcher()); + SearchGroupsResultTransformer serializer = newSearchGroupsResultTransformer(rb.req.getSearcher()); int maxElapsedTime = 0; int hitCountDuringFirstPhase = 0; @@ -75,6 +82,8 @@ public void process(ResponseBuilder rb, ShardRequest shardRequest) { rb.rsp.getValues().add(ShardParams.SHARDS_INFO + ".firstPhase", shardInfo); } + SearchGroupsContainer searchGroupsContainer = newSearchGroupsContainer(rb); + for (ShardResponse srsp : shardRequest.responses) { if (shardInfo != null) { SimpleOrderedMap nl = new SimpleOrderedMap<>(4); @@ -123,15 +132,7 @@ public void process(ResponseBuilder rb, ShardRequest shardRequest) { } commandSearchGroups.get(field).add(searchGroups); - for (SearchGroup searchGroup : searchGroups) { - Map, Set> map = tempSearchGroupToShards.get(field); - Set shards = map.get(searchGroup); - if (shards == null) { - shards = new HashSet<>(); - map.put(searchGroup, shards); - } - shards.add(srsp.getShard()); - } + searchGroupsContainer.addSearchGroups(srsp, field, searchGroups); } hitCountDuringFirstPhase += (Integer) srsp.getSolrResponse().getResponse().get("totalHitCount"); } @@ -143,8 +144,45 @@ public void process(ResponseBuilder rb, ShardRequest shardRequest) { if (mergedTopGroups == null) { continue; } + searchGroupsContainer.addMergedSearchGroups(rb, groupField, mergedTopGroups); + searchGroupsContainer.addSearchGroupToShards(rb, groupField, mergedTopGroups); + } + } + + protected static class SearchGroupsContainer { + + private final Map, Set>> tempSearchGroupToShards; + + public SearchGroupsContainer(String[] fields) { + tempSearchGroupToShards = new HashMap<>(fields.length, 1.0f); + for (String field : fields) { + tempSearchGroupToShards.put(field, new HashMap, Set>()); + } + } + + public void addSearchGroups(ShardResponse srsp, String field, Collection> searchGroups) { + for (SearchGroup searchGroup : searchGroups) { + Map, Set> map = tempSearchGroupToShards.get(field); + Set shards = map.get(searchGroup); + if (shards == null) { + shards = new HashSet<>(); + map.put(searchGroup, shards); + } + shards.add(srsp.getShard()); + } + } + /** + * This accumulates {@link ResponseBuilder#mergedSearchGroups} for use in the second step. + */ + public void addMergedSearchGroups(ResponseBuilder rb, String groupField, Collection> mergedTopGroups) { rb.mergedSearchGroups.put(groupField, mergedTopGroups); + } + + /** + * This accumulates {@link ResponseBuilder#searchGroupToShards} for use in the second step. + */ + public void addSearchGroupToShards(ResponseBuilder rb, String groupField, Collection> mergedTopGroups) { for (SearchGroup mergedTopGroup : mergedTopGroups) { rb.searchGroupToShards.get(groupField).put(mergedTopGroup, tempSearchGroupToShards.get(groupField).get(mergedTopGroup)); } diff --git a/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/SkipSecondStepSearchGroupShardResponseProcessor.java b/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/SkipSecondStepSearchGroupShardResponseProcessor.java new file mode 100644 index 000000000000..d41449663872 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/SkipSecondStepSearchGroupShardResponseProcessor.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search.grouping.distributed.responseprocessor; + +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; + +import org.apache.lucene.search.TotalHits; +import org.apache.lucene.search.grouping.GroupDocs; +import org.apache.lucene.search.grouping.SearchGroup; +import org.apache.lucene.search.grouping.TopGroups; +import org.apache.lucene.util.BytesRef; +import org.apache.solr.handler.component.ResponseBuilder; +import org.apache.solr.handler.component.ShardDoc; +import org.apache.solr.handler.component.ShardRequest; +import org.apache.solr.handler.component.ShardResponse; +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.search.grouping.GroupingSpecification; +import org.apache.solr.search.grouping.distributed.shardresultserializer.SearchGroupsResultTransformer; +import org.apache.solr.search.grouping.distributed.shardresultserializer.SkipSecondStepSearchResultResultTransformer; + +public class SkipSecondStepSearchGroupShardResponseProcessor extends SearchGroupShardResponseProcessor { + + @Override + protected SearchGroupsResultTransformer newSearchGroupsResultTransformer(SolrIndexSearcher solrIndexSearcher) { + return new SkipSecondStepSearchResultResultTransformer(solrIndexSearcher); + } + + @Override + protected SearchGroupsContainer newSearchGroupsContainer(ResponseBuilder rb) { + return new SkipSecondStepSearchGroupsContainer(rb.getGroupingSpec().getFields()); + } + + @Override + public void process(ResponseBuilder rb, ShardRequest shardRequest) { + super.process(rb, shardRequest); + TopGroupsShardResponseProcessor.fillResultIds(rb); + } + + private static class SkipSecondStepSearchGroupsContainer extends SearchGroupsContainer { + + private final Map docIdToShard = new HashMap<>(); + + public SkipSecondStepSearchGroupsContainer(String[] fields) { + super(fields); + } + + @Override + public void addSearchGroups(ShardResponse srsp, String field, Collection> searchGroups) { + super.addSearchGroups(srsp, field, searchGroups); + for (SearchGroup searchGroup : searchGroups) { + assert(srsp.getShard() != null); + docIdToShard.put(searchGroup.topDocSolrId, srsp.getShard()); + } + } + + /** + * This is overridden as a no-op since we need not accumulate {@link ResponseBuilder#mergedSearchGroups} + * for use in the second step because the second step is being skipped. + */ + @Override + public void addMergedSearchGroups(ResponseBuilder rb, String groupField, Collection> mergedTopGroups ) { + // no-op + } + + /** + * This does accumulate {@link ResponseBuilder#mergedTopGroups} for use in the get-fields stage. + */ + @Override + public void addSearchGroupToShards(ResponseBuilder rb, String groupField, Collection> mergedTopGroups) { + final GroupDocs[] groups = new GroupDocs[mergedTopGroups.size()]; + + // This is the max score found in any document on any group + float maxScore = Float.MIN_VALUE; + int groupsIndex = 0; + + for (SearchGroup group : mergedTopGroups) { + if (! Float.isNaN(group.topDocScore)) { + maxScore = Math.max(maxScore, group.topDocScore); + } + final String shard = docIdToShard.get(group.topDocSolrId); + assert(shard != null); + final ShardDoc sdoc = new ShardDoc(); + sdoc.score = group.topDocScore; + sdoc.id = group.topDocSolrId; + sdoc.shard = shard; + + groups[groupsIndex++] = new GroupDocs<>( + Float.NaN, + group.topDocScore, + new TotalHits(1, TotalHits.Relation.EQUAL_TO), /* we don't know the actual number of hits in the group- we set it to 1 as we only keep track of the top doc */ + new ShardDoc[] { sdoc }, /* only top doc */ + group.groupValue, + group.sortValues); + } + + final GroupingSpecification groupingSpecification = rb.getGroupingSpec(); + + final TopGroups topMergedGroups = new TopGroups<>( + groupingSpecification.getGroupSortSpec().getSort().getSort(), + groupingSpecification.getWithinGroupSortSpec().getSort().getSort(), + 0, /*Set totalHitCount to 0 as we can't computed it as is */ + 0, /*Set totalGroupedHitCount to 0 as we can't computed it as is*/ + groups, + groups.length > 0 ? maxScore : Float.NaN); + rb.mergedTopGroups.put(groupField, topMergedGroups); + } + } + +} + diff --git a/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/SearchGroupsResultTransformer.java b/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/SearchGroupsResultTransformer.java index 425ca5bdcafc..85dcb3119172 100644 --- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/SearchGroupsResultTransformer.java +++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/SearchGroupsResultTransformer.java @@ -40,7 +40,7 @@ public class SearchGroupsResultTransformer implements ShardResultTransformer data) throws IOException { return result; } - private SearchGroup deserializeOneSearchGroup(SchemaField groupField, String groupValue, - SortField[] groupSortField, List rawSearchGroupData) { + protected Object[] getSortValues(Object rawSearchGroupData) { + List sortValues = (List)rawSearchGroupData; + return sortValues.toArray(new Comparable[sortValues.size()]); + } + + protected SearchGroup deserializeOneSearchGroup(SchemaField groupField, String groupValue, + SortField[] groupSortField, Object rawSearchGroupData) { SearchGroup searchGroup = new SearchGroup<>(); searchGroup.groupValue = null; if (groupValue != null) { @@ -84,7 +89,7 @@ private SearchGroup deserializeOneSearchGroup(SchemaField groupField, searchGroup.groupValue = new BytesRef(groupValue); } } - searchGroup.sortValues = rawSearchGroupData.toArray(new Comparable[rawSearchGroupData.size()]); + searchGroup.sortValues = getSortValues(rawSearchGroupData); for (int i = 0; i < searchGroup.sortValues.length; i++) { SchemaField field = groupSortField[i].getField() != null ? searcher.getSchema().getFieldOrNull(groupSortField[i].getField()) : null; searchGroup.sortValues[i] = ShardResultTransformerUtils.unmarshalSortValue(searchGroup.sortValues[i], field); @@ -117,7 +122,7 @@ public Map transformToNative(NamedList searchGroup) { + protected Object serializeOneSearchGroup(SortField[] groupSortField, SearchGroup searchGroup) { Object[] convertedSortValues = new Object[searchGroup.sortValues.length]; for (int i = 0; i < searchGroup.sortValues.length; i++) { Object sortValue = searchGroup.sortValues[i]; @@ -128,12 +133,12 @@ private Object[] serializeOneSearchGroup(SortField[] groupSortField, SearchGroup return convertedSortValues; } - private NamedList serializeSearchGroup(Collection> data, SearchGroupsFieldCommand command) { - final NamedList result = new NamedList<>(data.size()); + protected NamedList serializeSearchGroup(Collection> data, SearchGroupsFieldCommand command) { + final NamedList result = new NamedList<>(data.size()); SortField[] groupSortField = command.getGroupSort().getSort(); for (SearchGroup searchGroup : data) { - Object[] convertedSortValues = serializeOneSearchGroup(groupSortField, searchGroup); + Object convertedSortValues = serializeOneSearchGroup(groupSortField, searchGroup); SchemaField field = searcher.getSchema().getFieldOrNull(command.getKey()); String groupValue = searchGroup.groupValue != null ? field.getType().indexedToReadable(searchGroup.groupValue, new CharsRefBuilder()).toString() : null; result.add(groupValue, convertedSortValues); @@ -141,5 +146,4 @@ private NamedList serializeSearchGroup(Collection> data, S return result; } - } diff --git a/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/SkipSecondStepSearchResultResultTransformer.java b/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/SkipSecondStepSearchResultResultTransformer.java new file mode 100644 index 000000000000..6ce5314282b4 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/SkipSecondStepSearchResultResultTransformer.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search.grouping.distributed.shardresultserializer; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Set; + +import org.apache.lucene.document.Document; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.grouping.SearchGroup; +import org.apache.lucene.util.BytesRef; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.SolrIndexSearcher; + +/** + * Extends {@link SearchGroupsResultTransformer} and overrides the serializeOneSearchGroup, + * deserializeOneSearchGroup and getSortValues methods because additional data + * (top doc id and top doc score) needs to be transformed for each group. + */ +public class SkipSecondStepSearchResultResultTransformer extends SearchGroupsResultTransformer { + + private static final String TOP_DOC_SOLR_ID_KEY = "topDocSolrId"; + private static final String TOP_DOC_SCORE_KEY = "topDocScore"; + private static final String SORTVALUES_KEY = "sortValues"; + + private final SchemaField uniqueField; + private final Set uniqueFieldNameAsSet; + + public SkipSecondStepSearchResultResultTransformer(SolrIndexSearcher searcher) { + super(searcher); + this.uniqueField = searcher.getSchema().getUniqueKeyField(); + this.uniqueFieldNameAsSet = Collections.singleton(this.uniqueField.getName()); + } + + @Override + protected Object[] getSortValues(Object rawSearchGroupData) { + NamedList groupInfo = (NamedList) rawSearchGroupData; + ArrayList sortValues = (ArrayList) groupInfo.get(SORTVALUES_KEY); + return sortValues.toArray(new Comparable[sortValues.size()]); + } + + @Override + protected SearchGroup deserializeOneSearchGroup(SchemaField groupField, String groupValue, + SortField[] groupSortField, Object rawSearchGroupData) { + SearchGroup searchGroup = super.deserializeOneSearchGroup(groupField, groupValue, groupSortField, rawSearchGroupData); + NamedList groupInfo = (NamedList) rawSearchGroupData; + searchGroup.topDocLuceneId = DocIdSetIterator.NO_MORE_DOCS; + searchGroup.topDocScore = (float) groupInfo.get(TOP_DOC_SCORE_KEY); + searchGroup.topDocSolrId = groupInfo.get(TOP_DOC_SOLR_ID_KEY); + return searchGroup; + } + + @Override + protected Object serializeOneSearchGroup(SortField[] groupSortField, SearchGroup searchGroup) { + Document luceneDoc = null; + /** Use the lucene id to get the unique solr id so that it can be sent to the federator. + * The lucene id of a document is not unique across all shards i.e. different documents + * in different shards could have the same lucene id, whereas the solr id is guaranteed + * to be unique so this is what we need to return to the federator + **/ + try { + luceneDoc = searcher.doc(searchGroup.topDocLuceneId, uniqueFieldNameAsSet); + } catch (IOException e) { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Cannot retrieve document for unique field " + uniqueField + " (" + e.toString() + ")"); + } + String topDocSolrId = uniqueField.getType().toExternal(luceneDoc.getField(uniqueField.getName())); + NamedList groupInfo = new NamedList<>(); + groupInfo.add(TOP_DOC_SCORE_KEY, searchGroup.topDocScore); + groupInfo.add(TOP_DOC_SOLR_ID_KEY, topDocSolrId); + + Object convertedSortValues = super.serializeOneSearchGroup(groupSortField, searchGroup); + groupInfo.add(SORTVALUES_KEY, convertedSortValues); + return groupInfo; + } +} diff --git a/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/GroupedEndResultTransformer.java b/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/GroupedEndResultTransformer.java index 3ba3470dc5a9..72428d274c11 100644 --- a/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/GroupedEndResultTransformer.java +++ b/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/GroupedEndResultTransformer.java @@ -19,6 +19,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.function.BiFunction; import org.apache.commons.collections.CollectionUtils; import org.apache.lucene.index.IndexableField; @@ -56,6 +57,23 @@ public GroupedEndResultTransformer(SolrIndexSearcher searcher) { public void transform(Map result, ResponseBuilder rb, SolrDocumentSource solrDocumentSource) { NamedList commands = new SimpleOrderedMap<>(); SortSpec withinGroupSortSpec = rb.getGroupingSpec().getWithinGroupSortSpec(); + final BiFunction bytesRefToString; + if (rb.getGroupingSpec().isSkipSecondGroupingStep()) { + /** + * The QueryComponent.doProcessGroupedDistributedSearchSecondPhase call + * is skipped (because the second phase is skipped) and so we need to + * convert from indexed to readable group value here when the + * QueryComponent.groupedFinishStage call us. + */ + bytesRefToString = (value, fieldType) -> { return fieldType.indexedToReadable(value.utf8ToString()); }; + } else { + /** + * QueryComponent.doProcessGroupedDistributedSearchSecondPhase calls + * TopGroupsResultTransformer#serializeTopGroups which converts the + * indexed group value to a readable group value. + */ + bytesRefToString = (value, fieldType) -> { return value.utf8ToString(); }; + } for (Map.Entry entry : result.entrySet()) { Object value = entry.getValue(); if (TopGroups.class.isInstance(value)) { @@ -75,7 +93,8 @@ public void transform(Map result, ResponseBuilder rb, SolrDocumentSou SimpleOrderedMap groupResult = new SimpleOrderedMap<>(); if (group.groupValue != null) { // use createFields so that fields having doc values are also supported - List fields = groupField.createFields(group.groupValue.utf8ToString()); + final String groupValue = bytesRefToString.apply(group.groupValue, groupFieldType); + List fields = groupField.createFields(groupValue); if (CollectionUtils.isNotEmpty(fields)) { groupResult.add("groupValue", groupFieldType.toObject(fields.get(0))); } else { diff --git a/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java b/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java index d16fbbe26c38..7f26882121a7 100644 --- a/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java +++ b/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java @@ -22,14 +22,18 @@ import org.apache.lucene.util.LuceneTestCase.Slow; import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.response.Group; +import org.apache.solr.client.solrj.response.GroupCommand; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.GroupParams; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.SolrTestCaseJ4.SuppressPointFields; +import org.junit.Assert; import org.junit.Test; import static org.hamcrest.CoreMatchers.containsString; @@ -425,6 +429,192 @@ public void test() throws Exception { //Debug simpleQuery("q", "*:*", "rows", 10, "fl", "id," + i1, "group", "true", "group.field", i1, "debug", "true"); + + doTestGroupSkipSecondStepAlt(); + doTestGroupSkipSecondStep(); + } + + /* + SOLR-11831, test skipping the second grouping step if the query only retrieves on document per group + */ + private void doTestGroupSkipSecondStepAlt() throws Exception { + + // group.skip.second.step (absent/false/true) have an equivalent + // outcome only because the query matches nothing + + ModifiableSolrParams solrParams = params( + "q", "1234doesnotmatchanything1234", + "fl", "id," + i1, + "group", "true", + "group.field", i1); + setDistributedParams(solrParams); + + variantQuery( + solrParams, + params(), // Test with no group.skip.second.step (use the default) + params("group.skip.second.step", "false"), + params("group.skip.second.step", "true") + ); + + // when the query matches something numFound needs to be ignored because + // the number of documents per group will not be computed i.e. numFound=1 + // will always be returned + // TODO: can we test for numFound=1 somehow? + assertFalse(handle.containsKey("numFound")); + handle.put("numFound", SKIP); + + solrParams = params( + "q", "{!func}id_i1", + "rows", "3", + "fl", "id," + i1, + "group", "true", + "group.field", i1, + "group.skip.second.step", "true"); + setDistributedParams(solrParams); + + try { + variantQuery( + solrParams, + params(), + params("group.limit", "1") + ); + } finally { + handle.remove("numFound"); + } + } + + private void doTestGroupSkipSecondStep() throws Exception { + ignoreException(GroupParams.GROUP_SKIP_DISTRIBUTED_SECOND); // don't print stack trace for exception raised by group.skip.second.step + // Ignore numFound if group.skip.second.step is enabled because the number of documents per group will not be computed (will default to 1) + handle.put("numFound", SKIP); + query("q", "kings", "group.skip.second.step", true, "fl", "id," + i1, "group", "true", "group.field", i1); + + ignoreException("Illegal grouping specification"); + assertSimpleQueryThrows("q", "{!func}id_i1", "group.skip.second.step", true, "fl", "id," + i1, "group", "true", "group.field", i1, "group.ngroups", true); + assertSimpleQueryThrows("q", "{!func}id", "group.skip.second.step", true, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 5); + assertSimpleQueryThrows("q", "{!func}id_i1", "group.skip.second.step", true, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 0); + + // if group.sort fields list is a prefix of sort fields list, the query should succeed + query("q", "{!func}id_i1", "group.skip.second.step", true, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 1, "sort", i1+" desc, id_i1 desc","group.sort", i1+" desc, id_i1 desc"); + // if sort is present and group.sort is absent then sort would also be used for group.sort -- it should pass? + query("q", "{!func}id_i1", "group.skip.second.step", true, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 1, "sort", i1+" desc, id_i1 desc"); + + // group sorted in a different way should fail + assertSimpleQueryThrows("q", "{!func}id_i1", "group.skip.second.step", true, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 1, "group.sort", i1+" desc"); + + query("q", "{!func}id_i1", "rows", 3, "group.skip.second.step", true, "fl", "id," + i1, "group", "true", + "group.field", i1, "sort", tlong+" desc,"+i1+" asc", "group.sort", tlong+" desc"); + + query("q", "{!func}id_i1", "rows", 3, "group.skip.second.step", true, "fl", "id," + i1, "group", "true", + "group.field", i1, "sort", tlong+" desc,"+i1+" asc", "group.sort", tlong+" desc"); + query("q", "{!func}id_i1", "rows", 3, "group.skip.second.step", true, "fl", "id," + i1, "group", "true", + "group.field", i1, "sort", tlong+" desc,"+i1+" asc", "group.sort", tlong+" desc,"+ i1+" asc"); + // not a prefix, should fail + assertSimpleQueryThrows("q", "{!func}id_i1", "rows", 3, "group.skip.second.step", true, "fl", "id," + i1, "group", "true", + "group.field", i1, "sort", tlong+" desc,"+i1+" asc", "group.sort",i1+" asc,"+tlong+" desc"); + + // check that the requests fails if group.func is used with group.skip.second.step instead of group.field + assertSimpleQueryThrows("q", "{!func}id_i1", "rows", 3, "group.skip.second.step", true, "fl", "id," + i1, "group", "true", + "group.func", i1); + + // check that the requests fails if group.query is used with group.skip.second.step instead of group.field + assertSimpleQueryThrows("q", "{!func}id_i1", "rows", 3, "group.skip.second.step", true, "fl", "id," + i1, "group", "true", + "group.query", "{!func}id_1"); + + /// check that group.skip.second.step works properly with group.main == true (using a different + // EndResultTransformer but still sharing the skipping logic) + query("q", "{!func}id_i1", "rows", 3, "group.skip.second.step", true, "fl", "id," + i1, "group", "true", + "group.field", i1, "sort", tlong+" desc,"+i1+" asc", "group.sort", tlong+" desc", "group.main", true); + query("q", "kings", "group.skip.second.step", true, "fl", "id," + i1, "group", "true", "group.field", i1, "group.main", true); + // check zero results + query("q", "this_wont_match_any_document", "group.skip.second.step", true, "fl", "id," + i1, "group", "true", "group.field", i1, "group.main", true); + // check that group.skip.second.step works properly with group.format == simple (using a different + // EndResultTransformer but still sharing the skipping logic) + query("q", "{!func}id_i1", "rows", 3, "group.skip.second.step", true, "fl", "id," + i1, "group", "true", + "group.field", i1, "sort", tlong+" desc,"+i1+" asc", "group.sort", tlong+" desc", "group.format", "simple"); + // check that group.skip.second.step works properly with group.format == simple + query("q", "kings", "group.skip.second.step", true, "fl", "id," + i1, "group", "true", "group.field", i1, "group.format", "simple"); + query("q", "{!func}id_i1", "rows", "3", "group.skip.second.step", "true", "fl", "id," + i1+",id_i1,score", "group", "true", "group.field", i1, "group.limit", "1"); + query("q", "{!func}id_i1", "rows", "3", "group.skip.second.step", "false", "fl", "id," + i1+",id_i1,score", "group", "true", "group.field", i1, "group.limit", "1"); + + + assertNumFoundWithSkipSecondGroupingStep("q", "kings", "group.skip.second.step", "true", "fl", "id," + i1, "group", "true", "group.field", i1); + assertNumFoundWithSkipSecondGroupingStep( "q", "{!func}id_i1", "rows", "3", "fl", "id," + i1+",id_i1", "group", "true", "group.field", i1, "group.limit", "1","sort", tlong+" desc"); + assertNumFoundWithSkipSecondGroupingStep("q", "{!func}id_i1", "rows", "3", "fl", "id," + i1+",id_i1,score", "group", "true", "group.field", i1, "group.limit", "1"); + + // score in fl and in sort (by default) + testMaxScoreWithSkipSecondGroupingStep("q", "{!func}id_i1", "rows", "3", "fl", "id," + i1+",id_i1,score", "group", "true", "group.field", i1, "group.limit", "1"); + // no score and different sort (should not return maxScore) + testMaxScoreWithSkipSecondGroupingStep("q", "{!func}id_i1", "rows", "3", "fl", "id," + i1+",id_i1", "group", "true", "group.field", i1, "group.limit", "1","sort", tlong+" desc"); + // no score in fl, sort in score (by default) + testMaxScoreWithSkipSecondGroupingStep("q", "{!func}id_i1", "rows", "3", "fl", "id," + i1+",id_i1", "group", "true", "group.field", i1, "group.limit", "1"); + // no score in fl and sort by secondary field + testMaxScoreWithSkipSecondGroupingStep("q", "{!func}id_i1", "rows", "3", "fl", "id," + i1+",id_i1", "group", "true", "group.field", i1, "group.limit", "1", "sort", tlong+" desc, score desc"); + + handle.remove("numFound"); + } + + // When group.skip.second.step is enabled numFound in each group will be 1. + private void assertNumFoundWithSkipSecondGroupingStep(String ... params) throws IOException, SolrServerException { + ModifiableSolrParams solrParams = params(params); + solrParams.set("group.skip.second.step", "true"); + setDistributedParams(solrParams); + QueryResponse skipSecondStep = queryServer(solrParams); + for (GroupCommand gc : skipSecondStep.getGroupResponse().getValues()){ + for (Group group : gc.getValues()){ + if (! group.getResult().isEmpty()){ + Assert.assertEquals(1, group.getResult().getNumFound()); + } + } + } + } + + // will check that maxScore is the same in a distribute query regardless of group.skip.second.step enabled or not. + private void testMaxScoreWithSkipSecondGroupingStep(String ... params) throws IOException, SolrServerException { + Integer maxScoreConf = handle.get("maxScore"); + handle.remove("maxScore"); + ModifiableSolrParams solrParams = params(params); + setDistributedParams(solrParams); + + solrParams.set("group.skip.second.step", "true"); + + // normal solr query + QueryResponse skipSecondStep = queryServer(solrParams); + solrParams.set("group.skip.second.step", "false"); + QueryResponse expectedResponse = queryServer(solrParams); + + List skipSecondStepGroups = skipSecondStep.getGroupResponse().getValues(); + List expectedGroups = expectedResponse.getGroupResponse().getValues(); + Assert.assertEquals(expectedGroups.size(), skipSecondStepGroups.size()); + int size = expectedGroups.size(); + for (int i = 0; i < size; i++){ + List expectedValues = expectedGroups.get(i).getValues(); + List skipSecondStepValues = skipSecondStepGroups.get(i).getValues(); + Assert.assertEquals(expectedValues.size(), skipSecondStepValues.size()); + for (int j = 0; j < expectedValues.size(); j++){ + Float expectedMaxScore = expectedValues.get(j).getResult().getMaxScore(); + + if ( expectedMaxScore != null && expectedMaxScore.isNaN()){ + Assert.assertTrue(skipSecondStepValues.get(j).getResult().getMaxScore().isNaN()); + } else { + Assert.assertEquals(expectedMaxScore, skipSecondStepValues.get(j).getResult().getMaxScore()); + } + } + } + handle.put("maxScore", maxScoreConf); + + } + + + + private void assertSimpleQueryThrows(Object... queryParams) { + boolean requestFailed = false; + try { + simpleQuery(queryParams); + } catch (Exception e) { + requestFailed = true; + } + assertTrue(requestFailed); } private void simpleQuery(Object... queryParams) throws SolrServerException, IOException { diff --git a/solr/solr-ref-guide/src/result-grouping.adoc b/solr/solr-ref-guide/src/result-grouping.adoc index a687b781e4d1..d99b782fbdc5 100644 --- a/solr/solr-ref-guide/src/result-grouping.adoc +++ b/solr/solr-ref-guide/src/result-grouping.adoc @@ -114,6 +114,11 @@ Setting this parameter to a number greater than 0 enables caching for result gro + Testing has shown that group caching only improves search time with Boolean, wildcard, and fuzzy queries. For simple queries like term or "match all" queries, group caching degrades performance. +`group.skip.second.step`:: +This parameter can be set to `true` if only one document per group needs to be retrieved. Result Grouping executes two searches; if enabled this option will disable the second search improving the performance. By default the value is set to `false`. It can be set to `true` if `group.limit` is 1, and `group.sort` fields list is absent or is a prefix of `sort` fields list (e.g., if `sort=price asc,name desc` and `group.sort=price asc` is fine, but `sort=price asc,name desc` and `group.sort=name desc` is not). When grouping only grouping by field is supported (`group.field`), grouping by function (`group.func`) or query (`group.query`) are not supported. The actual `numFound` for each group will not be available, `numFound` will be set to 1. It cannot be used together with <>. ++ +More details on `group.skip.second.step` in https://www.youtube.com/watch?v=eMuepJpjUjI&t=1591[Learning to Rank: From Theory to Production]. + Any number of group commands (e.g., `group.field`, `group.func`, `group.query`, etc.) may be specified in a single request. == Grouping Examples diff --git a/solr/solrj/src/java/org/apache/solr/common/params/GroupParams.java b/solr/solrj/src/java/org/apache/solr/common/params/GroupParams.java index 3bfc59891387..482e577ce978 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/GroupParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/GroupParams.java @@ -66,5 +66,11 @@ public interface GroupParams { public static final String GROUP_DISTRIBUTED_SECOND = GROUP + ".distributed.second"; public static final String GROUP_DISTRIBUTED_TOPGROUPS_PREFIX = GROUP + ".topgroups."; + + /** If true activates optimization for speed up grouping. + * Setting this to true is only compatible with group.limit = 1 */ + public static final String GROUP_SKIP_DISTRIBUTED_SECOND = GROUP + ".skip.second.step"; + /** default value for GROUP_SKIP_DISTRIBUTED_SECOND */ + public static final boolean GROUP_SKIP_DISTRIBUTED_SECOND_DEFAULT = false; } diff --git a/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java b/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java index a881084f75d5..430566a34d93 100644 --- a/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java +++ b/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java @@ -860,9 +860,13 @@ public static String compare(SolrDocumentList a, SolrDocumentList b, int flags, } } } - - cmp = compare(a.getNumFound(), b.getNumFound(), 0, handle); - if (cmp != null) return ".numFound" + cmp; + final int checkNumFound = flags(handle, "numFound"); + if (checkNumFound == 0){ + cmp = compare(a.getNumFound(), b.getNumFound(), 0, handle); + if (cmp != null) return ".numFound" + cmp; + } else if (checkNumFound != SKIP) { + assert (f & SKIPVAL) != 0; + } cmp = compare(a.getStart(), b.getStart(), 0, handle); if (cmp != null) return ".start" + cmp;