Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
bf3cd5d
Combined Query Feature for Multi Query Execution
Jun 15, 2025
182bec9
Tests: Combined Query Feature for Multi Query Execution
Jun 17, 2025
b884f0e
Tests: Combined Query Feature for Multi Query Execution
Jun 24, 2025
29e8aea
Tests: Combined Query Feature for Multi Query Execution
Jun 25, 2025
c113799
Improve: Fix typo
ercsonusharma Jul 4, 2025
3600ed3
Tests: Fix errors
ercsonusharma Jul 4, 2025
9b0c76e
Review comments: implementation
ercsonusharma Jul 5, 2025
a841bc7
Code review changes
ercsonusharma Jul 12, 2025
91f8e09
Code review changes
ercsonusharma Jul 12, 2025
cace1f7
Code review changes
ercsonusharma Jul 12, 2025
299db43
Code review changes
ercsonusharma Jul 13, 2025
840070e
Code review changes
ercsonusharma Jul 13, 2025
d2feefc
Improvement and fixes
ercsonusharma Jul 16, 2025
89f63a9
Review comments impl
ercsonusharma Jul 26, 2025
d821abb
Build fix
ercsonusharma Jul 28, 2025
8041d66
Added documentation
ercsonusharma Aug 5, 2025
397dbb3
Fix for lucene upgrade
ercsonusharma Aug 8, 2025
d8b5588
Doc improv for cursors
ercsonusharma Aug 14, 2025
ec0b9cb
review comment implementation
ercsonusharma Aug 18, 2025
d6fd190
review comment implementation
ercsonusharma Aug 19, 2025
86933bc
review comment implementation
ercsonusharma Aug 20, 2025
b164979
doc update
ercsonusharma Aug 27, 2025
85f2cf9
added more test
ercsonusharma Aug 29, 2025
a4a26aa
abstract QueryComponent.mergeIds' ShardDoc processing
cpoerschke Aug 29, 2025
7fe997c
add missing @Override annotations
cpoerschke Aug 29, 2025
bcd1c3b
make DefaultShardDocQueue an anonymous class
cpoerschke Sep 1, 2025
787a016
Merge branch 'apache:main' into QueryComponent-mergeIds
cpoerschke Sep 1, 2025
7e0727c
Merge remote-tracking branch 'github_cpoerschke/QueryComponent-mergeI…
cpoerschke Sep 1, 2025
4dcbb57
dev increment: add uniqueDoc map-and-logic to ShardDocQueue
cpoerschke Sep 1, 2025
8a65023
review comment fix
ercsonusharma Sep 2, 2025
006b8c2
micro dev increment: replace unnecessary local resultSize use in Quer…
cpoerschke Sep 2, 2025
771089b
dev increment: factor out ShardDocQueue.resultIds method
cpoerschke Sep 2, 2025
460e8cd
dev increment: remove no-longer-used ShardDocQueue.(pop,size) methods
cpoerschke Sep 2, 2025
ac85d2f
review comment fix
ercsonusharma Sep 3, 2025
7b0593c
review comment fix
ercsonusharma Sep 3, 2025
c03c0f7
review comment enhancement
ercsonusharma Sep 3, 2025
a52dd22
simplification/consolidation: protected QueryComponent.newShardDocQue…
cpoerschke Sep 3, 2025
195f3f1
factor out protected QueryComponent.setResultIdsAndResponseDocs method
cpoerschke Sep 3, 2025
c1f5501
review comment enhancement
ercsonusharma Sep 3, 2025
3649d3e
Merge branch 'feat_combined_query' of https://github.com/ercsonusharm…
ercsonusharma Sep 3, 2025
4eedbed
refactor to reduce cyclometric complexity
ercsonusharma Sep 3, 2025
0990e7f
review comment fixes
ercsonusharma Sep 4, 2025
14ff5e1
debug params fix and rrf shard sort order
ercsonusharma Sep 4, 2025
bd637b7
test cases fix and rrf shard sort order
ercsonusharma Sep 5, 2025
2958599
introducing combiner methods as pre and post
ercsonusharma Sep 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;

import java.util.ArrayList;
import java.util.List;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;

/**
* The CombinedQueryResponseBuilder class extends the ResponseBuilder class and is responsible for
* building a combined response for multiple SearchComponent objects. It orchestrates the process of
* constructing the SolrQueryResponse by aggregating results from various components.
*/
public class CombinedQueryResponseBuilder extends ResponseBuilder {

public final List<ResponseBuilder> responseBuilders = new ArrayList<>();

/**
* Constructs a CombinedQueryResponseBuilder instance.
*
* @param req the SolrQueryRequest object containing the query parameters and context.
* @param rsp the SolrQueryResponse object to which the combined results will be added.
* @param components a list of SearchComponent objects that will be used to build the response.
*/
public CombinedQueryResponseBuilder(
SolrQueryRequest req, SolrQueryResponse rsp, List<SearchComponent> components) {
super(req, rsp, components);
}

/**
* Propagates all the properties from parent ResponseBuilder to the all the children which are
* being set later after the CombinedQueryComponent is prepared.
*/
public final void propagate() {
responseBuilders.forEach(
thisRb -> {
thisRb.setNeedDocSet(isNeedDocSet());
thisRb.setNeedDocList(isNeedDocList());
thisRb.doFacets = doFacets;
thisRb.doHighlights = doHighlights;
thisRb.doExpand = doExpand;
thisRb.doTerms = doTerms;
thisRb.doStats = doStats;
thisRb.setDistribStatsDisabled(isDistribStatsDisabled());
});
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;

import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.List;
import org.apache.solr.common.params.CombinerParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.ShardParams;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.search.facet.FacetModule;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* The CombinedQuerySearchHandler class extends the SearchHandler and provides custom behavior for
* handling combined queries. It overrides methods to create a response builder based on the {@link
* CombinerParams#COMBINER} parameter and to define the default components included in the search
* configuration.
*/
public class CombinedQuerySearchHandler extends SearchHandler {

private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

/**
* Overrides the default response builder creation method. This method checks if the {@link
* CombinerParams#COMBINER} parameter is set to true in the request. If it is, it returns an
* instance of {@link CombinedQueryResponseBuilder}, otherwise, it returns an instance of {@link
* ResponseBuilder}.
*
* @param req the SolrQueryRequest object
* @param rsp the SolrQueryResponse object
* @param components the list of SearchComponent objects
* @return the appropriate ResponseBuilder instance based on the CombinerParams.COMBINER parameter
*/
@Override
protected ResponseBuilder newResponseBuilder(
SolrQueryRequest req, SolrQueryResponse rsp, List<SearchComponent> components) {
if (req.getParams().getBool(CombinerParams.COMBINER, false)) {
return new CombinedQueryResponseBuilder(req, rsp, components);
}
return super.newResponseBuilder(req, rsp, components);
}

/**
* Overrides the default components and returns a list of component names that are included in the
* default configuration.
*
* @return a list of component names
*/
@Override
@SuppressWarnings("unchecked")
protected List<String> getDefaultComponents() {
List<String> names = new ArrayList<>(9);
names.add(CombinedQueryComponent.COMPONENT_NAME);
names.add(FacetComponent.COMPONENT_NAME);
names.add(FacetModule.COMPONENT_NAME);
names.add(MoreLikeThisComponent.COMPONENT_NAME);
names.add(HighlightComponent.COMPONENT_NAME);
names.add(StatsComponent.COMPONENT_NAME);
names.add(DebugComponent.COMPONENT_NAME);
names.add(ExpandComponent.COMPONENT_NAME);
names.add(TermsComponent.COMPONENT_NAME);
return names;
}

@Override
protected void postPrepareComponents(ResponseBuilder rb) {
super.postPrepareComponents(rb);
// propagate the CombinedQueryResponseBuilder's state to all subBuilders after prepare
if (rb instanceof CombinedQueryResponseBuilder crb) {
crb.propagate();
}
}

/**
* rb.distrib {@link ResponseBuilder} must be set for the combined query to work in case of single
* core standalone mode. This method set the parameter explicitly with other required solr param
* i.e. shards
*
* @param req the SolrQueryRequest
* @return boolean denoting whether the request can be marked as distributed.
*/
@Override
protected boolean isDistrib(SolrQueryRequest req) {
boolean isDistrib = super.isDistrib(req);
if (!isDistrib
&& !req.getParams().getBool(ShardParams.IS_SHARD, false)
&& req.getHttpSolrCall() != null) {
log.info("Configuring distributed mode to enable Combined Query.");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not worth logging; or maybe trace

ModifiableSolrParams solrParams = new ModifiableSolrParams(req.getParams());
String scheme = req.getHttpSolrCall().getReq().getScheme();
String host = req.getHttpSolrCall().getReq().getServerName();
int port = req.getHttpSolrCall().getReq().getServerPort();
String context = req.getHttpSolrCall().getReq().getContextPath();
String core = req.getCore().getName();
String localShardUrl = String.format("%s://%s:%d%s/%s", scheme, host, port, context, core);
solrParams.set(ShardParams.SHARDS, localShardUrl);
req.setParams(solrParams);
Comment on lines +114 to +115
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's a bit sneaky that a predicate looking method has a side-effect. This is a hack to work around a need for something proper -- for a component or handler to communicate we need the distributed search algorithm (no so-called short-circuit).

return true;
}
return isDistrib;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -905,6 +905,67 @@ protected boolean addFL(StringBuilder fl, String field, boolean additionalAdded)
return true;
}

protected abstract static class ShardDocQueue {
public abstract boolean push(ShardDoc shardDoc);

public abstract Map<Object, ShardDoc> resultIds(int offset);
}
;

protected ShardDocQueue newShardDocQueue(
SolrIndexSearcher searcher, SortField[] sortFields, Integer size) {
return new ShardDocQueue() {

// id to shard mapping, to eliminate any accidental dups
private final HashMap<Object, String> uniqueDoc = new HashMap<>();

private final ShardFieldSortedHitQueue queue =
new ShardFieldSortedHitQueue(sortFields, size, searcher);

@Override
public boolean push(ShardDoc shardDoc) {
final String prevShard = uniqueDoc.put(shardDoc.id, shardDoc.shard);
if (prevShard != null) {
// duplicate detected

// For now, just always use the first encountered since we can't currently
// remove the previous one added to the priority queue. If we switched
// to the Java5 PriorityQueue, this would be easier.
return false;
// make which duplicate is used deterministic based on shard
// if (prevShard.compareTo(shardDoc.shard) >= 0) {
// TODO: remove previous from priority queue
// return false;
// }
}

queue.insertWithOverflow(shardDoc);
return true;
}

@Override
public Map<Object, ShardDoc> resultIds(int offset) {
final Map<Object, ShardDoc> resultIds = new HashMap<>();

// The queue now has 0 -> queuesize docs, where queuesize <= start + rows
// So we want to pop the last documents off the queue to get
// the docs offset -> queuesize
int resultSize = queue.size() - offset;
resultSize = Math.max(0, resultSize); // there may not be any docs in range

for (int i = resultSize - 1; i >= 0; i--) {
ShardDoc shardDoc = queue.pop();
shardDoc.positionInResponse = i;
// Need the toString() for correlation with other lists that must
// be strings (like keys in highlighting, explain, etc)
resultIds.put(shardDoc.id.toString(), shardDoc);
}

return resultIds;
}
};
}

protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) {
List<MergeStrategy> mergeStrategies = rb.getMergeStrategies();
if (mergeStrategies != null) {
Expand Down Expand Up @@ -947,14 +1008,10 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) {
IndexSchema schema = rb.req.getSchema();
SchemaField uniqueKeyField = schema.getUniqueKeyField();

// id to shard mapping, to eliminate any accidental dups
HashMap<Object, String> uniqueDoc = new HashMap<>();

// Merge the docs via a priority queue so we don't have to sort *all* of the
// documents... we only need to order the top (rows+start)
final ShardFieldSortedHitQueue queue =
new ShardFieldSortedHitQueue(
sortFields, ss.getOffset() + ss.getCount(), rb.req.getSearcher());
final ShardDocQueue shardDocQueue =
newShardDocQueue(rb.req.getSearcher(), sortFields, ss.getOffset() + ss.getCount());

NamedList<Object> shardInfo = null;
if (rb.req.getParams().getBool(ShardParams.SHARDS_INFO, false)) {
Expand Down Expand Up @@ -1125,23 +1182,6 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) {
for (int i = 0; i < docs.size(); i++) {
SolrDocument doc = docs.get(i);
Object id = doc.getFieldValue(uniqueKeyField.getName());

String prevShard = uniqueDoc.put(id, srsp.getShard());
if (prevShard != null) {
// duplicate detected
numFound--;

// For now, just always use the first encountered since we can't currently
// remove the previous one added to the priority queue. If we switched
// to the Java5 PriorityQueue, this would be easier.
continue;
// make which duplicate is used deterministic based on shard
// if (prevShard.compareTo(srsp.shard) >= 0) {
// TODO: remove previous from priority queue
// continue;
// }
}

ShardDoc shardDoc = new ShardDoc();
shardDoc.id = id;
shardDoc.shard = srsp.getShard();
Expand All @@ -1160,42 +1200,18 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) {

shardDoc.sortFieldValues = unmarshalledSortFieldValues;

queue.insertWithOverflow(shardDoc);
if (!shardDocQueue.push(shardDoc)) {
numFound--;
}
} // end for-each-doc-in-response
} // end for-each-response

// The queue now has 0 -> queuesize docs, where queuesize <= start + rows
// So we want to pop the last documents off the queue to get
// the docs offset -> queuesize
int resultSize = queue.size() - ss.getOffset();
resultSize = Math.max(0, resultSize); // there may not be any docs in range

Map<Object, ShardDoc> resultIds = new HashMap<>();
for (int i = resultSize - 1; i >= 0; i--) {
ShardDoc shardDoc = queue.pop();
shardDoc.positionInResponse = i;
// Need the toString() for correlation with other lists that must
// be strings (like keys in highlighting, explain, etc)
resultIds.put(shardDoc.id.toString(), shardDoc);
}

// Add hits for distributed requests
// https://issues.apache.org/jira/browse/SOLR-3518
rb.rsp.addToLog("hits", numFound);

SolrDocumentList responseDocs = new SolrDocumentList();
if (maxScore != null) responseDocs.setMaxScore(maxScore);
responseDocs.setNumFound(numFound);
responseDocs.setNumFoundExact(hitCountIsExact);
responseDocs.setStart(ss.getOffset());
// size appropriately
for (int i = 0; i < resultSize; i++) responseDocs.add(null);

// save these results in a private area so we can access them
// again when retrieving stored fields.
// TODO: use ResponseBuilder (w/ comments) or the request context?
rb.resultIds = resultIds;
rb.setResponseDocs(responseDocs);
setResultIdsAndResponseDocs(
rb, shardDocQueue, maxScore, numFound, hitCountIsExact, ss.getOffset());

populateNextCursorMarkFromMergedShards(rb);

Expand Down Expand Up @@ -1241,6 +1257,30 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) {
}
}

protected void setResultIdsAndResponseDocs(
ResponseBuilder rb,
ShardDocQueue shardDocQueue,
Float maxScore,
long numFound,
boolean hitCountIsExact,
int offset) {
final Map<Object, ShardDoc> resultIds = shardDocQueue.resultIds(offset);

final SolrDocumentList responseDocs = new SolrDocumentList();
if (maxScore != null) responseDocs.setMaxScore(maxScore);
responseDocs.setNumFound(numFound);
responseDocs.setNumFoundExact(hitCountIsExact);
responseDocs.setStart(offset);
// size appropriately
for (int i = 0; i < resultIds.size(); i++) responseDocs.add(null);

// save these results in a private area so we can access them
// again when retrieving stored fields.
// TODO: use ResponseBuilder (w/ comments) or the request context?
rb.resultIds = resultIds;
rb.setResponseDocs(responseDocs);
}

/**
* Inspects the state of the {@link ResponseBuilder} and populates the next {@link
* ResponseBuilder#setNextCursorMark} as appropriate based on the merged sort values from
Expand Down
Loading