Skip to content
Permalink
Browse files

SOLR-1632: Distributed IDF, finally.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1647253 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information...
anshumg committed Dec 22, 2014
1 parent ec9fdd5 commit 73a6fca89032399c5317ea9760cacf0d30914a47
Showing with 1,783 additions and 68 deletions.
  1. +3 −0 solr/CHANGES.txt
  2. +3 −1 solr/core/src/java/org/apache/solr/core/SolrConfig.java
  3. +64 −40 solr/core/src/java/org/apache/solr/core/SolrCore.java
  4. +72 −23 solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
  5. +1 −0 solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java
  6. +2 −0 solr/core/src/java/org/apache/solr/handler/component/ShardRequest.java
  7. +46 −3 solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
  8. +23 −0 solr/core/src/java/org/apache/solr/search/stats/CachedSearcherStats.java
  9. +84 −0 solr/core/src/java/org/apache/solr/search/stats/CollectionStats.java
  10. +88 −0 solr/core/src/java/org/apache/solr/search/stats/ExactSharedStatsCache.java
  11. +352 −0 solr/core/src/java/org/apache/solr/search/stats/ExactStatsCache.java
  12. +169 −0 solr/core/src/java/org/apache/solr/search/stats/LRUStatsCache.java
  13. +78 −0 solr/core/src/java/org/apache/solr/search/stats/LocalStatsCache.java
  14. +48 −0 solr/core/src/java/org/apache/solr/search/stats/LocalStatsSource.java
  15. +119 −0 solr/core/src/java/org/apache/solr/search/stats/StatsCache.java
  16. +43 −0 solr/core/src/java/org/apache/solr/search/stats/StatsSource.java
  17. +223 −0 solr/core/src/java/org/apache/solr/search/stats/StatsUtil.java
  18. +82 −0 solr/core/src/java/org/apache/solr/search/stats/TermStats.java
  19. +30 −0 solr/core/src/java/org/apache/solr/search/stats/package.html
  20. +2 −1 solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java
  21. +2 −0 solr/core/src/test-files/solr/collection1/conf/solrconfig.xml
  22. +69 −0 solr/core/src/test/org/apache/solr/search/stats/TestBaseStatsCache.java
  23. +104 −0 solr/core/src/test/org/apache/solr/search/stats/TestDefaultStatsCache.java
  24. +25 −0 solr/core/src/test/org/apache/solr/search/stats/TestExactSharedStatsCache.java
  25. +24 −0 solr/core/src/test/org/apache/solr/search/stats/TestExactStatsCache.java
  26. +24 −0 solr/core/src/test/org/apache/solr/search/stats/TestLRUStatsCache.java
  27. +3 −0 solr/solrj/src/java/org/apache/solr/common/params/ShardParams.java
@@ -244,6 +244,9 @@ New Features

* SOLR-6801: Load RequestHandler from blob store (Noble Paul)

* SOLR-1632: Support Distributed IDF (Andrzej Bialecki, Mark Miller, Yonik Seeley,
Robert Muir, Markus Jelsma, Vitaliy Zhovtyuk, Anshum Gupta)

Bug Fixes
----------------------

@@ -36,6 +36,7 @@
import org.apache.solr.search.FastLRUCache;
import org.apache.solr.search.QParserPlugin;
import org.apache.solr.search.ValueSourceParser;
import org.apache.solr.search.stats.StatsCache;
import org.apache.solr.servlet.SolrRequestParsers;
import org.apache.solr.spelling.QueryConverter;
import org.apache.solr.update.SolrIndexConfig;
@@ -251,7 +252,7 @@ public SolrConfig(SolrResourceLoader loader, String name, InputSource is)
jmxConfig = new JmxConfiguration(false, null, null, null);
}
maxWarmingSearchers = getInt("query/maxWarmingSearchers",Integer.MAX_VALUE);
slowQueryThresholdMillis = getInt("query/slowQueryThresholdMillis", -1);
slowQueryThresholdMillis = getInt("query/slowQueryThresholdMillis", -1);
for (SolrPluginInfo plugin : plugins) loadPluginInfo(plugin);
updateHandlerInfo = loadUpdatehandlerInfo();

@@ -312,6 +313,7 @@ public SolrConfig(SolrResourceLoader loader, String name, InputSource is)
.add(new SolrPluginInfo(IndexSchemaFactory.class, "schemaFactory", REQUIRE_CLASS))
.add(new SolrPluginInfo(RestManager.class, "restManager"))
.add(new SolrPluginInfo(InitParams.class, InitParams.TYPE, MULTI_OK))
.add(new SolrPluginInfo(StatsCache.class, "statsCache", REQUIRE_CLASS))
.build();

public static class SolrPluginInfo{
@@ -17,44 +17,6 @@

package org.apache.solr.core;

import java.io.Closeable;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.Writer;
import java.lang.reflect.Constructor;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.NoSuchFileException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.ReentrantLock;

import org.apache.commons.io.FileUtils;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.DirectoryReader;
@@ -80,8 +42,6 @@
import org.apache.solr.handler.ReplicationHandler;
import org.apache.solr.handler.RequestHandlerBase;
import org.apache.solr.handler.SnapPuller;
import org.apache.solr.handler.SolrConfigHandler;
import org.apache.solr.handler.UpdateRequestHandler;
import org.apache.solr.handler.admin.ShowFileRequestHandler;
import org.apache.solr.handler.component.DebugComponent;
import org.apache.solr.handler.component.ExpandComponent;
@@ -119,6 +79,8 @@
import org.apache.solr.search.SolrFieldCacheMBean;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.ValueSourceParser;
import org.apache.solr.search.stats.LocalStatsCache;
import org.apache.solr.search.stats.StatsCache;
import org.apache.solr.update.DefaultSolrCoreState;
import org.apache.solr.update.DirectUpdateHandler2;
import org.apache.solr.update.SolrCoreState;
@@ -143,6 +105,43 @@
import org.xml.sax.SAXException;

import javax.xml.parsers.ParserConfigurationException;
import java.io.Closeable;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.Writer;
import java.lang.reflect.Constructor;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.NoSuchFileException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.ReentrantLock;

/**
*
@@ -164,6 +163,8 @@

private boolean isReloaded = false;

private StatsCache statsCache;

private final SolrConfig solrConfig;
private final SolrResourceLoader resourceLoader;
private volatile IndexSchema schema;
@@ -818,6 +819,8 @@ public SolrCore(String name, String dataDir, SolrConfig config, IndexSchema sche
// Handle things that should eventually go away
initDeprecatedSupport();

statsCache = initStatsCache();

// cause the executor to stall so firstSearcher events won't fire
// until after inform() has been called for all components.
// searchExecutor must be single-threaded for this to work
@@ -970,6 +973,27 @@ public Codec getCodec() {
}
return factory.getCodec();
}

private StatsCache initStatsCache() {
final StatsCache cache;
PluginInfo pluginInfo = solrConfig.getPluginInfo(StatsCache.class.getName());
if (pluginInfo != null && pluginInfo.className != null && pluginInfo.className.length() > 0) {
cache = createInitInstance(pluginInfo, StatsCache.class, null,
LocalStatsCache.class.getName());
log.info("Using statsCache impl: " + cache.getClass().getName());
} else {
log.info("Using default statsCache cache: " + LocalStatsCache.class.getName());
cache = new LocalStatsCache();
}
return cache;
}

/**
* Get the StatsCache.
*/
public StatsCache getStatsCache() {
return statsCache;
}

/**
* Load the request processors
@@ -17,22 +17,8 @@

package org.apache.solr.handler.component;

import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanQuery;
@@ -52,7 +38,13 @@
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.*;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.CursorMarkParams;
import org.apache.solr.common.params.GroupParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.MoreLikeThisParams;
import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.StrUtils;
@@ -71,11 +63,11 @@
import org.apache.solr.search.QParser;
import org.apache.solr.search.QParserPlugin;
import org.apache.solr.search.QueryParsing;
import org.apache.solr.search.RankQuery;
import org.apache.solr.search.ReturnFields;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SolrReturnFields;
import org.apache.solr.search.SortSpec;
import org.apache.solr.search.RankQuery;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.grouping.CommandHandler;
import org.apache.solr.search.grouping.GroupingSpecification;
@@ -96,8 +88,25 @@
import org.apache.solr.search.grouping.endresulttransformer.GroupedEndResultTransformer;
import org.apache.solr.search.grouping.endresulttransformer.MainEndResultTransformer;
import org.apache.solr.search.grouping.endresulttransformer.SimpleEndResultTransformer;
import org.apache.solr.search.stats.StatsCache;
import org.apache.solr.util.SolrPluginUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;

/**
* TODO!
@@ -108,6 +117,7 @@
public class QueryComponent extends SearchComponent
{
public static final String COMPONENT_NAME = "query";
private static final Logger LOG = LoggerFactory.getLogger(QueryComponent.class);

@Override
public void prepare(ResponseBuilder rb) throws IOException
@@ -271,6 +281,8 @@ private void prepareGrouping(ResponseBuilder rb) throws IOException {
@Override
public void process(ResponseBuilder rb) throws IOException
{
LOG.debug("process: {}", rb.req.getParams());

SolrQueryRequest req = rb.req;
SolrQueryResponse rsp = rb.rsp;
SolrParams params = req.getParams();
@@ -279,6 +291,19 @@ public void process(ResponseBuilder rb) throws IOException
}
SolrIndexSearcher searcher = req.getSearcher();

StatsCache statsCache = req.getCore().getStatsCache();

int purpose = params.getInt(ShardParams.SHARDS_PURPOSE, ShardRequest.PURPOSE_GET_TOP_IDS);
if ((purpose & ShardRequest.PURPOSE_GET_TERM_STATS) != 0) {
statsCache.returnLocalStats(rb, searcher);
return;
}
// check if we need to update the local copy of global dfs
if ((purpose & ShardRequest.PURPOSE_SET_TERM_STATS) != 0) {
// retrieve from request and update local cache
statsCache.receiveGlobalStats(req);
}

if (rb.getQueryCommand().getOffset() < 0) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "'start' parameter cannot be negative");
}
@@ -329,6 +354,9 @@ public void process(ResponseBuilder rb) throws IOException

SolrIndexSearcher.QueryCommand cmd = rb.getQueryCommand();
cmd.setTimeAllowed(timeAllowed);

req.getContext().put(SolrIndexSearcher.STATS_SOURCE, statsCache.get(req));

SolrIndexSearcher.QueryResult result = new SolrIndexSearcher.QueryResult();

//
@@ -479,8 +507,8 @@ public void process(ResponseBuilder rb) throws IOException
}

// normal search result
searcher.search(result,cmd);
rb.setResult( result );
searcher.search(result, cmd);
rb.setResult(result);

ResultContext ctx = new ResultContext();
ctx.docs = rb.getResults().docList;
@@ -637,7 +665,7 @@ private int groupedDistributedProcess(ResponseBuilder rb) {
if (rb.stage < ResponseBuilder.STAGE_PARSE_QUERY) {
nextStage = ResponseBuilder.STAGE_PARSE_QUERY;
} else if (rb.stage == ResponseBuilder.STAGE_PARSE_QUERY) {
createDistributedIdf(rb);
createDistributedStats(rb);
nextStage = ResponseBuilder.STAGE_TOP_GROUPS;
} else if (rb.stage < ResponseBuilder.STAGE_TOP_GROUPS) {
nextStage = ResponseBuilder.STAGE_TOP_GROUPS;
@@ -668,7 +696,7 @@ private int regularDistributedProcess(ResponseBuilder rb) {
if (rb.stage < ResponseBuilder.STAGE_PARSE_QUERY)
return ResponseBuilder.STAGE_PARSE_QUERY;
if (rb.stage == ResponseBuilder.STAGE_PARSE_QUERY) {
createDistributedIdf(rb);
createDistributedStats(rb);
return ResponseBuilder.STAGE_EXECUTE_QUERY;
}
if (rb.stage < ResponseBuilder.STAGE_EXECUTE_QUERY) return ResponseBuilder.STAGE_EXECUTE_QUERY;
@@ -713,6 +741,10 @@ private void handleRegularResponses(ResponseBuilder rb, ShardRequest sreq) {
mergeIds(rb, sreq);
}

if ((sreq.purpose & ShardRequest.PURPOSE_GET_TERM_STATS) != 0) {
updateStats(rb, sreq);
}

if ((sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS) != 0) {
returnFields(rb, sreq);
}
@@ -786,8 +818,19 @@ private void regularFinishStage(ResponseBuilder rb) {
}
}

private void createDistributedIdf(ResponseBuilder rb) {
// TODO
private void createDistributedStats(ResponseBuilder rb) {
StatsCache cache = rb.req.getCore().getStatsCache();
if ( (rb.getFieldFlags() & SolrIndexSearcher.GET_SCORES)!=0 || rb.getSortSpec().includesScore()) {
ShardRequest sreq = cache.retrieveStatsRequest(rb);
if (sreq != null) {
rb.addRequest(this, sreq);
}
}
}

private void updateStats(ResponseBuilder rb, ShardRequest sreq) {
StatsCache cache = rb.req.getCore().getStatsCache();
cache.mergeToGlobalStats(rb.req, sreq.responses);
}

private void createMainQuery(ResponseBuilder rb) {
@@ -836,6 +879,12 @@ private void createMainQuery(ResponseBuilder rb) {

sreq.params.set(ResponseBuilder.FIELD_SORT_VALUES,"true");

// TODO: should this really sendGlobalDfs if just includeScore?
if ( (rb.getFieldFlags() & SolrIndexSearcher.GET_SCORES)!=0 || rb.getSortSpec().includesScore()) {
sreq.params.set(CommonParams.FL, rb.req.getSchema().getUniqueKeyField().getName() + ",score");
StatsCache statsCache = rb.req.getCore().getStatsCache();
statsCache.sendGlobalStats(rb, sreq);
}
boolean shardQueryIncludeScore = (rb.getFieldFlags() & SolrIndexSearcher.GET_SCORES) != 0 || rb.getSortSpec().includesScore();
if (distribSinglePass) {
String[] fls = rb.req.getParams().getParams(CommonParams.FL);
@@ -299,6 +299,7 @@ public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throw
params.remove("indent");
params.remove(CommonParams.HEADER_ECHO_PARAMS);
params.set(ShardParams.IS_SHARD, true); // a sub (shard) request
params.set(ShardParams.SHARDS_PURPOSE, sreq.purpose);
params.set(ShardParams.SHARD_URL, shard); // so the shard knows what was asked
if (rb.requestInfo != null) {
// we could try and detect when this is needed, but it could be tricky

0 comments on commit 73a6fca

Please sign in to comment.
You can’t perform that action at this time.