From 5a6290359c0e12f2372c8470de636796928921cc Mon Sep 17 00:00:00 2001 From: broustant Date: Fri, 12 Jan 2018 18:03:26 +0100 Subject: [PATCH 1/8] Refactor QueryElevationComponent to introduce ElevationProvider - Refactor to introduce ElevationProvider. The current full-query match policy becomes a default simple MapElevationProvider. It can be replaced by a more efficient provider in the future, or replaced by an extending class. - Add overridable methods to handle exceptions during the component initialization. - Add overridable methods to provide the default values for config properties. - No functional change beyond refactoring. - Adapt unit test. --- .gitignore | 2 + .../component/QueryElevationComponent.java | 1478 ++++++++++++----- .../QueryElevationComponentTest.java | 100 +- .../common/params/QueryElevationParams.java | 6 + 4 files changed, 1151 insertions(+), 435 deletions(-) diff --git a/.gitignore b/.gitignore index 4b947436dc6e..74a50f7f62ad 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,5 @@ pom.xml /nbproject /nb-build .pydevproject +.DS_Store +/temp \ No newline at end of file diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java index d7b84740869a..cadcfc02f8b9 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java @@ -16,6 +16,7 @@ */ package org.apache.solr.handler.component; +import javax.xml.parsers.ParserConfigurationException; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpressionException; @@ -23,35 +24,27 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.io.StringReader; import java.lang.invoke.MethodHandles; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.WeakHashMap; +import java.util.*; +import java.util.stream.Collector; +import java.util.stream.Collectors; import com.carrotsearch.hppc.IntIntHashMap; +import com.google.common.annotations.VisibleForTesting; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.PostingsEnum; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.*; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.FieldComparatorSource; -import org.apache.lucene.search.Query; import org.apache.lucene.search.SimpleFieldComparator; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; @@ -69,11 +62,9 @@ import org.apache.solr.common.util.StrUtils; import org.apache.solr.core.Config; import org.apache.solr.core.SolrCore; -import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.transform.ElevatedMarkerFactory; import org.apache.solr.response.transform.ExcludedMarkerFactory; import org.apache.solr.schema.FieldType; -import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; import org.apache.solr.search.QueryParsing; import org.apache.solr.search.SolrIndexSearcher; @@ -88,8 +79,7 @@ import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.InputSource; - -import static org.apache.solr.common.params.CommonParams.ID; +import org.xml.sax.SAXException; /** * A component to elevate some documents to the top of the result set. @@ -100,70 +90,47 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); // Constants used in solrconfig.xml + @VisibleForTesting static final String FIELD_TYPE = "queryFieldType"; + @VisibleForTesting static final String CONFIG_FILE = "config-file"; - static final String EXCLUDE = "exclude"; + private static final String EXCLUDE = "exclude"; public static final String BOOSTED = "BOOSTED"; - public static final String BOOSTED_DOCIDS = "BOOSTED_DOCIDS"; + private static final String BOOSTED_DOCIDS = "BOOSTED_DOCIDS"; public static final String BOOSTED_PRIORITY = "BOOSTED_PRIORITY"; - public static final String EXCLUDED = "EXCLUDED"; - // Runtime param -- should be in common? - - private SolrParams initArgs = null; - private Analyzer analyzer = null; - private String idField = null; - private FieldType idSchemaFT; - - boolean forceElevation = false; - // For each IndexReader, keep a query->elevation map - // When the configuration is loaded from the data directory. - // The key is null if loaded from the config directory, and - // is never re-loaded. - final Map> elevationCache = - new WeakHashMap<>(); - - class ElevationObj { - final String text; - final String analyzed; - final TermQuery [] exclude;//just keep the term query, b/c we will not always explicitly exclude the item based on markExcludes query time param - final BooleanQuery include; - final Map priority; - final Set ids; - final Set excludeIds; - - ElevationObj(String qstr, List elevate, List exclude) throws IOException { - this.text = qstr; - this.analyzed = getAnalyzedQuery(this.text); - this.ids = new HashSet<>(); - this.excludeIds = new HashSet<>(); - - BooleanQuery.Builder include = new BooleanQuery.Builder(); - this.priority = new HashMap<>(); - int max = elevate.size() + 5; - for (String id : elevate) { - id = idSchemaFT.readableToIndexed(id); - ids.add(id); - TermQuery tq = new TermQuery(new Term(idField, id)); - include.add(tq, BooleanClause.Occur.SHOULD); - this.priority.put(new BytesRef(id), max--); - } - this.include = include.build(); - - if (exclude == null || exclude.isEmpty()) { - this.exclude = null; - } else { - this.exclude = new TermQuery[exclude.size()]; - for (int i = 0; i < exclude.size(); i++) { - String id = idSchemaFT.readableToIndexed(exclude.get(i)); - excludeIds.add(id); - this.exclude[i] = new TermQuery(new Term(idField, id)); - } - } - } - } + private static final boolean DEFAULT_FORCE_ELEVATION = false; + private static final boolean DEFAULT_KEEP_ELEVATION_PRIORITY = true; + private static final boolean DEFAULT_SUBSET_MATCH = false; + private static final String DEFAULT_EXCLUDE_MARKER_FIELD_NAME = "excluded"; + private static final String DEFAULT_EDITORIAL_MARKER_FIELD_NAME = "elevated"; + + private static final Collector QUERY_EXACT_JOINER = Collectors.joining(" "); + + // Runtime param + private SolrParams initArgs; + private Analyzer queryAnalyzer; + private String uniqueKeyFieldName; + private FieldType uniqueKeyFieldType; + private IndexedValueProvider indexedValueProvider; + @VisibleForTesting + boolean forceElevation; + private boolean keepElevationPriority; + private boolean initialized; + + /** + * For each IndexReader, keep an ElevationProvider when the configuration is loaded from the data directory. + * The key is null if loaded from the config directory, and is never re-loaded. + */ + private final Map elevationProviderCache = new WeakHashMap<>(); + + /** + * Keep track of a counter each time a configuration file cannot be loaded. + * Stop trying to load after {@link #getConfigLoadingExceptionHandler()}.{@link LoadingExceptionHandler#getLoadingMaxAttempts getLoadingMaxAttempts()}. + */ + private final Map configLoadingErrorCounters = new WeakHashMap<>(); @Override public void init(NamedList args) { @@ -172,128 +139,240 @@ public void init(NamedList args) { @Override public void inform(SolrCore core) { - IndexSchema schema = core.getLatestSchema(); + initialized = false; + try { + parseFieldType(core); + setUniqueKeyField(core); + parseExcludedMarkerFieldName(core); + parseEditorialMarkerFieldName(core); + parseForceElevation(); + parseKeepElevationPriority(); + loadElevationConfiguration(core); + initialized = true; + } catch (InitializationException e) { + assert !initialized; + handleInitializationException(e, e.exceptionCause); + } catch (Exception e) { + assert !initialized; + handleInitializationException(e, InitializationExceptionHandler.ExceptionCause.OTHER); + } + } + + private void parseFieldType(SolrCore core) throws InitializationException { String a = initArgs.get(FIELD_TYPE); if (a != null) { - FieldType ft = schema.getFieldTypes().get(a); + FieldType ft = core.getLatestSchema().getFieldTypes().get(a); if (ft == null) { - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, - "Unknown FieldType: '" + a + "' used in QueryElevationComponent"); + throw new InitializationException("Parameter " + FIELD_TYPE + " defines an unknown field type \"" + a + "\"", InitializationExceptionHandler.ExceptionCause.UNKNOWN_FIELD_TYPE); } - analyzer = ft.getQueryAnalyzer(); + queryAnalyzer = ft.getQueryAnalyzer(); } + } - SchemaField sf = schema.getUniqueKeyField(); - if( sf == null) { - throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, - "QueryElevationComponent requires the schema to have a uniqueKeyField." ); + private void setUniqueKeyField(SolrCore core) throws InitializationException { + SchemaField sf = core.getLatestSchema().getUniqueKeyField(); + if (sf == null) { + throw new InitializationException("This component requires the schema to have a uniqueKeyField", InitializationExceptionHandler.ExceptionCause.MISSING_UNIQUE_KEY_FIELD); } - idSchemaFT = sf.getType(); - idField = sf.getName(); - //register the EditorialMarkerFactory - String excludeName = initArgs.get(QueryElevationParams.EXCLUDE_MARKER_FIELD_NAME, "excluded"); - if (excludeName == null || excludeName.equals("") == true){ - excludeName = "excluded"; + uniqueKeyFieldType = sf.getType(); + uniqueKeyFieldName = sf.getName(); + indexedValueProvider = readableValue -> uniqueKeyFieldType.readableToIndexed(readableValue); + } + + private void parseExcludedMarkerFieldName(SolrCore core) { + String markerName = initArgs.get(QueryElevationParams.EXCLUDE_MARKER_FIELD_NAME, DEFAULT_EXCLUDE_MARKER_FIELD_NAME); + if (markerName == null || markerName.equals("")) { + markerName = DEFAULT_EXCLUDE_MARKER_FIELD_NAME; } - ExcludedMarkerFactory excludedMarkerFactory = new ExcludedMarkerFactory(); - core.addTransformerFactory(excludeName, excludedMarkerFactory); - ElevatedMarkerFactory elevatedMarkerFactory = new ElevatedMarkerFactory(); - String markerName = initArgs.get(QueryElevationParams.EDITORIAL_MARKER_FIELD_NAME, "elevated"); - if (markerName == null || markerName.equals("") == true) { - markerName = "elevated"; + core.addTransformerFactory(markerName, new ExcludedMarkerFactory()); + } + + private void parseEditorialMarkerFieldName(SolrCore core) { + String markerName = initArgs.get(QueryElevationParams.EDITORIAL_MARKER_FIELD_NAME, DEFAULT_EDITORIAL_MARKER_FIELD_NAME); + if (markerName == null || markerName.equals("")) { + markerName = DEFAULT_EDITORIAL_MARKER_FIELD_NAME; } - core.addTransformerFactory(markerName, elevatedMarkerFactory); - forceElevation = initArgs.getBool(QueryElevationParams.FORCE_ELEVATION, forceElevation); + core.addTransformerFactory(markerName, new ElevatedMarkerFactory()); + } - String f = initArgs.get(CONFIG_FILE); - if (f != null) { - try { - synchronized (elevationCache) { - elevationCache.clear(); - boolean exists = false; - - // check if using ZooKeeper - ZkController zkController = core.getCoreContainer().getZkController(); - if (zkController != null) { - // TODO : shouldn't have to keep reading the config name when it has been read before - exists = zkController.configFileExists(zkController.getZkStateReader().readConfigName(core.getCoreDescriptor().getCloudDescriptor().getCollectionName()), f); + private void parseForceElevation() { + forceElevation = initArgs.getBool(QueryElevationParams.FORCE_ELEVATION, getDefaultForceElevation()); + } + + private void parseKeepElevationPriority() { + keepElevationPriority = initArgs.getBool(QueryElevationParams.KEEP_ELEVATION_PRIORITY, getDefaultKeepElevationPriority()); + } + + /** + * (Re)Loads elevation configuration. + *

+ * Protected access to be called by extending class. + *

+ * + * @param core The core holding this component. + * @return The number of elevation rules parsed. + */ + @SuppressWarnings("WeakerAccess") + protected int loadElevationConfiguration(SolrCore core) throws Exception { + synchronized (elevationProviderCache) { + elevationProviderCache.clear(); + String configFileName = initArgs.get(CONFIG_FILE); + if (configFileName == null) { + // Throw an exception which can be handled by an overriding InitializationExceptionHandler (see handleInitializationException()). + // The default InitializationExceptionHandler will simply skip this exception. + throw new InitializationException("Missing component parameter " + CONFIG_FILE + " - it has to define the path to the elevation configuration file", InitializationExceptionHandler.ExceptionCause.NO_CONFIG_FILE_DEFINED); + } + boolean configFileExists = false; + ElevationProvider elevationProvider = NO_OP_ELEVATION_PROVIDER; + + // check if using ZooKeeper + ZkController zkController = core.getCoreContainer().getZkController(); + if (zkController != null) { + // TODO : shouldn't have to keep reading the config name when it has been read before + configFileExists = zkController.configFileExists(zkController.getZkStateReader().readConfigName(core.getCoreDescriptor().getCloudDescriptor().getCollectionName()), configFileName); + } else { + File fC = new File(core.getResourceLoader().getConfigDir(), configFileName); + File fD = new File(core.getDataDir(), configFileName); + if (fC.exists() == fD.exists()) { + InitializationException e = new InitializationException("Missing config file \"" + configFileName + "\" - either " + fC.getAbsolutePath() + " or " + fD.getAbsolutePath() + " must exist, but not both", InitializationExceptionHandler.ExceptionCause.MISSING_CONFIG_FILE); + elevationProvider = handleConfigLoadingException(e, true); + elevationProviderCache.put(null, elevationProvider); + } else if (fC.exists()) { + if (fC.length() == 0) { + InitializationException e = new InitializationException("Empty config file \"" + configFileName + "\" - " + fC.getAbsolutePath(), InitializationExceptionHandler.ExceptionCause.EMPTY_CONFIG_FILE); + elevationProvider = handleConfigLoadingException(e, true); } else { - File fC = new File(core.getResourceLoader().getConfigDir(), f); - File fD = new File(core.getDataDir(), f); - if (fC.exists() == fD.exists()) { - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, - "QueryElevationComponent missing config file: '" + f + "\n" - + "either: " + fC.getAbsolutePath() + " or " + fD.getAbsolutePath() + " must exist, but not both."); - } - if (fC.exists()) { - exists = true; - log.info("Loading QueryElevation from: " + fC.getAbsolutePath()); - Config cfg = new Config(core.getResourceLoader(), f); - elevationCache.put(null, loadElevationMap(cfg)); - } + configFileExists = true; + log.info("Loading QueryElevation from: " + fC.getAbsolutePath()); + Config cfg = new Config(core.getResourceLoader(), configFileName); + elevationProvider = loadElevationProvider(cfg); } - //in other words, we think this is in the data dir, not the conf dir - if (!exists) { - // preload the first data - RefCounted searchHolder = null; - try { - searchHolder = core.getNewestSearcher(false); - IndexReader reader = searchHolder.get().getIndexReader(); - getElevationMap(reader, core); - } finally { - if (searchHolder != null) searchHolder.decref(); - } + elevationProviderCache.put(null, elevationProvider); + } + } + //in other words, we think this is in the data dir, not the conf dir + if (!configFileExists) { + // preload the first data + RefCounted searchHolder = null; + try { + searchHolder = core.getNewestSearcher(false); + if (searchHolder == null) { + elevationProvider = NO_OP_ELEVATION_PROVIDER; + } else { + IndexReader reader = searchHolder.get().getIndexReader(); + elevationProvider = getElevationProvider(reader, core); } + } finally { + if (searchHolder != null) searchHolder.decref(); } - } catch (Exception ex) { - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, - "Error initializing QueryElevationComponent.", ex); } + return elevationProvider.size(); } } - //get the elevation map from the data dir - Map getElevationMap(IndexReader reader, SolrCore core) throws Exception { - synchronized (elevationCache) { - Map map = elevationCache.get(null); - if (map != null) return map; - - map = elevationCache.get(reader); - if (map == null) { - String f = initArgs.get(CONFIG_FILE); - if (f == null) { - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, - "QueryElevationComponent must specify argument: " + CONFIG_FILE); + /** + * Gets the {@link ElevationProvider} from the data dir or from the cache. + * + * @return The cached or loaded {@link ElevationProvider}. + * @throws java.io.IOException If the configuration resource cannot be found, or if an I/O error occurs while analyzing the triggering queries. + * @throws org.xml.sax.SAXException If the configuration resource is not a valid XML content. + * @throws javax.xml.parsers.ParserConfigurationException If the configuration resource is not a valid XML configuration. + * @throws RuntimeException If the configuration resource is not an XML content of the expected format + * (either {@link RuntimeException} or {@link org.apache.solr.common.SolrException}). + */ + @VisibleForTesting + ElevationProvider getElevationProvider(IndexReader reader, SolrCore core) throws Exception { + synchronized (elevationProviderCache) { + ElevationProvider elevationProvider; + elevationProvider = elevationProviderCache.get(null); + if (elevationProvider != null) return elevationProvider; + + elevationProvider = elevationProviderCache.get(reader); + if (elevationProvider == null) { + Exception loadingException = null; + boolean resourceAccessIssue = false; + try { + elevationProvider = loadElevationProvider(core); + } catch (IOException e) { + loadingException = e; + resourceAccessIssue = true; + } catch (Exception e) { + loadingException = e; } - log.info("Loading QueryElevation from data dir: " + f); - - Config cfg; - - ZkController zkController = core.getCoreContainer().getZkController(); - if (zkController != null) { - cfg = new Config(core.getResourceLoader(), f, null, null); - } else { - InputStream is = VersionedFile.getLatestFile(core.getDataDir(), f); - cfg = new Config(core.getResourceLoader(), f, new InputSource(is), null); + boolean shouldCache = true; + if (loadingException != null) { + elevationProvider = handleConfigLoadingException(loadingException, resourceAccessIssue); + // Do not cache the fallback ElevationProvider for the first exceptions because the exception might + // occur only a couple of times and the config file could be loaded correctly afterwards + // (e.g. temporary invalid file access). After some attempts, cache the fallback ElevationProvider + // not to overload the exception handler (and beyond it, the logs probably). + if (incConfigLoadingErrorCount(reader) < getConfigLoadingExceptionHandler().getLoadingMaxAttempts()) { + shouldCache = false; + } + } + if (shouldCache) { + elevationProviderCache.put(reader, elevationProvider); } - - map = loadElevationMap(cfg); - elevationCache.put(reader, map); } - return map; + assert elevationProvider != null; + return elevationProvider; + } + } + + /** + * Loads the {@link ElevationProvider} from the data dir. + * + * @return The loaded {@link ElevationProvider}. + * @throws java.io.IOException If the configuration resource cannot be found, or if an I/O error occurs while analyzing the triggering queries. + * @throws org.xml.sax.SAXException If the configuration resource is not a valid XML content. + * @throws javax.xml.parsers.ParserConfigurationException If the configuration resource is not a valid XML configuration. + * @throws RuntimeException If the configuration resource is not an XML content of the expected format + * (either {@link RuntimeException} or {@link org.apache.solr.common.SolrException}). + */ + private ElevationProvider loadElevationProvider(SolrCore core) throws IOException, SAXException, ParserConfigurationException { + String configFileName = initArgs.get(CONFIG_FILE); + if (configFileName == null) { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, + "QueryElevationComponent must specify argument: " + CONFIG_FILE); + } + log.info("Loading QueryElevation from data dir: " + configFileName); + + Config cfg; + ZkController zkController = core.getCoreContainer().getZkController(); + if (zkController != null) { + cfg = new Config(core.getResourceLoader(), configFileName, null, null); + } else { + InputStream is = VersionedFile.getLatestFile(core.getDataDir(), configFileName); + cfg = new Config(core.getResourceLoader(), configFileName, new InputSource(is), null); } + ElevationProvider elevationProvider = loadElevationProvider(cfg); + assert elevationProvider != null; + return elevationProvider; } - //load up the elevation map - private Map loadElevationMap(Config cfg) throws IOException { + /** + * Loads the {@link ElevationProvider}. + *

+ * This method can be overridden. + *

+ * + * @throws java.io.IOException If an I/O error occurs while analyzing the triggering queries. + * @throws RuntimeException If the config does not provide an XML content of the expected format + * (either {@link RuntimeException} or {@link org.apache.solr.common.SolrException}). + */ + @SuppressWarnings("WeakerAccess") + protected ElevationProvider loadElevationProvider(Config config) throws IOException { + Map elevationBuilderMap = keepElevationPriority ? + new LinkedHashMap<>() : new HashMap<>(); XPath xpath = XPathFactory.newInstance().newXPath(); - Map map = new HashMap<>(); - NodeList nodes = (NodeList) cfg.evaluate("elevate/query", XPathConstants.NODESET); + NodeList nodes = (NodeList) config.evaluate("elevate/query", XPathConstants.NODESET); for (int i = 0; i < nodes.getLength(); i++) { Node node = nodes.item(i); - String qstr = DOMUtil.getAttr(node, "text", "missing query 'text'"); + String queryString = DOMUtil.getAttr(node, "text", "missing query 'text'"); + String matchString = DOMUtil.getAttr(node, "match"); + ElevatingQuery elevatingQuery = new ElevatingQuery(queryString, parseMatchPolicy(matchString)); - NodeList children = null; + NodeList children; try { children = (NodeList) xpath.evaluate("doc", node, XPathConstants.NODESET); } catch (XPathExpressionException e) { @@ -301,68 +380,89 @@ private Map loadElevationMap(Config cfg) throws IOExceptio "query requires '' child"); } - ArrayList include = new ArrayList<>(); - ArrayList exclude = new ArrayList<>(); + ElevationBuilder elevationBuilder = new ElevationBuilder(); for (int j = 0; j < children.getLength(); j++) { Node child = children.item(j); - String id = DOMUtil.getAttr(child, ID, "missing 'id'"); + String id = DOMUtil.getAttr(child, "id", "missing 'id'"); String e = DOMUtil.getAttr(child, EXCLUDE, null); if (e != null) { if (Boolean.valueOf(e)) { - exclude.add(id); + elevationBuilder.addExcludedId(id); continue; } } - include.add(id); + elevationBuilder.addElevatedId(id); } - ElevationObj elev = new ElevationObj(qstr, include, exclude); - if (map.containsKey(elev.analyzed)) { - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, - "Boosting query defined twice for query: '" + elev.text + "' (" + elev.analyzed + "')"); + // It is allowed to define multiple times different elevations for the same query. In this case the elevations + // are merged in the ElevationBuilder (they will be triggered at the same time). + ElevationBuilder previousElevationBuilder = elevationBuilderMap.get(elevatingQuery); + if (previousElevationBuilder == null) { + elevationBuilderMap.put(elevatingQuery, elevationBuilder); + } else { + previousElevationBuilder.merge(elevationBuilder); } - map.put(elev.analyzed, elev); } - return map; + ElevationProvider elevationProvider = createElevationProvider(queryAnalyzer); + for (Map.Entry entry : elevationBuilderMap.entrySet()) { + elevationProvider.setElevationForQuery(entry.getKey(), entry.getValue().build()); + } + return elevationProvider.makeImmutable(); + } + + private boolean parseMatchPolicy(String matchString) { + if (matchString == null) { + return getDefaultSubsetMatch(); + } else if (matchString.equalsIgnoreCase("exact")) { + return false; + } else if (matchString.equalsIgnoreCase("subset")) { + return true; + } else { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, + "invalid value \"" + matchString + "\" for query match attribute"); + } } /** - * Helpful for testing without loading config.xml + * Potentially handles and captures an exception that occurred while loading the configuration resource. * - * @throws IOException If there is a low-level I/O error. + * @param e The exception caught. + * @param resourceAccessIssueOrEmptyConfig true if the exception has been thrown because the resource could not + * be accessed (missing or cannot be read) or the config file is empty; false if the resource has + * been found and accessed but the error occurred while loading the resource + * (invalid format, incomplete or corrupted). + * @return The {@link ElevationProvider} to use if the exception is absorbed. + * @throws E If the exception is not absorbed. */ - void setTopQueryResults(IndexReader reader, String query, String[] ids, String[] ex) throws IOException { - if (ids == null) { - ids = new String[0]; + private ElevationProvider handleConfigLoadingException(E e, boolean resourceAccessIssueOrEmptyConfig) throws E { + if (getConfigLoadingExceptionHandler().handleLoadingException(e, resourceAccessIssueOrEmptyConfig)) { + return NO_OP_ELEVATION_PROVIDER; } - if (ex == null) { - ex = new String[0]; - } - - Map elev = elevationCache.get(reader); - if (elev == null) { - elev = new HashMap<>(); - elevationCache.put(reader, elev); - } - ElevationObj obj = new ElevationObj(query, Arrays.asList(ids), Arrays.asList(ex)); - elev.put(obj.analyzed, obj); + assert e != null; + throw e; } - String getAnalyzedQuery(String query) throws IOException { - if (analyzer == null) { - return query; + private int incConfigLoadingErrorCount(IndexReader reader) { + Integer counter = configLoadingErrorCounters.get(reader); + if (counter == null) { + counter = 1; + } else { + counter++; } - StringBuilder norm = new StringBuilder(); - try (TokenStream tokens = analyzer.tokenStream("", query)) { - tokens.reset(); + configLoadingErrorCounters.put(reader, counter); + return counter; + } - CharTermAttribute termAtt = tokens.addAttribute(CharTermAttribute.class); - while (tokens.incrementToken()) { - norm.append(termAtt.buffer(), 0, termAtt.length()); - } - tokens.end(); - return norm.toString(); - } + /** + * Potentially handles and captures the exception that occurred while initializing this component. If the exception + * is captured by the handler, this component fails to initialize silently and is muted because field initialized is + * false. + */ + private void handleInitializationException(Exception initializationException, InitializationExceptionHandler.ExceptionCause exceptionCause) { + SolrException solrException = new SolrException(SolrException.ErrorCode.SERVER_ERROR, + "Error initializing " + QueryElevationComponent.class.getSimpleName(), initializationException); + if (!getInitializationExceptionHandler().handleInitializationException(solrException, exceptionCause)) + throw solrException; } //--------------------------------------------------------------------------------- @@ -371,125 +471,122 @@ String getAnalyzedQuery(String query) throws IOException { @Override public void prepare(ResponseBuilder rb) throws IOException { - SolrQueryRequest req = rb.req; - SolrParams params = req.getParams(); - // A runtime param can skip - if (!params.getBool(QueryElevationParams.ENABLE, true)) { + if (!initialized || !rb.req.getParams().getBool(QueryElevationParams.ENABLE, true)) { return; } - boolean exclusive = params.getBool(QueryElevationParams.EXCLUSIVE, false); - // A runtime parameter can alter the config value for forceElevation - boolean force = params.getBool(QueryElevationParams.FORCE_ELEVATION, forceElevation); - boolean markExcludes = params.getBool(QueryElevationParams.MARK_EXCLUDES, false); - String boostStr = params.get(QueryElevationParams.IDS); - String exStr = params.get(QueryElevationParams.EXCLUDE); + Elevation elevation = getElevation(rb); + if (elevation != null) { + setQuery(rb, elevation); + setSort(rb, elevation); + } + + if (rb.isDebug() && rb.isDebugQuery()) { + addDebugInfo(rb, elevation); + } + } + + @Override + public void process(ResponseBuilder rb) throws IOException { + // Do nothing -- the real work is modifying the input query + } - Query query = rb.getQuery(); + private Elevation getElevation(ResponseBuilder rb) { SolrParams localParams = rb.getQparser().getLocalParams(); - String qstr = localParams == null ? rb.getQueryString() : localParams.get(QueryParsing.V); - if (query == null || qstr == null) { - return; + String queryString = localParams == null ? rb.getQueryString() : localParams.get(QueryParsing.V); + if (queryString == null || rb.getQuery() == null) { + return null; } - ElevationObj booster = null; + SolrParams params = rb.req.getParams(); + String paramElevatedIds = params.get(QueryElevationParams.IDS); + String paramExcludedIds = params.get(QueryElevationParams.EXCLUDE); try { - if(boostStr != null || exStr != null) { - List boosts = (boostStr != null) ? StrUtils.splitSmart(boostStr,",", true) : new ArrayList(0); - List excludes = (exStr != null) ? StrUtils.splitSmart(exStr, ",", true) : new ArrayList(0); - booster = new ElevationObj(qstr, boosts, excludes); + if (paramElevatedIds != null || paramExcludedIds != null) { + List elevatedIds = paramElevatedIds != null ? StrUtils.splitSmart(paramElevatedIds,",", true) : Collections.emptyList(); + List excludedIds = paramExcludedIds != null ? StrUtils.splitSmart(paramExcludedIds, ",", true) : Collections.emptyList(); + return new ElevationBuilder().addElevatedIds(elevatedIds).addExcludedIds(excludedIds).build(); } else { - IndexReader reader = req.getSearcher().getIndexReader(); - qstr = getAnalyzedQuery(qstr); - booster = getElevationMap(reader, req.getCore()).get(qstr); + IndexReader reader = rb.req.getSearcher().getIndexReader(); + return getElevationProvider(reader, rb.req.getCore()).getElevationForQuery(queryString); } - } catch (Exception ex) { - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, - "Error loading elevation", ex); + } catch (Exception e) { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error loading elevation", e); } + } - if (booster != null) { - rb.req.getContext().put(BOOSTED, booster.ids); - rb.req.getContext().put(BOOSTED_PRIORITY, booster.priority); - - // Change the query to insert forced documents - if (exclusive == true) { - //we only want these results - rb.setQuery(new BoostQuery(booster.include, 0f)); - } else { - BooleanQuery.Builder newq = new BooleanQuery.Builder(); - newq.add(query, BooleanClause.Occur.SHOULD); - newq.add(new BoostQuery(booster.include, 0f), BooleanClause.Occur.SHOULD); - if (booster.exclude != null) { - if (markExcludes == false) { - for (TermQuery tq : booster.exclude) { - newq.add(new BooleanClause(tq, BooleanClause.Occur.MUST_NOT)); - } - } else { - //we are only going to mark items as excluded, not actually exclude them. This works - //with the EditorialMarkerFactory - rb.req.getContext().put(EXCLUDED, booster.excludeIds); + private void setQuery(ResponseBuilder rb, Elevation elevation) { + rb.req.getContext().put(BOOSTED, elevation.elevatedIds); + rb.req.getContext().put(BOOSTED_PRIORITY, elevation.priorities); + + // Change the query to insert forced documents + SolrParams params = rb.req.getParams(); + if (params.getBool(QueryElevationParams.EXCLUSIVE, false)) { + // We only want these elevated results + rb.setQuery(new BoostQuery(elevation.includeQuery, 0f)); + } else { + BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); + queryBuilder.add(rb.getQuery(), BooleanClause.Occur.SHOULD); + queryBuilder.add(new BoostQuery(elevation.includeQuery, 0f), BooleanClause.Occur.SHOULD); + if (elevation.excludeQueries != null) { + if (params.getBool(QueryElevationParams.MARK_EXCLUDES, false)) { + // We are only going to mark items as excluded, not actually exclude them. + // This works with the EditorialMarkerFactory. + rb.req.getContext().put(EXCLUDED, elevation.excludedIds); + } else { + for (TermQuery tq : elevation.excludeQueries) { + queryBuilder.add(new BooleanClause(tq, BooleanClause.Occur.MUST_NOT)); } } - rb.setQuery(newq.build()); - } - - ElevationComparatorSource comparator = new ElevationComparatorSource(booster); - // if the sort is 'score desc' use a custom sorting method to - // insert documents in their proper place - SortSpec sortSpec = rb.getSortSpec(); - if (sortSpec.getSort() == null) { - sortSpec.setSortAndFields(new Sort(new SortField[]{ - new SortField("_elevate_", comparator, true), - new SortField(null, SortField.Type.SCORE, false) - }), - Arrays.asList(new SchemaField[2])); - } else { - // Check if the sort is based on score - SortSpec modSortSpec = this.modifySortSpec(sortSpec, force, comparator); - if (null != modSortSpec) { - rb.setSortSpec(modSortSpec); - } } + rb.setQuery(queryBuilder.build()); + } + } - // alter the sorting in the grouping specification if there is one - GroupingSpecification groupingSpec = rb.getGroupingSpec(); - if(groupingSpec != null) { - SortSpec groupSortSpec = groupingSpec.getGroupSortSpec(); - SortSpec modGroupSortSpec = this.modifySortSpec(groupSortSpec, force, comparator); - if (modGroupSortSpec != null) { - groupingSpec.setGroupSortSpec(modGroupSortSpec); - } - SortSpec withinGroupSortSpec = groupingSpec.getWithinGroupSortSpec(); - SortSpec modWithinGroupSortSpec = this.modifySortSpec(withinGroupSortSpec, force, comparator); - if (modWithinGroupSortSpec != null) { - groupingSpec.setWithinGroupSortSpec(modWithinGroupSortSpec); - } + private void setSort(ResponseBuilder rb, Elevation elevation) { + boolean forceElevation = rb.req.getParams().getBool(QueryElevationParams.FORCE_ELEVATION, this.forceElevation); + ElevationComparatorSource comparator = new ElevationComparatorSource(elevation); + setSortSpec(rb, forceElevation, comparator); + setGroupingSpec(rb, forceElevation, comparator); + } + + private void setSortSpec(ResponseBuilder rb, boolean forceElevation, ElevationComparatorSource comparator) { + // if the sort is 'score desc' use a custom sorting method to + // insert documents in their proper place + SortSpec sortSpec = rb.getSortSpec(); + if (sortSpec.getSort() == null) { + sortSpec.setSortAndFields( + new Sort( + new SortField("_elevate_", comparator, true), + new SortField(null, SortField.Type.SCORE, false)), + Arrays.asList(new SchemaField[2])); + } else { + // Check if the sort is based on score + SortSpec modSortSpec = this.modifySortSpec(sortSpec, forceElevation, comparator); + if (null != modSortSpec) { + rb.setSortSpec(modSortSpec); } } + } - // Add debugging information - if (rb.isDebug()) { - List match = null; - if (booster != null) { - // Extract the elevated terms into a list - match = new ArrayList<>(booster.priority.size()); - for (Object o : booster.include.clauses()) { - TermQuery tq = (TermQuery) ((BooleanClause) o).getQuery(); - match.add(tq.getTerm().text()); - } + private void setGroupingSpec(ResponseBuilder rb, boolean forceElevation, ElevationComparatorSource comparator) { + // alter the sorting in the grouping specification if there is one + GroupingSpecification groupingSpec = rb.getGroupingSpec(); + if(groupingSpec != null) { + SortSpec groupSortSpec = groupingSpec.getGroupSortSpec(); + SortSpec modGroupSortSpec = this.modifySortSpec(groupSortSpec, forceElevation, comparator); + if (modGroupSortSpec != null) { + groupingSpec.setGroupSortSpec(modGroupSortSpec); } - - SimpleOrderedMap dbg = new SimpleOrderedMap<>(); - dbg.add("q", qstr); - dbg.add("match", match); - if (rb.isDebugQuery()) { - rb.addDebugInfo("queryBoosting", dbg); + SortSpec withinGroupSortSpec = groupingSpec.getWithinGroupSortSpec(); + SortSpec modWithinGroupSortSpec = this.modifySortSpec(withinGroupSortSpec, forceElevation, comparator); + if (modWithinGroupSortSpec != null) { + groupingSpec.setWithinGroupSortSpec(modWithinGroupSortSpec); } } } - private SortSpec modifySortSpec(SortSpec current, boolean force, ElevationComparatorSource comparator) { + private SortSpec modifySortSpec(SortSpec current, boolean forceElevation, ElevationComparatorSource comparator) { boolean modify = false; SortField[] currentSorts = current.getSort().getSort(); List currentFields = current.getSchemaFields(); @@ -498,7 +595,7 @@ private SortSpec modifySortSpec(SortSpec current, boolean force, ElevationCompar List fields = new ArrayList<>(currentFields.size() + 1); // Perhaps force it to always sort by score - if (force && currentSorts[0].getType() != SortField.Type.SCORE) { + if (forceElevation && currentSorts[0].getType() != SortField.Type.SCORE) { sorts.add(new SortField("_elevate_", comparator, true)); fields.add(null); modify = true; @@ -513,16 +610,33 @@ private SortSpec modifySortSpec(SortSpec current, boolean force, ElevationCompar sorts.add(sf); fields.add(currentFields.get(i)); } - if (modify) { - SortSpec newSpec = new SortSpec(new Sort(sorts.toArray(new SortField[sorts.size()])), - fields, - current.getCount(), - current.getOffset()); - return newSpec; + return modify ? + new SortSpec(new Sort(sorts.toArray(new SortField[sorts.size()])), + fields, + current.getCount(), + current.getOffset()) + : null; + } + + private void addDebugInfo(ResponseBuilder rb, Elevation elevation) { + List match = null; + if (elevation != null) { + // Extract the elevated terms into a list + match = new ArrayList<>(elevation.includeQuery.clauses().size()); + for (BooleanClause clause : elevation.includeQuery.clauses()) { + TermQuery tq = (TermQuery) clause.getQuery(); + match.add(tq.getTerm().text()); + } } - return null; + SimpleOrderedMap dbg = new SimpleOrderedMap<>(); + dbg.add("q", rb.getQueryString()); + dbg.add("match", match); + rb.addDebugInfo("queryBoosting", dbg); } + //--------------------------------------------------------------------------------- + // Boosted docs helper + //--------------------------------------------------------------------------------- public static IntIntHashMap getBoostDocs(SolrIndexSearcher indexSearcher, Mapboosted, Map context) throws IOException { @@ -544,9 +658,8 @@ public static IntIntHashMap getBoostDocs(SolrIndexSearcher indexSearcher, Map localBoosts = new HashSet<>(boosted.size()*2); - Iterator boostedIt = boosted.keySet().iterator(); - while(boostedIt.hasNext()) { - localBoosts.add(boostedIt.next()); + for (BytesRef boost : boosted.keySet()) { + localBoosts.add(boost); } boostDocs = new IntIntHashMap(boosted.size()); @@ -565,7 +678,7 @@ public static IntIntHashMap getBoostDocs(SolrIndexSearcher indexSearcher, MapCan be overridden by extending this class.

+ */ + @SuppressWarnings("WeakerAccess") + protected boolean getDefaultForceElevation() { + return DEFAULT_FORCE_ELEVATION; + } + + /** + * Gets the default value for {@link #DEFAULT_KEEP_ELEVATION_PRIORITY} parameter. + *

Can be overridden by extending class.

+ */ + @SuppressWarnings("WeakerAccess") + protected boolean getDefaultKeepElevationPriority() { + return DEFAULT_KEEP_ELEVATION_PRIORITY; + } - public ElevationComparatorSource(final QueryElevationComponent.ElevationObj elevations) { - this.elevations = elevations; - int size = elevations.ids.size(); - ordSet = new SentinelIntSet(size, -1); - termValues = new BytesRef[ordSet.keys.length]; + /** + * Gets the default subset match policy. + *

Can be overridden by extending class.

+ */ + @SuppressWarnings("WeakerAccess") + protected boolean getDefaultSubsetMatch() { + return DEFAULT_SUBSET_MATCH; } - @Override - public FieldComparator newComparator(String fieldname, final int numHits, int sortPos, boolean reversed) { - return new SimpleFieldComparator() { - private final int[] values = new int[numHits]; - private int bottomVal; - private int topVal; - private PostingsEnum postingsEnum; - Set seen = new HashSet<>(elevations.ids.size()); + /** + * Gets the {@link InitializationExceptionHandler} that handles exception thrown during the initialization of the + * elevation configuration. + *

Can be overridden by extending class. This method provides a mean to set a custom exception handler if a + * specific error processing is needed.

+ */ + @SuppressWarnings("WeakerAccess") + protected InitializationExceptionHandler getInitializationExceptionHandler() { + return InitializationExceptionHandler.NO_OP; + } + + /** + * Gets the {@link LoadingExceptionHandler} that handles exception thrown during the loading of the elevation configuration. + *

Can be overridden by extending class. This method provides a mean to set a custom exception handler if a + * specific error processing is needed.

+ */ + @SuppressWarnings("WeakerAccess") + protected LoadingExceptionHandler getConfigLoadingExceptionHandler() { + return LoadingExceptionHandler.NO_OP; + } + + /** + * Creates the {@link ElevationProvider} to set during configuration loading. The same instance will be used later + * when elevating results for queries. + *

+ * Extending classes can override this method to create {@link ElevationProvider} with different behavior. + *

+ * + * @param queryAnalyzer to analyze and tokenize the query. + * @return The created {@link ElevationProvider}. + */ + @SuppressWarnings("WeakerAccess") + protected ElevationProvider createElevationProvider(Analyzer queryAnalyzer) { + return new MapElevationProvider(queryAnalyzer); + } + + //--------------------------------------------------------------------------------- + // Query analysis and tokenization + //--------------------------------------------------------------------------------- + + @VisibleForTesting + String analyzeQuery(String queryString) throws IOException { + return analyzeQuery(queryString, queryAnalyzer); + } + + /** + * Analyzes the provided query string and returns a concatenation of the analyzed tokens. + */ + private static String analyzeQuery(String queryString, Analyzer queryAnalyzer) throws IOException { + if (queryAnalyzer == null) { + return queryString; + } + Collection queryTerms = new ArrayList<>(); + splitQueryTermsWithAnalyzer(queryString, queryAnalyzer, queryTerms); + return queryTerms.stream().collect(QUERY_EXACT_JOINER); + } + + private static void splitQueryTermsWithAnalyzer(String queryString, Analyzer queryAnalyzer, Collection tokenCollector) throws IOException { + TokenStream tokens = queryAnalyzer.tokenStream("", new StringReader(queryString)); + tokens.reset(); + CharTermAttribute termAttribute = tokens.addAttribute(CharTermAttribute.class); + while (tokens.incrementToken()) { + tokenCollector.add(new String(termAttribute.buffer(), 0, termAttribute.length())); + } + tokens.end(); + tokens.close(); + } + + //--------------------------------------------------------------------------------- + // Testing + //--------------------------------------------------------------------------------- + + /** + * Helpful for testing without loading config.xml. + * + * + * @param reader The {@link org.apache.lucene.index.IndexReader}. + * @param queryString The query for which to elevate some documents. If the query has already been defined an + * elevation, this method overwrites it. + * @param subsetMatch true for query subset match; false for query exact match. + * @param elevatedIds The readable ids of the documents to set as top results for the provided query. + * @param excludedIds The readable ids of the document to exclude from results for the provided query. + * @throws java.io.IOException If there is a low-level I/O error. + */ + @VisibleForTesting + void setTopQueryResults(IndexReader reader, String queryString, boolean subsetMatch, String[] elevatedIds, + String[] excludedIds) throws IOException { + clearElevationProviderCache(); + if (elevatedIds == null) { + elevatedIds = new String[0]; + } + if (excludedIds == null) { + excludedIds = new String[0]; + } + ElevatingQuery elevatingQuery = new ElevatingQuery(queryString, subsetMatch); + Elevation elevation = createElevation(Arrays.asList(elevatedIds), Arrays.asList(excludedIds)); + ElevationProvider elevationProvider; + synchronized (elevationProviderCache) { + elevationProvider = elevationProviderCache.computeIfAbsent(reader, k -> createElevationProvider(queryAnalyzer)); + } + elevationProvider.setElevationForQuery(elevatingQuery, elevation); + } + + @VisibleForTesting + void clearElevationProviderCache() { + synchronized (elevationProviderCache) { + elevationProviderCache.clear(); + } + } + + //--------------------------------------------------------------------------------- + // Exception classes + //--------------------------------------------------------------------------------- + + private static class InitializationException extends Exception { + final InitializationExceptionHandler.ExceptionCause exceptionCause; + + InitializationException(String message, InitializationExceptionHandler.ExceptionCause exceptionCause) { + super(message); + this.exceptionCause = exceptionCause; + } + } + + /** + * Handles resource loading exception. + */ + protected interface InitializationExceptionHandler { + /** + * NoOp {@link LoadingExceptionHandler} that does not capture any exception and simply returns false. + */ + InitializationExceptionHandler NO_OP = new InitializationExceptionHandler() { @Override - public int compare(int slot1, int slot2) { - return values[slot1] - values[slot2]; // values will be small enough that there is no overflow concern + public boolean handleInitializationException(Exception e, ExceptionCause exceptionCause) { + return exceptionCause == ExceptionCause.NO_CONFIG_FILE_DEFINED; } + }; + + enum ExceptionCause { + /** + * The component parameter {@link #FIELD_TYPE} defines an unknown field type. + */ + UNKNOWN_FIELD_TYPE, + /** + * This component requires the schema to have a uniqueKeyField, which it does not have. + */ + MISSING_UNIQUE_KEY_FIELD, + /** + * Missing component parameter {@link #CONFIG_FILE} - it has to define the path to the elevation configuration file (e.g. elevate.xml). + */ + NO_CONFIG_FILE_DEFINED, + /** + * The elevation configuration file (e.g. elevate.xml) cannot be found, or is defined in both conf/ and data/ directories. + */ + MISSING_CONFIG_FILE, + /** + * The elevation configuration file (e.g. elevate.xml) is empty. + */ + EMPTY_CONFIG_FILE, + /** + * Unclassified exception cause. + */ + OTHER, + } + + /** + * Potentially handles and captures an exception that occurred while initializing the component. + * If the exception is captured, the component fails to initialize silently and is muted. + * + * @param e The exception caught. + * @param exceptionCause The exception cause. + * @param The exception type. + * @return true if the exception is handled and captured by this handler (and thus will not be + * thrown anymore); false if the exception is not captured, in this case it will be probably + * thrown again by the calling code. + * @throws E If this handler throws the exception itself (it may add some cause or message). + */ + boolean handleInitializationException(E e, ExceptionCause exceptionCause) throws E; + } + /** + * Handles resource loading exception. + */ + protected interface LoadingExceptionHandler { + + /** + * NoOp {@link LoadingExceptionHandler} that does not capture any exception and simply returns false. + */ + LoadingExceptionHandler NO_OP = new LoadingExceptionHandler() { @Override - public void setBottom(int slot) { - bottomVal = values[slot]; + public boolean handleLoadingException(Exception e, boolean resourceAccessIssue) { + return false; } @Override - public void setTopValue(Integer value) { - topVal = value.intValue(); + public int getLoadingMaxAttempts() { + return 0; } + }; - private int docVal(int doc) { - if (ordSet.size() > 0) { - int slot = ordSet.find(doc); - if (slot >= 0) { - BytesRef id = termValues[slot]; - Integer prio = elevations.priority.get(id); - return prio == null ? 0 : prio.intValue(); - } - } - return 0; + /** + * Potentially handles and captures an exception that occurred while loading a resource. + * + * @param e The exception caught. + * @param resourceAccessIssue true if the exception has been thrown because the resource could not + * be accessed (missing or cannot be read); false if the resource has + * been found and accessed but the error occurred while loading the resource + * (invalid format, incomplete or corrupted). + * @param The exception type. + * @return true if the exception is handled and captured by this handler (and thus will not be + * thrown anymore); false if the exception is not captured, in this case it will be probably + * thrown again by the calling code. + * @throws E If this handler throws the exception itself (it may add some cause or message). + */ + boolean handleLoadingException(E e, boolean resourceAccessIssue) throws E; + + /** + * Gets the maximum number of attempts to load the resource in case of error (resource not found, I/O error, + * invalid format), for each Solr core. + * After this number of attempts (so {@link #handleLoadingException} is called this number of times), + * {@link #handleLoadingException} will not be called anymore for the specific Solr core, and the resource is + * considered empty afterwards (until the core is reloaded). + * + * @return The maximum number of attempts to load the resource. The value must be >= 0. + */ + int getLoadingMaxAttempts(); + } + + //--------------------------------------------------------------------------------- + // Elevation classes + //--------------------------------------------------------------------------------- + + /** + * Creates an elevation. + * + * @param elevatedIds The ids of the elevated documents that should appear on top of search results; can be null. + * @param excludedIds The ids of the excluded documents that should not appear in search results; can be null. + */ + private Elevation createElevation(Collection elevatedIds, Collection excludedIds) { + return new Elevation(elevatedIds, excludedIds, indexedValueProvider, uniqueKeyFieldName, keepElevationPriority); + } + + /** + * Provides the elevations defined for queries. + */ + protected interface ElevationProvider { + /** + * Gets the elevation associated to the provided query. + * + * @param queryString The query string (not {@link #analyzeQuery(String, Analyzer) analyzed} yet, + * this {@link ElevationProvider} is in charge of analyzing it). + * @return The elevation associated with the query; or null if none. + */ + Elevation getElevationForQuery(String queryString) throws IOException; + + /** + * Sets the elevation for the provided query. + *

+ * By contract and by design, only one elevation may be associated + * to a given query (this can be safely verified by an assertion). + *

+ *

+ * It is not allowed to call this method once this {@link ElevationProvider} becomes {@link #makeImmutable() immutable}. + * Otherwise a {@link RuntimeException} may be thrown. + *

+ * + * @param elevatingQuery The query triggering elevation. + * @param elevation The elevation. + */ + void setElevationForQuery(ElevatingQuery elevatingQuery, Elevation elevation) throws IOException; + + /** + * Gets the number of query elevations in this {@link ElevationProvider}. + */ + int size(); + + /** + * Makes this elevation provider immutable. + *

Calling {@link #setElevationForQuery} afterwards will throw an exception.

+ *

Making this elevation provider immutable may reduce its memory usage and make it more efficient.

+ * + * @return This elevation provider. + */ + ElevationProvider makeImmutable(); + } + + /** + * {@link ElevationProvider} that returns no elevation. + */ + @SuppressWarnings("WeakerAccess") + protected static final ElevationProvider NO_OP_ELEVATION_PROVIDER = new ElevationProvider() { + @Override + public Elevation getElevationForQuery(String queryString) { + return null; + } + + @Override + public void setElevationForQuery(ElevatingQuery elevatingQuery, Elevation elevation) { + // Do nothing. + } + + @Override + public int size() { + return 0; + } + + @Override + public ElevationProvider makeImmutable() { + return this; + } + }; + + /** + * Simple query exact match {@link ElevationProvider}. + *

+ * It does not support subset matching (see {@link #parseMatchPolicy(String)}). + *

+ */ + protected static class MapElevationProvider implements ElevationProvider { + + private final Analyzer queryAnalyzer; + private Map exactMatchElevationMap = new HashMap<>(); + + @SuppressWarnings("WeakerAccess") + public MapElevationProvider(Analyzer queryAnalyzer) { + this.queryAnalyzer = queryAnalyzer; + } + + @Override + public Elevation getElevationForQuery(String queryString) throws IOException { + String analyzedQuery = analyzeQuery(queryString, queryAnalyzer); + return exactMatchElevationMap.get(analyzedQuery); + } + + @Override + public void setElevationForQuery(ElevatingQuery elevatingQuery, Elevation elevation) throws IOException { + if (elevatingQuery.subsetMatch) { + throw new UnsupportedOperationException("Subset matching is not supported by " + getClass().getName()); } + String analyzedQuery = analyzeQuery(elevatingQuery.queryString, queryAnalyzer); + Elevation duplicateElevation = exactMatchElevationMap.put(analyzedQuery, elevation); + assert duplicateElevation == null; + } - @Override - public int compareBottom(int doc) { - return bottomVal - docVal(doc); + @Override + public int size() { + return exactMatchElevationMap.size(); + } + + @Override + public ElevationProvider makeImmutable() { + exactMatchElevationMap = Collections.unmodifiableMap(exactMatchElevationMap); + return this; + } + } + + /** + * Query triggering elevation. + */ + protected static class ElevatingQuery { + + @SuppressWarnings("WeakerAccess") + public final String queryString; + @SuppressWarnings("WeakerAccess") + public final boolean subsetMatch; + + /** + * @param queryString The query to elevate documents for (not the analyzed form). + * @param subsetMatch Whether to match a subset of query terms. + */ + private ElevatingQuery(String queryString, boolean subsetMatch) throws IOException { + this.queryString = queryString; + this.subsetMatch = subsetMatch; + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof ElevatingQuery)) { + return false; } + ElevatingQuery eq = (ElevatingQuery) o; + return queryString.equals(eq.queryString) && subsetMatch == eq.subsetMatch; + } - @Override - public void copy(int slot, int doc) { - values[slot] = docVal(doc); + @Override + public int hashCode() { + return queryString.hashCode() + (subsetMatch ? 1 : 0); + } + } + + /** + * Builds an {@link Elevation}. This class is used to start defining query elevations, but allowing the merge of + * multiple elevations for the same query. + */ + private class ElevationBuilder { + + /** + * The ids of the elevated documents that should appear on top of search results; can be null. + */ + private Set elevatedIds; + /** + * The ids of the excluded documents that should not appear in search results; can be null. + */ + private Set excludedIds; + + ElevationBuilder addElevatedId(String id) { + if (elevatedIds == null) { + elevatedIds = createIdSet(); } + elevatedIds.add(id); + return this; + } - @Override - protected void doSetNextReader(LeafReaderContext context) throws IOException { - //convert the ids to Lucene doc ids, the ordSet and termValues needs to be the same size as the number of elevation docs we have - ordSet.clear(); - Terms terms = context.reader().terms(idField); - if (terms == null) return; - TermsEnum termsEnum = terms.iterator(); - BytesRefBuilder term = new BytesRefBuilder(); - Bits liveDocs = context.reader().getLiveDocs(); - - for (String id : elevations.ids) { - term.copyChars(id); - if (seen.contains(id) == false && termsEnum.seekExact(term.get())) { - postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE); - int docId = postingsEnum.nextDoc(); - while (docId != DocIdSetIterator.NO_MORE_DOCS && liveDocs != null && liveDocs.get(docId) == false) { - docId = postingsEnum.nextDoc(); - } - if (docId == DocIdSetIterator.NO_MORE_DOCS ) continue; // must have been deleted - termValues[ordSet.put(docId)] = term.toBytesRef(); - seen.add(id); - assert postingsEnum.nextDoc() == DocIdSetIterator.NO_MORE_DOCS; + ElevationBuilder addElevatedIds(List ids) { + for (String id : ids) { + addElevatedId(id); + } + return this; + } + + ElevationBuilder addExcludedId(String id) { + if (excludedIds == null) { + excludedIds = createIdSet(); + } + excludedIds.add(id); + return this; + } + + ElevationBuilder addExcludedIds(List ids) { + for (String id : ids) { + addExcludedId(id); + } + return this; + } + + ElevationBuilder merge(ElevationBuilder elevationBuilder) { + if (elevatedIds == null) { + elevatedIds = elevationBuilder.elevatedIds; + } else if (elevationBuilder.elevatedIds != null) { + elevatedIds.addAll(elevationBuilder.elevatedIds); + } + if (excludedIds == null) { + excludedIds = elevationBuilder.excludedIds; + } else if (elevationBuilder.excludedIds != null) { + excludedIds.addAll(elevationBuilder.excludedIds); + } + return this; + } + + Elevation build() { + return createElevation(elevatedIds, excludedIds); + } + + private Set createIdSet() { + return (keepElevationPriority ? new LinkedHashSet<>() : new HashSet<>()); + } + } + + /** + * Elevation of some documents in search results, with potential exclusion of others. + */ + protected static class Elevation { + + private static final BooleanQuery EMPTY_QUERY = new BooleanQuery.Builder().build(); + + @VisibleForTesting + final Set elevatedIds; + private final BooleanQuery includeQuery; + @VisibleForTesting + final Map priorities; + private final Set excludedIds; + private final TermQuery[] excludeQueries;//just keep the term query, b/c we will not always explicitly exclude the item based on markExcludes query time param + + /** + * Constructs an elevation. + * + * @param elevatedIds The ids of the elevated documents that should appear on top of search results; can be null. + * @param excludedIds The ids of the excluded documents that should not appear in search results; can be null. + * @param indexedValueProvider Provides indexed values. + * @param queryFieldName The field name to use to create query terms. + * @param keepElevationPriority Whether to keep the elevation priority order. + */ + private Elevation(Collection elevatedIds, Collection excludedIds, + IndexedValueProvider indexedValueProvider, String queryFieldName, + boolean keepElevationPriority) { + if (elevatedIds == null || elevatedIds.isEmpty()) { + this.elevatedIds = Collections.emptySet(); + includeQuery = EMPTY_QUERY; + priorities = Collections.emptyMap(); + } else { + ImmutableSet.Builder elevatedIdsBuilder = ImmutableSet.builder(); + BooleanQuery.Builder includeQueryBuilder = new BooleanQuery.Builder(); + ImmutableMap.Builder prioritiesBuilder = null; + if (keepElevationPriority) { + prioritiesBuilder = ImmutableMap.builder(); + } + int priorityLevel = elevatedIds.size(); + for (String elevatedId : elevatedIds) { + elevatedIdsBuilder.add(indexedValueProvider.getIndexedValue(elevatedId)); + TermQuery tq = new TermQuery(new Term(queryFieldName, elevatedId)); + includeQueryBuilder.add(tq, BooleanClause.Occur.SHOULD); + if (keepElevationPriority) { + prioritiesBuilder.put(new BytesRef(elevatedId), priorityLevel--); } } + this.elevatedIds = elevatedIdsBuilder.build(); + includeQuery = includeQueryBuilder.build(); + priorities = keepElevationPriority ? prioritiesBuilder.build() : null; } - @Override - public Integer value(int slot) { - return values[slot]; + if (excludedIds == null || excludedIds.isEmpty()) { + this.excludedIds = Collections.emptySet(); + excludeQueries = null; + } else { + ImmutableSet.Builder excludedIdsBuilder = ImmutableSet.builder(); + List excludeQueriesBuilder = new ArrayList<>(excludedIds.size()); + for (String excludedId : excludedIds) { + excludedIdsBuilder.add(indexedValueProvider.getIndexedValue(excludedId)); + excludeQueriesBuilder.add(new TermQuery(new Term(queryFieldName, excludedId))); + } + this.excludedIds = excludedIdsBuilder.build(); + excludeQueries = excludeQueriesBuilder.toArray(new TermQuery[excludeQueriesBuilder.size()]); } + } - @Override - public int compareTop(int doc) { - final int docValue = docVal(doc); - return topVal - docValue; // values will be small enough that there is no overflow concern - } - }; + @Override + public String toString() { + return "{elevatedIds=" + elevatedIds + ", excludedIds=" + excludedIds + "}"; + } + } + + private class ElevationComparatorSource extends FieldComparatorSource { + + private final Elevation elevation; + private final SentinelIntSet ordSet; //the key half of the map + private final BytesRef[] termValues; //the value half of the map + + private ElevationComparatorSource(Elevation elevation) { + this.elevation = elevation; + int size = elevation.elevatedIds.size(); + ordSet = new SentinelIntSet(size, -1); + termValues = keepElevationPriority ? new BytesRef[ordSet.keys.length] : null; + } + + @Override + public FieldComparator newComparator(String fieldName, final int numHits, int sortPos, boolean reversed) { + return new SimpleFieldComparator() { + final int[] values = new int[numHits]; + int bottomVal; + int topVal; + PostingsEnum postingsEnum; + final Set seen = new HashSet<>(elevation.elevatedIds.size()); + + @Override + public int compare(int slot1, int slot2) { + return values[slot1] - values[slot2]; // values will be small enough that there is no overflow concern + } + + @Override + public void setBottom(int slot) { + bottomVal = values[slot]; + } + + @Override + public void setTopValue(Integer value) { + topVal = value; + } + + private int docVal(int doc) { + if (ordSet.size() > 0) { + int slot = ordSet.find(doc); + if (slot >= 0) { + if (!keepElevationPriority) + return 1; + BytesRef id = termValues[slot]; + Integer priority = elevation.priorities.get(id); + return priority == null ? 0 : priority; + } + } + return 0; + } + + @Override + public int compareBottom(int doc) { + return bottomVal - docVal(doc); + } + + @Override + public void copy(int slot, int doc) { + values[slot] = docVal(doc); + } + + @Override + protected void doSetNextReader(LeafReaderContext context) throws IOException { + //convert the ids to Lucene doc ids, the ordSet and termValues needs to be the same size as the number of elevation docs we have + ordSet.clear(); + Terms terms = context.reader().terms(uniqueKeyFieldName); + if (terms == null) return; + TermsEnum termsEnum = terms.iterator(); + BytesRefBuilder term = new BytesRefBuilder(); + Bits liveDocs = context.reader().getLiveDocs(); + + for (String id : elevation.elevatedIds) { + term.copyChars(id); + if (!seen.contains(id) && termsEnum.seekExact(term.get())) { + postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE); + int docId = postingsEnum.nextDoc(); + while (docId != DocIdSetIterator.NO_MORE_DOCS && liveDocs != null && !liveDocs.get(docId)) { + docId = postingsEnum.nextDoc(); + } + if (docId == DocIdSetIterator.NO_MORE_DOCS ) continue; // must have been deleted + int slot = ordSet.put(docId); + if (keepElevationPriority) { + termValues[slot] = term.toBytesRef(); + } + seen.add(id); + assert postingsEnum.nextDoc() == DocIdSetIterator.NO_MORE_DOCS; + } + } + } + + @Override + public Integer value(int slot) { + return values[slot]; + } + + @Override + public int compareTop(int doc) { + final int docValue = docVal(doc); + return topVal - docValue; // values will be small enough that there is no overflow concern + } + }; + } } + + /** + * Provides indexed value from readable value. + */ + private interface IndexedValueProvider { + /** + * Gets the indexed value corresponding to a readable value. + * + * @param readableValue The readable value. + * @return The indexed value. + */ + String getIndexedValue(String readableValue); } } diff --git a/solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java index 829c585c44fa..2528b3f56247 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java @@ -17,7 +17,6 @@ package org.apache.solr.handler.component; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.util.BytesRef; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.GroupParams; @@ -26,7 +25,6 @@ import org.apache.solr.util.FileUtils; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; -import org.apache.solr.handler.component.QueryElevationComponent.ElevationObj; import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.request.SolrQueryRequest; import org.junit.Before; @@ -360,17 +358,17 @@ public void testInterface() throws Exception { SolrQueryRequest req = req(); IndexReader reader = req.getSearcher().getIndexReader(); - Map map = comp.getElevationMap(reader, core); + QueryElevationComponent.ElevationProvider elevationProvider = comp.getElevationProvider(reader, core); req.close(); // Make sure the boosts loaded properly - assertEquals(7, map.size()); - assertEquals(1, map.get("XXXX").priority.size()); - assertEquals(2, map.get("YYYY").priority.size()); - assertEquals(3, map.get("ZZZZ").priority.size()); - assertEquals(null, map.get("xxxx")); - assertEquals(null, map.get("yyyy")); - assertEquals(null, map.get("zzzz")); + assertEquals(7, elevationProvider.size()); + assertEquals(1, elevationProvider.getElevationForQuery("XXXX").priorities.size()); + assertEquals(2, elevationProvider.getElevationForQuery("YYYY").priorities.size()); + assertEquals(3, elevationProvider.getElevationForQuery("ZZZZ").priorities.size()); + assertEquals(null, elevationProvider.getElevationForQuery("xxxx")); + assertEquals(null, elevationProvider.getElevationForQuery("yyyy")); + assertEquals(null, elevationProvider.getElevationForQuery("zzzz")); // Now test the same thing with a lowercase filter: 'lowerfilt' args = new NamedList<>(); @@ -380,17 +378,17 @@ public void testInterface() throws Exception { comp = new QueryElevationComponent(); comp.init(args); comp.inform(core); - map = comp.getElevationMap(reader, core); - assertEquals(7, map.size()); - assertEquals(null, map.get("XXXX")); - assertEquals(null, map.get("YYYY")); - assertEquals(null, map.get("ZZZZ")); - assertEquals(1, map.get("xxxx").priority.size()); - assertEquals(2, map.get("yyyy").priority.size()); - assertEquals(3, map.get("zzzz").priority.size()); - - assertEquals("xxxx", comp.getAnalyzedQuery("XXXX")); - assertEquals("xxxxyyyy", comp.getAnalyzedQuery("XXXX YYYY")); + elevationProvider = comp.getElevationProvider(reader, core); + assertEquals(7, elevationProvider.size()); + assertEquals(1, elevationProvider.getElevationForQuery("XXXX").priorities.size()); + assertEquals(2, elevationProvider.getElevationForQuery("YYYY").priorities.size()); + assertEquals(3, elevationProvider.getElevationForQuery("ZZZZ").priorities.size()); + assertEquals(1, elevationProvider.getElevationForQuery("xxxx").priorities.size()); + assertEquals(2, elevationProvider.getElevationForQuery("yyyy").priorities.size()); + assertEquals(3, elevationProvider.getElevationForQuery("zzzz").priorities.size()); + + assertEquals("xxxx", comp.analyzeQuery("XXXX")); + assertEquals("xxxx yyyy", comp.analyzeQuery("XXXX YYYY")); assertQ("Make sure QEC handles null queries", req("qt", "/elevate", "q.alt", "*:*", "defType", "dismax"), "//*[@numFound='0']"); @@ -555,9 +553,7 @@ public void testSorting() throws Exception { ); // Explicitly set what gets boosted - booster.elevationCache.clear(); - booster.setTopQueryResults(reader, query, new String[]{"x", "y", "z"}, null); - + booster.setTopQueryResults(reader, query, false, new String[]{"x", "y", "z"}, null); req.close(); req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args)); assertQ("All six should make it", req @@ -570,11 +566,9 @@ public void testSorting() throws Exception { , "//result/doc[6]/str[@name='id'][.='a']" ); - booster.elevationCache.clear(); - // now switch the order: req.close(); req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args)); - booster.setTopQueryResults(reader, query, new String[]{"a", "x"}, null); + booster.setTopQueryResults(reader, query, false, new String[]{"a", "x"}, null); assertQ("All four should make it", req , "//*[@numFound='4']" , "//result/doc[1]/str[@name='id'][.='a']" @@ -631,7 +625,7 @@ public void testSorting() throws Exception { //Test exclusive (not to be confused with exclusion) args.put(QueryElevationParams.EXCLUSIVE, "true"); req.close(); req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args)); - booster.setTopQueryResults(reader, query, new String[]{"x", "a"}, new String[]{}); + booster.setTopQueryResults(reader, query, false, new String[]{"x", "a"}, new String[]{}); assertQ(null, req , "//*[@numFound='2']" , "//result/doc[1]/str[@name='id'][.='x']" @@ -639,11 +633,10 @@ public void testSorting() throws Exception { ); // Test exclusion - booster.elevationCache.clear(); args.remove(CommonParams.SORT); args.remove(QueryElevationParams.EXCLUSIVE); req.close(); req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args)); - booster.setTopQueryResults(reader, query, new String[]{"x"}, new String[]{"a"}); + booster.setTopQueryResults(reader, query, false, new String[]{"x"}, new String[]{"a"}); assertQ(null, req , "//*[@numFound='3']" , "//result/doc[1]/str[@name='id'][.='x']" @@ -654,7 +647,7 @@ public void testSorting() throws Exception { // Test setting ids and excludes from http parameters - booster.elevationCache.clear(); + booster.clearElevationProviderCache(); args.put(QueryElevationParams.IDS, "x,y,z"); args.put(QueryElevationParams.EXCLUDE, "b"); req.close(); req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args)); @@ -706,8 +699,8 @@ public void testElevationReloading() throws Exception { try { init("schema12.xml"); String testfile = "data-elevation.xml"; - File f = new File(h.getCore().getDataDir(), testfile); - writeFile(f, "aaa", "A"); + File configFile = new File(h.getCore().getDataDir(), testfile); + writeFile(configFile, "aaa", "A"); QueryElevationComponent comp = (QueryElevationComponent) h.getCore().getSearchComponent("elevate"); NamedList args = new NamedList<>(); @@ -717,21 +710,46 @@ public void testElevationReloading() throws Exception { SolrQueryRequest req = req(); IndexReader reader = req.getSearcher().getIndexReader(); - Map map = comp.getElevationMap(reader, h.getCore()); - assertTrue(map.get("aaa").priority.containsKey(new BytesRef("A"))); - assertNull(map.get("bbb")); + QueryElevationComponent.ElevationProvider elevationProvider = comp.getElevationProvider(reader, h.getCore()); + assertTrue(elevationProvider.getElevationForQuery("aaa").elevatedIds.contains("A")); + assertNull(elevationProvider.getElevationForQuery("bbb")); req.close(); // now change the file - writeFile(f, "bbb", "B"); - assertU(adoc("id", "10000")); // will get same reader if no index change + writeFile(configFile, "bbb", "B"); + + // With no index change, we get the same index reader, so the elevationProviderCache returns the previous ElevationProvider without the change. + req = req(); + reader = req.getSearcher().getIndexReader(); + elevationProvider = comp.getElevationProvider(reader, h.getCore()); + assertTrue(elevationProvider.getElevationForQuery("aaa").elevatedIds.contains("A")); + assertNull(elevationProvider.getElevationForQuery("bbb")); + req.close(); + + // Index a new doc to get a new index reader. + assertU(adoc("id", "10000")); assertU(commit()); + // Check that we effectively reload a new ElevationProvider for a different index reader (so two entries in elevationProviderCache). + req = req(); + reader = req.getSearcher().getIndexReader(); + elevationProvider = comp.getElevationProvider(reader, h.getCore()); + assertNull(elevationProvider.getElevationForQuery("aaa")); + assertTrue(elevationProvider.getElevationForQuery("bbb").elevatedIds.contains("B")); + req.close(); + + // Now change the config file again. + writeFile(configFile, "ccc", "C"); + + // Without index change, but calling a different method that clears the elevationProviderCache, so we should load a new ElevationProvider. + int elevationRuleNumber = comp.loadElevationConfiguration(h.getCore()); + assertEquals(1, elevationRuleNumber); req = req(); reader = req.getSearcher().getIndexReader(); - map = comp.getElevationMap(reader, h.getCore()); - assertNull(map.get("aaa")); - assertTrue(map.get("bbb").priority.containsKey(new BytesRef("B"))); + elevationProvider = comp.getElevationProvider(reader, h.getCore()); + assertNull(elevationProvider.getElevationForQuery("aaa")); + assertNull(elevationProvider.getElevationForQuery("bbb")); + assertTrue(elevationProvider.getElevationForQuery("ccc").elevatedIds.contains("C")); req.close(); } finally { delete(); diff --git a/solr/solrj/src/java/org/apache/solr/common/params/QueryElevationParams.java b/solr/solrj/src/java/org/apache/solr/common/params/QueryElevationParams.java index 794bbed8e10b..d538cab6a2b4 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/QueryElevationParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/QueryElevationParams.java @@ -48,4 +48,10 @@ public interface QueryElevationParams { * as excluded. */ String MARK_EXCLUDES = "markExcludes"; + + /** + * Whether the priority order between elevated documents is kept, based on the definition order in the configuration file. + * This parameter is only taken into account if {@link QueryElevationParams#FORCE_ELEVATION} is true. + */ + String KEEP_ELEVATION_PRIORITY = "keepElevationPriority"; } From e9f53315ef0dc230280e93f868055183aa09abb6 Mon Sep 17 00:00:00 2001 From: broustant Date: Fri, 30 Mar 2018 14:04:43 +0200 Subject: [PATCH 2/8] Refactor QueryElevationComponent after review --- .../component/QueryElevationComponent.java | 209 +++++++----------- 1 file changed, 77 insertions(+), 132 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java index cadcfc02f8b9..2bd31e92e8fc 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java @@ -27,14 +27,17 @@ import java.io.StringReader; import java.lang.invoke.MethodHandles; import java.util.*; +import java.util.function.UnaryOperator; import java.util.stream.Collector; import java.util.stream.Collectors; import com.carrotsearch.hppc.IntIntHashMap; import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Throwables; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Maps; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; @@ -114,7 +117,10 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore private Analyzer queryAnalyzer; private String uniqueKeyFieldName; private FieldType uniqueKeyFieldType; - private IndexedValueProvider indexedValueProvider; + /** + * Provides the indexed value corresponding to a readable value. + */ + private UnaryOperator indexedValueProvider; @VisibleForTesting boolean forceElevation; private boolean keepElevationPriority; @@ -181,17 +187,11 @@ private void setUniqueKeyField(SolrCore core) throws InitializationException { private void parseExcludedMarkerFieldName(SolrCore core) { String markerName = initArgs.get(QueryElevationParams.EXCLUDE_MARKER_FIELD_NAME, DEFAULT_EXCLUDE_MARKER_FIELD_NAME); - if (markerName == null || markerName.equals("")) { - markerName = DEFAULT_EXCLUDE_MARKER_FIELD_NAME; - } core.addTransformerFactory(markerName, new ExcludedMarkerFactory()); } private void parseEditorialMarkerFieldName(SolrCore core) { String markerName = initArgs.get(QueryElevationParams.EDITORIAL_MARKER_FIELD_NAME, DEFAULT_EDITORIAL_MARKER_FIELD_NAME); - if (markerName == null || markerName.equals("")) { - markerName = DEFAULT_EDITORIAL_MARKER_FIELD_NAME; - } core.addTransformerFactory(markerName, new ElevatedMarkerFactory()); } @@ -352,9 +352,6 @@ private ElevationProvider loadElevationProvider(SolrCore core) throws IOExceptio /** * Loads the {@link ElevationProvider}. - *

- * This method can be overridden. - *

* * @throws java.io.IOException If an I/O error occurs while analyzing the triggering queries. * @throws RuntimeException If the config does not provide an XML content of the expected format @@ -403,11 +400,7 @@ protected ElevationProvider loadElevationProvider(Config config) throws IOExcept previousElevationBuilder.merge(elevationBuilder); } } - ElevationProvider elevationProvider = createElevationProvider(queryAnalyzer); - for (Map.Entry entry : elevationBuilderMap.entrySet()) { - elevationProvider.setElevationForQuery(entry.getKey(), entry.getValue().build()); - } - return elevationProvider.makeImmutable(); + return createElevationProvider(queryAnalyzer, elevationBuilderMap); } private boolean parseMatchPolicy(String matchString) { @@ -638,50 +631,46 @@ private void addDebugInfo(ResponseBuilder rb, Elevation elevation) { // Boosted docs helper //--------------------------------------------------------------------------------- - public static IntIntHashMap getBoostDocs(SolrIndexSearcher indexSearcher, Mapboosted, Map context) throws IOException { + public static IntIntHashMap getBoostDocs(SolrIndexSearcher indexSearcher, Map boosted, Map context) throws IOException { IntIntHashMap boostDocs = null; - if(boosted != null) { + if (boosted != null) { //First see if it's already in the request context. Could have been put there //by another caller. - if(context != null) { + if (context != null) { boostDocs = (IntIntHashMap) context.get(BOOSTED_DOCIDS); } - if(boostDocs != null) { + if (boostDocs != null) { return boostDocs; } //Not in the context yet so load it. SchemaField idField = indexSearcher.getSchema().getUniqueKeyField(); String fieldName = idField.getName(); - HashSet localBoosts = new HashSet<>(boosted.size()*2); - for (BytesRef boost : boosted.keySet()) { - localBoosts.add(boost); - } boostDocs = new IntIntHashMap(boosted.size()); Listleaves = indexSearcher.getTopReaderContext().leaves(); PostingsEnum postingsEnum = null; - for(LeafReaderContext leaf : leaves) { + for (LeafReaderContext leaf : leaves) { LeafReader reader = leaf.reader(); int docBase = leaf.docBase; Bits liveDocs = reader.getLiveDocs(); Terms terms = reader.terms(fieldName); TermsEnum termsEnum = terms.iterator(); - Iterator it = localBoosts.iterator(); - while(it.hasNext()) { + Iterator it = boosted.keySet().iterator(); + while (it.hasNext()) { BytesRef ref = it.next(); - if(termsEnum.seekExact(ref)) { + if (termsEnum.seekExact(ref)) { postingsEnum = termsEnum.postings(postingsEnum); int doc = postingsEnum.nextDoc(); while (doc != PostingsEnum.NO_MORE_DOCS && liveDocs != null && !liveDocs.get(doc)) { doc = postingsEnum.nextDoc(); } - if(doc != PostingsEnum.NO_MORE_DOCS) { + if (doc != PostingsEnum.NO_MORE_DOCS) { //Found the document. int p = boosted.get(ref); boostDocs.put(doc+docBase, p); @@ -715,7 +704,6 @@ public String getDescription() { /** * Gets the default value for {@link org.apache.solr.common.params.QueryElevationParams#FORCE_ELEVATION} parameter. - *

Can be overridden by extending this class.

*/ @SuppressWarnings("WeakerAccess") protected boolean getDefaultForceElevation() { @@ -724,7 +712,6 @@ protected boolean getDefaultForceElevation() { /** * Gets the default value for {@link #DEFAULT_KEEP_ELEVATION_PRIORITY} parameter. - *

Can be overridden by extending class.

*/ @SuppressWarnings("WeakerAccess") protected boolean getDefaultKeepElevationPriority() { @@ -733,7 +720,6 @@ protected boolean getDefaultKeepElevationPriority() { /** * Gets the default subset match policy. - *

Can be overridden by extending class.

*/ @SuppressWarnings("WeakerAccess") protected boolean getDefaultSubsetMatch() { @@ -743,8 +729,6 @@ protected boolean getDefaultSubsetMatch() { /** * Gets the {@link InitializationExceptionHandler} that handles exception thrown during the initialization of the * elevation configuration. - *

Can be overridden by extending class. This method provides a mean to set a custom exception handler if a - * specific error processing is needed.

*/ @SuppressWarnings("WeakerAccess") protected InitializationExceptionHandler getInitializationExceptionHandler() { @@ -753,8 +737,6 @@ protected InitializationExceptionHandler getInitializationExceptionHandler() { /** * Gets the {@link LoadingExceptionHandler} that handles exception thrown during the loading of the elevation configuration. - *

Can be overridden by extending class. This method provides a mean to set a custom exception handler if a - * specific error processing is needed.

*/ @SuppressWarnings("WeakerAccess") protected LoadingExceptionHandler getConfigLoadingExceptionHandler() { @@ -764,16 +746,14 @@ protected LoadingExceptionHandler getConfigLoadingExceptionHandler() { /** * Creates the {@link ElevationProvider} to set during configuration loading. The same instance will be used later * when elevating results for queries. - *

- * Extending classes can override this method to create {@link ElevationProvider} with different behavior. - *

* * @param queryAnalyzer to analyze and tokenize the query. + * @param elevationBuilderMap map of all {@link ElevatingQuery} and their corresponding {@link ElevationBuilder}. * @return The created {@link ElevationProvider}. */ @SuppressWarnings("WeakerAccess") - protected ElevationProvider createElevationProvider(Analyzer queryAnalyzer) { - return new MapElevationProvider(queryAnalyzer); + protected ElevationProvider createElevationProvider(Analyzer queryAnalyzer, Map elevationBuilderMap) { + return new MapElevationProvider(queryAnalyzer, elevationBuilderMap); } //--------------------------------------------------------------------------------- @@ -781,14 +761,14 @@ protected ElevationProvider createElevationProvider(Analyzer queryAnalyzer) { //--------------------------------------------------------------------------------- @VisibleForTesting - String analyzeQuery(String queryString) throws IOException { + String analyzeQuery(String queryString) { return analyzeQuery(queryString, queryAnalyzer); } /** * Analyzes the provided query string and returns a concatenation of the analyzed tokens. */ - private static String analyzeQuery(String queryString, Analyzer queryAnalyzer) throws IOException { + private static String analyzeQuery(String queryString, Analyzer queryAnalyzer) { if (queryAnalyzer == null) { return queryString; } @@ -797,15 +777,20 @@ private static String analyzeQuery(String queryString, Analyzer queryAnalyzer) t return queryTerms.stream().collect(QUERY_EXACT_JOINER); } - private static void splitQueryTermsWithAnalyzer(String queryString, Analyzer queryAnalyzer, Collection tokenCollector) throws IOException { - TokenStream tokens = queryAnalyzer.tokenStream("", new StringReader(queryString)); - tokens.reset(); - CharTermAttribute termAttribute = tokens.addAttribute(CharTermAttribute.class); - while (tokens.incrementToken()) { - tokenCollector.add(new String(termAttribute.buffer(), 0, termAttribute.length())); + private static void splitQueryTermsWithAnalyzer(String queryString, Analyzer queryAnalyzer, Collection tokenCollector) { + try { + TokenStream tokens = queryAnalyzer.tokenStream("", new StringReader(queryString)); + tokens.reset(); + CharTermAttribute termAttribute = tokens.addAttribute(CharTermAttribute.class); + while (tokens.incrementToken()) { + tokenCollector.add(new String(termAttribute.buffer(), 0, termAttribute.length())); + } + tokens.end(); + tokens.close(); + } catch (IOException e) { + // Will never be thrown since we read a StringReader. + throw Throwables.propagate(e); } - tokens.end(); - tokens.close(); } //--------------------------------------------------------------------------------- @@ -835,12 +820,13 @@ void setTopQueryResults(IndexReader reader, String queryString, boolean subsetMa excludedIds = new String[0]; } ElevatingQuery elevatingQuery = new ElevatingQuery(queryString, subsetMatch); - Elevation elevation = createElevation(Arrays.asList(elevatedIds), Arrays.asList(excludedIds)); - ElevationProvider elevationProvider; + ElevationBuilder elevationBuilder = new ElevationBuilder() + .addElevatedIds(Arrays.asList(elevatedIds)) + .addExcludedIds(Arrays.asList(excludedIds)); + Map elevationBuilderMap = ImmutableMap.of(elevatingQuery, elevationBuilder); synchronized (elevationProviderCache) { - elevationProvider = elevationProviderCache.computeIfAbsent(reader, k -> createElevationProvider(queryAnalyzer)); + elevationProviderCache.computeIfAbsent(reader, k -> createElevationProvider(queryAnalyzer, elevationBuilderMap)); } - elevationProvider.setElevationForQuery(elevatingQuery, elevation); } @VisibleForTesting @@ -988,42 +974,21 @@ private Elevation createElevation(Collection elevatedIds, Collectionnull if none. - */ - Elevation getElevationForQuery(String queryString) throws IOException; - - /** - * Sets the elevation for the provided query. *

* By contract and by design, only one elevation may be associated * to a given query (this can be safely verified by an assertion). - *

- *

- * It is not allowed to call this method once this {@link ElevationProvider} becomes {@link #makeImmutable() immutable}. - * Otherwise a {@link RuntimeException} may be thrown. - *

* - * @param elevatingQuery The query triggering elevation. - * @param elevation The elevation. + * @param queryString The query string (not {@link #analyzeQuery(String, Analyzer) analyzed} yet, + * this {@link ElevationProvider} is in charge of analyzing it). + * @return The elevation associated with the query; or null if none. */ - void setElevationForQuery(ElevatingQuery elevatingQuery, Elevation elevation) throws IOException; + Elevation getElevationForQuery(String queryString); /** * Gets the number of query elevations in this {@link ElevationProvider}. */ + @VisibleForTesting int size(); - - /** - * Makes this elevation provider immutable. - *

Calling {@link #setElevationForQuery} afterwards will throw an exception.

- *

Making this elevation provider immutable may reduce its memory usage and make it more efficient.

- * - * @return This elevation provider. - */ - ElevationProvider makeImmutable(); } /** @@ -1036,63 +1001,56 @@ public Elevation getElevationForQuery(String queryString) { return null; } - @Override - public void setElevationForQuery(ElevatingQuery elevatingQuery, Elevation elevation) { - // Do nothing. - } - @Override public int size() { return 0; } - - @Override - public ElevationProvider makeImmutable() { - return this; - } }; /** * Simple query exact match {@link ElevationProvider}. *

* It does not support subset matching (see {@link #parseMatchPolicy(String)}). - *

+ *

+ * Immutable. */ protected static class MapElevationProvider implements ElevationProvider { private final Analyzer queryAnalyzer; - private Map exactMatchElevationMap = new HashMap<>(); + private final Map elevationMap; @SuppressWarnings("WeakerAccess") - public MapElevationProvider(Analyzer queryAnalyzer) { + public MapElevationProvider(Analyzer queryAnalyzer, Map elevationBuilderMap) { this.queryAnalyzer = queryAnalyzer; + elevationMap = buildElevationMap(elevationBuilderMap); } - @Override - public Elevation getElevationForQuery(String queryString) throws IOException { - String analyzedQuery = analyzeQuery(queryString, queryAnalyzer); - return exactMatchElevationMap.get(analyzedQuery); - } - - @Override - public void setElevationForQuery(ElevatingQuery elevatingQuery, Elevation elevation) throws IOException { - if (elevatingQuery.subsetMatch) { - throw new UnsupportedOperationException("Subset matching is not supported by " + getClass().getName()); + private Map buildElevationMap(Map elevationBuilderMap) { + Map elevationMap = Maps.newHashMapWithExpectedSize(elevationBuilderMap.size()); + for (Map.Entry entry : elevationBuilderMap.entrySet()) { + ElevatingQuery elevatingQuery = entry.getKey(); + if (elevatingQuery.subsetMatch) { + throw new UnsupportedOperationException("Subset matching is not supported by " + getClass().getName()); + } + String analyzedQuery = analyzeQuery(elevatingQuery.queryString, queryAnalyzer); + Elevation elevation = entry.getValue().build(); + Elevation duplicateElevation = elevationMap.put(analyzedQuery, elevation); + if (duplicateElevation != null) { + throw new IllegalArgumentException("Duplicate elevation for query \"" + analyzedQuery + "\""); + } } - String analyzedQuery = analyzeQuery(elevatingQuery.queryString, queryAnalyzer); - Elevation duplicateElevation = exactMatchElevationMap.put(analyzedQuery, elevation); - assert duplicateElevation == null; + return Collections.unmodifiableMap(elevationMap); } @Override - public int size() { - return exactMatchElevationMap.size(); + public Elevation getElevationForQuery(String queryString) { + String analyzedQuery = analyzeQuery(queryString, queryAnalyzer); + return elevationMap.get(analyzedQuery); } @Override - public ElevationProvider makeImmutable() { - exactMatchElevationMap = Collections.unmodifiableMap(exactMatchElevationMap); - return this; + public int size() { + return elevationMap.size(); } } @@ -1110,7 +1068,8 @@ protected static class ElevatingQuery { * @param queryString The query to elevate documents for (not the analyzed form). * @param subsetMatch Whether to match a subset of query terms. */ - private ElevatingQuery(String queryString, boolean subsetMatch) throws IOException { + @SuppressWarnings("WeakerAccess") + protected ElevatingQuery(String queryString, boolean subsetMatch) throws IOException { this.queryString = queryString; this.subsetMatch = subsetMatch; } @@ -1218,12 +1177,12 @@ protected static class Elevation { * * @param elevatedIds The ids of the elevated documents that should appear on top of search results; can be null. * @param excludedIds The ids of the excluded documents that should not appear in search results; can be null. - * @param indexedValueProvider Provides indexed values. + * @param indexedValueProvider Provides the indexed value corresponding to a readable value.. * @param queryFieldName The field name to use to create query terms. * @param keepElevationPriority Whether to keep the elevation priority order. */ private Elevation(Collection elevatedIds, Collection excludedIds, - IndexedValueProvider indexedValueProvider, String queryFieldName, + UnaryOperator indexedValueProvider, String queryFieldName, boolean keepElevationPriority) { if (elevatedIds == null || elevatedIds.isEmpty()) { this.elevatedIds = Collections.emptySet(); @@ -1238,7 +1197,7 @@ private Elevation(Collection elevatedIds, Collection excludedIds } int priorityLevel = elevatedIds.size(); for (String elevatedId : elevatedIds) { - elevatedIdsBuilder.add(indexedValueProvider.getIndexedValue(elevatedId)); + elevatedIdsBuilder.add(indexedValueProvider.apply(elevatedId)); TermQuery tq = new TermQuery(new Term(queryFieldName, elevatedId)); includeQueryBuilder.add(tq, BooleanClause.Occur.SHOULD); if (keepElevationPriority) { @@ -1257,7 +1216,7 @@ private Elevation(Collection elevatedIds, Collection excludedIds ImmutableSet.Builder excludedIdsBuilder = ImmutableSet.builder(); List excludeQueriesBuilder = new ArrayList<>(excludedIds.size()); for (String excludedId : excludedIds) { - excludedIdsBuilder.add(indexedValueProvider.getIndexedValue(excludedId)); + excludedIdsBuilder.add(indexedValueProvider.apply(excludedId)); excludeQueriesBuilder.add(new TermQuery(new Term(queryFieldName, excludedId))); } this.excludedIds = excludedIdsBuilder.build(); @@ -1315,8 +1274,7 @@ private int docVal(int doc) { if (!keepElevationPriority) return 1; BytesRef id = termValues[slot]; - Integer priority = elevation.priorities.get(id); - return priority == null ? 0 : priority; + return elevation.priorities.getOrDefault(id, 0); } } return 0; @@ -1344,7 +1302,7 @@ protected void doSetNextReader(LeafReaderContext context) throws IOException { for (String id : elevation.elevatedIds) { term.copyChars(id); - if (!seen.contains(id) && termsEnum.seekExact(term.get())) { + if (seen.contains(id) == false && termsEnum.seekExact(term.get())) { postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE); int docId = postingsEnum.nextDoc(); while (docId != DocIdSetIterator.NO_MORE_DOCS && liveDocs != null && !liveDocs.get(docId)) { @@ -1374,17 +1332,4 @@ public int compareTop(int doc) { }; } } - - /** - * Provides indexed value from readable value. - */ - private interface IndexedValueProvider { - /** - * Gets the indexed value corresponding to a readable value. - * - * @param readableValue The readable value. - * @return The indexed value. - */ - String getIndexedValue(String readableValue); - } } From 0bad4c66cf4ce89bc6cca3a7e631c20b23c500c4 Mon Sep 17 00:00:00 2001 From: broustant Date: Wed, 4 Apr 2018 17:51:03 +0200 Subject: [PATCH 3/8] Remove exception handlers and refactor getBoostDocs --- .../component/QueryElevationComponent.java | 334 +++++------------- 1 file changed, 97 insertions(+), 237 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java index 2bd31e92e8fc..c2eac7b9f392 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java @@ -132,12 +132,6 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore */ private final Map elevationProviderCache = new WeakHashMap<>(); - /** - * Keep track of a counter each time a configuration file cannot be loaded. - * Stop trying to load after {@link #getConfigLoadingExceptionHandler()}.{@link LoadingExceptionHandler#getLoadingMaxAttempts getLoadingMaxAttempts()}. - */ - private final Map configLoadingErrorCounters = new WeakHashMap<>(); - @Override public void init(NamedList args) { this.initArgs = args.toSolrParams(); @@ -160,7 +154,7 @@ public void inform(SolrCore core) { handleInitializationException(e, e.exceptionCause); } catch (Exception e) { assert !initialized; - handleInitializationException(e, InitializationExceptionHandler.ExceptionCause.OTHER); + handleInitializationException(e, InitializationExceptionCause.OTHER); } } @@ -169,7 +163,7 @@ private void parseFieldType(SolrCore core) throws InitializationException { if (a != null) { FieldType ft = core.getLatestSchema().getFieldTypes().get(a); if (ft == null) { - throw new InitializationException("Parameter " + FIELD_TYPE + " defines an unknown field type \"" + a + "\"", InitializationExceptionHandler.ExceptionCause.UNKNOWN_FIELD_TYPE); + throw new InitializationException("Parameter " + FIELD_TYPE + " defines an unknown field type \"" + a + "\"", InitializationExceptionCause.UNKNOWN_FIELD_TYPE); } queryAnalyzer = ft.getQueryAnalyzer(); } @@ -178,7 +172,7 @@ private void parseFieldType(SolrCore core) throws InitializationException { private void setUniqueKeyField(SolrCore core) throws InitializationException { SchemaField sf = core.getLatestSchema().getUniqueKeyField(); if (sf == null) { - throw new InitializationException("This component requires the schema to have a uniqueKeyField", InitializationExceptionHandler.ExceptionCause.MISSING_UNIQUE_KEY_FIELD); + throw new InitializationException("This component requires the schema to have a uniqueKeyField", InitializationExceptionCause.MISSING_UNIQUE_KEY_FIELD); } uniqueKeyFieldType = sf.getType(); uniqueKeyFieldName = sf.getName(); @@ -205,9 +199,6 @@ private void parseKeepElevationPriority() { /** * (Re)Loads elevation configuration. - *

- * Protected access to be called by extending class. - *

* * @param core The core holding this component. * @return The number of elevation rules parsed. @@ -218,9 +209,9 @@ protected int loadElevationConfiguration(SolrCore core) throws Exception { elevationProviderCache.clear(); String configFileName = initArgs.get(CONFIG_FILE); if (configFileName == null) { - // Throw an exception which can be handled by an overriding InitializationExceptionHandler (see handleInitializationException()). - // The default InitializationExceptionHandler will simply skip this exception. - throw new InitializationException("Missing component parameter " + CONFIG_FILE + " - it has to define the path to the elevation configuration file", InitializationExceptionHandler.ExceptionCause.NO_CONFIG_FILE_DEFINED); + // Throw an exception which is handled by handleInitializationException(). + // If not overridden handleInitializationException() simply skips this exception. + throw new InitializationException("Missing component parameter " + CONFIG_FILE + " - it has to define the path to the elevation configuration file", InitializationExceptionCause.NO_CONFIG_FILE_DEFINED); } boolean configFileExists = false; ElevationProvider elevationProvider = NO_OP_ELEVATION_PROVIDER; @@ -234,12 +225,12 @@ protected int loadElevationConfiguration(SolrCore core) throws Exception { File fC = new File(core.getResourceLoader().getConfigDir(), configFileName); File fD = new File(core.getDataDir(), configFileName); if (fC.exists() == fD.exists()) { - InitializationException e = new InitializationException("Missing config file \"" + configFileName + "\" - either " + fC.getAbsolutePath() + " or " + fD.getAbsolutePath() + " must exist, but not both", InitializationExceptionHandler.ExceptionCause.MISSING_CONFIG_FILE); + InitializationException e = new InitializationException("Missing config file \"" + configFileName + "\" - either " + fC.getAbsolutePath() + " or " + fD.getAbsolutePath() + " must exist, but not both", InitializationExceptionCause.MISSING_CONFIG_FILE); elevationProvider = handleConfigLoadingException(e, true); elevationProviderCache.put(null, elevationProvider); } else if (fC.exists()) { if (fC.length() == 0) { - InitializationException e = new InitializationException("Empty config file \"" + configFileName + "\" - " + fC.getAbsolutePath(), InitializationExceptionHandler.ExceptionCause.EMPTY_CONFIG_FILE); + InitializationException e = new InitializationException("Empty config file \"" + configFileName + "\" - " + fC.getAbsolutePath(), InitializationExceptionCause.EMPTY_CONFIG_FILE); elevationProvider = handleConfigLoadingException(e, true); } else { configFileExists = true; @@ -270,6 +261,36 @@ protected int loadElevationConfiguration(SolrCore core) throws Exception { } } + /** + * Handles the exception that occurred while initializing this component. + * If this method does not throw an exception, this component silently fails to initialize + * and is muted with field {@link #initialized} which becomes {@code false}. + */ + protected void handleInitializationException(Exception exception, InitializationExceptionCause cause) { + if (cause != InitializationExceptionCause.NO_CONFIG_FILE_DEFINED) { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, + "Error initializing " + QueryElevationComponent.class.getSimpleName(), exception); + } + } + + /** + * Handles an exception that occurred while loading the configuration resource. + * + * @param e The exception caught. + * @param resourceAccessIssue true if the exception has been thrown + * because the resource could not be accessed (missing or cannot be read) + * or the config file is empty; false if the resource has + * been found and accessed but the error occurred while loading the resource + * (invalid format, incomplete or corrupted). + * @return The {@link ElevationProvider} to use if the exception is absorbed. If {@code null} + * is returned, the {@link #NO_OP_ELEVATION_PROVIDER} is used but not cached in + * the {@link ElevationProvider} cache. + * @throws E If the exception is not absorbed. + */ + protected ElevationProvider handleConfigLoadingException(E e, boolean resourceAccessIssue) throws E { + throw e; + } + /** * Gets the {@link ElevationProvider} from the data dir or from the cache. * @@ -302,11 +323,8 @@ ElevationProvider getElevationProvider(IndexReader reader, SolrCore core) throws boolean shouldCache = true; if (loadingException != null) { elevationProvider = handleConfigLoadingException(loadingException, resourceAccessIssue); - // Do not cache the fallback ElevationProvider for the first exceptions because the exception might - // occur only a couple of times and the config file could be loaded correctly afterwards - // (e.g. temporary invalid file access). After some attempts, cache the fallback ElevationProvider - // not to overload the exception handler (and beyond it, the logs probably). - if (incConfigLoadingErrorCount(reader) < getConfigLoadingExceptionHandler().getLoadingMaxAttempts()) { + if (elevationProvider == null) { + elevationProvider = NO_OP_ELEVATION_PROVIDER; shouldCache = false; } } @@ -416,48 +434,6 @@ private boolean parseMatchPolicy(String matchString) { } } - /** - * Potentially handles and captures an exception that occurred while loading the configuration resource. - * - * @param e The exception caught. - * @param resourceAccessIssueOrEmptyConfig true if the exception has been thrown because the resource could not - * be accessed (missing or cannot be read) or the config file is empty; false if the resource has - * been found and accessed but the error occurred while loading the resource - * (invalid format, incomplete or corrupted). - * @return The {@link ElevationProvider} to use if the exception is absorbed. - * @throws E If the exception is not absorbed. - */ - private ElevationProvider handleConfigLoadingException(E e, boolean resourceAccessIssueOrEmptyConfig) throws E { - if (getConfigLoadingExceptionHandler().handleLoadingException(e, resourceAccessIssueOrEmptyConfig)) { - return NO_OP_ELEVATION_PROVIDER; - } - assert e != null; - throw e; - } - - private int incConfigLoadingErrorCount(IndexReader reader) { - Integer counter = configLoadingErrorCounters.get(reader); - if (counter == null) { - counter = 1; - } else { - counter++; - } - configLoadingErrorCounters.put(reader, counter); - return counter; - } - - /** - * Potentially handles and captures the exception that occurred while initializing this component. If the exception - * is captured by the handler, this component fails to initialize silently and is muted because field initialized is - * false. - */ - private void handleInitializationException(Exception initializationException, InitializationExceptionHandler.ExceptionCause exceptionCause) { - SolrException solrException = new SolrException(SolrException.ErrorCode.SERVER_ERROR, - "Error initializing " + QueryElevationComponent.class.getSimpleName(), initializationException); - if (!getInitializationExceptionHandler().handleInitializationException(solrException, exceptionCause)) - throw solrException; - } - //--------------------------------------------------------------------------------- // SearchComponent //--------------------------------------------------------------------------------- @@ -633,61 +609,41 @@ private void addDebugInfo(ResponseBuilder rb, Elevation elevation) { public static IntIntHashMap getBoostDocs(SolrIndexSearcher indexSearcher, Map boosted, Map context) throws IOException { - IntIntHashMap boostDocs = null; + IntIntHashMap boostDocs = null; - if (boosted != null) { + if (boosted != null) { - //First see if it's already in the request context. Could have been put there - //by another caller. - if (context != null) { - boostDocs = (IntIntHashMap) context.get(BOOSTED_DOCIDS); - } + //First see if it's already in the request context. Could have been put there by another caller. + if (context != null) { + boostDocs = (IntIntHashMap) context.get(BOOSTED_DOCIDS); + if (boostDocs != null) { + return boostDocs; + } + } - if (boostDocs != null) { - return boostDocs; - } - //Not in the context yet so load it. - - SchemaField idField = indexSearcher.getSchema().getUniqueKeyField(); - String fieldName = idField.getName(); - - boostDocs = new IntIntHashMap(boosted.size()); - - Listleaves = indexSearcher.getTopReaderContext().leaves(); - PostingsEnum postingsEnum = null; - for (LeafReaderContext leaf : leaves) { - LeafReader reader = leaf.reader(); - int docBase = leaf.docBase; - Bits liveDocs = reader.getLiveDocs(); - Terms terms = reader.terms(fieldName); - TermsEnum termsEnum = terms.iterator(); - Iterator it = boosted.keySet().iterator(); - while (it.hasNext()) { - BytesRef ref = it.next(); - if (termsEnum.seekExact(ref)) { - postingsEnum = termsEnum.postings(postingsEnum); - int doc = postingsEnum.nextDoc(); - while (doc != PostingsEnum.NO_MORE_DOCS && liveDocs != null && !liveDocs.get(doc)) { - doc = postingsEnum.nextDoc(); - } - if (doc != PostingsEnum.NO_MORE_DOCS) { - //Found the document. - int p = boosted.get(ref); - boostDocs.put(doc+docBase, p); - it.remove(); - } + //Not in the context yet so load it. + boostDocs = new IntIntHashMap(boosted.size()); // docId to boost + for (Map.Entry keyAndBoostPair : boosted.entrySet()) { + final BytesRef uniqueKey = keyAndBoostPair.getKey(); + long segAndId = indexSearcher.lookupId(uniqueKey); // higher 32 bits == segment ID, low 32 bits == doc ID + if (segAndId == -1) { // not found + continue; } + int seg = (int) (segAndId >> 32); + int localDocId = (int) segAndId; + final IndexReaderContext indexReaderContext = indexSearcher.getTopReaderContext().children().get(seg); + int docId = indexReaderContext.docBaseInParent + localDocId; + boostDocs.put(docId, keyAndBoostPair.getValue()); } } - } - if(context != null) { - //noinspection unchecked - context.put(BOOSTED_DOCIDS, boostDocs); - } + if (context != null) { + //noinspection unchecked + context.put(BOOSTED_DOCIDS, boostDocs); + } - return boostDocs; - } + return boostDocs; + } //--------------------------------------------------------------------------------- // SolrInfoBean @@ -726,23 +682,6 @@ protected boolean getDefaultSubsetMatch() { return DEFAULT_SUBSET_MATCH; } - /** - * Gets the {@link InitializationExceptionHandler} that handles exception thrown during the initialization of the - * elevation configuration. - */ - @SuppressWarnings("WeakerAccess") - protected InitializationExceptionHandler getInitializationExceptionHandler() { - return InitializationExceptionHandler.NO_OP; - } - - /** - * Gets the {@link LoadingExceptionHandler} that handles exception thrown during the loading of the elevation configuration. - */ - @SuppressWarnings("WeakerAccess") - protected LoadingExceptionHandler getConfigLoadingExceptionHandler() { - return LoadingExceptionHandler.NO_OP; - } - /** * Creates the {@link ElevationProvider} to set during configuration loading. The same instance will be used later * when elevating results for queries. @@ -778,15 +717,13 @@ private static String analyzeQuery(String queryString, Analyzer queryAnalyzer) { } private static void splitQueryTermsWithAnalyzer(String queryString, Analyzer queryAnalyzer, Collection tokenCollector) { - try { - TokenStream tokens = queryAnalyzer.tokenStream("", new StringReader(queryString)); + try (TokenStream tokens = queryAnalyzer.tokenStream("", new StringReader(queryString))) { tokens.reset(); CharTermAttribute termAttribute = tokens.addAttribute(CharTermAttribute.class); while (tokens.incrementToken()) { tokenCollector.add(new String(termAttribute.buffer(), 0, termAttribute.length())); } tokens.end(); - tokens.close(); } catch (IOException e) { // Will never be thrown since we read a StringReader. throw Throwables.propagate(e); @@ -837,122 +774,45 @@ void clearElevationProviderCache() { } //--------------------------------------------------------------------------------- - // Exception classes + // Exception //--------------------------------------------------------------------------------- private static class InitializationException extends Exception { - final InitializationExceptionHandler.ExceptionCause exceptionCause; - InitializationException(String message, InitializationExceptionHandler.ExceptionCause exceptionCause) { + private final InitializationExceptionCause exceptionCause; + + InitializationException(String message, InitializationExceptionCause exceptionCause) { super(message); this.exceptionCause = exceptionCause; } } - /** - * Handles resource loading exception. - */ - protected interface InitializationExceptionHandler { - - /** - * NoOp {@link LoadingExceptionHandler} that does not capture any exception and simply returns false. - */ - InitializationExceptionHandler NO_OP = new InitializationExceptionHandler() { - @Override - public boolean handleInitializationException(Exception e, ExceptionCause exceptionCause) { - return exceptionCause == ExceptionCause.NO_CONFIG_FILE_DEFINED; + protected enum InitializationExceptionCause { + /** + * The component parameter {@link #FIELD_TYPE} defines an unknown field type. + */ + UNKNOWN_FIELD_TYPE, + /** + * This component requires the schema to have a uniqueKeyField, which it does not have. + */ + MISSING_UNIQUE_KEY_FIELD, + /** + * Missing component parameter {@link #CONFIG_FILE} - it has to define the path to the elevation configuration file (e.g. elevate.xml). + */ + NO_CONFIG_FILE_DEFINED, + /** + * The elevation configuration file (e.g. elevate.xml) cannot be found, or is defined in both conf/ and data/ directories. + */ + MISSING_CONFIG_FILE, + /** + * The elevation configuration file (e.g. elevate.xml) is empty. + */ + EMPTY_CONFIG_FILE, + /** + * Unclassified exception cause. + */ + OTHER, } - }; - - enum ExceptionCause { - /** - * The component parameter {@link #FIELD_TYPE} defines an unknown field type. - */ - UNKNOWN_FIELD_TYPE, - /** - * This component requires the schema to have a uniqueKeyField, which it does not have. - */ - MISSING_UNIQUE_KEY_FIELD, - /** - * Missing component parameter {@link #CONFIG_FILE} - it has to define the path to the elevation configuration file (e.g. elevate.xml). - */ - NO_CONFIG_FILE_DEFINED, - /** - * The elevation configuration file (e.g. elevate.xml) cannot be found, or is defined in both conf/ and data/ directories. - */ - MISSING_CONFIG_FILE, - /** - * The elevation configuration file (e.g. elevate.xml) is empty. - */ - EMPTY_CONFIG_FILE, - /** - * Unclassified exception cause. - */ - OTHER, - } - - /** - * Potentially handles and captures an exception that occurred while initializing the component. - * If the exception is captured, the component fails to initialize silently and is muted. - * - * @param e The exception caught. - * @param exceptionCause The exception cause. - * @param The exception type. - * @return true if the exception is handled and captured by this handler (and thus will not be - * thrown anymore); false if the exception is not captured, in this case it will be probably - * thrown again by the calling code. - * @throws E If this handler throws the exception itself (it may add some cause or message). - */ - boolean handleInitializationException(E e, ExceptionCause exceptionCause) throws E; - } - - /** - * Handles resource loading exception. - */ - protected interface LoadingExceptionHandler { - - /** - * NoOp {@link LoadingExceptionHandler} that does not capture any exception and simply returns false. - */ - LoadingExceptionHandler NO_OP = new LoadingExceptionHandler() { - @Override - public boolean handleLoadingException(Exception e, boolean resourceAccessIssue) { - return false; - } - - @Override - public int getLoadingMaxAttempts() { - return 0; - } - }; - - /** - * Potentially handles and captures an exception that occurred while loading a resource. - * - * @param e The exception caught. - * @param resourceAccessIssue true if the exception has been thrown because the resource could not - * be accessed (missing or cannot be read); false if the resource has - * been found and accessed but the error occurred while loading the resource - * (invalid format, incomplete or corrupted). - * @param The exception type. - * @return true if the exception is handled and captured by this handler (and thus will not be - * thrown anymore); false if the exception is not captured, in this case it will be probably - * thrown again by the calling code. - * @throws E If this handler throws the exception itself (it may add some cause or message). - */ - boolean handleLoadingException(E e, boolean resourceAccessIssue) throws E; - - /** - * Gets the maximum number of attempts to load the resource in case of error (resource not found, I/O error, - * invalid format), for each Solr core. - * After this number of attempts (so {@link #handleLoadingException} is called this number of times), - * {@link #handleLoadingException} will not be called anymore for the specific Solr core, and the resource is - * considered empty afterwards (until the core is reloaded). - * - * @return The maximum number of attempts to load the resource. The value must be >= 0. - */ - int getLoadingMaxAttempts(); - } //--------------------------------------------------------------------------------- // Elevation classes From ad0e26652332eaa53034ed7fe0a998cc4702e8a6 Mon Sep 17 00:00:00 2001 From: broustant Date: Thu, 31 May 2018 23:37:22 -0400 Subject: [PATCH 4/8] * rename keepElevationPriority to useConfiguredElevatedOrder * solr ref guide docs for useConfiguredElevatedOrder * QueryElevationComponentTest.testSorting modernized a bit and added a test for useConfiguredElevatedOrder * Uses BytesRef for uniqueKey ID pervasively instead of String. This is not just internal to QEC since boosted docs are used by some other components. * removed BOOSTED_PRIORITY entry into SolrRequest.getContext() as it's obsolete; BOOSTED is enough -- the priority is implied the the iteration order. * setSort will short-circuit if there are no elevated Ids * ElevationComparatorSource now re-uses existing logic of getBoostDocs; easier to maintain. --- .../component/QueryElevationComponent.java | 450 ++++++++---------- .../transform/BaseEditorialTransformer.java | 17 +- .../transform/ElevatedMarkerFactory.java | 6 +- .../transform/ExcludedMarkerFactory.java | 6 +- .../solr/search/AbstractReRankQuery.java | 7 +- .../solr/search/CollapsingQParserPlugin.java | 25 +- .../apache/solr/search/ReRankCollector.java | 7 +- .../QueryElevationComponentTest.java | 193 ++++---- .../src/the-query-elevation-component.adoc | 11 + .../common/params/QueryElevationParams.java | 7 +- 10 files changed, 340 insertions(+), 389 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java index c2eac7b9f392..23d4380c9a92 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java @@ -24,38 +24,47 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; -import java.io.StringReader; import java.lang.invoke.MethodHandles; -import java.util.*; -import java.util.function.UnaryOperator; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.WeakHashMap; import java.util.stream.Collector; import java.util.stream.Collectors; import com.carrotsearch.hppc.IntIntHashMap; +import com.carrotsearch.hppc.cursors.IntIntCursor; import com.google.common.annotations.VisibleForTesting; - import com.google.common.base.Throwables; +import com.google.common.collect.Collections2; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Maps; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.index.*; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReaderContext; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; -import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.FieldComparatorSource; import org.apache.lucene.search.SimpleFieldComparator; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TermQuery; -import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; -import org.apache.lucene.util.SentinelIntSet; import org.apache.solr.cloud.ZkController; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.QueryElevationParams; @@ -65,6 +74,7 @@ import org.apache.solr.common.util.StrUtils; import org.apache.solr.core.Config; import org.apache.solr.core.SolrCore; +import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.transform.ElevatedMarkerFactory; import org.apache.solr.response.transform.ExcludedMarkerFactory; import org.apache.solr.schema.FieldType; @@ -90,6 +100,7 @@ * @since solr 1.3 */ public class QueryElevationComponent extends SearchComponent implements SolrCoreAware { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); // Constants used in solrconfig.xml @@ -98,32 +109,33 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore @VisibleForTesting static final String CONFIG_FILE = "config-file"; private static final String EXCLUDE = "exclude"; - public static final String BOOSTED = "BOOSTED"; + + /** @see #getBoostDocs(SolrIndexSearcher, Set, Map) */ private static final String BOOSTED_DOCIDS = "BOOSTED_DOCIDS"; - public static final String BOOSTED_PRIORITY = "BOOSTED_PRIORITY"; + /** Key to {@link SolrQueryRequest#getContext()} for a {@code Set} of included IDs in configured + * order (so-called priority). */ + public static final String BOOSTED = "BOOSTED"; + /** Key to {@link SolrQueryRequest#getContext()} for a {@code Set} of excluded IDs. */ public static final String EXCLUDED = "EXCLUDED"; private static final boolean DEFAULT_FORCE_ELEVATION = false; - private static final boolean DEFAULT_KEEP_ELEVATION_PRIORITY = true; + private static final boolean DEFAULT_USE_CONFIGURED_ELEVATED_ORDER = true; private static final boolean DEFAULT_SUBSET_MATCH = false; private static final String DEFAULT_EXCLUDE_MARKER_FIELD_NAME = "excluded"; private static final String DEFAULT_EDITORIAL_MARKER_FIELD_NAME = "elevated"; private static final Collector QUERY_EXACT_JOINER = Collectors.joining(" "); - // Runtime param private SolrParams initArgs; private Analyzer queryAnalyzer; - private String uniqueKeyFieldName; - private FieldType uniqueKeyFieldType; - /** - * Provides the indexed value corresponding to a readable value. - */ - private UnaryOperator indexedValueProvider; + private SchemaField uniqueKeyField; + /** @see QueryElevationParams#FORCE_ELEVATION */ @VisibleForTesting boolean forceElevation; - private boolean keepElevationPriority; + /** @see QueryElevationParams#USE_CONFIGURED_ELEVATED_ORDER */ + private boolean useConfiguredElevatedOrder; + private boolean initialized; /** @@ -146,7 +158,7 @@ public void inform(SolrCore core) { parseExcludedMarkerFieldName(core); parseEditorialMarkerFieldName(core); parseForceElevation(); - parseKeepElevationPriority(); + parseUseConfiguredOrderForElevations(); loadElevationConfiguration(core); initialized = true; } catch (InitializationException e) { @@ -170,13 +182,10 @@ private void parseFieldType(SolrCore core) throws InitializationException { } private void setUniqueKeyField(SolrCore core) throws InitializationException { - SchemaField sf = core.getLatestSchema().getUniqueKeyField(); - if (sf == null) { + uniqueKeyField = core.getLatestSchema().getUniqueKeyField(); + if (uniqueKeyField == null) { throw new InitializationException("This component requires the schema to have a uniqueKeyField", InitializationExceptionCause.MISSING_UNIQUE_KEY_FIELD); } - uniqueKeyFieldType = sf.getType(); - uniqueKeyFieldName = sf.getName(); - indexedValueProvider = readableValue -> uniqueKeyFieldType.readableToIndexed(readableValue); } private void parseExcludedMarkerFieldName(SolrCore core) { @@ -190,11 +199,11 @@ private void parseEditorialMarkerFieldName(SolrCore core) { } private void parseForceElevation() { - forceElevation = initArgs.getBool(QueryElevationParams.FORCE_ELEVATION, getDefaultForceElevation()); + forceElevation = initArgs.getBool(QueryElevationParams.FORCE_ELEVATION, DEFAULT_FORCE_ELEVATION); } - private void parseKeepElevationPriority() { - keepElevationPriority = initArgs.getBool(QueryElevationParams.KEEP_ELEVATION_PRIORITY, getDefaultKeepElevationPriority()); + private void parseUseConfiguredOrderForElevations() { + useConfiguredElevatedOrder = initArgs.getBool(QueryElevationParams.USE_CONFIGURED_ELEVATED_ORDER, DEFAULT_USE_CONFIGURED_ELEVATED_ORDER); } /** @@ -276,15 +285,15 @@ protected void handleInitializationException(Exception exception, Initialization /** * Handles an exception that occurred while loading the configuration resource. * - * @param e The exception caught. + * @param e The exception caught. * @param resourceAccessIssue true if the exception has been thrown * because the resource could not be accessed (missing or cannot be read) * or the config file is empty; false if the resource has * been found and accessed but the error occurred while loading the resource * (invalid format, incomplete or corrupted). * @return The {@link ElevationProvider} to use if the exception is absorbed. If {@code null} - * is returned, the {@link #NO_OP_ELEVATION_PROVIDER} is used but not cached in - * the {@link ElevationProvider} cache. + * is returned, the {@link #NO_OP_ELEVATION_PROVIDER} is used but not cached in + * the {@link ElevationProvider} cache. * @throws E If the exception is not absorbed. */ protected ElevationProvider handleConfigLoadingException(E e, boolean resourceAccessIssue) throws E { @@ -377,8 +386,7 @@ private ElevationProvider loadElevationProvider(SolrCore core) throws IOExceptio */ @SuppressWarnings("WeakerAccess") protected ElevationProvider loadElevationProvider(Config config) throws IOException { - Map elevationBuilderMap = keepElevationPriority ? - new LinkedHashMap<>() : new HashMap<>(); + Map elevationBuilderMap = new LinkedHashMap<>(); XPath xpath = XPathFactory.newInstance().newXPath(); NodeList nodes = (NodeList) config.evaluate("elevate/query", XPathConstants.NODESET); for (int i = 0; i < nodes.getLength(); i++) { @@ -395,6 +403,9 @@ protected ElevationProvider loadElevationProvider(Config config) throws IOExcept "query requires '' child"); } + if (children.getLength() == 0) { // weird + continue; + } ElevationBuilder elevationBuilder = new ElevationBuilder(); for (int j = 0; j < children.getLength(); j++) { Node child = children.item(j); @@ -402,11 +413,11 @@ protected ElevationProvider loadElevationProvider(Config config) throws IOExcept String e = DOMUtil.getAttr(child, EXCLUDE, null); if (e != null) { if (Boolean.valueOf(e)) { - elevationBuilder.addExcludedId(id); + elevationBuilder.addExcludedIds(Collections.singleton(id)); continue; } } - elevationBuilder.addElevatedId(id); + elevationBuilder.addElevatedIds(Collections.singletonList(id)); } // It is allowed to define multiple times different elevations for the same query. In this case the elevations @@ -423,7 +434,7 @@ protected ElevationProvider loadElevationProvider(Config config) throws IOExcept private boolean parseMatchPolicy(String matchString) { if (matchString == null) { - return getDefaultSubsetMatch(); + return DEFAULT_SUBSET_MATCH; } else if (matchString.equalsIgnoreCase("exact")) { return false; } else if (matchString.equalsIgnoreCase("subset")) { @@ -486,7 +497,6 @@ private Elevation getElevation(ResponseBuilder rb) { private void setQuery(ResponseBuilder rb, Elevation elevation) { rb.req.getContext().put(BOOSTED, elevation.elevatedIds); - rb.req.getContext().put(BOOSTED_PRIORITY, elevation.priorities); // Change the query to insert forced documents SolrParams params = rb.req.getParams(); @@ -504,7 +514,7 @@ private void setQuery(ResponseBuilder rb, Elevation elevation) { rb.req.getContext().put(EXCLUDED, elevation.excludedIds); } else { for (TermQuery tq : elevation.excludeQueries) { - queryBuilder.add(new BooleanClause(tq, BooleanClause.Occur.MUST_NOT)); + queryBuilder.add(tq, BooleanClause.Occur.MUST_NOT); } } } @@ -512,9 +522,14 @@ private void setQuery(ResponseBuilder rb, Elevation elevation) { } } - private void setSort(ResponseBuilder rb, Elevation elevation) { + private void setSort(ResponseBuilder rb, Elevation elevation) throws IOException { + if (elevation.elevatedIds.isEmpty()) { + return; + } boolean forceElevation = rb.req.getParams().getBool(QueryElevationParams.FORCE_ELEVATION, this.forceElevation); - ElevationComparatorSource comparator = new ElevationComparatorSource(elevation); + boolean useConfigured = rb.req.getParams().getBool(QueryElevationParams.USE_CONFIGURED_ELEVATED_ORDER, this.useConfiguredElevatedOrder); + final IntIntHashMap elevatedWithPriority = getBoostDocs(rb.req.getSearcher(), elevation.elevatedIds, rb.req.getContext()); + ElevationComparatorSource comparator = new ElevationComparatorSource(elevatedWithPriority, useConfigured); setSortSpec(rb, forceElevation, comparator); setGroupingSpec(rb, forceElevation, comparator); } @@ -607,44 +622,50 @@ private void addDebugInfo(ResponseBuilder rb, Elevation elevation) { // Boosted docs helper //--------------------------------------------------------------------------------- - public static IntIntHashMap getBoostDocs(SolrIndexSearcher indexSearcher, Map boosted, Map context) throws IOException { - - IntIntHashMap boostDocs = null; + /** + * Returns a map of docIds elevated with a priority value > 0. The mapping is looked up and cached in {@code context} when + * not null. {@code boosted} are the set of uniqueKey values to be boosted in priority order. + */ + public static IntIntHashMap getBoostDocs(SolrIndexSearcher indexSearcher, Set boosted, Map context) throws IOException { - if (boosted != null) { + IntIntHashMap boostDocs = null; - //First see if it's already in the request context. Could have been put there by another caller. - if (context != null) { - boostDocs = (IntIntHashMap) context.get(BOOSTED_DOCIDS); - if (boostDocs != null) { - return boostDocs; - } - } + if (boosted != null) { - //Not in the context yet so load it. - boostDocs = new IntIntHashMap(boosted.size()); // docId to boost - for (Map.Entry keyAndBoostPair : boosted.entrySet()) { - final BytesRef uniqueKey = keyAndBoostPair.getKey(); - long segAndId = indexSearcher.lookupId(uniqueKey); // higher 32 bits == segment ID, low 32 bits == doc ID - if (segAndId == -1) { // not found - continue; - } - int seg = (int) (segAndId >> 32); - int localDocId = (int) segAndId; - final IndexReaderContext indexReaderContext = indexSearcher.getTopReaderContext().children().get(seg); - int docId = indexReaderContext.docBaseInParent + localDocId; - boostDocs.put(docId, keyAndBoostPair.getValue()); + //First see if it's already in the request context. Could have been put there by another caller. + if (context != null) { + boostDocs = (IntIntHashMap) context.get(BOOSTED_DOCIDS); + if (boostDocs != null) { + return boostDocs; } } - if (context != null) { - //noinspection unchecked - context.put(BOOSTED_DOCIDS, boostDocs); + //Not in the context yet so load it. + boostDocs = new IntIntHashMap(boosted.size()); // docId to boost + int priority = boosted.size() + 1; // the corresponding priority for each boosted key (starts at this; decrements down) + for (BytesRef uniqueKey : boosted) { + priority--; // therefore first == bosted.size(); last will be 1 + long segAndId = indexSearcher.lookupId(uniqueKey); // higher 32 bits == segment ID, low 32 bits == doc ID + if (segAndId == -1) { // not found + continue; + } + int seg = (int) (segAndId >> 32); + int localDocId = (int) segAndId; + final IndexReaderContext indexReaderContext = indexSearcher.getTopReaderContext().children().get(seg); + int docId = indexReaderContext.docBaseInParent + localDocId; + boostDocs.put(docId, priority); } + assert priority == 1; // the last priority (lowest) + } - return boostDocs; + if (context != null) { + //noinspection unchecked + context.put(BOOSTED_DOCIDS, boostDocs); } + return boostDocs; + } + //--------------------------------------------------------------------------------- // SolrInfoBean //--------------------------------------------------------------------------------- @@ -658,30 +679,6 @@ public String getDescription() { // Overrides //--------------------------------------------------------------------------------- - /** - * Gets the default value for {@link org.apache.solr.common.params.QueryElevationParams#FORCE_ELEVATION} parameter. - */ - @SuppressWarnings("WeakerAccess") - protected boolean getDefaultForceElevation() { - return DEFAULT_FORCE_ELEVATION; - } - - /** - * Gets the default value for {@link #DEFAULT_KEEP_ELEVATION_PRIORITY} parameter. - */ - @SuppressWarnings("WeakerAccess") - protected boolean getDefaultKeepElevationPriority() { - return DEFAULT_KEEP_ELEVATION_PRIORITY; - } - - /** - * Gets the default subset match policy. - */ - @SuppressWarnings("WeakerAccess") - protected boolean getDefaultSubsetMatch() { - return DEFAULT_SUBSET_MATCH; - } - /** * Creates the {@link ElevationProvider} to set during configuration loading. The same instance will be used later * when elevating results for queries. @@ -717,15 +714,14 @@ private static String analyzeQuery(String queryString, Analyzer queryAnalyzer) { } private static void splitQueryTermsWithAnalyzer(String queryString, Analyzer queryAnalyzer, Collection tokenCollector) { - try (TokenStream tokens = queryAnalyzer.tokenStream("", new StringReader(queryString))) { + try (TokenStream tokens = queryAnalyzer.tokenStream("", queryString)) { tokens.reset(); CharTermAttribute termAttribute = tokens.addAttribute(CharTermAttribute.class); while (tokens.incrementToken()) { - tokenCollector.add(new String(termAttribute.buffer(), 0, termAttribute.length())); + tokenCollector.add(termAttribute.toString()); } tokens.end(); } catch (IOException e) { - // Will never be thrown since we read a StringReader. throw Throwables.propagate(e); } } @@ -737,7 +733,6 @@ private static void splitQueryTermsWithAnalyzer(String queryString, Analyzer que /** * Helpful for testing without loading config.xml. * - * * @param reader The {@link org.apache.lucene.index.IndexReader}. * @param queryString The query for which to elevate some documents. If the query has already been defined an * elevation, this method overwrites it. @@ -747,19 +742,13 @@ private static void splitQueryTermsWithAnalyzer(String queryString, Analyzer que * @throws java.io.IOException If there is a low-level I/O error. */ @VisibleForTesting - void setTopQueryResults(IndexReader reader, String queryString, boolean subsetMatch, String[] elevatedIds, - String[] excludedIds) throws IOException { + void setTopQueryResults(IndexReader reader, String queryString, boolean subsetMatch, + String[] elevatedIds, String[] excludedIds) throws IOException { clearElevationProviderCache(); - if (elevatedIds == null) { - elevatedIds = new String[0]; - } - if (excludedIds == null) { - excludedIds = new String[0]; - } ElevatingQuery elevatingQuery = new ElevatingQuery(queryString, subsetMatch); - ElevationBuilder elevationBuilder = new ElevationBuilder() - .addElevatedIds(Arrays.asList(elevatedIds)) - .addExcludedIds(Arrays.asList(excludedIds)); + ElevationBuilder elevationBuilder = new ElevationBuilder(); + elevationBuilder.addElevatedIds(elevatedIds == null ? Collections.emptyList() : Arrays.asList(elevatedIds)); + elevationBuilder.addExcludedIds(excludedIds == null ? Collections.emptyList() : Arrays.asList(excludedIds)); Map elevationBuilderMap = ImmutableMap.of(elevatingQuery, elevationBuilder); synchronized (elevationProviderCache) { elevationProviderCache.computeIfAbsent(reader, k -> createElevationProvider(queryAnalyzer, elevationBuilderMap)); @@ -788,46 +777,36 @@ private static class InitializationException extends Exception { } protected enum InitializationExceptionCause { - /** - * The component parameter {@link #FIELD_TYPE} defines an unknown field type. - */ - UNKNOWN_FIELD_TYPE, - /** - * This component requires the schema to have a uniqueKeyField, which it does not have. - */ - MISSING_UNIQUE_KEY_FIELD, - /** - * Missing component parameter {@link #CONFIG_FILE} - it has to define the path to the elevation configuration file (e.g. elevate.xml). - */ - NO_CONFIG_FILE_DEFINED, - /** - * The elevation configuration file (e.g. elevate.xml) cannot be found, or is defined in both conf/ and data/ directories. - */ - MISSING_CONFIG_FILE, - /** - * The elevation configuration file (e.g. elevate.xml) is empty. - */ - EMPTY_CONFIG_FILE, - /** - * Unclassified exception cause. - */ - OTHER, - } + /** + * The component parameter {@link #FIELD_TYPE} defines an unknown field type. + */ + UNKNOWN_FIELD_TYPE, + /** + * This component requires the schema to have a uniqueKeyField, which it does not have. + */ + MISSING_UNIQUE_KEY_FIELD, + /** + * Missing component parameter {@link #CONFIG_FILE} - it has to define the path to the elevation configuration file (e.g. elevate.xml). + */ + NO_CONFIG_FILE_DEFINED, + /** + * The elevation configuration file (e.g. elevate.xml) cannot be found, or is defined in both conf/ and data/ directories. + */ + MISSING_CONFIG_FILE, + /** + * The elevation configuration file (e.g. elevate.xml) is empty. + */ + EMPTY_CONFIG_FILE, + /** + * Unclassified exception cause. + */ + OTHER, + } //--------------------------------------------------------------------------------- // Elevation classes //--------------------------------------------------------------------------------- - /** - * Creates an elevation. - * - * @param elevatedIds The ids of the elevated documents that should appear on top of search results; can be null. - * @param excludedIds The ids of the excluded documents that should not appear in search results; can be null. - */ - private Elevation createElevation(Collection elevatedIds, Collection excludedIds) { - return new Elevation(elevatedIds, excludedIds, indexedValueProvider, uniqueKeyFieldName, keepElevationPriority); - } - /** * Provides the elevations defined for queries. */ @@ -896,7 +875,8 @@ private Map buildElevationMap(Mapnull. + * The order is retained. */ - private Set elevatedIds; + private LinkedHashSet elevatedIds; /** * The ids of the excluded documents that should not appear in search results; can be null. */ - private Set excludedIds; + private Set excludedIds; - ElevationBuilder addElevatedId(String id) { - if (elevatedIds == null) { - elevatedIds = createIdSet(); - } - elevatedIds.add(id); - return this; - } + // for temporary/transient use when adding an elevated or excluded ID + private final BytesRefBuilder scratch = new BytesRefBuilder(); ElevationBuilder addElevatedIds(List ids) { + if (elevatedIds == null) { + elevatedIds = new LinkedHashSet<>(Math.max(10, ids.size())); + } for (String id : ids) { - addElevatedId(id); + elevatedIds.add(toBytesRef(id)); } return this; } - ElevationBuilder addExcludedId(String id) { + ElevationBuilder addExcludedIds(Collection ids) { if (excludedIds == null) { - excludedIds = createIdSet(); + excludedIds = new LinkedHashSet<>(Math.max(10, ids.size())); } - excludedIds.add(id); - return this; - } - - ElevationBuilder addExcludedIds(List ids) { for (String id : ids) { - addExcludedId(id); + excludedIds.add(toBytesRef(id)); } return this; } + private BytesRef toBytesRef(String id) { + uniqueKeyField.getType().readableToIndexed(id, scratch); + return scratch.toBytesRef(); + } + ElevationBuilder merge(ElevationBuilder elevationBuilder) { if (elevatedIds == null) { elevatedIds = elevationBuilder.elevatedIds; @@ -1009,12 +988,9 @@ ElevationBuilder merge(ElevationBuilder elevationBuilder) { } Elevation build() { - return createElevation(elevatedIds, excludedIds); + return new Elevation(elevatedIds, excludedIds, uniqueKeyField.getName()); } - private Set createIdSet() { - return (keepElevationPriority ? new LinkedHashSet<>() : new HashSet<>()); - } } /** @@ -1025,82 +1001,73 @@ protected static class Elevation { private static final BooleanQuery EMPTY_QUERY = new BooleanQuery.Builder().build(); @VisibleForTesting - final Set elevatedIds; - private final BooleanQuery includeQuery; - @VisibleForTesting - final Map priorities; - private final Set excludedIds; - private final TermQuery[] excludeQueries;//just keep the term query, b/c we will not always explicitly exclude the item based on markExcludes query time param + final Set elevatedIds; // in configured order; not null + private final BooleanQuery includeQuery; // not null + private final Set excludedIds; // not null + //just keep the term query, b/c we will not always explicitly exclude the item based on markExcludes query time param + private final TermQuery[] excludeQueries; //may be null /** * Constructs an elevation. - * * @param elevatedIds The ids of the elevated documents that should appear on top of search results; can be null. + * In configured order. * @param excludedIds The ids of the excluded documents that should not appear in search results; can be null. - * @param indexedValueProvider Provides the indexed value corresponding to a readable value.. * @param queryFieldName The field name to use to create query terms. - * @param keepElevationPriority Whether to keep the elevation priority order. */ - private Elevation(Collection elevatedIds, Collection excludedIds, - UnaryOperator indexedValueProvider, String queryFieldName, - boolean keepElevationPriority) { + private Elevation(Set elevatedIds, Set excludedIds, + String queryFieldName) { if (elevatedIds == null || elevatedIds.isEmpty()) { - this.elevatedIds = Collections.emptySet(); includeQuery = EMPTY_QUERY; - priorities = Collections.emptyMap(); + this.elevatedIds = Collections.emptySet(); } else { - ImmutableSet.Builder elevatedIdsBuilder = ImmutableSet.builder(); + this.elevatedIds = new LinkedHashSet<>(elevatedIds); BooleanQuery.Builder includeQueryBuilder = new BooleanQuery.Builder(); - ImmutableMap.Builder prioritiesBuilder = null; - if (keepElevationPriority) { - prioritiesBuilder = ImmutableMap.builder(); - } - int priorityLevel = elevatedIds.size(); - for (String elevatedId : elevatedIds) { - elevatedIdsBuilder.add(indexedValueProvider.apply(elevatedId)); - TermQuery tq = new TermQuery(new Term(queryFieldName, elevatedId)); - includeQueryBuilder.add(tq, BooleanClause.Occur.SHOULD); - if (keepElevationPriority) { - prioritiesBuilder.put(new BytesRef(elevatedId), priorityLevel--); - } + for (BytesRef elevatedId : elevatedIds) { + includeQueryBuilder.add(new TermQuery(new Term(queryFieldName, elevatedId)), BooleanClause.Occur.SHOULD); } - this.elevatedIds = elevatedIdsBuilder.build(); includeQuery = includeQueryBuilder.build(); - priorities = keepElevationPriority ? prioritiesBuilder.build() : null; } if (excludedIds == null || excludedIds.isEmpty()) { this.excludedIds = Collections.emptySet(); excludeQueries = null; } else { - ImmutableSet.Builder excludedIdsBuilder = ImmutableSet.builder(); + this.excludedIds = ImmutableSet.copyOf(excludedIds); List excludeQueriesBuilder = new ArrayList<>(excludedIds.size()); - for (String excludedId : excludedIds) { - excludedIdsBuilder.add(indexedValueProvider.apply(excludedId)); + for (BytesRef excludedId : excludedIds) { excludeQueriesBuilder.add(new TermQuery(new Term(queryFieldName, excludedId))); } - this.excludedIds = excludedIdsBuilder.build(); excludeQueries = excludeQueriesBuilder.toArray(new TermQuery[excludeQueriesBuilder.size()]); } } @Override public String toString() { - return "{elevatedIds=" + elevatedIds + ", excludedIds=" + excludedIds + "}"; + return "{elevatedIds=" + Collections2.transform(elevatedIds, BytesRef::utf8ToString) + + ", excludedIds=" + Collections2.transform(excludedIds, BytesRef::utf8ToString) + "}"; } } + /** Elevates certain docs to the top. */ private class ElevationComparatorSource extends FieldComparatorSource { - private final Elevation elevation; - private final SentinelIntSet ordSet; //the key half of the map - private final BytesRef[] termValues; //the value half of the map + private final IntIntHashMap elevatedWithPriority; + private final boolean useConfiguredElevatedOrder; + private final int[] sortedElevatedDocIds; + + private ElevationComparatorSource(IntIntHashMap elevatedWithPriority, boolean useConfiguredElevatedOrder) { + this.elevatedWithPriority = elevatedWithPriority; + this.useConfiguredElevatedOrder = useConfiguredElevatedOrder; - private ElevationComparatorSource(Elevation elevation) { - this.elevation = elevation; - int size = elevation.elevatedIds.size(); - ordSet = new SentinelIntSet(size, -1); - termValues = keepElevationPriority ? new BytesRef[ordSet.keys.length] : null; + // copy elevatedWithPriority keys (doc IDs) into sortedElevatedDocIds, sorted + sortedElevatedDocIds = new int[elevatedWithPriority.size()]; + final Iterator iterator = elevatedWithPriority.iterator(); + for (int i = 0; i < sortedElevatedDocIds.length; i++) { + IntIntCursor next = iterator.next(); + sortedElevatedDocIds[i] = next.key; + } + assert iterator.hasNext() == false; + Arrays.sort(sortedElevatedDocIds); } @Override @@ -1109,8 +1076,28 @@ public FieldComparator newComparator(String fieldName, final int numHit final int[] values = new int[numHits]; int bottomVal; int topVal; - PostingsEnum postingsEnum; - final Set seen = new HashSet<>(elevation.elevatedIds.size()); + + int docBase; + boolean hasElevatedDocsThisSegment; + + @Override + protected void doSetNextReader(LeafReaderContext context) throws IOException { + docBase = context.docBase; + // ascertain if hasElevatedDocsThisSegment + final int idx = Arrays.binarySearch(sortedElevatedDocIds, docBase); + if (idx < 0) { + //first doc in segment isn't elevated (typical). Maybe another is? + int nextIdx = -idx - 1; + if (nextIdx < sortedElevatedDocIds.length) { + int nextElevatedDocId = sortedElevatedDocIds[nextIdx]; + if (nextElevatedDocId > docBase + context.reader().maxDoc()) { + hasElevatedDocsThisSegment = false; + return; + } + } + } + hasElevatedDocsThisSegment = true; + } @Override public int compare(int slot1, int slot2) { @@ -1128,16 +1115,14 @@ public void setTopValue(Integer value) { } private int docVal(int doc) { - if (ordSet.size() > 0) { - int slot = ordSet.find(doc); - if (slot >= 0) { - if (!keepElevationPriority) - return 1; - BytesRef id = termValues[slot]; - return elevation.priorities.getOrDefault(id, 0); - } + if (!hasElevatedDocsThisSegment) { + assert elevatedWithPriority.containsKey(docBase + doc) == false; + return -1; + } else if (useConfiguredElevatedOrder) { + return elevatedWithPriority.getOrDefault(docBase + doc, -1); + } else { + return elevatedWithPriority.containsKey(docBase + doc) ? 1 : -1; } - return 0; } @Override @@ -1150,35 +1135,6 @@ public void copy(int slot, int doc) { values[slot] = docVal(doc); } - @Override - protected void doSetNextReader(LeafReaderContext context) throws IOException { - //convert the ids to Lucene doc ids, the ordSet and termValues needs to be the same size as the number of elevation docs we have - ordSet.clear(); - Terms terms = context.reader().terms(uniqueKeyFieldName); - if (terms == null) return; - TermsEnum termsEnum = terms.iterator(); - BytesRefBuilder term = new BytesRefBuilder(); - Bits liveDocs = context.reader().getLiveDocs(); - - for (String id : elevation.elevatedIds) { - term.copyChars(id); - if (seen.contains(id) == false && termsEnum.seekExact(term.get())) { - postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE); - int docId = postingsEnum.nextDoc(); - while (docId != DocIdSetIterator.NO_MORE_DOCS && liveDocs != null && !liveDocs.get(docId)) { - docId = postingsEnum.nextDoc(); - } - if (docId == DocIdSetIterator.NO_MORE_DOCS ) continue; // must have been deleted - int slot = ordSet.put(docId); - if (keepElevationPriority) { - termValues[slot] = term.toBytesRef(); - } - seen.add(id); - assert postingsEnum.nextDoc() == DocIdSetIterator.NO_MORE_DOCS; - } - } - } - @Override public Integer value(int slot) { return values[slot]; diff --git a/solr/core/src/java/org/apache/solr/response/transform/BaseEditorialTransformer.java b/solr/core/src/java/org/apache/solr/response/transform/BaseEditorialTransformer.java index d646ee401c63..6324d8ad2e17 100644 --- a/solr/core/src/java/org/apache/solr/response/transform/BaseEditorialTransformer.java +++ b/solr/core/src/java/org/apache/solr/response/transform/BaseEditorialTransformer.java @@ -20,6 +20,8 @@ import java.util.Set; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; import org.apache.solr.common.SolrDocument; import org.apache.solr.schema.FieldType; @@ -47,9 +49,9 @@ public String getName() { @Override public void transform(SolrDocument doc, int docid) { //this only gets added if QueryElevationParams.MARK_EXCLUDED is true - Set ids = getIdSet(); + Set ids = getIdSet(); if (ids != null && ids.isEmpty() == false) { - String key = getKey(doc); + BytesRef key = getKey(doc); doc.setField(name, ids.contains(key)); } else { //if we have no ids, that means we weren't marking, but the user still asked for the field to be added, so just mark everything as false @@ -57,17 +59,20 @@ public void transform(SolrDocument doc, int docid) { } } - protected abstract Set getIdSet(); + protected abstract Set getIdSet(); - protected String getKey(SolrDocument doc) { + protected BytesRef getKey(SolrDocument doc) { Object obj = doc.get(idFieldName); if (obj instanceof IndexableField) { IndexableField f = (IndexableField) obj; + BytesRefBuilder bytesRefBuilder = new BytesRefBuilder(); Number n = f.numericValue(); if (n != null) { - return ft.readableToIndexed(n.toString()); + ft.readableToIndexed(n.toString(), bytesRefBuilder); + } else { + ft.readableToIndexed(f.stringValue(), bytesRefBuilder); } - return ft.readableToIndexed(f.stringValue()); + return bytesRefBuilder.get(); } throw new AssertionError("Expected an IndexableField but got: " + obj.getClass()); } diff --git a/solr/core/src/java/org/apache/solr/response/transform/ElevatedMarkerFactory.java b/solr/core/src/java/org/apache/solr/response/transform/ElevatedMarkerFactory.java index 51f3cff96440..e5fb4143b37f 100644 --- a/solr/core/src/java/org/apache/solr/response/transform/ElevatedMarkerFactory.java +++ b/solr/core/src/java/org/apache/solr/response/transform/ElevatedMarkerFactory.java @@ -18,6 +18,7 @@ import java.util.Set; +import org.apache.lucene.util.BytesRef; import org.apache.solr.common.params.SolrParams; import org.apache.solr.handler.component.QueryElevationComponent; import org.apache.solr.request.SolrQueryRequest; @@ -44,9 +45,10 @@ class MarkTransformer extends BaseEditorialTransformer { super(name, idFieldName, ft); } + @SuppressWarnings("unchecked") @Override - protected Set getIdSet() { - return (Set) context.getRequest().getContext().get(QueryElevationComponent.BOOSTED); + protected Set getIdSet() { + return (Set) context.getRequest().getContext().get(QueryElevationComponent.BOOSTED); } } diff --git a/solr/core/src/java/org/apache/solr/response/transform/ExcludedMarkerFactory.java b/solr/core/src/java/org/apache/solr/response/transform/ExcludedMarkerFactory.java index 2d670d7c4c72..2036c488a066 100644 --- a/solr/core/src/java/org/apache/solr/response/transform/ExcludedMarkerFactory.java +++ b/solr/core/src/java/org/apache/solr/response/transform/ExcludedMarkerFactory.java @@ -18,6 +18,7 @@ import java.util.Set; +import org.apache.lucene.util.BytesRef; import org.apache.solr.common.params.SolrParams; import org.apache.solr.handler.component.QueryElevationComponent; import org.apache.solr.request.SolrQueryRequest; @@ -46,9 +47,10 @@ public ExcludedTransformer( String name, String idFieldName, FieldType ft) super(name, idFieldName, ft); } + @SuppressWarnings("unchecked") @Override - protected Set getIdSet() { - return (Set)context.getRequest().getContext().get(QueryElevationComponent.EXCLUDED); + protected Set getIdSet() { + return (Set)context.getRequest().getContext().get(QueryElevationComponent.EXCLUDED); } } diff --git a/solr/core/src/java/org/apache/solr/search/AbstractReRankQuery.java b/solr/core/src/java/org/apache/solr/search/AbstractReRankQuery.java index f7679b080efb..0c2fb828aa84 100644 --- a/solr/core/src/java/org/apache/solr/search/AbstractReRankQuery.java +++ b/solr/core/src/java/org/apache/solr/search/AbstractReRankQuery.java @@ -18,6 +18,7 @@ import java.io.IOException; import java.util.Map; +import java.util.Set; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.IndexSearcher; @@ -35,7 +36,7 @@ public abstract class AbstractReRankQuery extends RankQuery { protected Query mainQuery; final protected int reRankDocs; final protected Rescorer reRankQueryRescorer; - protected Map boostedPriority; + protected Set boostedPriority; public AbstractReRankQuery(Query mainQuery, int reRankDocs, Rescorer reRankQueryRescorer) { this.mainQuery = mainQuery; @@ -54,13 +55,13 @@ public MergeStrategy getMergeStrategy() { return null; } + @SuppressWarnings("unchecked") public TopDocsCollector getTopDocsCollector(int len, QueryCommand cmd, IndexSearcher searcher) throws IOException { - if(this.boostedPriority == null) { SolrRequestInfo info = SolrRequestInfo.getRequestInfo(); if(info != null) { Map context = info.getReq().getContext(); - this.boostedPriority = (Map)context.get(QueryElevationComponent.BOOSTED_PRIORITY); + this.boostedPriority = (Set)context.get(QueryElevationComponent.BOOSTED); } } diff --git a/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java index 76a52583e328..0a7febc4d68f 100644 --- a/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java @@ -24,7 +24,14 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Set; +import com.carrotsearch.hppc.FloatArrayList; +import com.carrotsearch.hppc.IntArrayList; +import com.carrotsearch.hppc.IntIntHashMap; +import com.carrotsearch.hppc.IntLongHashMap; +import com.carrotsearch.hppc.cursors.IntIntCursor; +import com.carrotsearch.hppc.cursors.IntLongCursor; import org.apache.commons.lang.StringUtils; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.index.DocValues; @@ -69,13 +76,6 @@ import org.apache.solr.schema.StrField; import org.apache.solr.uninverting.UninvertingReader; -import com.carrotsearch.hppc.FloatArrayList; -import com.carrotsearch.hppc.IntArrayList; -import com.carrotsearch.hppc.IntIntHashMap; -import com.carrotsearch.hppc.IntLongHashMap; -import com.carrotsearch.hppc.cursors.IntIntCursor; -import com.carrotsearch.hppc.cursors.IntLongCursor; - import static org.apache.solr.common.params.CommonParams.SORT; /** @@ -215,7 +215,7 @@ public static class CollapsingPostFilter extends ExtendedQueryBase implements Po public String hint; private boolean needsScores = true; private int nullPolicy; - private Map boosted; + private Set boosted; // ordered by "priority" public static final int NULL_POLICY_IGNORE = 0; public static final int NULL_POLICY_COLLAPSE = 1; public static final int NULL_POLICY_EXPAND = 2; @@ -338,11 +338,6 @@ public CollapsingPostFilter(SolrParams localParams, SolrParams params, SolrQuery } } - private IntIntHashMap getBoostDocs(SolrIndexSearcher indexSearcher, Map boosted, Map context) throws IOException { - IntIntHashMap boostDocs = QueryElevationComponent.getBoostDocs(indexSearcher, boosted, context); - return boostDocs; - } - public DelegatingCollector getFilterCollector(IndexSearcher indexSearcher) { try { @@ -360,10 +355,10 @@ public DelegatingCollector getFilterCollector(IndexSearcher indexSearcher) { } if(this.boosted == null && context != null) { - this.boosted = (Map)context.get(QueryElevationComponent.BOOSTED_PRIORITY); + this.boosted = (Set)context.get(QueryElevationComponent.BOOSTED); } - boostDocsMap = getBoostDocs(searcher, this.boosted, context); + boostDocsMap = QueryElevationComponent.getBoostDocs(searcher, this.boosted, context); return collectorFactory.getCollector(this.collapseField, this.groupHeadSelector, this.sortSpec, diff --git a/solr/core/src/java/org/apache/solr/search/ReRankCollector.java b/solr/core/src/java/org/apache/solr/search/ReRankCollector.java index 0447053b3cbd..a1689dd7b08d 100644 --- a/solr/core/src/java/org/apache/solr/search/ReRankCollector.java +++ b/solr/core/src/java/org/apache/solr/search/ReRankCollector.java @@ -20,9 +20,10 @@ import java.util.Arrays; import java.util.Comparator; import java.util.Map; +import java.util.Set; + import com.carrotsearch.hppc.IntFloatHashMap; import com.carrotsearch.hppc.IntIntHashMap; - import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.LeafCollector; @@ -46,7 +47,7 @@ public class ReRankCollector extends TopDocsCollector { final private IndexSearcher searcher; final private int reRankDocs; final private int length; - final private Map boostedPriority; + final private Set boostedPriority; // order is the "priority" final private Rescorer reRankQueryRescorer; @@ -55,7 +56,7 @@ public ReRankCollector(int reRankDocs, Rescorer reRankQueryRescorer, QueryCommand cmd, IndexSearcher searcher, - Map boostedPriority) throws IOException { + Set boostedPriority) throws IOException { super(null); this.reRankDocs = reRankDocs; this.length = length; diff --git a/solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java index 2528b3f56247..98b1e8a6e996 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java @@ -16,32 +16,30 @@ */ package org.apache.solr.handler.component; +import java.io.File; +import java.io.FileOutputStream; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.lang.invoke.MethodHandles; +import java.nio.charset.StandardCharsets; + import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.BytesRef; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.GroupParams; -import org.apache.solr.common.params.MapSolrParams; import org.apache.solr.common.params.QueryElevationParams; -import org.apache.solr.util.FileUtils; +import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; -import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.util.FileUtils; import org.junit.Before; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.File; -import java.io.FileOutputStream; -import java.io.OutputStreamWriter; -import java.io.PrintWriter; -import java.lang.invoke.MethodHandles; -import java.nio.charset.StandardCharsets; -import java.util.HashMap; -import java.util.Map; - - public class QueryElevationComponentTest extends SolrTestCaseJ4 { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); @@ -69,6 +67,7 @@ private void init(String config, String schema) throws Exception { assertU(commit()); } + //TODO should be @After ? private void delete() throws Exception { deleteCore(); } @@ -363,9 +362,9 @@ public void testInterface() throws Exception { // Make sure the boosts loaded properly assertEquals(7, elevationProvider.size()); - assertEquals(1, elevationProvider.getElevationForQuery("XXXX").priorities.size()); - assertEquals(2, elevationProvider.getElevationForQuery("YYYY").priorities.size()); - assertEquals(3, elevationProvider.getElevationForQuery("ZZZZ").priorities.size()); + assertEquals(1, elevationProvider.getElevationForQuery("XXXX").elevatedIds.size()); + assertEquals(2, elevationProvider.getElevationForQuery("YYYY").elevatedIds.size()); + assertEquals(3, elevationProvider.getElevationForQuery("ZZZZ").elevatedIds.size()); assertEquals(null, elevationProvider.getElevationForQuery("xxxx")); assertEquals(null, elevationProvider.getElevationForQuery("yyyy")); assertEquals(null, elevationProvider.getElevationForQuery("zzzz")); @@ -380,12 +379,12 @@ public void testInterface() throws Exception { comp.inform(core); elevationProvider = comp.getElevationProvider(reader, core); assertEquals(7, elevationProvider.size()); - assertEquals(1, elevationProvider.getElevationForQuery("XXXX").priorities.size()); - assertEquals(2, elevationProvider.getElevationForQuery("YYYY").priorities.size()); - assertEquals(3, elevationProvider.getElevationForQuery("ZZZZ").priorities.size()); - assertEquals(1, elevationProvider.getElevationForQuery("xxxx").priorities.size()); - assertEquals(2, elevationProvider.getElevationForQuery("yyyy").priorities.size()); - assertEquals(3, elevationProvider.getElevationForQuery("zzzz").priorities.size()); + assertEquals(1, elevationProvider.getElevationForQuery("XXXX").elevatedIds.size()); + assertEquals(2, elevationProvider.getElevationForQuery("YYYY").elevatedIds.size()); + assertEquals(3, elevationProvider.getElevationForQuery("ZZZZ").elevatedIds.size()); + assertEquals(1, elevationProvider.getElevationForQuery("xxxx").elevatedIds.size()); + assertEquals(2, elevationProvider.getElevationForQuery("yyyy").elevatedIds.size()); + assertEquals(3, elevationProvider.getElevationForQuery("zzzz").elevatedIds.size()); assertEquals("xxxx", comp.analyzeQuery("XXXX")); assertEquals("xxxx yyyy", comp.analyzeQuery("XXXX YYYY")); @@ -524,28 +523,27 @@ public void testMarkExcludes() throws Exception { public void testSorting() throws Exception { try { init("schema12.xml"); - assertU(adoc("id", "a", "title", "ipod trash trash", "str_s1", "a")); - assertU(adoc("id", "b", "title", "ipod ipod trash", "str_s1", "b")); - assertU(adoc("id", "c", "title", "ipod ipod ipod ", "str_s1", "c")); + assertU(adoc("id", "a", "title", "ipod trash trash", "str_s1", "group1")); + assertU(adoc("id", "b", "title", "ipod ipod trash", "str_s1", "group2")); + assertU(adoc("id", "c", "title", "ipod ipod ipod ", "str_s1", "group2")); - assertU(adoc("id", "x", "title", "boosted", "str_s1", "x")); - assertU(adoc("id", "y", "title", "boosted boosted", "str_s1", "y")); - assertU(adoc("id", "z", "title", "boosted boosted boosted", "str_s1", "z")); + assertU(adoc("id", "x", "title", "boosted", "str_s1", "group1")); + assertU(adoc("id", "y", "title", "boosted boosted", "str_s1", "group2")); + assertU(adoc("id", "z", "title", "boosted boosted boosted", "str_s1", "group2")); assertU(commit()); - String query = "title:ipod"; + final String query = "title:ipod"; - Map args = new HashMap<>(); // reusing args & requests this way is a solr-test-antipattern. PLEASE DO NOT COPY THIS CODE - args.put(CommonParams.Q, query); - args.put(CommonParams.QT, "/elevate"); - args.put(CommonParams.FL, "id,score"); - args.put("indent", "true"); - //args.put( CommonParams.FL, "id,title,score" ); - SolrQueryRequest req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args)); - IndexReader reader = req.getSearcher().getIndexReader(); - QueryElevationComponent booster = (QueryElevationComponent) req.getCore().getSearchComponent("elevate"); + final SolrParams baseParams = params( + "qt", "/elevate", + "q", query, + "fl", "id,score", + "indent", "true"); + + QueryElevationComponent booster = (QueryElevationComponent) h.getCore().getSearchComponent("elevate"); + IndexReader reader = h.getCore().withSearcher(SolrIndexSearcher::getIndexReader); - assertQ("Make sure standard sort works as expected", req + assertQ("Make sure standard sort works as expected", req(baseParams) , "//*[@numFound='3']" , "//result/doc[1]/str[@name='id'][.='c']" , "//result/doc[2]/str[@name='id'][.='b']" @@ -555,8 +553,7 @@ public void testSorting() throws Exception { // Explicitly set what gets boosted booster.setTopQueryResults(reader, query, false, new String[]{"x", "y", "z"}, null); - req.close(); req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args)); - assertQ("All six should make it", req + assertQ("All six should make it", req(baseParams) , "//*[@numFound='6']" , "//result/doc[1]/str[@name='id'][.='x']" , "//result/doc[2]/str[@name='id'][.='y']" @@ -567,9 +564,8 @@ public void testSorting() throws Exception { ); // now switch the order: - req.close(); req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args)); booster.setTopQueryResults(reader, query, false, new String[]{"a", "x"}, null); - assertQ("All four should make it", req + assertQ(req(baseParams) , "//*[@numFound='4']" , "//result/doc[1]/str[@name='id'][.='a']" , "//result/doc[2]/str[@name='id'][.='x']" @@ -577,44 +573,28 @@ public void testSorting() throws Exception { , "//result/doc[4]/str[@name='id'][.='b']" ); - // Test reverse sort - args.put(CommonParams.SORT, "score asc"); - req.close(); req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args)); - assertQ("All four should make it", req - , "//*[@numFound='4']" - // NOTE REVERSED doc[X] indices - , "//result/doc[4]/str[@name='id'][.='a']" - , "//result/doc[3]/str[@name='id'][.='x']" - , "//result/doc[2]/str[@name='id'][.='c']" - , "//result/doc[1]/str[@name='id'][.='b']" - ); - // Try normal sort by 'id' // default 'forceBoost' should be false assertEquals(false, booster.forceElevation); - args.put(CommonParams.SORT, "str_s1 asc"); - req.close(); req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args)); - assertQ(null, req + assertQ(req(baseParams, "sort", "id asc") , "//*[@numFound='4']" , "//result/doc[1]/str[@name='id'][.='a']" , "//result/doc[2]/str[@name='id'][.='b']" , "//result/doc[3]/str[@name='id'][.='c']" , "//result/doc[4]/str[@name='id'][.='x']" ); - args.put(CommonParams.SORT, "id asc"); - req.close(); req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args)); - assertQ(null, req + + assertQ("useConfiguredElevatedOrder=false", + req(baseParams, "sort", "str_s1 asc,id desc", "useConfiguredElevatedOrder", "false") , "//*[@numFound='4']" - , "//result/doc[1]/str[@name='id'][.='a']" - , "//result/doc[2]/str[@name='id'][.='b']" + , "//result/doc[1]/str[@name='id'][.='x']"//group1 + , "//result/doc[2]/str[@name='id'][.='a']"//group1 , "//result/doc[3]/str[@name='id'][.='c']" - , "//result/doc[4]/str[@name='id'][.='x']" + , "//result/doc[4]/str[@name='id'][.='b']" ); booster.forceElevation = true; - args.put(CommonParams.SORT, "id asc"); - req.close(); req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args)); - assertQ(null, req + assertQ(req(baseParams, "sort", "id asc") , "//*[@numFound='4']" , "//result/doc[1]/str[@name='id'][.='a']" , "//result/doc[2]/str[@name='id'][.='x']" @@ -622,22 +602,27 @@ public void testSorting() throws Exception { , "//result/doc[4]/str[@name='id'][.='c']" ); + booster.forceElevation = true; + assertQ("useConfiguredElevatedOrder=false and forceElevation", + req(baseParams, "sort", "id desc", "useConfiguredElevatedOrder", "false") + , "//*[@numFound='4']" + , "//result/doc[1]/str[@name='id'][.='x']" // force elevated + , "//result/doc[2]/str[@name='id'][.='a']" // force elevated + , "//result/doc[3]/str[@name='id'][.='c']" + , "//result/doc[4]/str[@name='id'][.='b']" + ); + //Test exclusive (not to be confused with exclusion) - args.put(QueryElevationParams.EXCLUSIVE, "true"); - req.close(); req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args)); booster.setTopQueryResults(reader, query, false, new String[]{"x", "a"}, new String[]{}); - assertQ(null, req + assertQ(req(baseParams, "exclusive", "true") , "//*[@numFound='2']" , "//result/doc[1]/str[@name='id'][.='x']" , "//result/doc[2]/str[@name='id'][.='a']" ); // Test exclusion - args.remove(CommonParams.SORT); - args.remove(QueryElevationParams.EXCLUSIVE); - req.close(); req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args)); booster.setTopQueryResults(reader, query, false, new String[]{"x"}, new String[]{"a"}); - assertQ(null, req + assertQ(req(baseParams) , "//*[@numFound='3']" , "//result/doc[1]/str[@name='id'][.='x']" , "//result/doc[2]/str[@name='id'][.='c']" @@ -648,10 +633,7 @@ public void testSorting() throws Exception { // Test setting ids and excludes from http parameters booster.clearElevationProviderCache(); - args.put(QueryElevationParams.IDS, "x,y,z"); - args.put(QueryElevationParams.EXCLUDE, "b"); - req.close(); req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args)); - assertQ("All five should make it", req + assertQ("All five should make it", req(baseParams, "elevateIds", "x,y,z", "excludeIds", "b") , "//*[@numFound='5']" , "//result/doc[1]/str[@name='id'][.='x']" , "//result/doc[2]/str[@name='id'][.='y']" @@ -660,10 +642,7 @@ public void testSorting() throws Exception { , "//result/doc[5]/str[@name='id'][.='a']" ); - args.put(QueryElevationParams.IDS, "x,z,y"); - args.put(QueryElevationParams.EXCLUDE, "b,c"); - req.close(); req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args)); - assertQ("All four should make it", req + assertQ("All four should make it", req(baseParams, "elevateIds", "x,z,y", "excludeIds", "b,c") , "//*[@numFound='4']" , "//result/doc[1]/str[@name='id'][.='x']" , "//result/doc[2]/str[@name='id'][.='z']" @@ -671,7 +650,6 @@ public void testSorting() throws Exception { , "//result/doc[4]/str[@name='id'][.='a']" ); - req.close(); } finally { delete(); } @@ -708,35 +686,34 @@ public void testElevationReloading() throws Exception { comp.init(args); comp.inform(h.getCore()); - SolrQueryRequest req = req(); - IndexReader reader = req.getSearcher().getIndexReader(); - QueryElevationComponent.ElevationProvider elevationProvider = comp.getElevationProvider(reader, h.getCore()); - assertTrue(elevationProvider.getElevationForQuery("aaa").elevatedIds.contains("A")); - assertNull(elevationProvider.getElevationForQuery("bbb")); - req.close(); + QueryElevationComponent.ElevationProvider elevationProvider = null; + + try (SolrQueryRequest req = req()) { + elevationProvider = comp.getElevationProvider(req.getSearcher().getIndexReader(), req.getCore()); + assertTrue(elevationProvider.getElevationForQuery("aaa").elevatedIds.contains(new BytesRef("A"))); + assertNull(elevationProvider.getElevationForQuery("bbb")); + } // now change the file writeFile(configFile, "bbb", "B"); // With no index change, we get the same index reader, so the elevationProviderCache returns the previous ElevationProvider without the change. - req = req(); - reader = req.getSearcher().getIndexReader(); - elevationProvider = comp.getElevationProvider(reader, h.getCore()); - assertTrue(elevationProvider.getElevationForQuery("aaa").elevatedIds.contains("A")); - assertNull(elevationProvider.getElevationForQuery("bbb")); - req.close(); + try (SolrQueryRequest req = req()) { + elevationProvider = comp.getElevationProvider(req.getSearcher().getIndexReader(), req.getCore()); + assertTrue(elevationProvider.getElevationForQuery("aaa").elevatedIds.contains(new BytesRef("A"))); + assertNull(elevationProvider.getElevationForQuery("bbb")); + } // Index a new doc to get a new index reader. assertU(adoc("id", "10000")); assertU(commit()); // Check that we effectively reload a new ElevationProvider for a different index reader (so two entries in elevationProviderCache). - req = req(); - reader = req.getSearcher().getIndexReader(); - elevationProvider = comp.getElevationProvider(reader, h.getCore()); - assertNull(elevationProvider.getElevationForQuery("aaa")); - assertTrue(elevationProvider.getElevationForQuery("bbb").elevatedIds.contains("B")); - req.close(); + try (SolrQueryRequest req = req()) { + elevationProvider = comp.getElevationProvider(req.getSearcher().getIndexReader(), req.getCore()); + assertNull(elevationProvider.getElevationForQuery("aaa")); + assertTrue(elevationProvider.getElevationForQuery("bbb").elevatedIds.contains(new BytesRef("B"))); + } // Now change the config file again. writeFile(configFile, "ccc", "C"); @@ -744,13 +721,12 @@ public void testElevationReloading() throws Exception { // Without index change, but calling a different method that clears the elevationProviderCache, so we should load a new ElevationProvider. int elevationRuleNumber = comp.loadElevationConfiguration(h.getCore()); assertEquals(1, elevationRuleNumber); - req = req(); - reader = req.getSearcher().getIndexReader(); - elevationProvider = comp.getElevationProvider(reader, h.getCore()); - assertNull(elevationProvider.getElevationForQuery("aaa")); - assertNull(elevationProvider.getElevationForQuery("bbb")); - assertTrue(elevationProvider.getElevationForQuery("ccc").elevatedIds.contains("C")); - req.close(); + try (SolrQueryRequest req = req()) { + elevationProvider = comp.getElevationProvider(req.getSearcher().getIndexReader(), req.getCore()); + assertNull(elevationProvider.getElevationForQuery("aaa")); + assertNull(elevationProvider.getElevationForQuery("bbb")); + assertTrue(elevationProvider.getElevationForQuery("ccc").elevatedIds.contains(new BytesRef("C"))); + } } finally { delete(); } @@ -787,4 +763,5 @@ public void testWithLocalParam() throws Exception { delete(); } } + } diff --git a/solr/solr-ref-guide/src/the-query-elevation-component.adoc b/solr/solr-ref-guide/src/the-query-elevation-component.adoc index 3c7f50441179..3f9bf639968c 100644 --- a/solr/solr-ref-guide/src/the-query-elevation-component.adoc +++ b/solr/solr-ref-guide/src/the-query-elevation-component.adoc @@ -68,6 +68,13 @@ Path to the file that defines query elevation. This file must exist in `> can be used to annotate each document with information about whether or not it was elevated: diff --git a/solr/solrj/src/java/org/apache/solr/common/params/QueryElevationParams.java b/solr/solrj/src/java/org/apache/solr/common/params/QueryElevationParams.java index d538cab6a2b4..7801c700a398 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/QueryElevationParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/QueryElevationParams.java @@ -34,6 +34,7 @@ public interface QueryElevationParams { * See http://wiki.apache.org/solr/DocTransformers */ String EDITORIAL_MARKER_FIELD_NAME = "editorialMarkerFieldName"; + /** * The name of the field that excluded editorial results will be written out as when using the QueryElevationComponent, which * automatically configures the EditorialMarkerFactory. The default name is "excluded". This is only used @@ -50,8 +51,8 @@ public interface QueryElevationParams { String MARK_EXCLUDES = "markExcludes"; /** - * Whether the priority order between elevated documents is kept, based on the definition order in the configuration file. - * This parameter is only taken into account if {@link QueryElevationParams#FORCE_ELEVATION} is true. + * When multiple docs are elevated, should their relative order be the order in the configuration file or should + * they be subject to whatever the sort criteria is? True by default. */ - String KEEP_ELEVATION_PRIORITY = "keepElevationPriority"; + String USE_CONFIGURED_ELEVATED_ORDER = "useConfiguredElevatedOrder"; } From aaf69cc361153786c6f117a3483c06bb02261a74 Mon Sep 17 00:00:00 2001 From: broustant Date: Fri, 12 Jan 2018 18:03:26 +0100 Subject: [PATCH 5/8] Refactor QueryElevationComponent to introduce ElevationProvider - Refactor to introduce ElevationProvider. The current full-query match policy becomes a default simple MapElevationProvider. It can be replaced by a more efficient provider in the future, or replaced by an extending class. - Add overridable methods to handle exceptions during the component initialization. - Add overridable methods to provide the default values for config properties. - No functional change beyond refactoring. - Adapt unit test. --- .gitignore | 2 + .../component/QueryElevationComponent.java | 1478 ++++++++++++----- .../QueryElevationComponentTest.java | 100 +- .../common/params/QueryElevationParams.java | 6 + 4 files changed, 1151 insertions(+), 435 deletions(-) diff --git a/.gitignore b/.gitignore index 4b947436dc6e..74a50f7f62ad 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,5 @@ pom.xml /nbproject /nb-build .pydevproject +.DS_Store +/temp \ No newline at end of file diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java index d7b84740869a..cadcfc02f8b9 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java @@ -16,6 +16,7 @@ */ package org.apache.solr.handler.component; +import javax.xml.parsers.ParserConfigurationException; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpressionException; @@ -23,35 +24,27 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.io.StringReader; import java.lang.invoke.MethodHandles; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.WeakHashMap; +import java.util.*; +import java.util.stream.Collector; +import java.util.stream.Collectors; import com.carrotsearch.hppc.IntIntHashMap; +import com.google.common.annotations.VisibleForTesting; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.PostingsEnum; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.*; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.FieldComparatorSource; -import org.apache.lucene.search.Query; import org.apache.lucene.search.SimpleFieldComparator; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; @@ -69,11 +62,9 @@ import org.apache.solr.common.util.StrUtils; import org.apache.solr.core.Config; import org.apache.solr.core.SolrCore; -import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.transform.ElevatedMarkerFactory; import org.apache.solr.response.transform.ExcludedMarkerFactory; import org.apache.solr.schema.FieldType; -import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; import org.apache.solr.search.QueryParsing; import org.apache.solr.search.SolrIndexSearcher; @@ -88,8 +79,7 @@ import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.InputSource; - -import static org.apache.solr.common.params.CommonParams.ID; +import org.xml.sax.SAXException; /** * A component to elevate some documents to the top of the result set. @@ -100,70 +90,47 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); // Constants used in solrconfig.xml + @VisibleForTesting static final String FIELD_TYPE = "queryFieldType"; + @VisibleForTesting static final String CONFIG_FILE = "config-file"; - static final String EXCLUDE = "exclude"; + private static final String EXCLUDE = "exclude"; public static final String BOOSTED = "BOOSTED"; - public static final String BOOSTED_DOCIDS = "BOOSTED_DOCIDS"; + private static final String BOOSTED_DOCIDS = "BOOSTED_DOCIDS"; public static final String BOOSTED_PRIORITY = "BOOSTED_PRIORITY"; - public static final String EXCLUDED = "EXCLUDED"; - // Runtime param -- should be in common? - - private SolrParams initArgs = null; - private Analyzer analyzer = null; - private String idField = null; - private FieldType idSchemaFT; - - boolean forceElevation = false; - // For each IndexReader, keep a query->elevation map - // When the configuration is loaded from the data directory. - // The key is null if loaded from the config directory, and - // is never re-loaded. - final Map> elevationCache = - new WeakHashMap<>(); - - class ElevationObj { - final String text; - final String analyzed; - final TermQuery [] exclude;//just keep the term query, b/c we will not always explicitly exclude the item based on markExcludes query time param - final BooleanQuery include; - final Map priority; - final Set ids; - final Set excludeIds; - - ElevationObj(String qstr, List elevate, List exclude) throws IOException { - this.text = qstr; - this.analyzed = getAnalyzedQuery(this.text); - this.ids = new HashSet<>(); - this.excludeIds = new HashSet<>(); - - BooleanQuery.Builder include = new BooleanQuery.Builder(); - this.priority = new HashMap<>(); - int max = elevate.size() + 5; - for (String id : elevate) { - id = idSchemaFT.readableToIndexed(id); - ids.add(id); - TermQuery tq = new TermQuery(new Term(idField, id)); - include.add(tq, BooleanClause.Occur.SHOULD); - this.priority.put(new BytesRef(id), max--); - } - this.include = include.build(); - - if (exclude == null || exclude.isEmpty()) { - this.exclude = null; - } else { - this.exclude = new TermQuery[exclude.size()]; - for (int i = 0; i < exclude.size(); i++) { - String id = idSchemaFT.readableToIndexed(exclude.get(i)); - excludeIds.add(id); - this.exclude[i] = new TermQuery(new Term(idField, id)); - } - } - } - } + private static final boolean DEFAULT_FORCE_ELEVATION = false; + private static final boolean DEFAULT_KEEP_ELEVATION_PRIORITY = true; + private static final boolean DEFAULT_SUBSET_MATCH = false; + private static final String DEFAULT_EXCLUDE_MARKER_FIELD_NAME = "excluded"; + private static final String DEFAULT_EDITORIAL_MARKER_FIELD_NAME = "elevated"; + + private static final Collector QUERY_EXACT_JOINER = Collectors.joining(" "); + + // Runtime param + private SolrParams initArgs; + private Analyzer queryAnalyzer; + private String uniqueKeyFieldName; + private FieldType uniqueKeyFieldType; + private IndexedValueProvider indexedValueProvider; + @VisibleForTesting + boolean forceElevation; + private boolean keepElevationPriority; + private boolean initialized; + + /** + * For each IndexReader, keep an ElevationProvider when the configuration is loaded from the data directory. + * The key is null if loaded from the config directory, and is never re-loaded. + */ + private final Map elevationProviderCache = new WeakHashMap<>(); + + /** + * Keep track of a counter each time a configuration file cannot be loaded. + * Stop trying to load after {@link #getConfigLoadingExceptionHandler()}.{@link LoadingExceptionHandler#getLoadingMaxAttempts getLoadingMaxAttempts()}. + */ + private final Map configLoadingErrorCounters = new WeakHashMap<>(); @Override public void init(NamedList args) { @@ -172,128 +139,240 @@ public void init(NamedList args) { @Override public void inform(SolrCore core) { - IndexSchema schema = core.getLatestSchema(); + initialized = false; + try { + parseFieldType(core); + setUniqueKeyField(core); + parseExcludedMarkerFieldName(core); + parseEditorialMarkerFieldName(core); + parseForceElevation(); + parseKeepElevationPriority(); + loadElevationConfiguration(core); + initialized = true; + } catch (InitializationException e) { + assert !initialized; + handleInitializationException(e, e.exceptionCause); + } catch (Exception e) { + assert !initialized; + handleInitializationException(e, InitializationExceptionHandler.ExceptionCause.OTHER); + } + } + + private void parseFieldType(SolrCore core) throws InitializationException { String a = initArgs.get(FIELD_TYPE); if (a != null) { - FieldType ft = schema.getFieldTypes().get(a); + FieldType ft = core.getLatestSchema().getFieldTypes().get(a); if (ft == null) { - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, - "Unknown FieldType: '" + a + "' used in QueryElevationComponent"); + throw new InitializationException("Parameter " + FIELD_TYPE + " defines an unknown field type \"" + a + "\"", InitializationExceptionHandler.ExceptionCause.UNKNOWN_FIELD_TYPE); } - analyzer = ft.getQueryAnalyzer(); + queryAnalyzer = ft.getQueryAnalyzer(); } + } - SchemaField sf = schema.getUniqueKeyField(); - if( sf == null) { - throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, - "QueryElevationComponent requires the schema to have a uniqueKeyField." ); + private void setUniqueKeyField(SolrCore core) throws InitializationException { + SchemaField sf = core.getLatestSchema().getUniqueKeyField(); + if (sf == null) { + throw new InitializationException("This component requires the schema to have a uniqueKeyField", InitializationExceptionHandler.ExceptionCause.MISSING_UNIQUE_KEY_FIELD); } - idSchemaFT = sf.getType(); - idField = sf.getName(); - //register the EditorialMarkerFactory - String excludeName = initArgs.get(QueryElevationParams.EXCLUDE_MARKER_FIELD_NAME, "excluded"); - if (excludeName == null || excludeName.equals("") == true){ - excludeName = "excluded"; + uniqueKeyFieldType = sf.getType(); + uniqueKeyFieldName = sf.getName(); + indexedValueProvider = readableValue -> uniqueKeyFieldType.readableToIndexed(readableValue); + } + + private void parseExcludedMarkerFieldName(SolrCore core) { + String markerName = initArgs.get(QueryElevationParams.EXCLUDE_MARKER_FIELD_NAME, DEFAULT_EXCLUDE_MARKER_FIELD_NAME); + if (markerName == null || markerName.equals("")) { + markerName = DEFAULT_EXCLUDE_MARKER_FIELD_NAME; } - ExcludedMarkerFactory excludedMarkerFactory = new ExcludedMarkerFactory(); - core.addTransformerFactory(excludeName, excludedMarkerFactory); - ElevatedMarkerFactory elevatedMarkerFactory = new ElevatedMarkerFactory(); - String markerName = initArgs.get(QueryElevationParams.EDITORIAL_MARKER_FIELD_NAME, "elevated"); - if (markerName == null || markerName.equals("") == true) { - markerName = "elevated"; + core.addTransformerFactory(markerName, new ExcludedMarkerFactory()); + } + + private void parseEditorialMarkerFieldName(SolrCore core) { + String markerName = initArgs.get(QueryElevationParams.EDITORIAL_MARKER_FIELD_NAME, DEFAULT_EDITORIAL_MARKER_FIELD_NAME); + if (markerName == null || markerName.equals("")) { + markerName = DEFAULT_EDITORIAL_MARKER_FIELD_NAME; } - core.addTransformerFactory(markerName, elevatedMarkerFactory); - forceElevation = initArgs.getBool(QueryElevationParams.FORCE_ELEVATION, forceElevation); + core.addTransformerFactory(markerName, new ElevatedMarkerFactory()); + } - String f = initArgs.get(CONFIG_FILE); - if (f != null) { - try { - synchronized (elevationCache) { - elevationCache.clear(); - boolean exists = false; - - // check if using ZooKeeper - ZkController zkController = core.getCoreContainer().getZkController(); - if (zkController != null) { - // TODO : shouldn't have to keep reading the config name when it has been read before - exists = zkController.configFileExists(zkController.getZkStateReader().readConfigName(core.getCoreDescriptor().getCloudDescriptor().getCollectionName()), f); + private void parseForceElevation() { + forceElevation = initArgs.getBool(QueryElevationParams.FORCE_ELEVATION, getDefaultForceElevation()); + } + + private void parseKeepElevationPriority() { + keepElevationPriority = initArgs.getBool(QueryElevationParams.KEEP_ELEVATION_PRIORITY, getDefaultKeepElevationPriority()); + } + + /** + * (Re)Loads elevation configuration. + *

+ * Protected access to be called by extending class. + *

+ * + * @param core The core holding this component. + * @return The number of elevation rules parsed. + */ + @SuppressWarnings("WeakerAccess") + protected int loadElevationConfiguration(SolrCore core) throws Exception { + synchronized (elevationProviderCache) { + elevationProviderCache.clear(); + String configFileName = initArgs.get(CONFIG_FILE); + if (configFileName == null) { + // Throw an exception which can be handled by an overriding InitializationExceptionHandler (see handleInitializationException()). + // The default InitializationExceptionHandler will simply skip this exception. + throw new InitializationException("Missing component parameter " + CONFIG_FILE + " - it has to define the path to the elevation configuration file", InitializationExceptionHandler.ExceptionCause.NO_CONFIG_FILE_DEFINED); + } + boolean configFileExists = false; + ElevationProvider elevationProvider = NO_OP_ELEVATION_PROVIDER; + + // check if using ZooKeeper + ZkController zkController = core.getCoreContainer().getZkController(); + if (zkController != null) { + // TODO : shouldn't have to keep reading the config name when it has been read before + configFileExists = zkController.configFileExists(zkController.getZkStateReader().readConfigName(core.getCoreDescriptor().getCloudDescriptor().getCollectionName()), configFileName); + } else { + File fC = new File(core.getResourceLoader().getConfigDir(), configFileName); + File fD = new File(core.getDataDir(), configFileName); + if (fC.exists() == fD.exists()) { + InitializationException e = new InitializationException("Missing config file \"" + configFileName + "\" - either " + fC.getAbsolutePath() + " or " + fD.getAbsolutePath() + " must exist, but not both", InitializationExceptionHandler.ExceptionCause.MISSING_CONFIG_FILE); + elevationProvider = handleConfigLoadingException(e, true); + elevationProviderCache.put(null, elevationProvider); + } else if (fC.exists()) { + if (fC.length() == 0) { + InitializationException e = new InitializationException("Empty config file \"" + configFileName + "\" - " + fC.getAbsolutePath(), InitializationExceptionHandler.ExceptionCause.EMPTY_CONFIG_FILE); + elevationProvider = handleConfigLoadingException(e, true); } else { - File fC = new File(core.getResourceLoader().getConfigDir(), f); - File fD = new File(core.getDataDir(), f); - if (fC.exists() == fD.exists()) { - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, - "QueryElevationComponent missing config file: '" + f + "\n" - + "either: " + fC.getAbsolutePath() + " or " + fD.getAbsolutePath() + " must exist, but not both."); - } - if (fC.exists()) { - exists = true; - log.info("Loading QueryElevation from: " + fC.getAbsolutePath()); - Config cfg = new Config(core.getResourceLoader(), f); - elevationCache.put(null, loadElevationMap(cfg)); - } + configFileExists = true; + log.info("Loading QueryElevation from: " + fC.getAbsolutePath()); + Config cfg = new Config(core.getResourceLoader(), configFileName); + elevationProvider = loadElevationProvider(cfg); } - //in other words, we think this is in the data dir, not the conf dir - if (!exists) { - // preload the first data - RefCounted searchHolder = null; - try { - searchHolder = core.getNewestSearcher(false); - IndexReader reader = searchHolder.get().getIndexReader(); - getElevationMap(reader, core); - } finally { - if (searchHolder != null) searchHolder.decref(); - } + elevationProviderCache.put(null, elevationProvider); + } + } + //in other words, we think this is in the data dir, not the conf dir + if (!configFileExists) { + // preload the first data + RefCounted searchHolder = null; + try { + searchHolder = core.getNewestSearcher(false); + if (searchHolder == null) { + elevationProvider = NO_OP_ELEVATION_PROVIDER; + } else { + IndexReader reader = searchHolder.get().getIndexReader(); + elevationProvider = getElevationProvider(reader, core); } + } finally { + if (searchHolder != null) searchHolder.decref(); } - } catch (Exception ex) { - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, - "Error initializing QueryElevationComponent.", ex); } + return elevationProvider.size(); } } - //get the elevation map from the data dir - Map getElevationMap(IndexReader reader, SolrCore core) throws Exception { - synchronized (elevationCache) { - Map map = elevationCache.get(null); - if (map != null) return map; - - map = elevationCache.get(reader); - if (map == null) { - String f = initArgs.get(CONFIG_FILE); - if (f == null) { - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, - "QueryElevationComponent must specify argument: " + CONFIG_FILE); + /** + * Gets the {@link ElevationProvider} from the data dir or from the cache. + * + * @return The cached or loaded {@link ElevationProvider}. + * @throws java.io.IOException If the configuration resource cannot be found, or if an I/O error occurs while analyzing the triggering queries. + * @throws org.xml.sax.SAXException If the configuration resource is not a valid XML content. + * @throws javax.xml.parsers.ParserConfigurationException If the configuration resource is not a valid XML configuration. + * @throws RuntimeException If the configuration resource is not an XML content of the expected format + * (either {@link RuntimeException} or {@link org.apache.solr.common.SolrException}). + */ + @VisibleForTesting + ElevationProvider getElevationProvider(IndexReader reader, SolrCore core) throws Exception { + synchronized (elevationProviderCache) { + ElevationProvider elevationProvider; + elevationProvider = elevationProviderCache.get(null); + if (elevationProvider != null) return elevationProvider; + + elevationProvider = elevationProviderCache.get(reader); + if (elevationProvider == null) { + Exception loadingException = null; + boolean resourceAccessIssue = false; + try { + elevationProvider = loadElevationProvider(core); + } catch (IOException e) { + loadingException = e; + resourceAccessIssue = true; + } catch (Exception e) { + loadingException = e; } - log.info("Loading QueryElevation from data dir: " + f); - - Config cfg; - - ZkController zkController = core.getCoreContainer().getZkController(); - if (zkController != null) { - cfg = new Config(core.getResourceLoader(), f, null, null); - } else { - InputStream is = VersionedFile.getLatestFile(core.getDataDir(), f); - cfg = new Config(core.getResourceLoader(), f, new InputSource(is), null); + boolean shouldCache = true; + if (loadingException != null) { + elevationProvider = handleConfigLoadingException(loadingException, resourceAccessIssue); + // Do not cache the fallback ElevationProvider for the first exceptions because the exception might + // occur only a couple of times and the config file could be loaded correctly afterwards + // (e.g. temporary invalid file access). After some attempts, cache the fallback ElevationProvider + // not to overload the exception handler (and beyond it, the logs probably). + if (incConfigLoadingErrorCount(reader) < getConfigLoadingExceptionHandler().getLoadingMaxAttempts()) { + shouldCache = false; + } + } + if (shouldCache) { + elevationProviderCache.put(reader, elevationProvider); } - - map = loadElevationMap(cfg); - elevationCache.put(reader, map); } - return map; + assert elevationProvider != null; + return elevationProvider; + } + } + + /** + * Loads the {@link ElevationProvider} from the data dir. + * + * @return The loaded {@link ElevationProvider}. + * @throws java.io.IOException If the configuration resource cannot be found, or if an I/O error occurs while analyzing the triggering queries. + * @throws org.xml.sax.SAXException If the configuration resource is not a valid XML content. + * @throws javax.xml.parsers.ParserConfigurationException If the configuration resource is not a valid XML configuration. + * @throws RuntimeException If the configuration resource is not an XML content of the expected format + * (either {@link RuntimeException} or {@link org.apache.solr.common.SolrException}). + */ + private ElevationProvider loadElevationProvider(SolrCore core) throws IOException, SAXException, ParserConfigurationException { + String configFileName = initArgs.get(CONFIG_FILE); + if (configFileName == null) { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, + "QueryElevationComponent must specify argument: " + CONFIG_FILE); + } + log.info("Loading QueryElevation from data dir: " + configFileName); + + Config cfg; + ZkController zkController = core.getCoreContainer().getZkController(); + if (zkController != null) { + cfg = new Config(core.getResourceLoader(), configFileName, null, null); + } else { + InputStream is = VersionedFile.getLatestFile(core.getDataDir(), configFileName); + cfg = new Config(core.getResourceLoader(), configFileName, new InputSource(is), null); } + ElevationProvider elevationProvider = loadElevationProvider(cfg); + assert elevationProvider != null; + return elevationProvider; } - //load up the elevation map - private Map loadElevationMap(Config cfg) throws IOException { + /** + * Loads the {@link ElevationProvider}. + *

+ * This method can be overridden. + *

+ * + * @throws java.io.IOException If an I/O error occurs while analyzing the triggering queries. + * @throws RuntimeException If the config does not provide an XML content of the expected format + * (either {@link RuntimeException} or {@link org.apache.solr.common.SolrException}). + */ + @SuppressWarnings("WeakerAccess") + protected ElevationProvider loadElevationProvider(Config config) throws IOException { + Map elevationBuilderMap = keepElevationPriority ? + new LinkedHashMap<>() : new HashMap<>(); XPath xpath = XPathFactory.newInstance().newXPath(); - Map map = new HashMap<>(); - NodeList nodes = (NodeList) cfg.evaluate("elevate/query", XPathConstants.NODESET); + NodeList nodes = (NodeList) config.evaluate("elevate/query", XPathConstants.NODESET); for (int i = 0; i < nodes.getLength(); i++) { Node node = nodes.item(i); - String qstr = DOMUtil.getAttr(node, "text", "missing query 'text'"); + String queryString = DOMUtil.getAttr(node, "text", "missing query 'text'"); + String matchString = DOMUtil.getAttr(node, "match"); + ElevatingQuery elevatingQuery = new ElevatingQuery(queryString, parseMatchPolicy(matchString)); - NodeList children = null; + NodeList children; try { children = (NodeList) xpath.evaluate("doc", node, XPathConstants.NODESET); } catch (XPathExpressionException e) { @@ -301,68 +380,89 @@ private Map loadElevationMap(Config cfg) throws IOExceptio "query requires '' child"); } - ArrayList include = new ArrayList<>(); - ArrayList exclude = new ArrayList<>(); + ElevationBuilder elevationBuilder = new ElevationBuilder(); for (int j = 0; j < children.getLength(); j++) { Node child = children.item(j); - String id = DOMUtil.getAttr(child, ID, "missing 'id'"); + String id = DOMUtil.getAttr(child, "id", "missing 'id'"); String e = DOMUtil.getAttr(child, EXCLUDE, null); if (e != null) { if (Boolean.valueOf(e)) { - exclude.add(id); + elevationBuilder.addExcludedId(id); continue; } } - include.add(id); + elevationBuilder.addElevatedId(id); } - ElevationObj elev = new ElevationObj(qstr, include, exclude); - if (map.containsKey(elev.analyzed)) { - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, - "Boosting query defined twice for query: '" + elev.text + "' (" + elev.analyzed + "')"); + // It is allowed to define multiple times different elevations for the same query. In this case the elevations + // are merged in the ElevationBuilder (they will be triggered at the same time). + ElevationBuilder previousElevationBuilder = elevationBuilderMap.get(elevatingQuery); + if (previousElevationBuilder == null) { + elevationBuilderMap.put(elevatingQuery, elevationBuilder); + } else { + previousElevationBuilder.merge(elevationBuilder); } - map.put(elev.analyzed, elev); } - return map; + ElevationProvider elevationProvider = createElevationProvider(queryAnalyzer); + for (Map.Entry entry : elevationBuilderMap.entrySet()) { + elevationProvider.setElevationForQuery(entry.getKey(), entry.getValue().build()); + } + return elevationProvider.makeImmutable(); + } + + private boolean parseMatchPolicy(String matchString) { + if (matchString == null) { + return getDefaultSubsetMatch(); + } else if (matchString.equalsIgnoreCase("exact")) { + return false; + } else if (matchString.equalsIgnoreCase("subset")) { + return true; + } else { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, + "invalid value \"" + matchString + "\" for query match attribute"); + } } /** - * Helpful for testing without loading config.xml + * Potentially handles and captures an exception that occurred while loading the configuration resource. * - * @throws IOException If there is a low-level I/O error. + * @param e The exception caught. + * @param resourceAccessIssueOrEmptyConfig true if the exception has been thrown because the resource could not + * be accessed (missing or cannot be read) or the config file is empty; false if the resource has + * been found and accessed but the error occurred while loading the resource + * (invalid format, incomplete or corrupted). + * @return The {@link ElevationProvider} to use if the exception is absorbed. + * @throws E If the exception is not absorbed. */ - void setTopQueryResults(IndexReader reader, String query, String[] ids, String[] ex) throws IOException { - if (ids == null) { - ids = new String[0]; + private ElevationProvider handleConfigLoadingException(E e, boolean resourceAccessIssueOrEmptyConfig) throws E { + if (getConfigLoadingExceptionHandler().handleLoadingException(e, resourceAccessIssueOrEmptyConfig)) { + return NO_OP_ELEVATION_PROVIDER; } - if (ex == null) { - ex = new String[0]; - } - - Map elev = elevationCache.get(reader); - if (elev == null) { - elev = new HashMap<>(); - elevationCache.put(reader, elev); - } - ElevationObj obj = new ElevationObj(query, Arrays.asList(ids), Arrays.asList(ex)); - elev.put(obj.analyzed, obj); + assert e != null; + throw e; } - String getAnalyzedQuery(String query) throws IOException { - if (analyzer == null) { - return query; + private int incConfigLoadingErrorCount(IndexReader reader) { + Integer counter = configLoadingErrorCounters.get(reader); + if (counter == null) { + counter = 1; + } else { + counter++; } - StringBuilder norm = new StringBuilder(); - try (TokenStream tokens = analyzer.tokenStream("", query)) { - tokens.reset(); + configLoadingErrorCounters.put(reader, counter); + return counter; + } - CharTermAttribute termAtt = tokens.addAttribute(CharTermAttribute.class); - while (tokens.incrementToken()) { - norm.append(termAtt.buffer(), 0, termAtt.length()); - } - tokens.end(); - return norm.toString(); - } + /** + * Potentially handles and captures the exception that occurred while initializing this component. If the exception + * is captured by the handler, this component fails to initialize silently and is muted because field initialized is + * false. + */ + private void handleInitializationException(Exception initializationException, InitializationExceptionHandler.ExceptionCause exceptionCause) { + SolrException solrException = new SolrException(SolrException.ErrorCode.SERVER_ERROR, + "Error initializing " + QueryElevationComponent.class.getSimpleName(), initializationException); + if (!getInitializationExceptionHandler().handleInitializationException(solrException, exceptionCause)) + throw solrException; } //--------------------------------------------------------------------------------- @@ -371,125 +471,122 @@ String getAnalyzedQuery(String query) throws IOException { @Override public void prepare(ResponseBuilder rb) throws IOException { - SolrQueryRequest req = rb.req; - SolrParams params = req.getParams(); - // A runtime param can skip - if (!params.getBool(QueryElevationParams.ENABLE, true)) { + if (!initialized || !rb.req.getParams().getBool(QueryElevationParams.ENABLE, true)) { return; } - boolean exclusive = params.getBool(QueryElevationParams.EXCLUSIVE, false); - // A runtime parameter can alter the config value for forceElevation - boolean force = params.getBool(QueryElevationParams.FORCE_ELEVATION, forceElevation); - boolean markExcludes = params.getBool(QueryElevationParams.MARK_EXCLUDES, false); - String boostStr = params.get(QueryElevationParams.IDS); - String exStr = params.get(QueryElevationParams.EXCLUDE); + Elevation elevation = getElevation(rb); + if (elevation != null) { + setQuery(rb, elevation); + setSort(rb, elevation); + } + + if (rb.isDebug() && rb.isDebugQuery()) { + addDebugInfo(rb, elevation); + } + } + + @Override + public void process(ResponseBuilder rb) throws IOException { + // Do nothing -- the real work is modifying the input query + } - Query query = rb.getQuery(); + private Elevation getElevation(ResponseBuilder rb) { SolrParams localParams = rb.getQparser().getLocalParams(); - String qstr = localParams == null ? rb.getQueryString() : localParams.get(QueryParsing.V); - if (query == null || qstr == null) { - return; + String queryString = localParams == null ? rb.getQueryString() : localParams.get(QueryParsing.V); + if (queryString == null || rb.getQuery() == null) { + return null; } - ElevationObj booster = null; + SolrParams params = rb.req.getParams(); + String paramElevatedIds = params.get(QueryElevationParams.IDS); + String paramExcludedIds = params.get(QueryElevationParams.EXCLUDE); try { - if(boostStr != null || exStr != null) { - List boosts = (boostStr != null) ? StrUtils.splitSmart(boostStr,",", true) : new ArrayList(0); - List excludes = (exStr != null) ? StrUtils.splitSmart(exStr, ",", true) : new ArrayList(0); - booster = new ElevationObj(qstr, boosts, excludes); + if (paramElevatedIds != null || paramExcludedIds != null) { + List elevatedIds = paramElevatedIds != null ? StrUtils.splitSmart(paramElevatedIds,",", true) : Collections.emptyList(); + List excludedIds = paramExcludedIds != null ? StrUtils.splitSmart(paramExcludedIds, ",", true) : Collections.emptyList(); + return new ElevationBuilder().addElevatedIds(elevatedIds).addExcludedIds(excludedIds).build(); } else { - IndexReader reader = req.getSearcher().getIndexReader(); - qstr = getAnalyzedQuery(qstr); - booster = getElevationMap(reader, req.getCore()).get(qstr); + IndexReader reader = rb.req.getSearcher().getIndexReader(); + return getElevationProvider(reader, rb.req.getCore()).getElevationForQuery(queryString); } - } catch (Exception ex) { - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, - "Error loading elevation", ex); + } catch (Exception e) { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error loading elevation", e); } + } - if (booster != null) { - rb.req.getContext().put(BOOSTED, booster.ids); - rb.req.getContext().put(BOOSTED_PRIORITY, booster.priority); - - // Change the query to insert forced documents - if (exclusive == true) { - //we only want these results - rb.setQuery(new BoostQuery(booster.include, 0f)); - } else { - BooleanQuery.Builder newq = new BooleanQuery.Builder(); - newq.add(query, BooleanClause.Occur.SHOULD); - newq.add(new BoostQuery(booster.include, 0f), BooleanClause.Occur.SHOULD); - if (booster.exclude != null) { - if (markExcludes == false) { - for (TermQuery tq : booster.exclude) { - newq.add(new BooleanClause(tq, BooleanClause.Occur.MUST_NOT)); - } - } else { - //we are only going to mark items as excluded, not actually exclude them. This works - //with the EditorialMarkerFactory - rb.req.getContext().put(EXCLUDED, booster.excludeIds); + private void setQuery(ResponseBuilder rb, Elevation elevation) { + rb.req.getContext().put(BOOSTED, elevation.elevatedIds); + rb.req.getContext().put(BOOSTED_PRIORITY, elevation.priorities); + + // Change the query to insert forced documents + SolrParams params = rb.req.getParams(); + if (params.getBool(QueryElevationParams.EXCLUSIVE, false)) { + // We only want these elevated results + rb.setQuery(new BoostQuery(elevation.includeQuery, 0f)); + } else { + BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); + queryBuilder.add(rb.getQuery(), BooleanClause.Occur.SHOULD); + queryBuilder.add(new BoostQuery(elevation.includeQuery, 0f), BooleanClause.Occur.SHOULD); + if (elevation.excludeQueries != null) { + if (params.getBool(QueryElevationParams.MARK_EXCLUDES, false)) { + // We are only going to mark items as excluded, not actually exclude them. + // This works with the EditorialMarkerFactory. + rb.req.getContext().put(EXCLUDED, elevation.excludedIds); + } else { + for (TermQuery tq : elevation.excludeQueries) { + queryBuilder.add(new BooleanClause(tq, BooleanClause.Occur.MUST_NOT)); } } - rb.setQuery(newq.build()); - } - - ElevationComparatorSource comparator = new ElevationComparatorSource(booster); - // if the sort is 'score desc' use a custom sorting method to - // insert documents in their proper place - SortSpec sortSpec = rb.getSortSpec(); - if (sortSpec.getSort() == null) { - sortSpec.setSortAndFields(new Sort(new SortField[]{ - new SortField("_elevate_", comparator, true), - new SortField(null, SortField.Type.SCORE, false) - }), - Arrays.asList(new SchemaField[2])); - } else { - // Check if the sort is based on score - SortSpec modSortSpec = this.modifySortSpec(sortSpec, force, comparator); - if (null != modSortSpec) { - rb.setSortSpec(modSortSpec); - } } + rb.setQuery(queryBuilder.build()); + } + } - // alter the sorting in the grouping specification if there is one - GroupingSpecification groupingSpec = rb.getGroupingSpec(); - if(groupingSpec != null) { - SortSpec groupSortSpec = groupingSpec.getGroupSortSpec(); - SortSpec modGroupSortSpec = this.modifySortSpec(groupSortSpec, force, comparator); - if (modGroupSortSpec != null) { - groupingSpec.setGroupSortSpec(modGroupSortSpec); - } - SortSpec withinGroupSortSpec = groupingSpec.getWithinGroupSortSpec(); - SortSpec modWithinGroupSortSpec = this.modifySortSpec(withinGroupSortSpec, force, comparator); - if (modWithinGroupSortSpec != null) { - groupingSpec.setWithinGroupSortSpec(modWithinGroupSortSpec); - } + private void setSort(ResponseBuilder rb, Elevation elevation) { + boolean forceElevation = rb.req.getParams().getBool(QueryElevationParams.FORCE_ELEVATION, this.forceElevation); + ElevationComparatorSource comparator = new ElevationComparatorSource(elevation); + setSortSpec(rb, forceElevation, comparator); + setGroupingSpec(rb, forceElevation, comparator); + } + + private void setSortSpec(ResponseBuilder rb, boolean forceElevation, ElevationComparatorSource comparator) { + // if the sort is 'score desc' use a custom sorting method to + // insert documents in their proper place + SortSpec sortSpec = rb.getSortSpec(); + if (sortSpec.getSort() == null) { + sortSpec.setSortAndFields( + new Sort( + new SortField("_elevate_", comparator, true), + new SortField(null, SortField.Type.SCORE, false)), + Arrays.asList(new SchemaField[2])); + } else { + // Check if the sort is based on score + SortSpec modSortSpec = this.modifySortSpec(sortSpec, forceElevation, comparator); + if (null != modSortSpec) { + rb.setSortSpec(modSortSpec); } } + } - // Add debugging information - if (rb.isDebug()) { - List match = null; - if (booster != null) { - // Extract the elevated terms into a list - match = new ArrayList<>(booster.priority.size()); - for (Object o : booster.include.clauses()) { - TermQuery tq = (TermQuery) ((BooleanClause) o).getQuery(); - match.add(tq.getTerm().text()); - } + private void setGroupingSpec(ResponseBuilder rb, boolean forceElevation, ElevationComparatorSource comparator) { + // alter the sorting in the grouping specification if there is one + GroupingSpecification groupingSpec = rb.getGroupingSpec(); + if(groupingSpec != null) { + SortSpec groupSortSpec = groupingSpec.getGroupSortSpec(); + SortSpec modGroupSortSpec = this.modifySortSpec(groupSortSpec, forceElevation, comparator); + if (modGroupSortSpec != null) { + groupingSpec.setGroupSortSpec(modGroupSortSpec); } - - SimpleOrderedMap dbg = new SimpleOrderedMap<>(); - dbg.add("q", qstr); - dbg.add("match", match); - if (rb.isDebugQuery()) { - rb.addDebugInfo("queryBoosting", dbg); + SortSpec withinGroupSortSpec = groupingSpec.getWithinGroupSortSpec(); + SortSpec modWithinGroupSortSpec = this.modifySortSpec(withinGroupSortSpec, forceElevation, comparator); + if (modWithinGroupSortSpec != null) { + groupingSpec.setWithinGroupSortSpec(modWithinGroupSortSpec); } } } - private SortSpec modifySortSpec(SortSpec current, boolean force, ElevationComparatorSource comparator) { + private SortSpec modifySortSpec(SortSpec current, boolean forceElevation, ElevationComparatorSource comparator) { boolean modify = false; SortField[] currentSorts = current.getSort().getSort(); List currentFields = current.getSchemaFields(); @@ -498,7 +595,7 @@ private SortSpec modifySortSpec(SortSpec current, boolean force, ElevationCompar List fields = new ArrayList<>(currentFields.size() + 1); // Perhaps force it to always sort by score - if (force && currentSorts[0].getType() != SortField.Type.SCORE) { + if (forceElevation && currentSorts[0].getType() != SortField.Type.SCORE) { sorts.add(new SortField("_elevate_", comparator, true)); fields.add(null); modify = true; @@ -513,16 +610,33 @@ private SortSpec modifySortSpec(SortSpec current, boolean force, ElevationCompar sorts.add(sf); fields.add(currentFields.get(i)); } - if (modify) { - SortSpec newSpec = new SortSpec(new Sort(sorts.toArray(new SortField[sorts.size()])), - fields, - current.getCount(), - current.getOffset()); - return newSpec; + return modify ? + new SortSpec(new Sort(sorts.toArray(new SortField[sorts.size()])), + fields, + current.getCount(), + current.getOffset()) + : null; + } + + private void addDebugInfo(ResponseBuilder rb, Elevation elevation) { + List match = null; + if (elevation != null) { + // Extract the elevated terms into a list + match = new ArrayList<>(elevation.includeQuery.clauses().size()); + for (BooleanClause clause : elevation.includeQuery.clauses()) { + TermQuery tq = (TermQuery) clause.getQuery(); + match.add(tq.getTerm().text()); + } } - return null; + SimpleOrderedMap dbg = new SimpleOrderedMap<>(); + dbg.add("q", rb.getQueryString()); + dbg.add("match", match); + rb.addDebugInfo("queryBoosting", dbg); } + //--------------------------------------------------------------------------------- + // Boosted docs helper + //--------------------------------------------------------------------------------- public static IntIntHashMap getBoostDocs(SolrIndexSearcher indexSearcher, Mapboosted, Map context) throws IOException { @@ -544,9 +658,8 @@ public static IntIntHashMap getBoostDocs(SolrIndexSearcher indexSearcher, Map localBoosts = new HashSet<>(boosted.size()*2); - Iterator boostedIt = boosted.keySet().iterator(); - while(boostedIt.hasNext()) { - localBoosts.add(boostedIt.next()); + for (BytesRef boost : boosted.keySet()) { + localBoosts.add(boost); } boostDocs = new IntIntHashMap(boosted.size()); @@ -565,7 +678,7 @@ public static IntIntHashMap getBoostDocs(SolrIndexSearcher indexSearcher, MapCan be overridden by extending this class.

+ */ + @SuppressWarnings("WeakerAccess") + protected boolean getDefaultForceElevation() { + return DEFAULT_FORCE_ELEVATION; + } + + /** + * Gets the default value for {@link #DEFAULT_KEEP_ELEVATION_PRIORITY} parameter. + *

Can be overridden by extending class.

+ */ + @SuppressWarnings("WeakerAccess") + protected boolean getDefaultKeepElevationPriority() { + return DEFAULT_KEEP_ELEVATION_PRIORITY; + } - public ElevationComparatorSource(final QueryElevationComponent.ElevationObj elevations) { - this.elevations = elevations; - int size = elevations.ids.size(); - ordSet = new SentinelIntSet(size, -1); - termValues = new BytesRef[ordSet.keys.length]; + /** + * Gets the default subset match policy. + *

Can be overridden by extending class.

+ */ + @SuppressWarnings("WeakerAccess") + protected boolean getDefaultSubsetMatch() { + return DEFAULT_SUBSET_MATCH; } - @Override - public FieldComparator newComparator(String fieldname, final int numHits, int sortPos, boolean reversed) { - return new SimpleFieldComparator() { - private final int[] values = new int[numHits]; - private int bottomVal; - private int topVal; - private PostingsEnum postingsEnum; - Set seen = new HashSet<>(elevations.ids.size()); + /** + * Gets the {@link InitializationExceptionHandler} that handles exception thrown during the initialization of the + * elevation configuration. + *

Can be overridden by extending class. This method provides a mean to set a custom exception handler if a + * specific error processing is needed.

+ */ + @SuppressWarnings("WeakerAccess") + protected InitializationExceptionHandler getInitializationExceptionHandler() { + return InitializationExceptionHandler.NO_OP; + } + + /** + * Gets the {@link LoadingExceptionHandler} that handles exception thrown during the loading of the elevation configuration. + *

Can be overridden by extending class. This method provides a mean to set a custom exception handler if a + * specific error processing is needed.

+ */ + @SuppressWarnings("WeakerAccess") + protected LoadingExceptionHandler getConfigLoadingExceptionHandler() { + return LoadingExceptionHandler.NO_OP; + } + + /** + * Creates the {@link ElevationProvider} to set during configuration loading. The same instance will be used later + * when elevating results for queries. + *

+ * Extending classes can override this method to create {@link ElevationProvider} with different behavior. + *

+ * + * @param queryAnalyzer to analyze and tokenize the query. + * @return The created {@link ElevationProvider}. + */ + @SuppressWarnings("WeakerAccess") + protected ElevationProvider createElevationProvider(Analyzer queryAnalyzer) { + return new MapElevationProvider(queryAnalyzer); + } + + //--------------------------------------------------------------------------------- + // Query analysis and tokenization + //--------------------------------------------------------------------------------- + + @VisibleForTesting + String analyzeQuery(String queryString) throws IOException { + return analyzeQuery(queryString, queryAnalyzer); + } + + /** + * Analyzes the provided query string and returns a concatenation of the analyzed tokens. + */ + private static String analyzeQuery(String queryString, Analyzer queryAnalyzer) throws IOException { + if (queryAnalyzer == null) { + return queryString; + } + Collection queryTerms = new ArrayList<>(); + splitQueryTermsWithAnalyzer(queryString, queryAnalyzer, queryTerms); + return queryTerms.stream().collect(QUERY_EXACT_JOINER); + } + + private static void splitQueryTermsWithAnalyzer(String queryString, Analyzer queryAnalyzer, Collection tokenCollector) throws IOException { + TokenStream tokens = queryAnalyzer.tokenStream("", new StringReader(queryString)); + tokens.reset(); + CharTermAttribute termAttribute = tokens.addAttribute(CharTermAttribute.class); + while (tokens.incrementToken()) { + tokenCollector.add(new String(termAttribute.buffer(), 0, termAttribute.length())); + } + tokens.end(); + tokens.close(); + } + + //--------------------------------------------------------------------------------- + // Testing + //--------------------------------------------------------------------------------- + + /** + * Helpful for testing without loading config.xml. + * + * + * @param reader The {@link org.apache.lucene.index.IndexReader}. + * @param queryString The query for which to elevate some documents. If the query has already been defined an + * elevation, this method overwrites it. + * @param subsetMatch true for query subset match; false for query exact match. + * @param elevatedIds The readable ids of the documents to set as top results for the provided query. + * @param excludedIds The readable ids of the document to exclude from results for the provided query. + * @throws java.io.IOException If there is a low-level I/O error. + */ + @VisibleForTesting + void setTopQueryResults(IndexReader reader, String queryString, boolean subsetMatch, String[] elevatedIds, + String[] excludedIds) throws IOException { + clearElevationProviderCache(); + if (elevatedIds == null) { + elevatedIds = new String[0]; + } + if (excludedIds == null) { + excludedIds = new String[0]; + } + ElevatingQuery elevatingQuery = new ElevatingQuery(queryString, subsetMatch); + Elevation elevation = createElevation(Arrays.asList(elevatedIds), Arrays.asList(excludedIds)); + ElevationProvider elevationProvider; + synchronized (elevationProviderCache) { + elevationProvider = elevationProviderCache.computeIfAbsent(reader, k -> createElevationProvider(queryAnalyzer)); + } + elevationProvider.setElevationForQuery(elevatingQuery, elevation); + } + + @VisibleForTesting + void clearElevationProviderCache() { + synchronized (elevationProviderCache) { + elevationProviderCache.clear(); + } + } + + //--------------------------------------------------------------------------------- + // Exception classes + //--------------------------------------------------------------------------------- + + private static class InitializationException extends Exception { + final InitializationExceptionHandler.ExceptionCause exceptionCause; + + InitializationException(String message, InitializationExceptionHandler.ExceptionCause exceptionCause) { + super(message); + this.exceptionCause = exceptionCause; + } + } + + /** + * Handles resource loading exception. + */ + protected interface InitializationExceptionHandler { + /** + * NoOp {@link LoadingExceptionHandler} that does not capture any exception and simply returns false. + */ + InitializationExceptionHandler NO_OP = new InitializationExceptionHandler() { @Override - public int compare(int slot1, int slot2) { - return values[slot1] - values[slot2]; // values will be small enough that there is no overflow concern + public boolean handleInitializationException(Exception e, ExceptionCause exceptionCause) { + return exceptionCause == ExceptionCause.NO_CONFIG_FILE_DEFINED; } + }; + + enum ExceptionCause { + /** + * The component parameter {@link #FIELD_TYPE} defines an unknown field type. + */ + UNKNOWN_FIELD_TYPE, + /** + * This component requires the schema to have a uniqueKeyField, which it does not have. + */ + MISSING_UNIQUE_KEY_FIELD, + /** + * Missing component parameter {@link #CONFIG_FILE} - it has to define the path to the elevation configuration file (e.g. elevate.xml). + */ + NO_CONFIG_FILE_DEFINED, + /** + * The elevation configuration file (e.g. elevate.xml) cannot be found, or is defined in both conf/ and data/ directories. + */ + MISSING_CONFIG_FILE, + /** + * The elevation configuration file (e.g. elevate.xml) is empty. + */ + EMPTY_CONFIG_FILE, + /** + * Unclassified exception cause. + */ + OTHER, + } + + /** + * Potentially handles and captures an exception that occurred while initializing the component. + * If the exception is captured, the component fails to initialize silently and is muted. + * + * @param e The exception caught. + * @param exceptionCause The exception cause. + * @param The exception type. + * @return true if the exception is handled and captured by this handler (and thus will not be + * thrown anymore); false if the exception is not captured, in this case it will be probably + * thrown again by the calling code. + * @throws E If this handler throws the exception itself (it may add some cause or message). + */ + boolean handleInitializationException(E e, ExceptionCause exceptionCause) throws E; + } + /** + * Handles resource loading exception. + */ + protected interface LoadingExceptionHandler { + + /** + * NoOp {@link LoadingExceptionHandler} that does not capture any exception and simply returns false. + */ + LoadingExceptionHandler NO_OP = new LoadingExceptionHandler() { @Override - public void setBottom(int slot) { - bottomVal = values[slot]; + public boolean handleLoadingException(Exception e, boolean resourceAccessIssue) { + return false; } @Override - public void setTopValue(Integer value) { - topVal = value.intValue(); + public int getLoadingMaxAttempts() { + return 0; } + }; - private int docVal(int doc) { - if (ordSet.size() > 0) { - int slot = ordSet.find(doc); - if (slot >= 0) { - BytesRef id = termValues[slot]; - Integer prio = elevations.priority.get(id); - return prio == null ? 0 : prio.intValue(); - } - } - return 0; + /** + * Potentially handles and captures an exception that occurred while loading a resource. + * + * @param e The exception caught. + * @param resourceAccessIssue true if the exception has been thrown because the resource could not + * be accessed (missing or cannot be read); false if the resource has + * been found and accessed but the error occurred while loading the resource + * (invalid format, incomplete or corrupted). + * @param The exception type. + * @return true if the exception is handled and captured by this handler (and thus will not be + * thrown anymore); false if the exception is not captured, in this case it will be probably + * thrown again by the calling code. + * @throws E If this handler throws the exception itself (it may add some cause or message). + */ + boolean handleLoadingException(E e, boolean resourceAccessIssue) throws E; + + /** + * Gets the maximum number of attempts to load the resource in case of error (resource not found, I/O error, + * invalid format), for each Solr core. + * After this number of attempts (so {@link #handleLoadingException} is called this number of times), + * {@link #handleLoadingException} will not be called anymore for the specific Solr core, and the resource is + * considered empty afterwards (until the core is reloaded). + * + * @return The maximum number of attempts to load the resource. The value must be >= 0. + */ + int getLoadingMaxAttempts(); + } + + //--------------------------------------------------------------------------------- + // Elevation classes + //--------------------------------------------------------------------------------- + + /** + * Creates an elevation. + * + * @param elevatedIds The ids of the elevated documents that should appear on top of search results; can be null. + * @param excludedIds The ids of the excluded documents that should not appear in search results; can be null. + */ + private Elevation createElevation(Collection elevatedIds, Collection excludedIds) { + return new Elevation(elevatedIds, excludedIds, indexedValueProvider, uniqueKeyFieldName, keepElevationPriority); + } + + /** + * Provides the elevations defined for queries. + */ + protected interface ElevationProvider { + /** + * Gets the elevation associated to the provided query. + * + * @param queryString The query string (not {@link #analyzeQuery(String, Analyzer) analyzed} yet, + * this {@link ElevationProvider} is in charge of analyzing it). + * @return The elevation associated with the query; or null if none. + */ + Elevation getElevationForQuery(String queryString) throws IOException; + + /** + * Sets the elevation for the provided query. + *

+ * By contract and by design, only one elevation may be associated + * to a given query (this can be safely verified by an assertion). + *

+ *

+ * It is not allowed to call this method once this {@link ElevationProvider} becomes {@link #makeImmutable() immutable}. + * Otherwise a {@link RuntimeException} may be thrown. + *

+ * + * @param elevatingQuery The query triggering elevation. + * @param elevation The elevation. + */ + void setElevationForQuery(ElevatingQuery elevatingQuery, Elevation elevation) throws IOException; + + /** + * Gets the number of query elevations in this {@link ElevationProvider}. + */ + int size(); + + /** + * Makes this elevation provider immutable. + *

Calling {@link #setElevationForQuery} afterwards will throw an exception.

+ *

Making this elevation provider immutable may reduce its memory usage and make it more efficient.

+ * + * @return This elevation provider. + */ + ElevationProvider makeImmutable(); + } + + /** + * {@link ElevationProvider} that returns no elevation. + */ + @SuppressWarnings("WeakerAccess") + protected static final ElevationProvider NO_OP_ELEVATION_PROVIDER = new ElevationProvider() { + @Override + public Elevation getElevationForQuery(String queryString) { + return null; + } + + @Override + public void setElevationForQuery(ElevatingQuery elevatingQuery, Elevation elevation) { + // Do nothing. + } + + @Override + public int size() { + return 0; + } + + @Override + public ElevationProvider makeImmutable() { + return this; + } + }; + + /** + * Simple query exact match {@link ElevationProvider}. + *

+ * It does not support subset matching (see {@link #parseMatchPolicy(String)}). + *

+ */ + protected static class MapElevationProvider implements ElevationProvider { + + private final Analyzer queryAnalyzer; + private Map exactMatchElevationMap = new HashMap<>(); + + @SuppressWarnings("WeakerAccess") + public MapElevationProvider(Analyzer queryAnalyzer) { + this.queryAnalyzer = queryAnalyzer; + } + + @Override + public Elevation getElevationForQuery(String queryString) throws IOException { + String analyzedQuery = analyzeQuery(queryString, queryAnalyzer); + return exactMatchElevationMap.get(analyzedQuery); + } + + @Override + public void setElevationForQuery(ElevatingQuery elevatingQuery, Elevation elevation) throws IOException { + if (elevatingQuery.subsetMatch) { + throw new UnsupportedOperationException("Subset matching is not supported by " + getClass().getName()); } + String analyzedQuery = analyzeQuery(elevatingQuery.queryString, queryAnalyzer); + Elevation duplicateElevation = exactMatchElevationMap.put(analyzedQuery, elevation); + assert duplicateElevation == null; + } - @Override - public int compareBottom(int doc) { - return bottomVal - docVal(doc); + @Override + public int size() { + return exactMatchElevationMap.size(); + } + + @Override + public ElevationProvider makeImmutable() { + exactMatchElevationMap = Collections.unmodifiableMap(exactMatchElevationMap); + return this; + } + } + + /** + * Query triggering elevation. + */ + protected static class ElevatingQuery { + + @SuppressWarnings("WeakerAccess") + public final String queryString; + @SuppressWarnings("WeakerAccess") + public final boolean subsetMatch; + + /** + * @param queryString The query to elevate documents for (not the analyzed form). + * @param subsetMatch Whether to match a subset of query terms. + */ + private ElevatingQuery(String queryString, boolean subsetMatch) throws IOException { + this.queryString = queryString; + this.subsetMatch = subsetMatch; + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof ElevatingQuery)) { + return false; } + ElevatingQuery eq = (ElevatingQuery) o; + return queryString.equals(eq.queryString) && subsetMatch == eq.subsetMatch; + } - @Override - public void copy(int slot, int doc) { - values[slot] = docVal(doc); + @Override + public int hashCode() { + return queryString.hashCode() + (subsetMatch ? 1 : 0); + } + } + + /** + * Builds an {@link Elevation}. This class is used to start defining query elevations, but allowing the merge of + * multiple elevations for the same query. + */ + private class ElevationBuilder { + + /** + * The ids of the elevated documents that should appear on top of search results; can be null. + */ + private Set elevatedIds; + /** + * The ids of the excluded documents that should not appear in search results; can be null. + */ + private Set excludedIds; + + ElevationBuilder addElevatedId(String id) { + if (elevatedIds == null) { + elevatedIds = createIdSet(); } + elevatedIds.add(id); + return this; + } - @Override - protected void doSetNextReader(LeafReaderContext context) throws IOException { - //convert the ids to Lucene doc ids, the ordSet and termValues needs to be the same size as the number of elevation docs we have - ordSet.clear(); - Terms terms = context.reader().terms(idField); - if (terms == null) return; - TermsEnum termsEnum = terms.iterator(); - BytesRefBuilder term = new BytesRefBuilder(); - Bits liveDocs = context.reader().getLiveDocs(); - - for (String id : elevations.ids) { - term.copyChars(id); - if (seen.contains(id) == false && termsEnum.seekExact(term.get())) { - postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE); - int docId = postingsEnum.nextDoc(); - while (docId != DocIdSetIterator.NO_MORE_DOCS && liveDocs != null && liveDocs.get(docId) == false) { - docId = postingsEnum.nextDoc(); - } - if (docId == DocIdSetIterator.NO_MORE_DOCS ) continue; // must have been deleted - termValues[ordSet.put(docId)] = term.toBytesRef(); - seen.add(id); - assert postingsEnum.nextDoc() == DocIdSetIterator.NO_MORE_DOCS; + ElevationBuilder addElevatedIds(List ids) { + for (String id : ids) { + addElevatedId(id); + } + return this; + } + + ElevationBuilder addExcludedId(String id) { + if (excludedIds == null) { + excludedIds = createIdSet(); + } + excludedIds.add(id); + return this; + } + + ElevationBuilder addExcludedIds(List ids) { + for (String id : ids) { + addExcludedId(id); + } + return this; + } + + ElevationBuilder merge(ElevationBuilder elevationBuilder) { + if (elevatedIds == null) { + elevatedIds = elevationBuilder.elevatedIds; + } else if (elevationBuilder.elevatedIds != null) { + elevatedIds.addAll(elevationBuilder.elevatedIds); + } + if (excludedIds == null) { + excludedIds = elevationBuilder.excludedIds; + } else if (elevationBuilder.excludedIds != null) { + excludedIds.addAll(elevationBuilder.excludedIds); + } + return this; + } + + Elevation build() { + return createElevation(elevatedIds, excludedIds); + } + + private Set createIdSet() { + return (keepElevationPriority ? new LinkedHashSet<>() : new HashSet<>()); + } + } + + /** + * Elevation of some documents in search results, with potential exclusion of others. + */ + protected static class Elevation { + + private static final BooleanQuery EMPTY_QUERY = new BooleanQuery.Builder().build(); + + @VisibleForTesting + final Set elevatedIds; + private final BooleanQuery includeQuery; + @VisibleForTesting + final Map priorities; + private final Set excludedIds; + private final TermQuery[] excludeQueries;//just keep the term query, b/c we will not always explicitly exclude the item based on markExcludes query time param + + /** + * Constructs an elevation. + * + * @param elevatedIds The ids of the elevated documents that should appear on top of search results; can be null. + * @param excludedIds The ids of the excluded documents that should not appear in search results; can be null. + * @param indexedValueProvider Provides indexed values. + * @param queryFieldName The field name to use to create query terms. + * @param keepElevationPriority Whether to keep the elevation priority order. + */ + private Elevation(Collection elevatedIds, Collection excludedIds, + IndexedValueProvider indexedValueProvider, String queryFieldName, + boolean keepElevationPriority) { + if (elevatedIds == null || elevatedIds.isEmpty()) { + this.elevatedIds = Collections.emptySet(); + includeQuery = EMPTY_QUERY; + priorities = Collections.emptyMap(); + } else { + ImmutableSet.Builder elevatedIdsBuilder = ImmutableSet.builder(); + BooleanQuery.Builder includeQueryBuilder = new BooleanQuery.Builder(); + ImmutableMap.Builder prioritiesBuilder = null; + if (keepElevationPriority) { + prioritiesBuilder = ImmutableMap.builder(); + } + int priorityLevel = elevatedIds.size(); + for (String elevatedId : elevatedIds) { + elevatedIdsBuilder.add(indexedValueProvider.getIndexedValue(elevatedId)); + TermQuery tq = new TermQuery(new Term(queryFieldName, elevatedId)); + includeQueryBuilder.add(tq, BooleanClause.Occur.SHOULD); + if (keepElevationPriority) { + prioritiesBuilder.put(new BytesRef(elevatedId), priorityLevel--); } } + this.elevatedIds = elevatedIdsBuilder.build(); + includeQuery = includeQueryBuilder.build(); + priorities = keepElevationPriority ? prioritiesBuilder.build() : null; } - @Override - public Integer value(int slot) { - return values[slot]; + if (excludedIds == null || excludedIds.isEmpty()) { + this.excludedIds = Collections.emptySet(); + excludeQueries = null; + } else { + ImmutableSet.Builder excludedIdsBuilder = ImmutableSet.builder(); + List excludeQueriesBuilder = new ArrayList<>(excludedIds.size()); + for (String excludedId : excludedIds) { + excludedIdsBuilder.add(indexedValueProvider.getIndexedValue(excludedId)); + excludeQueriesBuilder.add(new TermQuery(new Term(queryFieldName, excludedId))); + } + this.excludedIds = excludedIdsBuilder.build(); + excludeQueries = excludeQueriesBuilder.toArray(new TermQuery[excludeQueriesBuilder.size()]); } + } - @Override - public int compareTop(int doc) { - final int docValue = docVal(doc); - return topVal - docValue; // values will be small enough that there is no overflow concern - } - }; + @Override + public String toString() { + return "{elevatedIds=" + elevatedIds + ", excludedIds=" + excludedIds + "}"; + } + } + + private class ElevationComparatorSource extends FieldComparatorSource { + + private final Elevation elevation; + private final SentinelIntSet ordSet; //the key half of the map + private final BytesRef[] termValues; //the value half of the map + + private ElevationComparatorSource(Elevation elevation) { + this.elevation = elevation; + int size = elevation.elevatedIds.size(); + ordSet = new SentinelIntSet(size, -1); + termValues = keepElevationPriority ? new BytesRef[ordSet.keys.length] : null; + } + + @Override + public FieldComparator newComparator(String fieldName, final int numHits, int sortPos, boolean reversed) { + return new SimpleFieldComparator() { + final int[] values = new int[numHits]; + int bottomVal; + int topVal; + PostingsEnum postingsEnum; + final Set seen = new HashSet<>(elevation.elevatedIds.size()); + + @Override + public int compare(int slot1, int slot2) { + return values[slot1] - values[slot2]; // values will be small enough that there is no overflow concern + } + + @Override + public void setBottom(int slot) { + bottomVal = values[slot]; + } + + @Override + public void setTopValue(Integer value) { + topVal = value; + } + + private int docVal(int doc) { + if (ordSet.size() > 0) { + int slot = ordSet.find(doc); + if (slot >= 0) { + if (!keepElevationPriority) + return 1; + BytesRef id = termValues[slot]; + Integer priority = elevation.priorities.get(id); + return priority == null ? 0 : priority; + } + } + return 0; + } + + @Override + public int compareBottom(int doc) { + return bottomVal - docVal(doc); + } + + @Override + public void copy(int slot, int doc) { + values[slot] = docVal(doc); + } + + @Override + protected void doSetNextReader(LeafReaderContext context) throws IOException { + //convert the ids to Lucene doc ids, the ordSet and termValues needs to be the same size as the number of elevation docs we have + ordSet.clear(); + Terms terms = context.reader().terms(uniqueKeyFieldName); + if (terms == null) return; + TermsEnum termsEnum = terms.iterator(); + BytesRefBuilder term = new BytesRefBuilder(); + Bits liveDocs = context.reader().getLiveDocs(); + + for (String id : elevation.elevatedIds) { + term.copyChars(id); + if (!seen.contains(id) && termsEnum.seekExact(term.get())) { + postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE); + int docId = postingsEnum.nextDoc(); + while (docId != DocIdSetIterator.NO_MORE_DOCS && liveDocs != null && !liveDocs.get(docId)) { + docId = postingsEnum.nextDoc(); + } + if (docId == DocIdSetIterator.NO_MORE_DOCS ) continue; // must have been deleted + int slot = ordSet.put(docId); + if (keepElevationPriority) { + termValues[slot] = term.toBytesRef(); + } + seen.add(id); + assert postingsEnum.nextDoc() == DocIdSetIterator.NO_MORE_DOCS; + } + } + } + + @Override + public Integer value(int slot) { + return values[slot]; + } + + @Override + public int compareTop(int doc) { + final int docValue = docVal(doc); + return topVal - docValue; // values will be small enough that there is no overflow concern + } + }; + } } + + /** + * Provides indexed value from readable value. + */ + private interface IndexedValueProvider { + /** + * Gets the indexed value corresponding to a readable value. + * + * @param readableValue The readable value. + * @return The indexed value. + */ + String getIndexedValue(String readableValue); } } diff --git a/solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java index 829c585c44fa..2528b3f56247 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java @@ -17,7 +17,6 @@ package org.apache.solr.handler.component; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.util.BytesRef; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.GroupParams; @@ -26,7 +25,6 @@ import org.apache.solr.util.FileUtils; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; -import org.apache.solr.handler.component.QueryElevationComponent.ElevationObj; import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.request.SolrQueryRequest; import org.junit.Before; @@ -360,17 +358,17 @@ public void testInterface() throws Exception { SolrQueryRequest req = req(); IndexReader reader = req.getSearcher().getIndexReader(); - Map map = comp.getElevationMap(reader, core); + QueryElevationComponent.ElevationProvider elevationProvider = comp.getElevationProvider(reader, core); req.close(); // Make sure the boosts loaded properly - assertEquals(7, map.size()); - assertEquals(1, map.get("XXXX").priority.size()); - assertEquals(2, map.get("YYYY").priority.size()); - assertEquals(3, map.get("ZZZZ").priority.size()); - assertEquals(null, map.get("xxxx")); - assertEquals(null, map.get("yyyy")); - assertEquals(null, map.get("zzzz")); + assertEquals(7, elevationProvider.size()); + assertEquals(1, elevationProvider.getElevationForQuery("XXXX").priorities.size()); + assertEquals(2, elevationProvider.getElevationForQuery("YYYY").priorities.size()); + assertEquals(3, elevationProvider.getElevationForQuery("ZZZZ").priorities.size()); + assertEquals(null, elevationProvider.getElevationForQuery("xxxx")); + assertEquals(null, elevationProvider.getElevationForQuery("yyyy")); + assertEquals(null, elevationProvider.getElevationForQuery("zzzz")); // Now test the same thing with a lowercase filter: 'lowerfilt' args = new NamedList<>(); @@ -380,17 +378,17 @@ public void testInterface() throws Exception { comp = new QueryElevationComponent(); comp.init(args); comp.inform(core); - map = comp.getElevationMap(reader, core); - assertEquals(7, map.size()); - assertEquals(null, map.get("XXXX")); - assertEquals(null, map.get("YYYY")); - assertEquals(null, map.get("ZZZZ")); - assertEquals(1, map.get("xxxx").priority.size()); - assertEquals(2, map.get("yyyy").priority.size()); - assertEquals(3, map.get("zzzz").priority.size()); - - assertEquals("xxxx", comp.getAnalyzedQuery("XXXX")); - assertEquals("xxxxyyyy", comp.getAnalyzedQuery("XXXX YYYY")); + elevationProvider = comp.getElevationProvider(reader, core); + assertEquals(7, elevationProvider.size()); + assertEquals(1, elevationProvider.getElevationForQuery("XXXX").priorities.size()); + assertEquals(2, elevationProvider.getElevationForQuery("YYYY").priorities.size()); + assertEquals(3, elevationProvider.getElevationForQuery("ZZZZ").priorities.size()); + assertEquals(1, elevationProvider.getElevationForQuery("xxxx").priorities.size()); + assertEquals(2, elevationProvider.getElevationForQuery("yyyy").priorities.size()); + assertEquals(3, elevationProvider.getElevationForQuery("zzzz").priorities.size()); + + assertEquals("xxxx", comp.analyzeQuery("XXXX")); + assertEquals("xxxx yyyy", comp.analyzeQuery("XXXX YYYY")); assertQ("Make sure QEC handles null queries", req("qt", "/elevate", "q.alt", "*:*", "defType", "dismax"), "//*[@numFound='0']"); @@ -555,9 +553,7 @@ public void testSorting() throws Exception { ); // Explicitly set what gets boosted - booster.elevationCache.clear(); - booster.setTopQueryResults(reader, query, new String[]{"x", "y", "z"}, null); - + booster.setTopQueryResults(reader, query, false, new String[]{"x", "y", "z"}, null); req.close(); req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args)); assertQ("All six should make it", req @@ -570,11 +566,9 @@ public void testSorting() throws Exception { , "//result/doc[6]/str[@name='id'][.='a']" ); - booster.elevationCache.clear(); - // now switch the order: req.close(); req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args)); - booster.setTopQueryResults(reader, query, new String[]{"a", "x"}, null); + booster.setTopQueryResults(reader, query, false, new String[]{"a", "x"}, null); assertQ("All four should make it", req , "//*[@numFound='4']" , "//result/doc[1]/str[@name='id'][.='a']" @@ -631,7 +625,7 @@ public void testSorting() throws Exception { //Test exclusive (not to be confused with exclusion) args.put(QueryElevationParams.EXCLUSIVE, "true"); req.close(); req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args)); - booster.setTopQueryResults(reader, query, new String[]{"x", "a"}, new String[]{}); + booster.setTopQueryResults(reader, query, false, new String[]{"x", "a"}, new String[]{}); assertQ(null, req , "//*[@numFound='2']" , "//result/doc[1]/str[@name='id'][.='x']" @@ -639,11 +633,10 @@ public void testSorting() throws Exception { ); // Test exclusion - booster.elevationCache.clear(); args.remove(CommonParams.SORT); args.remove(QueryElevationParams.EXCLUSIVE); req.close(); req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args)); - booster.setTopQueryResults(reader, query, new String[]{"x"}, new String[]{"a"}); + booster.setTopQueryResults(reader, query, false, new String[]{"x"}, new String[]{"a"}); assertQ(null, req , "//*[@numFound='3']" , "//result/doc[1]/str[@name='id'][.='x']" @@ -654,7 +647,7 @@ public void testSorting() throws Exception { // Test setting ids and excludes from http parameters - booster.elevationCache.clear(); + booster.clearElevationProviderCache(); args.put(QueryElevationParams.IDS, "x,y,z"); args.put(QueryElevationParams.EXCLUDE, "b"); req.close(); req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args)); @@ -706,8 +699,8 @@ public void testElevationReloading() throws Exception { try { init("schema12.xml"); String testfile = "data-elevation.xml"; - File f = new File(h.getCore().getDataDir(), testfile); - writeFile(f, "aaa", "A"); + File configFile = new File(h.getCore().getDataDir(), testfile); + writeFile(configFile, "aaa", "A"); QueryElevationComponent comp = (QueryElevationComponent) h.getCore().getSearchComponent("elevate"); NamedList args = new NamedList<>(); @@ -717,21 +710,46 @@ public void testElevationReloading() throws Exception { SolrQueryRequest req = req(); IndexReader reader = req.getSearcher().getIndexReader(); - Map map = comp.getElevationMap(reader, h.getCore()); - assertTrue(map.get("aaa").priority.containsKey(new BytesRef("A"))); - assertNull(map.get("bbb")); + QueryElevationComponent.ElevationProvider elevationProvider = comp.getElevationProvider(reader, h.getCore()); + assertTrue(elevationProvider.getElevationForQuery("aaa").elevatedIds.contains("A")); + assertNull(elevationProvider.getElevationForQuery("bbb")); req.close(); // now change the file - writeFile(f, "bbb", "B"); - assertU(adoc("id", "10000")); // will get same reader if no index change + writeFile(configFile, "bbb", "B"); + + // With no index change, we get the same index reader, so the elevationProviderCache returns the previous ElevationProvider without the change. + req = req(); + reader = req.getSearcher().getIndexReader(); + elevationProvider = comp.getElevationProvider(reader, h.getCore()); + assertTrue(elevationProvider.getElevationForQuery("aaa").elevatedIds.contains("A")); + assertNull(elevationProvider.getElevationForQuery("bbb")); + req.close(); + + // Index a new doc to get a new index reader. + assertU(adoc("id", "10000")); assertU(commit()); + // Check that we effectively reload a new ElevationProvider for a different index reader (so two entries in elevationProviderCache). + req = req(); + reader = req.getSearcher().getIndexReader(); + elevationProvider = comp.getElevationProvider(reader, h.getCore()); + assertNull(elevationProvider.getElevationForQuery("aaa")); + assertTrue(elevationProvider.getElevationForQuery("bbb").elevatedIds.contains("B")); + req.close(); + + // Now change the config file again. + writeFile(configFile, "ccc", "C"); + + // Without index change, but calling a different method that clears the elevationProviderCache, so we should load a new ElevationProvider. + int elevationRuleNumber = comp.loadElevationConfiguration(h.getCore()); + assertEquals(1, elevationRuleNumber); req = req(); reader = req.getSearcher().getIndexReader(); - map = comp.getElevationMap(reader, h.getCore()); - assertNull(map.get("aaa")); - assertTrue(map.get("bbb").priority.containsKey(new BytesRef("B"))); + elevationProvider = comp.getElevationProvider(reader, h.getCore()); + assertNull(elevationProvider.getElevationForQuery("aaa")); + assertNull(elevationProvider.getElevationForQuery("bbb")); + assertTrue(elevationProvider.getElevationForQuery("ccc").elevatedIds.contains("C")); req.close(); } finally { delete(); diff --git a/solr/solrj/src/java/org/apache/solr/common/params/QueryElevationParams.java b/solr/solrj/src/java/org/apache/solr/common/params/QueryElevationParams.java index 794bbed8e10b..d538cab6a2b4 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/QueryElevationParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/QueryElevationParams.java @@ -48,4 +48,10 @@ public interface QueryElevationParams { * as excluded. */ String MARK_EXCLUDES = "markExcludes"; + + /** + * Whether the priority order between elevated documents is kept, based on the definition order in the configuration file. + * This parameter is only taken into account if {@link QueryElevationParams#FORCE_ELEVATION} is true. + */ + String KEEP_ELEVATION_PRIORITY = "keepElevationPriority"; } From 2699564526bf01887a7119b41167e03663ba9a29 Mon Sep 17 00:00:00 2001 From: broustant Date: Fri, 30 Mar 2018 14:04:43 +0200 Subject: [PATCH 6/8] Refactor QueryElevationComponent after review --- .../component/QueryElevationComponent.java | 209 +++++++----------- 1 file changed, 77 insertions(+), 132 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java index cadcfc02f8b9..2bd31e92e8fc 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java @@ -27,14 +27,17 @@ import java.io.StringReader; import java.lang.invoke.MethodHandles; import java.util.*; +import java.util.function.UnaryOperator; import java.util.stream.Collector; import java.util.stream.Collectors; import com.carrotsearch.hppc.IntIntHashMap; import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Throwables; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Maps; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; @@ -114,7 +117,10 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore private Analyzer queryAnalyzer; private String uniqueKeyFieldName; private FieldType uniqueKeyFieldType; - private IndexedValueProvider indexedValueProvider; + /** + * Provides the indexed value corresponding to a readable value. + */ + private UnaryOperator indexedValueProvider; @VisibleForTesting boolean forceElevation; private boolean keepElevationPriority; @@ -181,17 +187,11 @@ private void setUniqueKeyField(SolrCore core) throws InitializationException { private void parseExcludedMarkerFieldName(SolrCore core) { String markerName = initArgs.get(QueryElevationParams.EXCLUDE_MARKER_FIELD_NAME, DEFAULT_EXCLUDE_MARKER_FIELD_NAME); - if (markerName == null || markerName.equals("")) { - markerName = DEFAULT_EXCLUDE_MARKER_FIELD_NAME; - } core.addTransformerFactory(markerName, new ExcludedMarkerFactory()); } private void parseEditorialMarkerFieldName(SolrCore core) { String markerName = initArgs.get(QueryElevationParams.EDITORIAL_MARKER_FIELD_NAME, DEFAULT_EDITORIAL_MARKER_FIELD_NAME); - if (markerName == null || markerName.equals("")) { - markerName = DEFAULT_EDITORIAL_MARKER_FIELD_NAME; - } core.addTransformerFactory(markerName, new ElevatedMarkerFactory()); } @@ -352,9 +352,6 @@ private ElevationProvider loadElevationProvider(SolrCore core) throws IOExceptio /** * Loads the {@link ElevationProvider}. - *

- * This method can be overridden. - *

* * @throws java.io.IOException If an I/O error occurs while analyzing the triggering queries. * @throws RuntimeException If the config does not provide an XML content of the expected format @@ -403,11 +400,7 @@ protected ElevationProvider loadElevationProvider(Config config) throws IOExcept previousElevationBuilder.merge(elevationBuilder); } } - ElevationProvider elevationProvider = createElevationProvider(queryAnalyzer); - for (Map.Entry entry : elevationBuilderMap.entrySet()) { - elevationProvider.setElevationForQuery(entry.getKey(), entry.getValue().build()); - } - return elevationProvider.makeImmutable(); + return createElevationProvider(queryAnalyzer, elevationBuilderMap); } private boolean parseMatchPolicy(String matchString) { @@ -638,50 +631,46 @@ private void addDebugInfo(ResponseBuilder rb, Elevation elevation) { // Boosted docs helper //--------------------------------------------------------------------------------- - public static IntIntHashMap getBoostDocs(SolrIndexSearcher indexSearcher, Mapboosted, Map context) throws IOException { + public static IntIntHashMap getBoostDocs(SolrIndexSearcher indexSearcher, Map boosted, Map context) throws IOException { IntIntHashMap boostDocs = null; - if(boosted != null) { + if (boosted != null) { //First see if it's already in the request context. Could have been put there //by another caller. - if(context != null) { + if (context != null) { boostDocs = (IntIntHashMap) context.get(BOOSTED_DOCIDS); } - if(boostDocs != null) { + if (boostDocs != null) { return boostDocs; } //Not in the context yet so load it. SchemaField idField = indexSearcher.getSchema().getUniqueKeyField(); String fieldName = idField.getName(); - HashSet localBoosts = new HashSet<>(boosted.size()*2); - for (BytesRef boost : boosted.keySet()) { - localBoosts.add(boost); - } boostDocs = new IntIntHashMap(boosted.size()); Listleaves = indexSearcher.getTopReaderContext().leaves(); PostingsEnum postingsEnum = null; - for(LeafReaderContext leaf : leaves) { + for (LeafReaderContext leaf : leaves) { LeafReader reader = leaf.reader(); int docBase = leaf.docBase; Bits liveDocs = reader.getLiveDocs(); Terms terms = reader.terms(fieldName); TermsEnum termsEnum = terms.iterator(); - Iterator it = localBoosts.iterator(); - while(it.hasNext()) { + Iterator it = boosted.keySet().iterator(); + while (it.hasNext()) { BytesRef ref = it.next(); - if(termsEnum.seekExact(ref)) { + if (termsEnum.seekExact(ref)) { postingsEnum = termsEnum.postings(postingsEnum); int doc = postingsEnum.nextDoc(); while (doc != PostingsEnum.NO_MORE_DOCS && liveDocs != null && !liveDocs.get(doc)) { doc = postingsEnum.nextDoc(); } - if(doc != PostingsEnum.NO_MORE_DOCS) { + if (doc != PostingsEnum.NO_MORE_DOCS) { //Found the document. int p = boosted.get(ref); boostDocs.put(doc+docBase, p); @@ -715,7 +704,6 @@ public String getDescription() { /** * Gets the default value for {@link org.apache.solr.common.params.QueryElevationParams#FORCE_ELEVATION} parameter. - *

Can be overridden by extending this class.

*/ @SuppressWarnings("WeakerAccess") protected boolean getDefaultForceElevation() { @@ -724,7 +712,6 @@ protected boolean getDefaultForceElevation() { /** * Gets the default value for {@link #DEFAULT_KEEP_ELEVATION_PRIORITY} parameter. - *

Can be overridden by extending class.

*/ @SuppressWarnings("WeakerAccess") protected boolean getDefaultKeepElevationPriority() { @@ -733,7 +720,6 @@ protected boolean getDefaultKeepElevationPriority() { /** * Gets the default subset match policy. - *

Can be overridden by extending class.

*/ @SuppressWarnings("WeakerAccess") protected boolean getDefaultSubsetMatch() { @@ -743,8 +729,6 @@ protected boolean getDefaultSubsetMatch() { /** * Gets the {@link InitializationExceptionHandler} that handles exception thrown during the initialization of the * elevation configuration. - *

Can be overridden by extending class. This method provides a mean to set a custom exception handler if a - * specific error processing is needed.

*/ @SuppressWarnings("WeakerAccess") protected InitializationExceptionHandler getInitializationExceptionHandler() { @@ -753,8 +737,6 @@ protected InitializationExceptionHandler getInitializationExceptionHandler() { /** * Gets the {@link LoadingExceptionHandler} that handles exception thrown during the loading of the elevation configuration. - *

Can be overridden by extending class. This method provides a mean to set a custom exception handler if a - * specific error processing is needed.

*/ @SuppressWarnings("WeakerAccess") protected LoadingExceptionHandler getConfigLoadingExceptionHandler() { @@ -764,16 +746,14 @@ protected LoadingExceptionHandler getConfigLoadingExceptionHandler() { /** * Creates the {@link ElevationProvider} to set during configuration loading. The same instance will be used later * when elevating results for queries. - *

- * Extending classes can override this method to create {@link ElevationProvider} with different behavior. - *

* * @param queryAnalyzer to analyze and tokenize the query. + * @param elevationBuilderMap map of all {@link ElevatingQuery} and their corresponding {@link ElevationBuilder}. * @return The created {@link ElevationProvider}. */ @SuppressWarnings("WeakerAccess") - protected ElevationProvider createElevationProvider(Analyzer queryAnalyzer) { - return new MapElevationProvider(queryAnalyzer); + protected ElevationProvider createElevationProvider(Analyzer queryAnalyzer, Map elevationBuilderMap) { + return new MapElevationProvider(queryAnalyzer, elevationBuilderMap); } //--------------------------------------------------------------------------------- @@ -781,14 +761,14 @@ protected ElevationProvider createElevationProvider(Analyzer queryAnalyzer) { //--------------------------------------------------------------------------------- @VisibleForTesting - String analyzeQuery(String queryString) throws IOException { + String analyzeQuery(String queryString) { return analyzeQuery(queryString, queryAnalyzer); } /** * Analyzes the provided query string and returns a concatenation of the analyzed tokens. */ - private static String analyzeQuery(String queryString, Analyzer queryAnalyzer) throws IOException { + private static String analyzeQuery(String queryString, Analyzer queryAnalyzer) { if (queryAnalyzer == null) { return queryString; } @@ -797,15 +777,20 @@ private static String analyzeQuery(String queryString, Analyzer queryAnalyzer) t return queryTerms.stream().collect(QUERY_EXACT_JOINER); } - private static void splitQueryTermsWithAnalyzer(String queryString, Analyzer queryAnalyzer, Collection tokenCollector) throws IOException { - TokenStream tokens = queryAnalyzer.tokenStream("", new StringReader(queryString)); - tokens.reset(); - CharTermAttribute termAttribute = tokens.addAttribute(CharTermAttribute.class); - while (tokens.incrementToken()) { - tokenCollector.add(new String(termAttribute.buffer(), 0, termAttribute.length())); + private static void splitQueryTermsWithAnalyzer(String queryString, Analyzer queryAnalyzer, Collection tokenCollector) { + try { + TokenStream tokens = queryAnalyzer.tokenStream("", new StringReader(queryString)); + tokens.reset(); + CharTermAttribute termAttribute = tokens.addAttribute(CharTermAttribute.class); + while (tokens.incrementToken()) { + tokenCollector.add(new String(termAttribute.buffer(), 0, termAttribute.length())); + } + tokens.end(); + tokens.close(); + } catch (IOException e) { + // Will never be thrown since we read a StringReader. + throw Throwables.propagate(e); } - tokens.end(); - tokens.close(); } //--------------------------------------------------------------------------------- @@ -835,12 +820,13 @@ void setTopQueryResults(IndexReader reader, String queryString, boolean subsetMa excludedIds = new String[0]; } ElevatingQuery elevatingQuery = new ElevatingQuery(queryString, subsetMatch); - Elevation elevation = createElevation(Arrays.asList(elevatedIds), Arrays.asList(excludedIds)); - ElevationProvider elevationProvider; + ElevationBuilder elevationBuilder = new ElevationBuilder() + .addElevatedIds(Arrays.asList(elevatedIds)) + .addExcludedIds(Arrays.asList(excludedIds)); + Map elevationBuilderMap = ImmutableMap.of(elevatingQuery, elevationBuilder); synchronized (elevationProviderCache) { - elevationProvider = elevationProviderCache.computeIfAbsent(reader, k -> createElevationProvider(queryAnalyzer)); + elevationProviderCache.computeIfAbsent(reader, k -> createElevationProvider(queryAnalyzer, elevationBuilderMap)); } - elevationProvider.setElevationForQuery(elevatingQuery, elevation); } @VisibleForTesting @@ -988,42 +974,21 @@ private Elevation createElevation(Collection elevatedIds, Collectionnull if none. - */ - Elevation getElevationForQuery(String queryString) throws IOException; - - /** - * Sets the elevation for the provided query. *

* By contract and by design, only one elevation may be associated * to a given query (this can be safely verified by an assertion). - *

- *

- * It is not allowed to call this method once this {@link ElevationProvider} becomes {@link #makeImmutable() immutable}. - * Otherwise a {@link RuntimeException} may be thrown. - *

* - * @param elevatingQuery The query triggering elevation. - * @param elevation The elevation. + * @param queryString The query string (not {@link #analyzeQuery(String, Analyzer) analyzed} yet, + * this {@link ElevationProvider} is in charge of analyzing it). + * @return The elevation associated with the query; or null if none. */ - void setElevationForQuery(ElevatingQuery elevatingQuery, Elevation elevation) throws IOException; + Elevation getElevationForQuery(String queryString); /** * Gets the number of query elevations in this {@link ElevationProvider}. */ + @VisibleForTesting int size(); - - /** - * Makes this elevation provider immutable. - *

Calling {@link #setElevationForQuery} afterwards will throw an exception.

- *

Making this elevation provider immutable may reduce its memory usage and make it more efficient.

- * - * @return This elevation provider. - */ - ElevationProvider makeImmutable(); } /** @@ -1036,63 +1001,56 @@ public Elevation getElevationForQuery(String queryString) { return null; } - @Override - public void setElevationForQuery(ElevatingQuery elevatingQuery, Elevation elevation) { - // Do nothing. - } - @Override public int size() { return 0; } - - @Override - public ElevationProvider makeImmutable() { - return this; - } }; /** * Simple query exact match {@link ElevationProvider}. *

* It does not support subset matching (see {@link #parseMatchPolicy(String)}). - *

+ *

+ * Immutable. */ protected static class MapElevationProvider implements ElevationProvider { private final Analyzer queryAnalyzer; - private Map exactMatchElevationMap = new HashMap<>(); + private final Map elevationMap; @SuppressWarnings("WeakerAccess") - public MapElevationProvider(Analyzer queryAnalyzer) { + public MapElevationProvider(Analyzer queryAnalyzer, Map elevationBuilderMap) { this.queryAnalyzer = queryAnalyzer; + elevationMap = buildElevationMap(elevationBuilderMap); } - @Override - public Elevation getElevationForQuery(String queryString) throws IOException { - String analyzedQuery = analyzeQuery(queryString, queryAnalyzer); - return exactMatchElevationMap.get(analyzedQuery); - } - - @Override - public void setElevationForQuery(ElevatingQuery elevatingQuery, Elevation elevation) throws IOException { - if (elevatingQuery.subsetMatch) { - throw new UnsupportedOperationException("Subset matching is not supported by " + getClass().getName()); + private Map buildElevationMap(Map elevationBuilderMap) { + Map elevationMap = Maps.newHashMapWithExpectedSize(elevationBuilderMap.size()); + for (Map.Entry entry : elevationBuilderMap.entrySet()) { + ElevatingQuery elevatingQuery = entry.getKey(); + if (elevatingQuery.subsetMatch) { + throw new UnsupportedOperationException("Subset matching is not supported by " + getClass().getName()); + } + String analyzedQuery = analyzeQuery(elevatingQuery.queryString, queryAnalyzer); + Elevation elevation = entry.getValue().build(); + Elevation duplicateElevation = elevationMap.put(analyzedQuery, elevation); + if (duplicateElevation != null) { + throw new IllegalArgumentException("Duplicate elevation for query \"" + analyzedQuery + "\""); + } } - String analyzedQuery = analyzeQuery(elevatingQuery.queryString, queryAnalyzer); - Elevation duplicateElevation = exactMatchElevationMap.put(analyzedQuery, elevation); - assert duplicateElevation == null; + return Collections.unmodifiableMap(elevationMap); } @Override - public int size() { - return exactMatchElevationMap.size(); + public Elevation getElevationForQuery(String queryString) { + String analyzedQuery = analyzeQuery(queryString, queryAnalyzer); + return elevationMap.get(analyzedQuery); } @Override - public ElevationProvider makeImmutable() { - exactMatchElevationMap = Collections.unmodifiableMap(exactMatchElevationMap); - return this; + public int size() { + return elevationMap.size(); } } @@ -1110,7 +1068,8 @@ protected static class ElevatingQuery { * @param queryString The query to elevate documents for (not the analyzed form). * @param subsetMatch Whether to match a subset of query terms. */ - private ElevatingQuery(String queryString, boolean subsetMatch) throws IOException { + @SuppressWarnings("WeakerAccess") + protected ElevatingQuery(String queryString, boolean subsetMatch) throws IOException { this.queryString = queryString; this.subsetMatch = subsetMatch; } @@ -1218,12 +1177,12 @@ protected static class Elevation { * * @param elevatedIds The ids of the elevated documents that should appear on top of search results; can be null. * @param excludedIds The ids of the excluded documents that should not appear in search results; can be null. - * @param indexedValueProvider Provides indexed values. + * @param indexedValueProvider Provides the indexed value corresponding to a readable value.. * @param queryFieldName The field name to use to create query terms. * @param keepElevationPriority Whether to keep the elevation priority order. */ private Elevation(Collection elevatedIds, Collection excludedIds, - IndexedValueProvider indexedValueProvider, String queryFieldName, + UnaryOperator indexedValueProvider, String queryFieldName, boolean keepElevationPriority) { if (elevatedIds == null || elevatedIds.isEmpty()) { this.elevatedIds = Collections.emptySet(); @@ -1238,7 +1197,7 @@ private Elevation(Collection elevatedIds, Collection excludedIds } int priorityLevel = elevatedIds.size(); for (String elevatedId : elevatedIds) { - elevatedIdsBuilder.add(indexedValueProvider.getIndexedValue(elevatedId)); + elevatedIdsBuilder.add(indexedValueProvider.apply(elevatedId)); TermQuery tq = new TermQuery(new Term(queryFieldName, elevatedId)); includeQueryBuilder.add(tq, BooleanClause.Occur.SHOULD); if (keepElevationPriority) { @@ -1257,7 +1216,7 @@ private Elevation(Collection elevatedIds, Collection excludedIds ImmutableSet.Builder excludedIdsBuilder = ImmutableSet.builder(); List excludeQueriesBuilder = new ArrayList<>(excludedIds.size()); for (String excludedId : excludedIds) { - excludedIdsBuilder.add(indexedValueProvider.getIndexedValue(excludedId)); + excludedIdsBuilder.add(indexedValueProvider.apply(excludedId)); excludeQueriesBuilder.add(new TermQuery(new Term(queryFieldName, excludedId))); } this.excludedIds = excludedIdsBuilder.build(); @@ -1315,8 +1274,7 @@ private int docVal(int doc) { if (!keepElevationPriority) return 1; BytesRef id = termValues[slot]; - Integer priority = elevation.priorities.get(id); - return priority == null ? 0 : priority; + return elevation.priorities.getOrDefault(id, 0); } } return 0; @@ -1344,7 +1302,7 @@ protected void doSetNextReader(LeafReaderContext context) throws IOException { for (String id : elevation.elevatedIds) { term.copyChars(id); - if (!seen.contains(id) && termsEnum.seekExact(term.get())) { + if (seen.contains(id) == false && termsEnum.seekExact(term.get())) { postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE); int docId = postingsEnum.nextDoc(); while (docId != DocIdSetIterator.NO_MORE_DOCS && liveDocs != null && !liveDocs.get(docId)) { @@ -1374,17 +1332,4 @@ public int compareTop(int doc) { }; } } - - /** - * Provides indexed value from readable value. - */ - private interface IndexedValueProvider { - /** - * Gets the indexed value corresponding to a readable value. - * - * @param readableValue The readable value. - * @return The indexed value. - */ - String getIndexedValue(String readableValue); - } } From 00b754a0dac072dc3b3eba2ab7f6b1a7af390477 Mon Sep 17 00:00:00 2001 From: broustant Date: Wed, 4 Apr 2018 17:51:03 +0200 Subject: [PATCH 7/8] Remove exception handlers and refactor getBoostDocs --- .../component/QueryElevationComponent.java | 334 +++++------------- 1 file changed, 97 insertions(+), 237 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java index 2bd31e92e8fc..c2eac7b9f392 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java @@ -132,12 +132,6 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore */ private final Map elevationProviderCache = new WeakHashMap<>(); - /** - * Keep track of a counter each time a configuration file cannot be loaded. - * Stop trying to load after {@link #getConfigLoadingExceptionHandler()}.{@link LoadingExceptionHandler#getLoadingMaxAttempts getLoadingMaxAttempts()}. - */ - private final Map configLoadingErrorCounters = new WeakHashMap<>(); - @Override public void init(NamedList args) { this.initArgs = args.toSolrParams(); @@ -160,7 +154,7 @@ public void inform(SolrCore core) { handleInitializationException(e, e.exceptionCause); } catch (Exception e) { assert !initialized; - handleInitializationException(e, InitializationExceptionHandler.ExceptionCause.OTHER); + handleInitializationException(e, InitializationExceptionCause.OTHER); } } @@ -169,7 +163,7 @@ private void parseFieldType(SolrCore core) throws InitializationException { if (a != null) { FieldType ft = core.getLatestSchema().getFieldTypes().get(a); if (ft == null) { - throw new InitializationException("Parameter " + FIELD_TYPE + " defines an unknown field type \"" + a + "\"", InitializationExceptionHandler.ExceptionCause.UNKNOWN_FIELD_TYPE); + throw new InitializationException("Parameter " + FIELD_TYPE + " defines an unknown field type \"" + a + "\"", InitializationExceptionCause.UNKNOWN_FIELD_TYPE); } queryAnalyzer = ft.getQueryAnalyzer(); } @@ -178,7 +172,7 @@ private void parseFieldType(SolrCore core) throws InitializationException { private void setUniqueKeyField(SolrCore core) throws InitializationException { SchemaField sf = core.getLatestSchema().getUniqueKeyField(); if (sf == null) { - throw new InitializationException("This component requires the schema to have a uniqueKeyField", InitializationExceptionHandler.ExceptionCause.MISSING_UNIQUE_KEY_FIELD); + throw new InitializationException("This component requires the schema to have a uniqueKeyField", InitializationExceptionCause.MISSING_UNIQUE_KEY_FIELD); } uniqueKeyFieldType = sf.getType(); uniqueKeyFieldName = sf.getName(); @@ -205,9 +199,6 @@ private void parseKeepElevationPriority() { /** * (Re)Loads elevation configuration. - *

- * Protected access to be called by extending class. - *

* * @param core The core holding this component. * @return The number of elevation rules parsed. @@ -218,9 +209,9 @@ protected int loadElevationConfiguration(SolrCore core) throws Exception { elevationProviderCache.clear(); String configFileName = initArgs.get(CONFIG_FILE); if (configFileName == null) { - // Throw an exception which can be handled by an overriding InitializationExceptionHandler (see handleInitializationException()). - // The default InitializationExceptionHandler will simply skip this exception. - throw new InitializationException("Missing component parameter " + CONFIG_FILE + " - it has to define the path to the elevation configuration file", InitializationExceptionHandler.ExceptionCause.NO_CONFIG_FILE_DEFINED); + // Throw an exception which is handled by handleInitializationException(). + // If not overridden handleInitializationException() simply skips this exception. + throw new InitializationException("Missing component parameter " + CONFIG_FILE + " - it has to define the path to the elevation configuration file", InitializationExceptionCause.NO_CONFIG_FILE_DEFINED); } boolean configFileExists = false; ElevationProvider elevationProvider = NO_OP_ELEVATION_PROVIDER; @@ -234,12 +225,12 @@ protected int loadElevationConfiguration(SolrCore core) throws Exception { File fC = new File(core.getResourceLoader().getConfigDir(), configFileName); File fD = new File(core.getDataDir(), configFileName); if (fC.exists() == fD.exists()) { - InitializationException e = new InitializationException("Missing config file \"" + configFileName + "\" - either " + fC.getAbsolutePath() + " or " + fD.getAbsolutePath() + " must exist, but not both", InitializationExceptionHandler.ExceptionCause.MISSING_CONFIG_FILE); + InitializationException e = new InitializationException("Missing config file \"" + configFileName + "\" - either " + fC.getAbsolutePath() + " or " + fD.getAbsolutePath() + " must exist, but not both", InitializationExceptionCause.MISSING_CONFIG_FILE); elevationProvider = handleConfigLoadingException(e, true); elevationProviderCache.put(null, elevationProvider); } else if (fC.exists()) { if (fC.length() == 0) { - InitializationException e = new InitializationException("Empty config file \"" + configFileName + "\" - " + fC.getAbsolutePath(), InitializationExceptionHandler.ExceptionCause.EMPTY_CONFIG_FILE); + InitializationException e = new InitializationException("Empty config file \"" + configFileName + "\" - " + fC.getAbsolutePath(), InitializationExceptionCause.EMPTY_CONFIG_FILE); elevationProvider = handleConfigLoadingException(e, true); } else { configFileExists = true; @@ -270,6 +261,36 @@ protected int loadElevationConfiguration(SolrCore core) throws Exception { } } + /** + * Handles the exception that occurred while initializing this component. + * If this method does not throw an exception, this component silently fails to initialize + * and is muted with field {@link #initialized} which becomes {@code false}. + */ + protected void handleInitializationException(Exception exception, InitializationExceptionCause cause) { + if (cause != InitializationExceptionCause.NO_CONFIG_FILE_DEFINED) { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, + "Error initializing " + QueryElevationComponent.class.getSimpleName(), exception); + } + } + + /** + * Handles an exception that occurred while loading the configuration resource. + * + * @param e The exception caught. + * @param resourceAccessIssue true if the exception has been thrown + * because the resource could not be accessed (missing or cannot be read) + * or the config file is empty; false if the resource has + * been found and accessed but the error occurred while loading the resource + * (invalid format, incomplete or corrupted). + * @return The {@link ElevationProvider} to use if the exception is absorbed. If {@code null} + * is returned, the {@link #NO_OP_ELEVATION_PROVIDER} is used but not cached in + * the {@link ElevationProvider} cache. + * @throws E If the exception is not absorbed. + */ + protected ElevationProvider handleConfigLoadingException(E e, boolean resourceAccessIssue) throws E { + throw e; + } + /** * Gets the {@link ElevationProvider} from the data dir or from the cache. * @@ -302,11 +323,8 @@ ElevationProvider getElevationProvider(IndexReader reader, SolrCore core) throws boolean shouldCache = true; if (loadingException != null) { elevationProvider = handleConfigLoadingException(loadingException, resourceAccessIssue); - // Do not cache the fallback ElevationProvider for the first exceptions because the exception might - // occur only a couple of times and the config file could be loaded correctly afterwards - // (e.g. temporary invalid file access). After some attempts, cache the fallback ElevationProvider - // not to overload the exception handler (and beyond it, the logs probably). - if (incConfigLoadingErrorCount(reader) < getConfigLoadingExceptionHandler().getLoadingMaxAttempts()) { + if (elevationProvider == null) { + elevationProvider = NO_OP_ELEVATION_PROVIDER; shouldCache = false; } } @@ -416,48 +434,6 @@ private boolean parseMatchPolicy(String matchString) { } } - /** - * Potentially handles and captures an exception that occurred while loading the configuration resource. - * - * @param e The exception caught. - * @param resourceAccessIssueOrEmptyConfig true if the exception has been thrown because the resource could not - * be accessed (missing or cannot be read) or the config file is empty; false if the resource has - * been found and accessed but the error occurred while loading the resource - * (invalid format, incomplete or corrupted). - * @return The {@link ElevationProvider} to use if the exception is absorbed. - * @throws E If the exception is not absorbed. - */ - private ElevationProvider handleConfigLoadingException(E e, boolean resourceAccessIssueOrEmptyConfig) throws E { - if (getConfigLoadingExceptionHandler().handleLoadingException(e, resourceAccessIssueOrEmptyConfig)) { - return NO_OP_ELEVATION_PROVIDER; - } - assert e != null; - throw e; - } - - private int incConfigLoadingErrorCount(IndexReader reader) { - Integer counter = configLoadingErrorCounters.get(reader); - if (counter == null) { - counter = 1; - } else { - counter++; - } - configLoadingErrorCounters.put(reader, counter); - return counter; - } - - /** - * Potentially handles and captures the exception that occurred while initializing this component. If the exception - * is captured by the handler, this component fails to initialize silently and is muted because field initialized is - * false. - */ - private void handleInitializationException(Exception initializationException, InitializationExceptionHandler.ExceptionCause exceptionCause) { - SolrException solrException = new SolrException(SolrException.ErrorCode.SERVER_ERROR, - "Error initializing " + QueryElevationComponent.class.getSimpleName(), initializationException); - if (!getInitializationExceptionHandler().handleInitializationException(solrException, exceptionCause)) - throw solrException; - } - //--------------------------------------------------------------------------------- // SearchComponent //--------------------------------------------------------------------------------- @@ -633,61 +609,41 @@ private void addDebugInfo(ResponseBuilder rb, Elevation elevation) { public static IntIntHashMap getBoostDocs(SolrIndexSearcher indexSearcher, Map boosted, Map context) throws IOException { - IntIntHashMap boostDocs = null; + IntIntHashMap boostDocs = null; - if (boosted != null) { + if (boosted != null) { - //First see if it's already in the request context. Could have been put there - //by another caller. - if (context != null) { - boostDocs = (IntIntHashMap) context.get(BOOSTED_DOCIDS); - } + //First see if it's already in the request context. Could have been put there by another caller. + if (context != null) { + boostDocs = (IntIntHashMap) context.get(BOOSTED_DOCIDS); + if (boostDocs != null) { + return boostDocs; + } + } - if (boostDocs != null) { - return boostDocs; - } - //Not in the context yet so load it. - - SchemaField idField = indexSearcher.getSchema().getUniqueKeyField(); - String fieldName = idField.getName(); - - boostDocs = new IntIntHashMap(boosted.size()); - - Listleaves = indexSearcher.getTopReaderContext().leaves(); - PostingsEnum postingsEnum = null; - for (LeafReaderContext leaf : leaves) { - LeafReader reader = leaf.reader(); - int docBase = leaf.docBase; - Bits liveDocs = reader.getLiveDocs(); - Terms terms = reader.terms(fieldName); - TermsEnum termsEnum = terms.iterator(); - Iterator it = boosted.keySet().iterator(); - while (it.hasNext()) { - BytesRef ref = it.next(); - if (termsEnum.seekExact(ref)) { - postingsEnum = termsEnum.postings(postingsEnum); - int doc = postingsEnum.nextDoc(); - while (doc != PostingsEnum.NO_MORE_DOCS && liveDocs != null && !liveDocs.get(doc)) { - doc = postingsEnum.nextDoc(); - } - if (doc != PostingsEnum.NO_MORE_DOCS) { - //Found the document. - int p = boosted.get(ref); - boostDocs.put(doc+docBase, p); - it.remove(); - } + //Not in the context yet so load it. + boostDocs = new IntIntHashMap(boosted.size()); // docId to boost + for (Map.Entry keyAndBoostPair : boosted.entrySet()) { + final BytesRef uniqueKey = keyAndBoostPair.getKey(); + long segAndId = indexSearcher.lookupId(uniqueKey); // higher 32 bits == segment ID, low 32 bits == doc ID + if (segAndId == -1) { // not found + continue; } + int seg = (int) (segAndId >> 32); + int localDocId = (int) segAndId; + final IndexReaderContext indexReaderContext = indexSearcher.getTopReaderContext().children().get(seg); + int docId = indexReaderContext.docBaseInParent + localDocId; + boostDocs.put(docId, keyAndBoostPair.getValue()); } } - } - if(context != null) { - //noinspection unchecked - context.put(BOOSTED_DOCIDS, boostDocs); - } + if (context != null) { + //noinspection unchecked + context.put(BOOSTED_DOCIDS, boostDocs); + } - return boostDocs; - } + return boostDocs; + } //--------------------------------------------------------------------------------- // SolrInfoBean @@ -726,23 +682,6 @@ protected boolean getDefaultSubsetMatch() { return DEFAULT_SUBSET_MATCH; } - /** - * Gets the {@link InitializationExceptionHandler} that handles exception thrown during the initialization of the - * elevation configuration. - */ - @SuppressWarnings("WeakerAccess") - protected InitializationExceptionHandler getInitializationExceptionHandler() { - return InitializationExceptionHandler.NO_OP; - } - - /** - * Gets the {@link LoadingExceptionHandler} that handles exception thrown during the loading of the elevation configuration. - */ - @SuppressWarnings("WeakerAccess") - protected LoadingExceptionHandler getConfigLoadingExceptionHandler() { - return LoadingExceptionHandler.NO_OP; - } - /** * Creates the {@link ElevationProvider} to set during configuration loading. The same instance will be used later * when elevating results for queries. @@ -778,15 +717,13 @@ private static String analyzeQuery(String queryString, Analyzer queryAnalyzer) { } private static void splitQueryTermsWithAnalyzer(String queryString, Analyzer queryAnalyzer, Collection tokenCollector) { - try { - TokenStream tokens = queryAnalyzer.tokenStream("", new StringReader(queryString)); + try (TokenStream tokens = queryAnalyzer.tokenStream("", new StringReader(queryString))) { tokens.reset(); CharTermAttribute termAttribute = tokens.addAttribute(CharTermAttribute.class); while (tokens.incrementToken()) { tokenCollector.add(new String(termAttribute.buffer(), 0, termAttribute.length())); } tokens.end(); - tokens.close(); } catch (IOException e) { // Will never be thrown since we read a StringReader. throw Throwables.propagate(e); @@ -837,122 +774,45 @@ void clearElevationProviderCache() { } //--------------------------------------------------------------------------------- - // Exception classes + // Exception //--------------------------------------------------------------------------------- private static class InitializationException extends Exception { - final InitializationExceptionHandler.ExceptionCause exceptionCause; - InitializationException(String message, InitializationExceptionHandler.ExceptionCause exceptionCause) { + private final InitializationExceptionCause exceptionCause; + + InitializationException(String message, InitializationExceptionCause exceptionCause) { super(message); this.exceptionCause = exceptionCause; } } - /** - * Handles resource loading exception. - */ - protected interface InitializationExceptionHandler { - - /** - * NoOp {@link LoadingExceptionHandler} that does not capture any exception and simply returns false. - */ - InitializationExceptionHandler NO_OP = new InitializationExceptionHandler() { - @Override - public boolean handleInitializationException(Exception e, ExceptionCause exceptionCause) { - return exceptionCause == ExceptionCause.NO_CONFIG_FILE_DEFINED; + protected enum InitializationExceptionCause { + /** + * The component parameter {@link #FIELD_TYPE} defines an unknown field type. + */ + UNKNOWN_FIELD_TYPE, + /** + * This component requires the schema to have a uniqueKeyField, which it does not have. + */ + MISSING_UNIQUE_KEY_FIELD, + /** + * Missing component parameter {@link #CONFIG_FILE} - it has to define the path to the elevation configuration file (e.g. elevate.xml). + */ + NO_CONFIG_FILE_DEFINED, + /** + * The elevation configuration file (e.g. elevate.xml) cannot be found, or is defined in both conf/ and data/ directories. + */ + MISSING_CONFIG_FILE, + /** + * The elevation configuration file (e.g. elevate.xml) is empty. + */ + EMPTY_CONFIG_FILE, + /** + * Unclassified exception cause. + */ + OTHER, } - }; - - enum ExceptionCause { - /** - * The component parameter {@link #FIELD_TYPE} defines an unknown field type. - */ - UNKNOWN_FIELD_TYPE, - /** - * This component requires the schema to have a uniqueKeyField, which it does not have. - */ - MISSING_UNIQUE_KEY_FIELD, - /** - * Missing component parameter {@link #CONFIG_FILE} - it has to define the path to the elevation configuration file (e.g. elevate.xml). - */ - NO_CONFIG_FILE_DEFINED, - /** - * The elevation configuration file (e.g. elevate.xml) cannot be found, or is defined in both conf/ and data/ directories. - */ - MISSING_CONFIG_FILE, - /** - * The elevation configuration file (e.g. elevate.xml) is empty. - */ - EMPTY_CONFIG_FILE, - /** - * Unclassified exception cause. - */ - OTHER, - } - - /** - * Potentially handles and captures an exception that occurred while initializing the component. - * If the exception is captured, the component fails to initialize silently and is muted. - * - * @param e The exception caught. - * @param exceptionCause The exception cause. - * @param The exception type. - * @return true if the exception is handled and captured by this handler (and thus will not be - * thrown anymore); false if the exception is not captured, in this case it will be probably - * thrown again by the calling code. - * @throws E If this handler throws the exception itself (it may add some cause or message). - */ - boolean handleInitializationException(E e, ExceptionCause exceptionCause) throws E; - } - - /** - * Handles resource loading exception. - */ - protected interface LoadingExceptionHandler { - - /** - * NoOp {@link LoadingExceptionHandler} that does not capture any exception and simply returns false. - */ - LoadingExceptionHandler NO_OP = new LoadingExceptionHandler() { - @Override - public boolean handleLoadingException(Exception e, boolean resourceAccessIssue) { - return false; - } - - @Override - public int getLoadingMaxAttempts() { - return 0; - } - }; - - /** - * Potentially handles and captures an exception that occurred while loading a resource. - * - * @param e The exception caught. - * @param resourceAccessIssue true if the exception has been thrown because the resource could not - * be accessed (missing or cannot be read); false if the resource has - * been found and accessed but the error occurred while loading the resource - * (invalid format, incomplete or corrupted). - * @param The exception type. - * @return true if the exception is handled and captured by this handler (and thus will not be - * thrown anymore); false if the exception is not captured, in this case it will be probably - * thrown again by the calling code. - * @throws E If this handler throws the exception itself (it may add some cause or message). - */ - boolean handleLoadingException(E e, boolean resourceAccessIssue) throws E; - - /** - * Gets the maximum number of attempts to load the resource in case of error (resource not found, I/O error, - * invalid format), for each Solr core. - * After this number of attempts (so {@link #handleLoadingException} is called this number of times), - * {@link #handleLoadingException} will not be called anymore for the specific Solr core, and the resource is - * considered empty afterwards (until the core is reloaded). - * - * @return The maximum number of attempts to load the resource. The value must be >= 0. - */ - int getLoadingMaxAttempts(); - } //--------------------------------------------------------------------------------- // Elevation classes From 8702cf9b6553eb67e3706491bd215f1f2ba0332e Mon Sep 17 00:00:00 2001 From: broustant Date: Mon, 4 Jun 2018 16:27:44 +0200 Subject: [PATCH 8/8] Integrate dsmiley QEC refactor; useConfiguredElevatedOrder --- .../component/QueryElevationComponent.java | 452 ++++++++---------- .../transform/BaseEditorialTransformer.java | 17 +- .../transform/ElevatedMarkerFactory.java | 6 +- .../transform/ExcludedMarkerFactory.java | 6 +- .../solr/search/AbstractReRankQuery.java | 7 +- .../solr/search/CollapsingQParserPlugin.java | 25 +- .../apache/solr/search/ReRankCollector.java | 7 +- .../QueryElevationComponentTest.java | 273 +++++------ .../src/the-query-elevation-component.adoc | 11 + .../common/params/QueryElevationParams.java | 9 +- 10 files changed, 382 insertions(+), 431 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java index c2eac7b9f392..66440b311a6a 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java @@ -24,38 +24,47 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; -import java.io.StringReader; import java.lang.invoke.MethodHandles; -import java.util.*; -import java.util.function.UnaryOperator; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.WeakHashMap; import java.util.stream.Collector; import java.util.stream.Collectors; import com.carrotsearch.hppc.IntIntHashMap; +import com.carrotsearch.hppc.cursors.IntIntCursor; import com.google.common.annotations.VisibleForTesting; - import com.google.common.base.Throwables; +import com.google.common.collect.Collections2; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Maps; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.index.*; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReaderContext; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; -import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.FieldComparatorSource; import org.apache.lucene.search.SimpleFieldComparator; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TermQuery; -import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; -import org.apache.lucene.util.SentinelIntSet; import org.apache.solr.cloud.ZkController; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.QueryElevationParams; @@ -65,6 +74,7 @@ import org.apache.solr.common.util.StrUtils; import org.apache.solr.core.Config; import org.apache.solr.core.SolrCore; +import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.transform.ElevatedMarkerFactory; import org.apache.solr.response.transform.ExcludedMarkerFactory; import org.apache.solr.schema.FieldType; @@ -90,6 +100,7 @@ * @since solr 1.3 */ public class QueryElevationComponent extends SearchComponent implements SolrCoreAware { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); // Constants used in solrconfig.xml @@ -98,32 +109,33 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore @VisibleForTesting static final String CONFIG_FILE = "config-file"; private static final String EXCLUDE = "exclude"; - public static final String BOOSTED = "BOOSTED"; + + /** @see #getBoostDocs(SolrIndexSearcher, Set, Map) */ private static final String BOOSTED_DOCIDS = "BOOSTED_DOCIDS"; - public static final String BOOSTED_PRIORITY = "BOOSTED_PRIORITY"; + /** Key to {@link SolrQueryRequest#getContext()} for a {@code Set} of included IDs in configured + * order (so-called priority). */ + public static final String BOOSTED = "BOOSTED"; + /** Key to {@link SolrQueryRequest#getContext()} for a {@code Set} of excluded IDs. */ public static final String EXCLUDED = "EXCLUDED"; private static final boolean DEFAULT_FORCE_ELEVATION = false; - private static final boolean DEFAULT_KEEP_ELEVATION_PRIORITY = true; + private static final boolean DEFAULT_USE_CONFIGURED_ELEVATED_ORDER = true; private static final boolean DEFAULT_SUBSET_MATCH = false; private static final String DEFAULT_EXCLUDE_MARKER_FIELD_NAME = "excluded"; private static final String DEFAULT_EDITORIAL_MARKER_FIELD_NAME = "elevated"; private static final Collector QUERY_EXACT_JOINER = Collectors.joining(" "); - // Runtime param private SolrParams initArgs; private Analyzer queryAnalyzer; - private String uniqueKeyFieldName; - private FieldType uniqueKeyFieldType; - /** - * Provides the indexed value corresponding to a readable value. - */ - private UnaryOperator indexedValueProvider; + private SchemaField uniqueKeyField; + /** @see QueryElevationParams#FORCE_ELEVATION */ @VisibleForTesting boolean forceElevation; - private boolean keepElevationPriority; + /** @see QueryElevationParams#USE_CONFIGURED_ELEVATED_ORDER */ + private boolean useConfiguredElevatedOrder; + private boolean initialized; /** @@ -146,7 +158,7 @@ public void inform(SolrCore core) { parseExcludedMarkerFieldName(core); parseEditorialMarkerFieldName(core); parseForceElevation(); - parseKeepElevationPriority(); + parseUseConfiguredOrderForElevations(); loadElevationConfiguration(core); initialized = true; } catch (InitializationException e) { @@ -170,13 +182,10 @@ private void parseFieldType(SolrCore core) throws InitializationException { } private void setUniqueKeyField(SolrCore core) throws InitializationException { - SchemaField sf = core.getLatestSchema().getUniqueKeyField(); - if (sf == null) { + uniqueKeyField = core.getLatestSchema().getUniqueKeyField(); + if (uniqueKeyField == null) { throw new InitializationException("This component requires the schema to have a uniqueKeyField", InitializationExceptionCause.MISSING_UNIQUE_KEY_FIELD); } - uniqueKeyFieldType = sf.getType(); - uniqueKeyFieldName = sf.getName(); - indexedValueProvider = readableValue -> uniqueKeyFieldType.readableToIndexed(readableValue); } private void parseExcludedMarkerFieldName(SolrCore core) { @@ -190,11 +199,11 @@ private void parseEditorialMarkerFieldName(SolrCore core) { } private void parseForceElevation() { - forceElevation = initArgs.getBool(QueryElevationParams.FORCE_ELEVATION, getDefaultForceElevation()); + forceElevation = initArgs.getBool(QueryElevationParams.FORCE_ELEVATION, DEFAULT_FORCE_ELEVATION); } - private void parseKeepElevationPriority() { - keepElevationPriority = initArgs.getBool(QueryElevationParams.KEEP_ELEVATION_PRIORITY, getDefaultKeepElevationPriority()); + private void parseUseConfiguredOrderForElevations() { + useConfiguredElevatedOrder = initArgs.getBool(QueryElevationParams.USE_CONFIGURED_ELEVATED_ORDER, DEFAULT_USE_CONFIGURED_ELEVATED_ORDER); } /** @@ -276,15 +285,15 @@ protected void handleInitializationException(Exception exception, Initialization /** * Handles an exception that occurred while loading the configuration resource. * - * @param e The exception caught. + * @param e The exception caught. * @param resourceAccessIssue true if the exception has been thrown * because the resource could not be accessed (missing or cannot be read) * or the config file is empty; false if the resource has * been found and accessed but the error occurred while loading the resource * (invalid format, incomplete or corrupted). * @return The {@link ElevationProvider} to use if the exception is absorbed. If {@code null} - * is returned, the {@link #NO_OP_ELEVATION_PROVIDER} is used but not cached in - * the {@link ElevationProvider} cache. + * is returned, the {@link #NO_OP_ELEVATION_PROVIDER} is used but not cached in + * the {@link ElevationProvider} cache. * @throws E If the exception is not absorbed. */ protected ElevationProvider handleConfigLoadingException(E e, boolean resourceAccessIssue) throws E { @@ -377,8 +386,7 @@ private ElevationProvider loadElevationProvider(SolrCore core) throws IOExceptio */ @SuppressWarnings("WeakerAccess") protected ElevationProvider loadElevationProvider(Config config) throws IOException { - Map elevationBuilderMap = keepElevationPriority ? - new LinkedHashMap<>() : new HashMap<>(); + Map elevationBuilderMap = new LinkedHashMap<>(); XPath xpath = XPathFactory.newInstance().newXPath(); NodeList nodes = (NodeList) config.evaluate("elevate/query", XPathConstants.NODESET); for (int i = 0; i < nodes.getLength(); i++) { @@ -395,6 +403,9 @@ protected ElevationProvider loadElevationProvider(Config config) throws IOExcept "query requires '' child"); } + if (children.getLength() == 0) { // weird + continue; + } ElevationBuilder elevationBuilder = new ElevationBuilder(); for (int j = 0; j < children.getLength(); j++) { Node child = children.item(j); @@ -402,11 +413,11 @@ protected ElevationProvider loadElevationProvider(Config config) throws IOExcept String e = DOMUtil.getAttr(child, EXCLUDE, null); if (e != null) { if (Boolean.valueOf(e)) { - elevationBuilder.addExcludedId(id); + elevationBuilder.addExcludedIds(Collections.singleton(id)); continue; } } - elevationBuilder.addElevatedId(id); + elevationBuilder.addElevatedIds(Collections.singletonList(id)); } // It is allowed to define multiple times different elevations for the same query. In this case the elevations @@ -423,7 +434,7 @@ protected ElevationProvider loadElevationProvider(Config config) throws IOExcept private boolean parseMatchPolicy(String matchString) { if (matchString == null) { - return getDefaultSubsetMatch(); + return DEFAULT_SUBSET_MATCH; } else if (matchString.equalsIgnoreCase("exact")) { return false; } else if (matchString.equalsIgnoreCase("subset")) { @@ -486,7 +497,6 @@ private Elevation getElevation(ResponseBuilder rb) { private void setQuery(ResponseBuilder rb, Elevation elevation) { rb.req.getContext().put(BOOSTED, elevation.elevatedIds); - rb.req.getContext().put(BOOSTED_PRIORITY, elevation.priorities); // Change the query to insert forced documents SolrParams params = rb.req.getParams(); @@ -504,7 +514,7 @@ private void setQuery(ResponseBuilder rb, Elevation elevation) { rb.req.getContext().put(EXCLUDED, elevation.excludedIds); } else { for (TermQuery tq : elevation.excludeQueries) { - queryBuilder.add(new BooleanClause(tq, BooleanClause.Occur.MUST_NOT)); + queryBuilder.add(tq, BooleanClause.Occur.MUST_NOT); } } } @@ -512,9 +522,14 @@ private void setQuery(ResponseBuilder rb, Elevation elevation) { } } - private void setSort(ResponseBuilder rb, Elevation elevation) { + private void setSort(ResponseBuilder rb, Elevation elevation) throws IOException { + if (elevation.elevatedIds.isEmpty()) { + return; + } boolean forceElevation = rb.req.getParams().getBool(QueryElevationParams.FORCE_ELEVATION, this.forceElevation); - ElevationComparatorSource comparator = new ElevationComparatorSource(elevation); + boolean useConfigured = rb.req.getParams().getBool(QueryElevationParams.USE_CONFIGURED_ELEVATED_ORDER, this.useConfiguredElevatedOrder); + final IntIntHashMap elevatedWithPriority = getBoostDocs(rb.req.getSearcher(), elevation.elevatedIds, rb.req.getContext()); + ElevationComparatorSource comparator = new ElevationComparatorSource(elevatedWithPriority, useConfigured); setSortSpec(rb, forceElevation, comparator); setGroupingSpec(rb, forceElevation, comparator); } @@ -607,44 +622,50 @@ private void addDebugInfo(ResponseBuilder rb, Elevation elevation) { // Boosted docs helper //--------------------------------------------------------------------------------- - public static IntIntHashMap getBoostDocs(SolrIndexSearcher indexSearcher, Map boosted, Map context) throws IOException { - - IntIntHashMap boostDocs = null; + /** + * Returns a map of docIds elevated with a priority value > 0. The mapping is looked up and cached in {@code context} when + * not null. {@code boosted} are the set of uniqueKey values to be boosted in priority order. + */ + public static IntIntHashMap getBoostDocs(SolrIndexSearcher indexSearcher, Set boosted, Map context) throws IOException { - if (boosted != null) { + IntIntHashMap boostDocs = null; - //First see if it's already in the request context. Could have been put there by another caller. - if (context != null) { - boostDocs = (IntIntHashMap) context.get(BOOSTED_DOCIDS); - if (boostDocs != null) { - return boostDocs; - } - } + if (boosted != null) { - //Not in the context yet so load it. - boostDocs = new IntIntHashMap(boosted.size()); // docId to boost - for (Map.Entry keyAndBoostPair : boosted.entrySet()) { - final BytesRef uniqueKey = keyAndBoostPair.getKey(); - long segAndId = indexSearcher.lookupId(uniqueKey); // higher 32 bits == segment ID, low 32 bits == doc ID - if (segAndId == -1) { // not found - continue; - } - int seg = (int) (segAndId >> 32); - int localDocId = (int) segAndId; - final IndexReaderContext indexReaderContext = indexSearcher.getTopReaderContext().children().get(seg); - int docId = indexReaderContext.docBaseInParent + localDocId; - boostDocs.put(docId, keyAndBoostPair.getValue()); + //First see if it's already in the request context. Could have been put there by another caller. + if (context != null) { + boostDocs = (IntIntHashMap) context.get(BOOSTED_DOCIDS); + if (boostDocs != null) { + return boostDocs; } } - if (context != null) { - //noinspection unchecked - context.put(BOOSTED_DOCIDS, boostDocs); + //Not in the context yet so load it. + boostDocs = new IntIntHashMap(boosted.size()); // docId to boost + int priority = boosted.size() + 1; // the corresponding priority for each boosted key (starts at this; decrements down) + for (BytesRef uniqueKey : boosted) { + priority--; // therefore first == bosted.size(); last will be 1 + long segAndId = indexSearcher.lookupId(uniqueKey); // higher 32 bits == segment ID, low 32 bits == doc ID + if (segAndId == -1) { // not found + continue; + } + int seg = (int) (segAndId >> 32); + int localDocId = (int) segAndId; + final IndexReaderContext indexReaderContext = indexSearcher.getTopReaderContext().children().get(seg); + int docId = indexReaderContext.docBaseInParent + localDocId; + boostDocs.put(docId, priority); } + assert priority == 1; // the last priority (lowest) + } - return boostDocs; + if (context != null) { + //noinspection unchecked + context.put(BOOSTED_DOCIDS, boostDocs); } + return boostDocs; + } + //--------------------------------------------------------------------------------- // SolrInfoBean //--------------------------------------------------------------------------------- @@ -658,30 +679,6 @@ public String getDescription() { // Overrides //--------------------------------------------------------------------------------- - /** - * Gets the default value for {@link org.apache.solr.common.params.QueryElevationParams#FORCE_ELEVATION} parameter. - */ - @SuppressWarnings("WeakerAccess") - protected boolean getDefaultForceElevation() { - return DEFAULT_FORCE_ELEVATION; - } - - /** - * Gets the default value for {@link #DEFAULT_KEEP_ELEVATION_PRIORITY} parameter. - */ - @SuppressWarnings("WeakerAccess") - protected boolean getDefaultKeepElevationPriority() { - return DEFAULT_KEEP_ELEVATION_PRIORITY; - } - - /** - * Gets the default subset match policy. - */ - @SuppressWarnings("WeakerAccess") - protected boolean getDefaultSubsetMatch() { - return DEFAULT_SUBSET_MATCH; - } - /** * Creates the {@link ElevationProvider} to set during configuration loading. The same instance will be used later * when elevating results for queries. @@ -717,15 +714,14 @@ private static String analyzeQuery(String queryString, Analyzer queryAnalyzer) { } private static void splitQueryTermsWithAnalyzer(String queryString, Analyzer queryAnalyzer, Collection tokenCollector) { - try (TokenStream tokens = queryAnalyzer.tokenStream("", new StringReader(queryString))) { + try (TokenStream tokens = queryAnalyzer.tokenStream("", queryString)) { tokens.reset(); CharTermAttribute termAttribute = tokens.addAttribute(CharTermAttribute.class); while (tokens.incrementToken()) { - tokenCollector.add(new String(termAttribute.buffer(), 0, termAttribute.length())); + tokenCollector.add(termAttribute.toString()); } tokens.end(); } catch (IOException e) { - // Will never be thrown since we read a StringReader. throw Throwables.propagate(e); } } @@ -737,7 +733,6 @@ private static void splitQueryTermsWithAnalyzer(String queryString, Analyzer que /** * Helpful for testing without loading config.xml. * - * * @param reader The {@link org.apache.lucene.index.IndexReader}. * @param queryString The query for which to elevate some documents. If the query has already been defined an * elevation, this method overwrites it. @@ -747,19 +742,13 @@ private static void splitQueryTermsWithAnalyzer(String queryString, Analyzer que * @throws java.io.IOException If there is a low-level I/O error. */ @VisibleForTesting - void setTopQueryResults(IndexReader reader, String queryString, boolean subsetMatch, String[] elevatedIds, - String[] excludedIds) throws IOException { + void setTopQueryResults(IndexReader reader, String queryString, boolean subsetMatch, + String[] elevatedIds, String[] excludedIds) throws IOException { clearElevationProviderCache(); - if (elevatedIds == null) { - elevatedIds = new String[0]; - } - if (excludedIds == null) { - excludedIds = new String[0]; - } ElevatingQuery elevatingQuery = new ElevatingQuery(queryString, subsetMatch); - ElevationBuilder elevationBuilder = new ElevationBuilder() - .addElevatedIds(Arrays.asList(elevatedIds)) - .addExcludedIds(Arrays.asList(excludedIds)); + ElevationBuilder elevationBuilder = new ElevationBuilder(); + elevationBuilder.addElevatedIds(elevatedIds == null ? Collections.emptyList() : Arrays.asList(elevatedIds)); + elevationBuilder.addExcludedIds(excludedIds == null ? Collections.emptyList() : Arrays.asList(excludedIds)); Map elevationBuilderMap = ImmutableMap.of(elevatingQuery, elevationBuilder); synchronized (elevationProviderCache) { elevationProviderCache.computeIfAbsent(reader, k -> createElevationProvider(queryAnalyzer, elevationBuilderMap)); @@ -788,46 +777,36 @@ private static class InitializationException extends Exception { } protected enum InitializationExceptionCause { - /** - * The component parameter {@link #FIELD_TYPE} defines an unknown field type. - */ - UNKNOWN_FIELD_TYPE, - /** - * This component requires the schema to have a uniqueKeyField, which it does not have. - */ - MISSING_UNIQUE_KEY_FIELD, - /** - * Missing component parameter {@link #CONFIG_FILE} - it has to define the path to the elevation configuration file (e.g. elevate.xml). - */ - NO_CONFIG_FILE_DEFINED, - /** - * The elevation configuration file (e.g. elevate.xml) cannot be found, or is defined in both conf/ and data/ directories. - */ - MISSING_CONFIG_FILE, - /** - * The elevation configuration file (e.g. elevate.xml) is empty. - */ - EMPTY_CONFIG_FILE, - /** - * Unclassified exception cause. - */ - OTHER, - } + /** + * The component parameter {@link #FIELD_TYPE} defines an unknown field type. + */ + UNKNOWN_FIELD_TYPE, + /** + * This component requires the schema to have a uniqueKeyField, which it does not have. + */ + MISSING_UNIQUE_KEY_FIELD, + /** + * Missing component parameter {@link #CONFIG_FILE} - it has to define the path to the elevation configuration file (e.g. elevate.xml). + */ + NO_CONFIG_FILE_DEFINED, + /** + * The elevation configuration file (e.g. elevate.xml) cannot be found, or is defined in both conf/ and data/ directories. + */ + MISSING_CONFIG_FILE, + /** + * The elevation configuration file (e.g. elevate.xml) is empty. + */ + EMPTY_CONFIG_FILE, + /** + * Unclassified exception cause. + */ + OTHER, + } //--------------------------------------------------------------------------------- // Elevation classes //--------------------------------------------------------------------------------- - /** - * Creates an elevation. - * - * @param elevatedIds The ids of the elevated documents that should appear on top of search results; can be null. - * @param excludedIds The ids of the excluded documents that should not appear in search results; can be null. - */ - private Elevation createElevation(Collection elevatedIds, Collection excludedIds) { - return new Elevation(elevatedIds, excludedIds, indexedValueProvider, uniqueKeyFieldName, keepElevationPriority); - } - /** * Provides the elevations defined for queries. */ @@ -896,7 +875,8 @@ private Map buildElevationMap(Mapnull. + * The order is retained. */ - private Set elevatedIds; + private LinkedHashSet elevatedIds; /** * The ids of the excluded documents that should not appear in search results; can be null. */ - private Set excludedIds; + private Set excludedIds; - ElevationBuilder addElevatedId(String id) { - if (elevatedIds == null) { - elevatedIds = createIdSet(); - } - elevatedIds.add(id); - return this; - } + // for temporary/transient use when adding an elevated or excluded ID + private final BytesRefBuilder scratch = new BytesRefBuilder(); ElevationBuilder addElevatedIds(List ids) { + if (elevatedIds == null) { + elevatedIds = new LinkedHashSet<>(Math.max(10, ids.size())); + } for (String id : ids) { - addElevatedId(id); + elevatedIds.add(toBytesRef(id)); } return this; } - ElevationBuilder addExcludedId(String id) { + ElevationBuilder addExcludedIds(Collection ids) { if (excludedIds == null) { - excludedIds = createIdSet(); + excludedIds = new LinkedHashSet<>(Math.max(10, ids.size())); } - excludedIds.add(id); - return this; - } - - ElevationBuilder addExcludedIds(List ids) { for (String id : ids) { - addExcludedId(id); + excludedIds.add(toBytesRef(id)); } return this; } + private BytesRef toBytesRef(String id) { + uniqueKeyField.getType().readableToIndexed(id, scratch); + return scratch.toBytesRef(); + } + ElevationBuilder merge(ElevationBuilder elevationBuilder) { if (elevatedIds == null) { elevatedIds = elevationBuilder.elevatedIds; @@ -1009,12 +988,9 @@ ElevationBuilder merge(ElevationBuilder elevationBuilder) { } Elevation build() { - return createElevation(elevatedIds, excludedIds); + return new Elevation(elevatedIds, excludedIds, uniqueKeyField.getName()); } - private Set createIdSet() { - return (keepElevationPriority ? new LinkedHashSet<>() : new HashSet<>()); - } } /** @@ -1025,82 +1001,73 @@ protected static class Elevation { private static final BooleanQuery EMPTY_QUERY = new BooleanQuery.Builder().build(); @VisibleForTesting - final Set elevatedIds; - private final BooleanQuery includeQuery; - @VisibleForTesting - final Map priorities; - private final Set excludedIds; - private final TermQuery[] excludeQueries;//just keep the term query, b/c we will not always explicitly exclude the item based on markExcludes query time param + final Set elevatedIds; // in configured order; not null + private final BooleanQuery includeQuery; // not null + private final Set excludedIds; // not null + //just keep the term query, b/c we will not always explicitly exclude the item based on markExcludes query time param + private final TermQuery[] excludeQueries; //may be null /** * Constructs an elevation. - * * @param elevatedIds The ids of the elevated documents that should appear on top of search results; can be null. + * In configured order. * @param excludedIds The ids of the excluded documents that should not appear in search results; can be null. - * @param indexedValueProvider Provides the indexed value corresponding to a readable value.. * @param queryFieldName The field name to use to create query terms. - * @param keepElevationPriority Whether to keep the elevation priority order. */ - private Elevation(Collection elevatedIds, Collection excludedIds, - UnaryOperator indexedValueProvider, String queryFieldName, - boolean keepElevationPriority) { + private Elevation(Set elevatedIds, Set excludedIds, + String queryFieldName) { if (elevatedIds == null || elevatedIds.isEmpty()) { - this.elevatedIds = Collections.emptySet(); includeQuery = EMPTY_QUERY; - priorities = Collections.emptyMap(); + this.elevatedIds = Collections.emptySet(); } else { - ImmutableSet.Builder elevatedIdsBuilder = ImmutableSet.builder(); + this.elevatedIds = new LinkedHashSet<>(elevatedIds); BooleanQuery.Builder includeQueryBuilder = new BooleanQuery.Builder(); - ImmutableMap.Builder prioritiesBuilder = null; - if (keepElevationPriority) { - prioritiesBuilder = ImmutableMap.builder(); - } - int priorityLevel = elevatedIds.size(); - for (String elevatedId : elevatedIds) { - elevatedIdsBuilder.add(indexedValueProvider.apply(elevatedId)); - TermQuery tq = new TermQuery(new Term(queryFieldName, elevatedId)); - includeQueryBuilder.add(tq, BooleanClause.Occur.SHOULD); - if (keepElevationPriority) { - prioritiesBuilder.put(new BytesRef(elevatedId), priorityLevel--); - } + for (BytesRef elevatedId : elevatedIds) { + includeQueryBuilder.add(new TermQuery(new Term(queryFieldName, elevatedId)), BooleanClause.Occur.SHOULD); } - this.elevatedIds = elevatedIdsBuilder.build(); includeQuery = includeQueryBuilder.build(); - priorities = keepElevationPriority ? prioritiesBuilder.build() : null; } if (excludedIds == null || excludedIds.isEmpty()) { this.excludedIds = Collections.emptySet(); excludeQueries = null; } else { - ImmutableSet.Builder excludedIdsBuilder = ImmutableSet.builder(); + this.excludedIds = ImmutableSet.copyOf(excludedIds); List excludeQueriesBuilder = new ArrayList<>(excludedIds.size()); - for (String excludedId : excludedIds) { - excludedIdsBuilder.add(indexedValueProvider.apply(excludedId)); + for (BytesRef excludedId : excludedIds) { excludeQueriesBuilder.add(new TermQuery(new Term(queryFieldName, excludedId))); } - this.excludedIds = excludedIdsBuilder.build(); excludeQueries = excludeQueriesBuilder.toArray(new TermQuery[excludeQueriesBuilder.size()]); } } @Override public String toString() { - return "{elevatedIds=" + elevatedIds + ", excludedIds=" + excludedIds + "}"; + return "{elevatedIds=" + Collections2.transform(elevatedIds, BytesRef::utf8ToString) + + ", excludedIds=" + Collections2.transform(excludedIds, BytesRef::utf8ToString) + "}"; } } + /** Elevates certain docs to the top. */ private class ElevationComparatorSource extends FieldComparatorSource { - private final Elevation elevation; - private final SentinelIntSet ordSet; //the key half of the map - private final BytesRef[] termValues; //the value half of the map + private final IntIntHashMap elevatedWithPriority; + private final boolean useConfiguredElevatedOrder; + private final int[] sortedElevatedDocIds; + + private ElevationComparatorSource(IntIntHashMap elevatedWithPriority, boolean useConfiguredElevatedOrder) { + this.elevatedWithPriority = elevatedWithPriority; + this.useConfiguredElevatedOrder = useConfiguredElevatedOrder; - private ElevationComparatorSource(Elevation elevation) { - this.elevation = elevation; - int size = elevation.elevatedIds.size(); - ordSet = new SentinelIntSet(size, -1); - termValues = keepElevationPriority ? new BytesRef[ordSet.keys.length] : null; + // copy elevatedWithPriority keys (doc IDs) into sortedElevatedDocIds, sorted + sortedElevatedDocIds = new int[elevatedWithPriority.size()]; + final Iterator iterator = elevatedWithPriority.iterator(); + for (int i = 0; i < sortedElevatedDocIds.length; i++) { + IntIntCursor next = iterator.next(); + sortedElevatedDocIds[i] = next.key; + } + assert iterator.hasNext() == false; + Arrays.sort(sortedElevatedDocIds); } @Override @@ -1109,8 +1076,28 @@ public FieldComparator newComparator(String fieldName, final int numHit final int[] values = new int[numHits]; int bottomVal; int topVal; - PostingsEnum postingsEnum; - final Set seen = new HashSet<>(elevation.elevatedIds.size()); + + int docBase; + boolean hasElevatedDocsThisSegment; + + @Override + protected void doSetNextReader(LeafReaderContext context) throws IOException { + docBase = context.docBase; + // ascertain if hasElevatedDocsThisSegment + final int idx = Arrays.binarySearch(sortedElevatedDocIds, docBase); + if (idx < 0) { + //first doc in segment isn't elevated (typical). Maybe another is? + int nextIdx = -idx - 1; + if (nextIdx < sortedElevatedDocIds.length) { + int nextElevatedDocId = sortedElevatedDocIds[nextIdx]; + if (nextElevatedDocId > docBase + context.reader().maxDoc()) { + hasElevatedDocsThisSegment = false; + return; + } + } + } + hasElevatedDocsThisSegment = true; + } @Override public int compare(int slot1, int slot2) { @@ -1128,16 +1115,14 @@ public void setTopValue(Integer value) { } private int docVal(int doc) { - if (ordSet.size() > 0) { - int slot = ordSet.find(doc); - if (slot >= 0) { - if (!keepElevationPriority) - return 1; - BytesRef id = termValues[slot]; - return elevation.priorities.getOrDefault(id, 0); - } + if (!hasElevatedDocsThisSegment) { + assert elevatedWithPriority.containsKey(docBase + doc) == false; + return -1; + } else if (useConfiguredElevatedOrder) { + return elevatedWithPriority.getOrDefault(docBase + doc, -1); + } else { + return elevatedWithPriority.containsKey(docBase + doc) ? 1 : -1; } - return 0; } @Override @@ -1150,35 +1135,6 @@ public void copy(int slot, int doc) { values[slot] = docVal(doc); } - @Override - protected void doSetNextReader(LeafReaderContext context) throws IOException { - //convert the ids to Lucene doc ids, the ordSet and termValues needs to be the same size as the number of elevation docs we have - ordSet.clear(); - Terms terms = context.reader().terms(uniqueKeyFieldName); - if (terms == null) return; - TermsEnum termsEnum = terms.iterator(); - BytesRefBuilder term = new BytesRefBuilder(); - Bits liveDocs = context.reader().getLiveDocs(); - - for (String id : elevation.elevatedIds) { - term.copyChars(id); - if (seen.contains(id) == false && termsEnum.seekExact(term.get())) { - postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE); - int docId = postingsEnum.nextDoc(); - while (docId != DocIdSetIterator.NO_MORE_DOCS && liveDocs != null && !liveDocs.get(docId)) { - docId = postingsEnum.nextDoc(); - } - if (docId == DocIdSetIterator.NO_MORE_DOCS ) continue; // must have been deleted - int slot = ordSet.put(docId); - if (keepElevationPriority) { - termValues[slot] = term.toBytesRef(); - } - seen.add(id); - assert postingsEnum.nextDoc() == DocIdSetIterator.NO_MORE_DOCS; - } - } - } - @Override public Integer value(int slot) { return values[slot]; @@ -1192,4 +1148,4 @@ public int compareTop(int doc) { }; } } -} +} \ No newline at end of file diff --git a/solr/core/src/java/org/apache/solr/response/transform/BaseEditorialTransformer.java b/solr/core/src/java/org/apache/solr/response/transform/BaseEditorialTransformer.java index d646ee401c63..6324d8ad2e17 100644 --- a/solr/core/src/java/org/apache/solr/response/transform/BaseEditorialTransformer.java +++ b/solr/core/src/java/org/apache/solr/response/transform/BaseEditorialTransformer.java @@ -20,6 +20,8 @@ import java.util.Set; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; import org.apache.solr.common.SolrDocument; import org.apache.solr.schema.FieldType; @@ -47,9 +49,9 @@ public String getName() { @Override public void transform(SolrDocument doc, int docid) { //this only gets added if QueryElevationParams.MARK_EXCLUDED is true - Set ids = getIdSet(); + Set ids = getIdSet(); if (ids != null && ids.isEmpty() == false) { - String key = getKey(doc); + BytesRef key = getKey(doc); doc.setField(name, ids.contains(key)); } else { //if we have no ids, that means we weren't marking, but the user still asked for the field to be added, so just mark everything as false @@ -57,17 +59,20 @@ public void transform(SolrDocument doc, int docid) { } } - protected abstract Set getIdSet(); + protected abstract Set getIdSet(); - protected String getKey(SolrDocument doc) { + protected BytesRef getKey(SolrDocument doc) { Object obj = doc.get(idFieldName); if (obj instanceof IndexableField) { IndexableField f = (IndexableField) obj; + BytesRefBuilder bytesRefBuilder = new BytesRefBuilder(); Number n = f.numericValue(); if (n != null) { - return ft.readableToIndexed(n.toString()); + ft.readableToIndexed(n.toString(), bytesRefBuilder); + } else { + ft.readableToIndexed(f.stringValue(), bytesRefBuilder); } - return ft.readableToIndexed(f.stringValue()); + return bytesRefBuilder.get(); } throw new AssertionError("Expected an IndexableField but got: " + obj.getClass()); } diff --git a/solr/core/src/java/org/apache/solr/response/transform/ElevatedMarkerFactory.java b/solr/core/src/java/org/apache/solr/response/transform/ElevatedMarkerFactory.java index 51f3cff96440..e5fb4143b37f 100644 --- a/solr/core/src/java/org/apache/solr/response/transform/ElevatedMarkerFactory.java +++ b/solr/core/src/java/org/apache/solr/response/transform/ElevatedMarkerFactory.java @@ -18,6 +18,7 @@ import java.util.Set; +import org.apache.lucene.util.BytesRef; import org.apache.solr.common.params.SolrParams; import org.apache.solr.handler.component.QueryElevationComponent; import org.apache.solr.request.SolrQueryRequest; @@ -44,9 +45,10 @@ class MarkTransformer extends BaseEditorialTransformer { super(name, idFieldName, ft); } + @SuppressWarnings("unchecked") @Override - protected Set getIdSet() { - return (Set) context.getRequest().getContext().get(QueryElevationComponent.BOOSTED); + protected Set getIdSet() { + return (Set) context.getRequest().getContext().get(QueryElevationComponent.BOOSTED); } } diff --git a/solr/core/src/java/org/apache/solr/response/transform/ExcludedMarkerFactory.java b/solr/core/src/java/org/apache/solr/response/transform/ExcludedMarkerFactory.java index 2d670d7c4c72..2036c488a066 100644 --- a/solr/core/src/java/org/apache/solr/response/transform/ExcludedMarkerFactory.java +++ b/solr/core/src/java/org/apache/solr/response/transform/ExcludedMarkerFactory.java @@ -18,6 +18,7 @@ import java.util.Set; +import org.apache.lucene.util.BytesRef; import org.apache.solr.common.params.SolrParams; import org.apache.solr.handler.component.QueryElevationComponent; import org.apache.solr.request.SolrQueryRequest; @@ -46,9 +47,10 @@ public ExcludedTransformer( String name, String idFieldName, FieldType ft) super(name, idFieldName, ft); } + @SuppressWarnings("unchecked") @Override - protected Set getIdSet() { - return (Set)context.getRequest().getContext().get(QueryElevationComponent.EXCLUDED); + protected Set getIdSet() { + return (Set)context.getRequest().getContext().get(QueryElevationComponent.EXCLUDED); } } diff --git a/solr/core/src/java/org/apache/solr/search/AbstractReRankQuery.java b/solr/core/src/java/org/apache/solr/search/AbstractReRankQuery.java index f7679b080efb..0c2fb828aa84 100644 --- a/solr/core/src/java/org/apache/solr/search/AbstractReRankQuery.java +++ b/solr/core/src/java/org/apache/solr/search/AbstractReRankQuery.java @@ -18,6 +18,7 @@ import java.io.IOException; import java.util.Map; +import java.util.Set; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.IndexSearcher; @@ -35,7 +36,7 @@ public abstract class AbstractReRankQuery extends RankQuery { protected Query mainQuery; final protected int reRankDocs; final protected Rescorer reRankQueryRescorer; - protected Map boostedPriority; + protected Set boostedPriority; public AbstractReRankQuery(Query mainQuery, int reRankDocs, Rescorer reRankQueryRescorer) { this.mainQuery = mainQuery; @@ -54,13 +55,13 @@ public MergeStrategy getMergeStrategy() { return null; } + @SuppressWarnings("unchecked") public TopDocsCollector getTopDocsCollector(int len, QueryCommand cmd, IndexSearcher searcher) throws IOException { - if(this.boostedPriority == null) { SolrRequestInfo info = SolrRequestInfo.getRequestInfo(); if(info != null) { Map context = info.getReq().getContext(); - this.boostedPriority = (Map)context.get(QueryElevationComponent.BOOSTED_PRIORITY); + this.boostedPriority = (Set)context.get(QueryElevationComponent.BOOSTED); } } diff --git a/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java index d0f8cd4633ea..5a6d48ced308 100644 --- a/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java @@ -24,7 +24,14 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Set; +import com.carrotsearch.hppc.FloatArrayList; +import com.carrotsearch.hppc.IntArrayList; +import com.carrotsearch.hppc.IntIntHashMap; +import com.carrotsearch.hppc.IntLongHashMap; +import com.carrotsearch.hppc.cursors.IntIntCursor; +import com.carrotsearch.hppc.cursors.IntLongCursor; import org.apache.commons.lang.StringUtils; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.index.DocValues; @@ -69,13 +76,6 @@ import org.apache.solr.schema.StrField; import org.apache.solr.uninverting.UninvertingReader; -import com.carrotsearch.hppc.FloatArrayList; -import com.carrotsearch.hppc.IntArrayList; -import com.carrotsearch.hppc.IntIntHashMap; -import com.carrotsearch.hppc.IntLongHashMap; -import com.carrotsearch.hppc.cursors.IntIntCursor; -import com.carrotsearch.hppc.cursors.IntLongCursor; - import static org.apache.solr.common.params.CommonParams.SORT; /** @@ -215,7 +215,7 @@ public static class CollapsingPostFilter extends ExtendedQueryBase implements Po public String hint; private boolean needsScores = true; private int nullPolicy; - private Map boosted; + private Set boosted; // ordered by "priority" public static final int NULL_POLICY_IGNORE = 0; public static final int NULL_POLICY_COLLAPSE = 1; public static final int NULL_POLICY_EXPAND = 2; @@ -338,11 +338,6 @@ public CollapsingPostFilter(SolrParams localParams, SolrParams params, SolrQuery } } - private IntIntHashMap getBoostDocs(SolrIndexSearcher indexSearcher, Map boosted, Map context) throws IOException { - IntIntHashMap boostDocs = QueryElevationComponent.getBoostDocs(indexSearcher, boosted, context); - return boostDocs; - } - public DelegatingCollector getFilterCollector(IndexSearcher indexSearcher) { try { @@ -360,10 +355,10 @@ public DelegatingCollector getFilterCollector(IndexSearcher indexSearcher) { } if(this.boosted == null && context != null) { - this.boosted = (Map)context.get(QueryElevationComponent.BOOSTED_PRIORITY); + this.boosted = (Set)context.get(QueryElevationComponent.BOOSTED); } - boostDocsMap = getBoostDocs(searcher, this.boosted, context); + boostDocsMap = QueryElevationComponent.getBoostDocs(searcher, this.boosted, context); return collectorFactory.getCollector(this.collapseField, this.groupHeadSelector, this.sortSpec, diff --git a/solr/core/src/java/org/apache/solr/search/ReRankCollector.java b/solr/core/src/java/org/apache/solr/search/ReRankCollector.java index 0447053b3cbd..a1689dd7b08d 100644 --- a/solr/core/src/java/org/apache/solr/search/ReRankCollector.java +++ b/solr/core/src/java/org/apache/solr/search/ReRankCollector.java @@ -20,9 +20,10 @@ import java.util.Arrays; import java.util.Comparator; import java.util.Map; +import java.util.Set; + import com.carrotsearch.hppc.IntFloatHashMap; import com.carrotsearch.hppc.IntIntHashMap; - import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.LeafCollector; @@ -46,7 +47,7 @@ public class ReRankCollector extends TopDocsCollector { final private IndexSearcher searcher; final private int reRankDocs; final private int length; - final private Map boostedPriority; + final private Set boostedPriority; // order is the "priority" final private Rescorer reRankQueryRescorer; @@ -55,7 +56,7 @@ public ReRankCollector(int reRankDocs, Rescorer reRankQueryRescorer, QueryCommand cmd, IndexSearcher searcher, - Map boostedPriority) throws IOException { + Set boostedPriority) throws IOException { super(null); this.reRankDocs = reRankDocs; this.length = length; diff --git a/solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java index 2528b3f56247..3802826debcf 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java @@ -16,32 +16,30 @@ */ package org.apache.solr.handler.component; +import java.io.File; +import java.io.FileOutputStream; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.lang.invoke.MethodHandles; +import java.nio.charset.StandardCharsets; + import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.BytesRef; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.GroupParams; -import org.apache.solr.common.params.MapSolrParams; import org.apache.solr.common.params.QueryElevationParams; -import org.apache.solr.util.FileUtils; +import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; -import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.util.FileUtils; import org.junit.Before; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.File; -import java.io.FileOutputStream; -import java.io.OutputStreamWriter; -import java.io.PrintWriter; -import java.lang.invoke.MethodHandles; -import java.nio.charset.StandardCharsets; -import java.util.HashMap; -import java.util.Map; - - public class QueryElevationComponentTest extends SolrTestCaseJ4 { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); @@ -69,6 +67,7 @@ private void init(String config, String schema) throws Exception { assertU(commit()); } + //TODO should be @After ? private void delete() throws Exception { deleteCore(); } @@ -121,22 +120,22 @@ public void testGroupedQuery() throws Exception { assertU(adoc("id", "7", "text", "AAAA AAAA ZZZZ", "str_s", "g")); assertU(adoc("id", "8", "text", "XXXX", "str_s", "h")); assertU(adoc("id", "9", "text", "YYYY ZZZZ", "str_s", "i")); - + assertU(adoc("id", "22", "text", "XXXX ZZZZ AAAA", "str_s", "b")); assertU(adoc("id", "66", "text", "XXXX ZZZZ AAAA", "str_s", "f")); assertU(adoc("id", "77", "text", "XXXX ZZZZ AAAA", "str_s", "g")); - + assertU(commit()); final String groups = "//arr[@name='groups']"; - assertQ("non-elevated group query", - req(CommonParams.Q, "AAAA", + assertQ("non-elevated group query", + req(CommonParams.Q, "AAAA", CommonParams.QT, "/elevate", - GroupParams.GROUP_FIELD, "str_s", + GroupParams.GROUP_FIELD, "str_s", GroupParams.GROUP, "true", - GroupParams.GROUP_TOTAL_COUNT, "true", - GroupParams.GROUP_LIMIT, "100", + GroupParams.GROUP_TOTAL_COUNT, "true", + GroupParams.GROUP_LIMIT, "100", QueryElevationParams.ENABLE, "false", CommonParams.FL, "id, score, [elevated]") , "//*[@name='ngroups'][.='3']" @@ -158,13 +157,13 @@ public void testGroupedQuery() throws Exception { , groups +"/lst[3]//doc[2]/bool[@name='[elevated]'][.='false']" ); - assertQ("elevated group query", - req(CommonParams.Q, "AAAA", + assertQ("elevated group query", + req(CommonParams.Q, "AAAA", CommonParams.QT, "/elevate", - GroupParams.GROUP_FIELD, "str_s", + GroupParams.GROUP_FIELD, "str_s", GroupParams.GROUP, "true", GroupParams.GROUP_TOTAL_COUNT, "true", - GroupParams.GROUP_LIMIT, "100", + GroupParams.GROUP_LIMIT, "100", CommonParams.FL, "id, score, [elevated]") , "//*[@name='ngroups'][.='3']" , "//*[@name='matches'][.='6']" @@ -185,14 +184,14 @@ public void testGroupedQuery() throws Exception { , groups +"/lst[3]//doc[2]/bool[@name='[elevated]'][.='false']" ); - assertQ("non-elevated because sorted group query", - req(CommonParams.Q, "AAAA", + assertQ("non-elevated because sorted group query", + req(CommonParams.Q, "AAAA", CommonParams.QT, "/elevate", CommonParams.SORT, "id asc", - GroupParams.GROUP_FIELD, "str_s", + GroupParams.GROUP_FIELD, "str_s", GroupParams.GROUP, "true", - GroupParams.GROUP_TOTAL_COUNT, "true", - GroupParams.GROUP_LIMIT, "100", + GroupParams.GROUP_TOTAL_COUNT, "true", + GroupParams.GROUP_LIMIT, "100", CommonParams.FL, "id, score, [elevated]") , "//*[@name='ngroups'][.='3']" , "//*[@name='matches'][.='6']" @@ -213,15 +212,15 @@ public void testGroupedQuery() throws Exception { , groups +"/lst[3]//doc[2]/bool[@name='[elevated]'][.='false']" ); - assertQ("force-elevated sorted group query", - req(CommonParams.Q, "AAAA", + assertQ("force-elevated sorted group query", + req(CommonParams.Q, "AAAA", CommonParams.QT, "/elevate", CommonParams.SORT, "id asc", - QueryElevationParams.FORCE_ELEVATION, "true", - GroupParams.GROUP_FIELD, "str_s", + QueryElevationParams.FORCE_ELEVATION, "true", + GroupParams.GROUP_FIELD, "str_s", GroupParams.GROUP, "true", - GroupParams.GROUP_TOTAL_COUNT, "true", - GroupParams.GROUP_LIMIT, "100", + GroupParams.GROUP_TOTAL_COUNT, "true", + GroupParams.GROUP_LIMIT, "100", CommonParams.FL, "id, score, [elevated]") , "//*[@name='ngroups'][.='3']" , "//*[@name='matches'][.='6']" @@ -243,15 +242,15 @@ public void testGroupedQuery() throws Exception { ); - assertQ("non-elevated because of sort within group query", - req(CommonParams.Q, "AAAA", + assertQ("non-elevated because of sort within group query", + req(CommonParams.Q, "AAAA", CommonParams.QT, "/elevate", CommonParams.SORT, "id asc", - GroupParams.GROUP_SORT, "id desc", - GroupParams.GROUP_FIELD, "str_s", + GroupParams.GROUP_SORT, "id desc", + GroupParams.GROUP_FIELD, "str_s", GroupParams.GROUP, "true", - GroupParams.GROUP_TOTAL_COUNT, "true", - GroupParams.GROUP_LIMIT, "100", + GroupParams.GROUP_TOTAL_COUNT, "true", + GroupParams.GROUP_LIMIT, "100", CommonParams.FL, "id, score, [elevated]") , "//*[@name='ngroups'][.='3']" , "//*[@name='matches'][.='6']" @@ -273,16 +272,16 @@ public void testGroupedQuery() throws Exception { ); - assertQ("force elevated sort within sorted group query", - req(CommonParams.Q, "AAAA", + assertQ("force elevated sort within sorted group query", + req(CommonParams.Q, "AAAA", CommonParams.QT, "/elevate", CommonParams.SORT, "id asc", - GroupParams.GROUP_SORT, "id desc", - QueryElevationParams.FORCE_ELEVATION, "true", - GroupParams.GROUP_FIELD, "str_s", + GroupParams.GROUP_SORT, "id desc", + QueryElevationParams.FORCE_ELEVATION, "true", + GroupParams.GROUP_FIELD, "str_s", GroupParams.GROUP, "true", - GroupParams.GROUP_TOTAL_COUNT, "true", - GroupParams.GROUP_LIMIT, "100", + GroupParams.GROUP_TOTAL_COUNT, "true", + GroupParams.GROUP_LIMIT, "100", CommonParams.FL, "id, score, [elevated]") , "//*[@name='ngroups'][.='3']" , "//*[@name='matches'][.='6']" @@ -363,9 +362,9 @@ public void testInterface() throws Exception { // Make sure the boosts loaded properly assertEquals(7, elevationProvider.size()); - assertEquals(1, elevationProvider.getElevationForQuery("XXXX").priorities.size()); - assertEquals(2, elevationProvider.getElevationForQuery("YYYY").priorities.size()); - assertEquals(3, elevationProvider.getElevationForQuery("ZZZZ").priorities.size()); + assertEquals(1, elevationProvider.getElevationForQuery("XXXX").elevatedIds.size()); + assertEquals(2, elevationProvider.getElevationForQuery("YYYY").elevatedIds.size()); + assertEquals(3, elevationProvider.getElevationForQuery("ZZZZ").elevatedIds.size()); assertEquals(null, elevationProvider.getElevationForQuery("xxxx")); assertEquals(null, elevationProvider.getElevationForQuery("yyyy")); assertEquals(null, elevationProvider.getElevationForQuery("zzzz")); @@ -380,12 +379,12 @@ public void testInterface() throws Exception { comp.inform(core); elevationProvider = comp.getElevationProvider(reader, core); assertEquals(7, elevationProvider.size()); - assertEquals(1, elevationProvider.getElevationForQuery("XXXX").priorities.size()); - assertEquals(2, elevationProvider.getElevationForQuery("YYYY").priorities.size()); - assertEquals(3, elevationProvider.getElevationForQuery("ZZZZ").priorities.size()); - assertEquals(1, elevationProvider.getElevationForQuery("xxxx").priorities.size()); - assertEquals(2, elevationProvider.getElevationForQuery("yyyy").priorities.size()); - assertEquals(3, elevationProvider.getElevationForQuery("zzzz").priorities.size()); + assertEquals(1, elevationProvider.getElevationForQuery("XXXX").elevatedIds.size()); + assertEquals(2, elevationProvider.getElevationForQuery("YYYY").elevatedIds.size()); + assertEquals(3, elevationProvider.getElevationForQuery("ZZZZ").elevatedIds.size()); + assertEquals(1, elevationProvider.getElevationForQuery("xxxx").elevatedIds.size()); + assertEquals(2, elevationProvider.getElevationForQuery("yyyy").elevatedIds.size()); + assertEquals(3, elevationProvider.getElevationForQuery("zzzz").elevatedIds.size()); assertEquals("xxxx", comp.analyzeQuery("XXXX")); assertEquals("xxxx yyyy", comp.analyzeQuery("XXXX YYYY")); @@ -454,11 +453,11 @@ public void testMarkExcludes() throws Exception { assertU(adoc("id", "5", "title", "YYYY YYYY", "str_s1", "y")); assertU(adoc("id", "6", "title", "XXXX XXXX", "str_s1", "z")); assertU(adoc("id", "7", "title", "AAAA", "str_s1", "a")); - + assertU(adoc("id", "8", "title", " QQQQ trash trash", "str_s1", "q")); assertU(adoc("id", "9", "title", " QQQQ QQQQ trash", "str_s1", "r")); assertU(adoc("id", "10", "title", "QQQQ QQQQ QQQQ ", "str_s1", "s")); - + assertU(commit()); assertQ("", req(CommonParams.Q, "XXXX XXXX", CommonParams.QT, "/elevate", @@ -475,7 +474,7 @@ public void testMarkExcludes() throws Exception { "//result/doc[3]/bool[@name='[excluded]'][.='false']", "//result/doc[4]/bool[@name='[excluded]'][.='true']" ); - + //ask for excluded as a field, but don't actually request the MARK_EXCLUDES //thus, number 6 should not be returned, b/c it is excluded assertQ("", req(CommonParams.Q, "XXXX XXXX", CommonParams.QT, "/elevate", @@ -489,7 +488,7 @@ public void testMarkExcludes() throws Exception { "//result/doc[2]/bool[@name='[excluded]'][.='false']", "//result/doc[3]/bool[@name='[excluded]'][.='false']" ); - + // test that excluded results are on the same positions in the result list // as when elevation component is disabled // (i.e. test that elevation component with MARK_EXCLUDES does not boost @@ -524,28 +523,27 @@ public void testMarkExcludes() throws Exception { public void testSorting() throws Exception { try { init("schema12.xml"); - assertU(adoc("id", "a", "title", "ipod trash trash", "str_s1", "a")); - assertU(adoc("id", "b", "title", "ipod ipod trash", "str_s1", "b")); - assertU(adoc("id", "c", "title", "ipod ipod ipod ", "str_s1", "c")); + assertU(adoc("id", "a", "title", "ipod trash trash", "str_s1", "group1")); + assertU(adoc("id", "b", "title", "ipod ipod trash", "str_s1", "group2")); + assertU(adoc("id", "c", "title", "ipod ipod ipod ", "str_s1", "group2")); - assertU(adoc("id", "x", "title", "boosted", "str_s1", "x")); - assertU(adoc("id", "y", "title", "boosted boosted", "str_s1", "y")); - assertU(adoc("id", "z", "title", "boosted boosted boosted", "str_s1", "z")); + assertU(adoc("id", "x", "title", "boosted", "str_s1", "group1")); + assertU(adoc("id", "y", "title", "boosted boosted", "str_s1", "group2")); + assertU(adoc("id", "z", "title", "boosted boosted boosted", "str_s1", "group2")); assertU(commit()); - String query = "title:ipod"; + final String query = "title:ipod"; - Map args = new HashMap<>(); // reusing args & requests this way is a solr-test-antipattern. PLEASE DO NOT COPY THIS CODE - args.put(CommonParams.Q, query); - args.put(CommonParams.QT, "/elevate"); - args.put(CommonParams.FL, "id,score"); - args.put("indent", "true"); - //args.put( CommonParams.FL, "id,title,score" ); - SolrQueryRequest req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args)); - IndexReader reader = req.getSearcher().getIndexReader(); - QueryElevationComponent booster = (QueryElevationComponent) req.getCore().getSearchComponent("elevate"); + final SolrParams baseParams = params( + "qt", "/elevate", + "q", query, + "fl", "id,score", + "indent", "true"); + + QueryElevationComponent booster = (QueryElevationComponent) h.getCore().getSearchComponent("elevate"); + IndexReader reader = h.getCore().withSearcher(SolrIndexSearcher::getIndexReader); - assertQ("Make sure standard sort works as expected", req + assertQ("Make sure standard sort works as expected", req(baseParams) , "//*[@numFound='3']" , "//result/doc[1]/str[@name='id'][.='c']" , "//result/doc[2]/str[@name='id'][.='b']" @@ -555,8 +553,7 @@ public void testSorting() throws Exception { // Explicitly set what gets boosted booster.setTopQueryResults(reader, query, false, new String[]{"x", "y", "z"}, null); - req.close(); req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args)); - assertQ("All six should make it", req + assertQ("All six should make it", req(baseParams) , "//*[@numFound='6']" , "//result/doc[1]/str[@name='id'][.='x']" , "//result/doc[2]/str[@name='id'][.='y']" @@ -567,9 +564,8 @@ public void testSorting() throws Exception { ); // now switch the order: - req.close(); req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args)); booster.setTopQueryResults(reader, query, false, new String[]{"a", "x"}, null); - assertQ("All four should make it", req + assertQ(req(baseParams) , "//*[@numFound='4']" , "//result/doc[1]/str[@name='id'][.='a']" , "//result/doc[2]/str[@name='id'][.='x']" @@ -577,44 +573,28 @@ public void testSorting() throws Exception { , "//result/doc[4]/str[@name='id'][.='b']" ); - // Test reverse sort - args.put(CommonParams.SORT, "score asc"); - req.close(); req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args)); - assertQ("All four should make it", req - , "//*[@numFound='4']" - // NOTE REVERSED doc[X] indices - , "//result/doc[4]/str[@name='id'][.='a']" - , "//result/doc[3]/str[@name='id'][.='x']" - , "//result/doc[2]/str[@name='id'][.='c']" - , "//result/doc[1]/str[@name='id'][.='b']" - ); - // Try normal sort by 'id' // default 'forceBoost' should be false assertEquals(false, booster.forceElevation); - args.put(CommonParams.SORT, "str_s1 asc"); - req.close(); req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args)); - assertQ(null, req + assertQ(req(baseParams, "sort", "id asc") , "//*[@numFound='4']" , "//result/doc[1]/str[@name='id'][.='a']" , "//result/doc[2]/str[@name='id'][.='b']" , "//result/doc[3]/str[@name='id'][.='c']" , "//result/doc[4]/str[@name='id'][.='x']" ); - args.put(CommonParams.SORT, "id asc"); - req.close(); req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args)); - assertQ(null, req + + assertQ("useConfiguredElevatedOrder=false", + req(baseParams, "sort", "str_s1 asc,id desc", "useConfiguredElevatedOrder", "false") , "//*[@numFound='4']" - , "//result/doc[1]/str[@name='id'][.='a']" - , "//result/doc[2]/str[@name='id'][.='b']" + , "//result/doc[1]/str[@name='id'][.='x']"//group1 + , "//result/doc[2]/str[@name='id'][.='a']"//group1 , "//result/doc[3]/str[@name='id'][.='c']" - , "//result/doc[4]/str[@name='id'][.='x']" + , "//result/doc[4]/str[@name='id'][.='b']" ); booster.forceElevation = true; - args.put(CommonParams.SORT, "id asc"); - req.close(); req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args)); - assertQ(null, req + assertQ(req(baseParams, "sort", "id asc") , "//*[@numFound='4']" , "//result/doc[1]/str[@name='id'][.='a']" , "//result/doc[2]/str[@name='id'][.='x']" @@ -622,22 +602,27 @@ public void testSorting() throws Exception { , "//result/doc[4]/str[@name='id'][.='c']" ); + booster.forceElevation = true; + assertQ("useConfiguredElevatedOrder=false and forceElevation", + req(baseParams, "sort", "id desc", "useConfiguredElevatedOrder", "false") + , "//*[@numFound='4']" + , "//result/doc[1]/str[@name='id'][.='x']" // force elevated + , "//result/doc[2]/str[@name='id'][.='a']" // force elevated + , "//result/doc[3]/str[@name='id'][.='c']" + , "//result/doc[4]/str[@name='id'][.='b']" + ); + //Test exclusive (not to be confused with exclusion) - args.put(QueryElevationParams.EXCLUSIVE, "true"); - req.close(); req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args)); booster.setTopQueryResults(reader, query, false, new String[]{"x", "a"}, new String[]{}); - assertQ(null, req + assertQ(req(baseParams, "exclusive", "true") , "//*[@numFound='2']" , "//result/doc[1]/str[@name='id'][.='x']" , "//result/doc[2]/str[@name='id'][.='a']" ); // Test exclusion - args.remove(CommonParams.SORT); - args.remove(QueryElevationParams.EXCLUSIVE); - req.close(); req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args)); booster.setTopQueryResults(reader, query, false, new String[]{"x"}, new String[]{"a"}); - assertQ(null, req + assertQ(req(baseParams) , "//*[@numFound='3']" , "//result/doc[1]/str[@name='id'][.='x']" , "//result/doc[2]/str[@name='id'][.='c']" @@ -648,10 +633,7 @@ public void testSorting() throws Exception { // Test setting ids and excludes from http parameters booster.clearElevationProviderCache(); - args.put(QueryElevationParams.IDS, "x,y,z"); - args.put(QueryElevationParams.EXCLUDE, "b"); - req.close(); req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args)); - assertQ("All five should make it", req + assertQ("All five should make it", req(baseParams, "elevateIds", "x,y,z", "excludeIds", "b") , "//*[@numFound='5']" , "//result/doc[1]/str[@name='id'][.='x']" , "//result/doc[2]/str[@name='id'][.='y']" @@ -660,10 +642,7 @@ public void testSorting() throws Exception { , "//result/doc[5]/str[@name='id'][.='a']" ); - args.put(QueryElevationParams.IDS, "x,z,y"); - args.put(QueryElevationParams.EXCLUDE, "b,c"); - req.close(); req = new LocalSolrQueryRequest(h.getCore(), new MapSolrParams(args)); - assertQ("All four should make it", req + assertQ("All four should make it", req(baseParams, "elevateIds", "x,z,y", "excludeIds", "b,c") , "//*[@numFound='4']" , "//result/doc[1]/str[@name='id'][.='x']" , "//result/doc[2]/str[@name='id'][.='z']" @@ -671,7 +650,6 @@ public void testSorting() throws Exception { , "//result/doc[4]/str[@name='id'][.='a']" ); - req.close(); } finally { delete(); } @@ -708,35 +686,34 @@ public void testElevationReloading() throws Exception { comp.init(args); comp.inform(h.getCore()); - SolrQueryRequest req = req(); - IndexReader reader = req.getSearcher().getIndexReader(); - QueryElevationComponent.ElevationProvider elevationProvider = comp.getElevationProvider(reader, h.getCore()); - assertTrue(elevationProvider.getElevationForQuery("aaa").elevatedIds.contains("A")); - assertNull(elevationProvider.getElevationForQuery("bbb")); - req.close(); + QueryElevationComponent.ElevationProvider elevationProvider = null; + + try (SolrQueryRequest req = req()) { + elevationProvider = comp.getElevationProvider(req.getSearcher().getIndexReader(), req.getCore()); + assertTrue(elevationProvider.getElevationForQuery("aaa").elevatedIds.contains(new BytesRef("A"))); + assertNull(elevationProvider.getElevationForQuery("bbb")); + } // now change the file writeFile(configFile, "bbb", "B"); // With no index change, we get the same index reader, so the elevationProviderCache returns the previous ElevationProvider without the change. - req = req(); - reader = req.getSearcher().getIndexReader(); - elevationProvider = comp.getElevationProvider(reader, h.getCore()); - assertTrue(elevationProvider.getElevationForQuery("aaa").elevatedIds.contains("A")); - assertNull(elevationProvider.getElevationForQuery("bbb")); - req.close(); + try (SolrQueryRequest req = req()) { + elevationProvider = comp.getElevationProvider(req.getSearcher().getIndexReader(), req.getCore()); + assertTrue(elevationProvider.getElevationForQuery("aaa").elevatedIds.contains(new BytesRef("A"))); + assertNull(elevationProvider.getElevationForQuery("bbb")); + } // Index a new doc to get a new index reader. assertU(adoc("id", "10000")); assertU(commit()); // Check that we effectively reload a new ElevationProvider for a different index reader (so two entries in elevationProviderCache). - req = req(); - reader = req.getSearcher().getIndexReader(); - elevationProvider = comp.getElevationProvider(reader, h.getCore()); - assertNull(elevationProvider.getElevationForQuery("aaa")); - assertTrue(elevationProvider.getElevationForQuery("bbb").elevatedIds.contains("B")); - req.close(); + try (SolrQueryRequest req = req()) { + elevationProvider = comp.getElevationProvider(req.getSearcher().getIndexReader(), req.getCore()); + assertNull(elevationProvider.getElevationForQuery("aaa")); + assertTrue(elevationProvider.getElevationForQuery("bbb").elevatedIds.contains(new BytesRef("B"))); + } // Now change the config file again. writeFile(configFile, "ccc", "C"); @@ -744,13 +721,12 @@ public void testElevationReloading() throws Exception { // Without index change, but calling a different method that clears the elevationProviderCache, so we should load a new ElevationProvider. int elevationRuleNumber = comp.loadElevationConfiguration(h.getCore()); assertEquals(1, elevationRuleNumber); - req = req(); - reader = req.getSearcher().getIndexReader(); - elevationProvider = comp.getElevationProvider(reader, h.getCore()); - assertNull(elevationProvider.getElevationForQuery("aaa")); - assertNull(elevationProvider.getElevationForQuery("bbb")); - assertTrue(elevationProvider.getElevationForQuery("ccc").elevatedIds.contains("C")); - req.close(); + try (SolrQueryRequest req = req()) { + elevationProvider = comp.getElevationProvider(req.getSearcher().getIndexReader(), req.getCore()); + assertNull(elevationProvider.getElevationForQuery("aaa")); + assertNull(elevationProvider.getElevationForQuery("bbb")); + assertTrue(elevationProvider.getElevationForQuery("ccc").elevatedIds.contains(new BytesRef("C"))); + } } finally { delete(); } @@ -787,4 +763,5 @@ public void testWithLocalParam() throws Exception { delete(); } } -} + +} \ No newline at end of file diff --git a/solr/solr-ref-guide/src/the-query-elevation-component.adoc b/solr/solr-ref-guide/src/the-query-elevation-component.adoc index 3c7f50441179..3f9bf639968c 100644 --- a/solr/solr-ref-guide/src/the-query-elevation-component.adoc +++ b/solr/solr-ref-guide/src/the-query-elevation-component.adoc @@ -68,6 +68,13 @@ Path to the file that defines query elevation. This file must exist in `> can be used to annotate each document with information about whether or not it was elevated: diff --git a/solr/solrj/src/java/org/apache/solr/common/params/QueryElevationParams.java b/solr/solrj/src/java/org/apache/solr/common/params/QueryElevationParams.java index d538cab6a2b4..9a31f6918896 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/QueryElevationParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/QueryElevationParams.java @@ -34,6 +34,7 @@ public interface QueryElevationParams { * See http://wiki.apache.org/solr/DocTransformers */ String EDITORIAL_MARKER_FIELD_NAME = "editorialMarkerFieldName"; + /** * The name of the field that excluded editorial results will be written out as when using the QueryElevationComponent, which * automatically configures the EditorialMarkerFactory. The default name is "excluded". This is only used @@ -50,8 +51,8 @@ public interface QueryElevationParams { String MARK_EXCLUDES = "markExcludes"; /** - * Whether the priority order between elevated documents is kept, based on the definition order in the configuration file. - * This parameter is only taken into account if {@link QueryElevationParams#FORCE_ELEVATION} is true. + * When multiple docs are elevated, should their relative order be the order in the configuration file or should + * they be subject to whatever the sort criteria is? True by default. */ - String KEEP_ELEVATION_PRIORITY = "keepElevationPriority"; -} + String USE_CONFIGURED_ELEVATED_ORDER = "useConfiguredElevatedOrder"; +} \ No newline at end of file