Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
LUCENE-6640: Use BitsProducer instead of Filter in lucene/suggest.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1688682 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
jpountz committed Jul 1, 2015
1 parent a0c2ed3 commit 304e1b9
Show file tree
Hide file tree
Showing 9 changed files with 153 additions and 102 deletions.
4 changes: 4 additions & 0 deletions lucene/CHANGES.txt
Expand Up @@ -164,6 +164,10 @@ API Changes
* LUCENE-6648: All lucene/facet APIs now take Query objects where they used to
take Filter objects. (Adrien Grand)

* LUCENE-6640: Suggesters now take a BitsProducer object instead of a Filter
object to reduce the scope of doc IDs that may be returned, emphasizing the
fact that these objects need to support random-access. (Adrien Grand)

Bug fixes

* LUCENE-6500: ParallelCompositeReader did not always call
Expand Down
@@ -0,0 +1,34 @@
package org.apache.lucene.search.suggest;

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import java.io.IOException;

import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.Bits;

/** A producer of {@link Bits} per segment. */
public abstract class BitsProducer {

/** Return {@link Bits} for the given leaf. The returned instance must
* be non-null and have a {@link Bits#length() length} equal to
* {@link LeafReader#maxDoc() maxDoc}. */
public abstract Bits getBits(LeafReaderContext context) throws IOException;

}
Expand Up @@ -24,15 +24,15 @@
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.suggest.BitsProducer;

import static org.apache.lucene.search.suggest.document.CompletionAnalyzer.HOLE_CHARACTER;
import static org.apache.lucene.search.suggest.document.CompletionAnalyzer.SEP_LABEL;

/**
* Abstract {@link Query} that match documents containing terms with a specified prefix
* filtered by {@link Filter}. This should be used to query against any {@link SuggestField}s
* filtered by {@link BitsProducer}. This should be used to query against any {@link SuggestField}s
* or {@link ContextSuggestField}s of documents.
* <p>
* Use {@link SuggestIndexSearcher#suggest(CompletionQuery, int)} to execute any query
Expand All @@ -56,25 +56,25 @@ public abstract class CompletionQuery extends Query {
private final Term term;

/**
* Filter for document scoping
* {@link BitsProducer} which is used to filter the document scope.
*/
private final Filter filter;
private final BitsProducer filter;

/**
* Creates a base Completion query against a <code>term</code>
* with a <code>filter</code> to scope the documents
*/
protected CompletionQuery(Term term, Filter filter) {
protected CompletionQuery(Term term, BitsProducer filter) {
validate(term.text());
this.term = term;
this.filter = filter;
}

/**
* Returns the filter for the query, used to
* suggest completions on a subset of indexed documents
* Returns a {@link BitsProducer}. Only suggestions matching the returned
* bits will be returned.
*/
public Filter getFilter() {
public BitsProducer getFilter() {
return filter;
}

Expand Down Expand Up @@ -148,7 +148,7 @@ public String toString(String field) {
buffer.append(",");
buffer.append("filter");
buffer.append(":");
buffer.append(filter.toString(field));
buffer.append(filter.toString());
}
return buffer.toString();
}
Expand Down
Expand Up @@ -25,11 +25,10 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.BulkScorer;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.suggest.BitsProducer;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.automaton.Automaton;
Expand Down Expand Up @@ -88,19 +87,15 @@ public BulkScorer bulkScorer(final LeafReaderContext context) throws IOException
throw new IllegalArgumentException(completionQuery.getField() + " is not a SuggestField");
}

DocIdSet docIdSet = null;
Filter filter = completionQuery.getFilter();
BitsProducer filter = completionQuery.getFilter();
Bits filteredDocs = null;
if (filter != null) {
docIdSet = filter.getDocIdSet(context, null);
if (docIdSet == null || docIdSet.iterator() == null) {
// filter matches no docs in current leave
filteredDocs = filter.getBits(context);
if (filteredDocs.getClass() == Bits.MatchNoBits.class) {
return null;
} else if (docIdSet.bits() == null) {
throw new IllegalArgumentException("DocIDSet does not provide random access interface");
}
}
Bits acceptDocBits = (docIdSet != null) ? docIdSet.bits() : null;
return new CompletionScorer(this, suggester, reader, acceptDocBits, filter != null, automaton);
return new CompletionScorer(this, suggester, reader, filteredDocs, filter != null, automaton);
}

/**
Expand Down
Expand Up @@ -23,9 +23,9 @@

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.suggest.BitsProducer;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.automaton.Automata;
Expand Down Expand Up @@ -88,15 +88,15 @@ public class FuzzyCompletionQuery extends PrefixCompletionQuery {
private final int maxDeterminizedStates;

/**
* Calls {@link FuzzyCompletionQuery#FuzzyCompletionQuery(Analyzer, Term, Filter)}
* Calls {@link FuzzyCompletionQuery#FuzzyCompletionQuery(Analyzer, Term, BitsProducer)}
* with no filter
*/
public FuzzyCompletionQuery(Analyzer analyzer, Term term) {
this(analyzer, term, null);
}

/**
* Calls {@link FuzzyCompletionQuery#FuzzyCompletionQuery(Analyzer, Term, Filter,
* Calls {@link FuzzyCompletionQuery#FuzzyCompletionQuery(Analyzer, Term, BitsProducer,
* int, boolean, int, int, boolean, int)}
* with defaults for <code>maxEdits</code>, <code>transpositions</code>,
* <code>nonFuzzyPrefix</code>, <code>minFuzzyLength</code>,
Expand All @@ -107,7 +107,7 @@ public FuzzyCompletionQuery(Analyzer analyzer, Term term) {
* {@link #DEFAULT_UNICODE_AWARE} and {@link Operations#DEFAULT_MAX_DETERMINIZED_STATES}
* for defaults
*/
public FuzzyCompletionQuery(Analyzer analyzer, Term term, Filter filter) {
public FuzzyCompletionQuery(Analyzer analyzer, Term term, BitsProducer filter) {
this(analyzer, term, filter, DEFAULT_MAX_EDITS, DEFAULT_TRANSPOSITIONS, DEFAULT_NON_FUZZY_PREFIX,
DEFAULT_MIN_FUZZY_LENGTH, DEFAULT_UNICODE_AWARE, Operations.DEFAULT_MAX_DETERMINIZED_STATES
);
Expand All @@ -127,7 +127,7 @@ public FuzzyCompletionQuery(Analyzer analyzer, Term term, Filter filter) {
* @param unicodeAware treat prefix as unicode rather than bytes
* @param maxDeterminizedStates maximum automaton states allowed for {@link LevenshteinAutomata}
*/
public FuzzyCompletionQuery(Analyzer analyzer, Term term, Filter filter, int maxEdits,
public FuzzyCompletionQuery(Analyzer analyzer, Term term, BitsProducer filter, int maxEdits,
boolean transpositions, int nonFuzzyPrefix, int minFuzzyLength,
boolean unicodeAware, int maxDeterminizedStates) {
super(analyzer, term, filter);
Expand Down Expand Up @@ -208,7 +208,7 @@ public String toString(String field) {
if (getFilter() != null) {
buffer.append(",");
buffer.append("filter");
buffer.append(getFilter().toString(field));
buffer.append(getFilter().toString());
}
return buffer.toString();
}
Expand Down
Expand Up @@ -21,9 +21,9 @@

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.suggest.BitsProducer;

/**
* A {@link CompletionQuery} which takes an {@link Analyzer}
Expand All @@ -42,7 +42,7 @@ public class PrefixCompletionQuery extends CompletionQuery {
protected final CompletionAnalyzer analyzer;

/**
* Calls {@link PrefixCompletionQuery#PrefixCompletionQuery(Analyzer, Term, Filter)}
* Calls {@link PrefixCompletionQuery#PrefixCompletionQuery(Analyzer, Term, BitsProducer)}
* with no filter
*/
public PrefixCompletionQuery(Analyzer analyzer, Term term) {
Expand All @@ -57,7 +57,7 @@ public PrefixCompletionQuery(Analyzer analyzer, Term term) {
* is analyzed with <code>analyzer</code>
* @param filter used to query on a sub set of documents
*/
public PrefixCompletionQuery(Analyzer analyzer, Term term, Filter filter) {
public PrefixCompletionQuery(Analyzer analyzer, Term term, BitsProducer filter) {
super(term, filter);
if (!(analyzer instanceof CompletionAnalyzer)) {
this.analyzer = new CompletionAnalyzer(analyzer);
Expand Down
Expand Up @@ -20,9 +20,9 @@
import java.io.IOException;

import org.apache.lucene.index.Term;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.suggest.BitsProducer;
import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.RegExp;

Expand Down Expand Up @@ -50,23 +50,23 @@ public class RegexCompletionQuery extends CompletionQuery {
private final int maxDeterminizedStates;

/**
* Calls {@link RegexCompletionQuery#RegexCompletionQuery(Term, Filter)}
* Calls {@link RegexCompletionQuery#RegexCompletionQuery(Term, BitsProducer)}
* with no filter
*/
public RegexCompletionQuery(Term term) {
this(term, null);
}

/**
* Calls {@link RegexCompletionQuery#RegexCompletionQuery(Term, int, int, Filter)}
* Calls {@link RegexCompletionQuery#RegexCompletionQuery(Term, int, int, BitsProducer)}
* enabling all optional regex syntax and <code>maxDeterminizedStates</code> of
* {@value Operations#DEFAULT_MAX_DETERMINIZED_STATES}
*/
public RegexCompletionQuery(Term term, Filter filter) {
public RegexCompletionQuery(Term term, BitsProducer filter) {
this(term, RegExp.ALL, Operations.DEFAULT_MAX_DETERMINIZED_STATES, filter);
}
/**
* Calls {@link RegexCompletionQuery#RegexCompletionQuery(Term, int, int, Filter)}
* Calls {@link RegexCompletionQuery#RegexCompletionQuery(Term, int, int, BitsProducer)}
* with no filter
*/
public RegexCompletionQuery(Term term, int flags, int maxDeterminizedStates) {
Expand All @@ -82,7 +82,7 @@ public RegexCompletionQuery(Term term, int flags, int maxDeterminizedStates) {
* @param maxDeterminizedStates used in {@link RegExp#toAutomaton(int)}
* @param filter used to query on a sub set of documents
*/
public RegexCompletionQuery(Term term, int flags, int maxDeterminizedStates, Filter filter) {
public RegexCompletionQuery(Term term, int flags, int maxDeterminizedStates, BitsProducer filter) {
super(term, filter);
this.flags = flags;
this.maxDeterminizedStates = maxDeterminizedStates;
Expand Down

0 comments on commit 304e1b9

Please sign in to comment.