Skip to content

Commit

Permalink
Use radix sort to speed up the sorting of terms in TermInSetQuery (#1…
Browse files Browse the repository at this point in the history
  • Loading branch information
gf2121 committed Oct 17, 2023
1 parent 1ab477a commit 442dcfc
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 6 deletions.
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Expand Up @@ -61,6 +61,8 @@ Optimizations

* GITHUB#12591: Use stable radix sort to speed up the sorting of update terms. (Guo Feng)

* GITHUB#12587: Use radix sort to speed up the sorting of terms in TermInSetQuery. (Guo Feng)

* GITHUB#12604: Estimate the block size of FST BytesStore in BlockTreeTermsWriter
to reduce GC load during indexing. (Guo Feng)

Expand Down
21 changes: 19 additions & 2 deletions lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java
Expand Up @@ -29,11 +29,12 @@
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.BytesRefComparator;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.StringSorter;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.ByteRunAutomaton;
Expand Down Expand Up @@ -113,7 +114,23 @@ private static PrefixCodedTerms packTerms(String field, Collection<BytesRef> ter
boolean sorted =
terms instanceof SortedSet && ((SortedSet<BytesRef>) terms).comparator() == null;
if (sorted == false) {
ArrayUtil.timSort(sortedTerms);
new StringSorter(BytesRefComparator.NATURAL) {

@Override
protected void get(BytesRefBuilder builder, BytesRef result, int i) {
BytesRef term = sortedTerms[i];
result.length = term.length;
result.offset = term.offset;
result.bytes = term.bytes;
}

@Override
protected void swap(int i, int j) {
BytesRef b = sortedTerms[i];
sortedTerms[i] = sortedTerms[j];
sortedTerms[j] = b;
}
}.sort(0, sortedTerms.length);
}
PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
BytesRefBuilder previous = null;
Expand Down
Expand Up @@ -20,8 +20,7 @@
import java.util.Comparator;

/**
* Specialized {@link BytesRef} comparator that {@link
* FixedLengthBytesRefArray#iterator(Comparator)} has optimizations for.
* Specialized {@link BytesRef} comparator that {@link StringSorter} has optimizations for.
*
* @lucene.internal
*/
Expand Down
10 changes: 8 additions & 2 deletions lucene/core/src/java/org/apache/lucene/util/StringSorter.java
Expand Up @@ -19,7 +19,13 @@

import java.util.Comparator;

abstract class StringSorter extends Sorter {
/**
* A {@link BytesRef} sorter tries to use a efficient radix sorter if {@link StringSorter#cmp} is a
* {@link BytesRefComparator}, otherwise fallback to {@link StringSorter#fallbackSorter}
*
* @lucene.internal
*/
public abstract class StringSorter extends Sorter {

private final Comparator<BytesRef> cmp;
protected final BytesRefBuilder scratch1 = new BytesRefBuilder();
Expand All @@ -29,7 +35,7 @@ abstract class StringSorter extends Sorter {
protected final BytesRef scratchBytes2 = new BytesRef();
protected final BytesRef pivot = new BytesRef();

StringSorter(Comparator<BytesRef> cmp) {
protected StringSorter(Comparator<BytesRef> cmp) {
this.cmp = cmp;
}

Expand Down

0 comments on commit 442dcfc

Please sign in to comment.