From 442dcfca996a12009b9b7d13299984e0114b2810 Mon Sep 17 00:00:00 2001 From: gf2121 <52390227+gf2121@users.noreply.github.com> Date: Tue, 17 Oct 2023 17:31:29 +0800 Subject: [PATCH] Use radix sort to speed up the sorting of terms in TermInSetQuery (#12587) --- lucene/CHANGES.txt | 2 ++ .../apache/lucene/search/TermInSetQuery.java | 21 +++++++++++++++++-- .../lucene/util/BytesRefComparator.java | 3 +-- .../org/apache/lucene/util/StringSorter.java | 10 +++++++-- 4 files changed, 30 insertions(+), 6 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 72ccbb21635..a6c6cc418ea 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -61,6 +61,8 @@ Optimizations * GITHUB#12591: Use stable radix sort to speed up the sorting of update terms. (Guo Feng) +* GITHUB#12587: Use radix sort to speed up the sorting of terms in TermInSetQuery. (Guo Feng) + * GITHUB#12604: Estimate the block size of FST BytesStore in BlockTreeTermsWriter to reduce GC load during indexing. (Guo Feng) diff --git a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java index ed268751bba..51d44f254a8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java @@ -29,11 +29,12 @@ import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.Accountable; -import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; +import org.apache.lucene.util.BytesRefComparator; import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.StringSorter; import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.ByteRunAutomaton; @@ -113,7 +114,23 @@ private static PrefixCodedTerms packTerms(String field, Collection ter boolean sorted = terms instanceof SortedSet && ((SortedSet) terms).comparator() == null; if (sorted == false) { - ArrayUtil.timSort(sortedTerms); + new StringSorter(BytesRefComparator.NATURAL) { + + @Override + protected void get(BytesRefBuilder builder, BytesRef result, int i) { + BytesRef term = sortedTerms[i]; + result.length = term.length; + result.offset = term.offset; + result.bytes = term.bytes; + } + + @Override + protected void swap(int i, int j) { + BytesRef b = sortedTerms[i]; + sortedTerms[i] = sortedTerms[j]; + sortedTerms[j] = b; + } + }.sort(0, sortedTerms.length); } PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder(); BytesRefBuilder previous = null; diff --git a/lucene/core/src/java/org/apache/lucene/util/BytesRefComparator.java b/lucene/core/src/java/org/apache/lucene/util/BytesRefComparator.java index 0465ec13e6c..1de78b249e2 100644 --- a/lucene/core/src/java/org/apache/lucene/util/BytesRefComparator.java +++ b/lucene/core/src/java/org/apache/lucene/util/BytesRefComparator.java @@ -20,8 +20,7 @@ import java.util.Comparator; /** - * Specialized {@link BytesRef} comparator that {@link - * FixedLengthBytesRefArray#iterator(Comparator)} has optimizations for. + * Specialized {@link BytesRef} comparator that {@link StringSorter} has optimizations for. * * @lucene.internal */ diff --git a/lucene/core/src/java/org/apache/lucene/util/StringSorter.java b/lucene/core/src/java/org/apache/lucene/util/StringSorter.java index 3cee1ab516a..b5458551ab8 100644 --- a/lucene/core/src/java/org/apache/lucene/util/StringSorter.java +++ b/lucene/core/src/java/org/apache/lucene/util/StringSorter.java @@ -19,7 +19,13 @@ import java.util.Comparator; -abstract class StringSorter extends Sorter { +/** + * A {@link BytesRef} sorter tries to use a efficient radix sorter if {@link StringSorter#cmp} is a + * {@link BytesRefComparator}, otherwise fallback to {@link StringSorter#fallbackSorter} + * + * @lucene.internal + */ +public abstract class StringSorter extends Sorter { private final Comparator cmp; protected final BytesRefBuilder scratch1 = new BytesRefBuilder(); @@ -29,7 +35,7 @@ abstract class StringSorter extends Sorter { protected final BytesRef scratchBytes2 = new BytesRef(); protected final BytesRef pivot = new BytesRef(); - StringSorter(Comparator cmp) { + protected StringSorter(Comparator cmp) { this.cmp = cmp; }