diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index dce07644fab3..2449db7978ac 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -163,6 +163,9 @@ Optimizations * GITHUB#12382: Faster top-level conjunctions on term queries when sorting by descending score. (Adrien Grand) +* GITHUB#12604: Estimate the block size of FST BytesStore in BlockTreeTermsWriter + to reduce GC load during indexing. (Guo Feng) + Changes in runtime behavior --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java index a7eb438489f9..1b0bd3568c9f 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java @@ -52,6 +52,7 @@ import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.FSTCompiler; import org.apache.lucene.util.fst.Util; +import org.apache.lucene.util.packed.PackedInts; /* TODO: @@ -490,10 +491,22 @@ public void compileIndex( } } + long estimateSize = prefix.length; + for (PendingBlock block : blocks) { + if (block.subIndices != null) { + for (FST subIndex : block.subIndices) { + estimateSize += subIndex.numBytes(); + } + } + } + int estimateBitsRequired = PackedInts.bitsRequired(estimateSize); + int pageBits = Math.min(15, Math.max(6, estimateBitsRequired)); + final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton(); final FSTCompiler fstCompiler = new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs) .shouldShareNonSingletonNodes(false) + .bytesPageBits(pageBits) .build(); // if (DEBUG) { // System.out.println(" compile index for prefix=" + prefix); diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/FST.java b/lucene/core/src/java/org/apache/lucene/util/fst/FST.java index 816a28572681..fb356c2c9c7e 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/FST.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/FST.java @@ -520,6 +520,10 @@ void finish(long newStartNode) throws IOException { bytes.finish(); } + public long numBytes() { + return bytes.getPosition(); + } + public T getEmptyOutput() { return emptyOutput; }