apache · gf2121 · Oct 4, 2023 · Sep 28, 2023 · Sep 28, 2023 · Sep 28, 2023
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
@@ -163,6 +163,9 @@ Optimizations
 * GITHUB#12382: Faster top-level conjunctions on term queries when sorting by
   descending score. (Adrien Grand)
 
+* GITHUB#12604: Estimate the block size of FST BytesStore in BlockTreeTermsWriter
+  to reduce GC load during indexing. (Guo Feng)
+
 Changes in runtime behavior
 ---------------------
 

diff --git a/...re/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java b/...re/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java
@@ -52,6 +52,7 @@
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.FSTCompiler;
 import org.apache.lucene.util.fst.Util;
+import org.apache.lucene.util.packed.PackedInts;
 
 /*
   TODO:
@@ -490,10 +491,22 @@ public void compileIndex(
         }
       }
 
+      long estimateSize = prefix.length;
+      for (PendingBlock block : blocks) {
+        if (block.subIndices != null) {
+          for (FST<BytesRef> subIndex : block.subIndices) {
+            estimateSize += subIndex.numBytes();
+          }
+        }
+      }
+      int estimateBitsRequired = PackedInts.bitsRequired(estimateSize);
+      int pageBits = Math.min(15, Math.max(6, estimateBitsRequired));
+
       final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
       final FSTCompiler<BytesRef> fstCompiler =
           new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs)
               .shouldShareNonSingletonNodes(false)
+              .bytesPageBits(pageBits)
               .build();
       // if (DEBUG) {
       //  System.out.println("  compile index for prefix=" + prefix);

diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/FST.java b/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
@@ -520,6 +520,10 @@ void finish(long newStartNode) throws IOException {
     bytes.finish();
   }
 
+  public long numBytes() {
+    return bytes.getPosition();
+  }
+
   public T getEmptyOutput() {
     return emptyOutput;
   }