From 7df65cba13685023088a24bc8ad689d77935b77f Mon Sep 17 00:00:00 2001 From: Ramakrishna chilaka Date: Fri, 24 Oct 2025 19:32:27 +0530 Subject: [PATCH 1/2] Optimize bulk integer encoding with writeInts for better performance --- .../jmh/ForUtilEncodeBulkIntsBenchmark.java | 70 +++++++++++++++++++ .../generated/checksums/generateForUtil.json | 4 +- .../lucene/codecs/lucene104/ForUtil.java | 32 +++------ .../lucene/codecs/lucene104/gen_ForUtil.py | 35 +++------- .../lucene/store/ByteBuffersDataOutput.java | 21 ++++++ .../lucene/store/ByteBuffersIndexOutput.java | 6 ++ .../org/apache/lucene/store/DataOutput.java | 14 ++++ .../lucene/store/OutputStreamIndexOutput.java | 29 ++++++++ 8 files changed, 162 insertions(+), 49 deletions(-) create mode 100644 lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/ForUtilEncodeBulkIntsBenchmark.java diff --git a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/ForUtilEncodeBulkIntsBenchmark.java b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/ForUtilEncodeBulkIntsBenchmark.java new file mode 100644 index 000000000000..6a0d009e612c --- /dev/null +++ b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/ForUtilEncodeBulkIntsBenchmark.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.benchmark.jmh; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.Random; +import java.util.concurrent.TimeUnit; +import org.apache.lucene.codecs.lucene104.ForUtil; +import org.apache.lucene.store.OutputStreamIndexOutput; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; + +@BenchmarkMode(Mode.Throughput) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +@State(Scope.Benchmark) +@Warmup(iterations = 3, time = 3) +@Measurement(iterations = 5, time = 5) +@Fork(value = 1) +public class ForUtilEncodeBulkIntsBenchmark { + + private final ForUtil forUtil = new ForUtil(); + private final int[] ints = new int[ForUtil.BLOCK_SIZE]; + private ByteArrayOutputStream baos; + private OutputStreamIndexOutput output; + + @Param({"2", "4", "8", "12", "16", "20", "24", "28", "32"}) + public int bitsPerValue; + + @Setup(Level.Trial) + public void setup() { + Random random = new Random(0); + int mask = (1 << bitsPerValue) - 1; + for (int i = 0; i < ForUtil.BLOCK_SIZE; i++) { + ints[i] = random.nextInt() & mask; + } + baos = new ByteArrayOutputStream(); + output = new OutputStreamIndexOutput("benchmark", "benchmark", baos, 1024); + } + + @Benchmark + public void encode() throws IOException { + baos.reset(); + forUtil.encode(ints, bitsPerValue, output); + } +} diff --git a/lucene/core/src/generated/checksums/generateForUtil.json b/lucene/core/src/generated/checksums/generateForUtil.json index 424079d09820..92ae47f4398e 100644 --- a/lucene/core/src/generated/checksums/generateForUtil.json +++ b/lucene/core/src/generated/checksums/generateForUtil.json @@ -1,4 +1,4 @@ { - "lucene/core/src/java/org/apache/lucene/codecs/lucene104/ForUtil.java": "bf1168dbc05311c2e49b652391e01cb01d3f9133", - "lucene/core/src/java/org/apache/lucene/codecs/lucene104/gen_ForUtil.py": "e87e420e633601f6f751b6777d7c094ebc66c3e7" + "lucene/core/src/java/org/apache/lucene/codecs/lucene104/ForUtil.java": "a7be1535a74a76ac908fc6983d41bec6a6fd91a6", + "lucene/core/src/java/org/apache/lucene/codecs/lucene104/gen_ForUtil.py": "a7731f26778957dcf4c21892b428a659e7e000da" } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/ForUtil.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/ForUtil.java index 523680e7d87c..3465ec07da35 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/ForUtil.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/ForUtil.java @@ -82,7 +82,7 @@ static void collapse16(int[] arr) { private final int[] tmp = new int[BLOCK_SIZE]; /** Encode 256 integers from {@code ints} into {@code out}. */ - void encode(int[] ints, int bitsPerValue, DataOutput out) throws IOException { + public void encode(int[] ints, int bitsPerValue, DataOutput out) throws IOException { final int nextPrimitive; if (bitsPerValue <= 8) { nextPrimitive = 8; @@ -101,6 +101,10 @@ static void encode(int[] ints, int bitsPerValue, int primitiveSize, DataOutput o final int numInts = BLOCK_SIZE * primitiveSize / Integer.SIZE; final int numIntsPerShift = bitsPerValue * 8; + + // Precompute masks to avoid array lookups + final int[] masks = (primitiveSize == 8) ? MASKS8 : (primitiveSize == 16) ? MASKS16 : MASKS32; + int idx = 0; int shift = primitiveSize - bitsPerValue; for (int i = 0; i < numIntsPerShift; ++i) { @@ -113,14 +117,7 @@ static void encode(int[] ints, int bitsPerValue, int primitiveSize, DataOutput o } final int remainingBitsPerInt = shift + bitsPerValue; - final int maskRemainingBitsPerInt; - if (primitiveSize == 8) { - maskRemainingBitsPerInt = MASKS8[remainingBitsPerInt]; - } else if (primitiveSize == 16) { - maskRemainingBitsPerInt = MASKS16[remainingBitsPerInt]; - } else { - maskRemainingBitsPerInt = MASKS32[remainingBitsPerInt]; - } + final int maskRemainingBitsPerInt = masks[remainingBitsPerInt]; int tmpIdx = 0; int remainingBitsPerValue = bitsPerValue; @@ -133,26 +130,15 @@ static void encode(int[] ints, int bitsPerValue, int primitiveSize, DataOutput o remainingBitsPerValue = bitsPerValue; } } else { - final int mask1, mask2; - if (primitiveSize == 8) { - mask1 = MASKS8[remainingBitsPerValue]; - mask2 = MASKS8[remainingBitsPerInt - remainingBitsPerValue]; - } else if (primitiveSize == 16) { - mask1 = MASKS16[remainingBitsPerValue]; - mask2 = MASKS16[remainingBitsPerInt - remainingBitsPerValue]; - } else { - mask1 = MASKS32[remainingBitsPerValue]; - mask2 = MASKS32[remainingBitsPerInt - remainingBitsPerValue]; - } + final int mask1 = masks[remainingBitsPerValue]; + final int mask2 = masks[remainingBitsPerInt - remainingBitsPerValue]; tmp[tmpIdx] |= (ints[idx++] & mask1) << (remainingBitsPerInt - remainingBitsPerValue); remainingBitsPerValue = bitsPerValue - remainingBitsPerInt + remainingBitsPerValue; tmp[tmpIdx++] |= (ints[idx] >>> remainingBitsPerValue) & mask2; } } - for (int i = 0; i < numIntsPerShift; ++i) { - out.writeInt(tmp[i]); - } + out.writeInts(tmp, 0, numIntsPerShift); } /** Number of bytes required to encode 256 integers of {@code bitsPerValue} bits per value. */ diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/gen_ForUtil.py b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/gen_ForUtil.py index cacec01e0a0d..507c9075a363 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene104/gen_ForUtil.py +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene104/gen_ForUtil.py @@ -111,7 +111,7 @@ private final int[] tmp = new int[BLOCK_SIZE]; /** Encode 256 integers from {@code ints} into {@code out}. */ - void encode(int[] ints, int bitsPerValue, DataOutput out) throws IOException { + public void encode(int[] ints, int bitsPerValue, DataOutput out) throws IOException { final int nextPrimitive; if (bitsPerValue <= 8) { nextPrimitive = 8; @@ -125,10 +125,15 @@ encode(ints, bitsPerValue, nextPrimitive, out, tmp); } - static void encode(int[] ints, int bitsPerValue, int primitiveSize, DataOutput out, int[] tmp) throws IOException { + static void encode(int[] ints, int bitsPerValue, int primitiveSize, DataOutput out, int[] tmp) + throws IOException { final int numInts = BLOCK_SIZE * primitiveSize / Integer.SIZE; final int numIntsPerShift = bitsPerValue * 8; + + // Precompute masks to avoid array lookups + final int[] masks = (primitiveSize == 8) ? MASKS8 : (primitiveSize == 16) ? MASKS16 : MASKS32; + int idx = 0; int shift = primitiveSize - bitsPerValue; for (int i = 0; i < numIntsPerShift; ++i) { @@ -141,14 +146,7 @@ } final int remainingBitsPerInt = shift + bitsPerValue; - final int maskRemainingBitsPerInt; - if (primitiveSize == 8) { - maskRemainingBitsPerInt = MASKS8[remainingBitsPerInt]; - } else if (primitiveSize == 16) { - maskRemainingBitsPerInt = MASKS16[remainingBitsPerInt]; - } else { - maskRemainingBitsPerInt = MASKS32[remainingBitsPerInt]; - } + final int maskRemainingBitsPerInt = masks[remainingBitsPerInt]; int tmpIdx = 0; int remainingBitsPerValue = bitsPerValue; @@ -161,26 +159,15 @@ remainingBitsPerValue = bitsPerValue; } } else { - final int mask1, mask2; - if (primitiveSize == 8) { - mask1 = MASKS8[remainingBitsPerValue]; - mask2 = MASKS8[remainingBitsPerInt - remainingBitsPerValue]; - } else if (primitiveSize == 16) { - mask1 = MASKS16[remainingBitsPerValue]; - mask2 = MASKS16[remainingBitsPerInt - remainingBitsPerValue]; - } else { - mask1 = MASKS32[remainingBitsPerValue]; - mask2 = MASKS32[remainingBitsPerInt - remainingBitsPerValue]; - } + final int mask1 = masks[remainingBitsPerValue]; + final int mask2 = masks[remainingBitsPerInt - remainingBitsPerValue]; tmp[tmpIdx] |= (ints[idx++] & mask1) << (remainingBitsPerInt - remainingBitsPerValue); remainingBitsPerValue = bitsPerValue - remainingBitsPerInt + remainingBitsPerValue; tmp[tmpIdx++] |= (ints[idx] >>> remainingBitsPerValue) & mask2; } } - for (int i = 0; i < numIntsPerShift; ++i) { - out.writeInt(tmp[i]); - } + out.writeInts(tmp, 0, numIntsPerShift); } /** Number of bytes required to encode 256 integers of {@code bitsPerValue} bits per value. */ diff --git a/lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataOutput.java b/lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataOutput.java index ab30d6153101..d1befc64b3b9 100644 --- a/lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataOutput.java +++ b/lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataOutput.java @@ -396,6 +396,27 @@ public void writeInt(int v) { } } + @Override + public void writeInts(int[] src, int offset, int length) { + while (length > 0) { + if (!currentBlock.hasRemaining()) { + appendBlock(); + } + + int intsToWrite = Math.min(currentBlock.remaining() / Integer.BYTES, length); + if (intsToWrite > 0) { + currentBlock.asIntBuffer().put(src, offset, intsToWrite); + currentBlock.position(currentBlock.position() + intsToWrite * Integer.BYTES); + offset += intsToWrite; + length -= intsToWrite; + } else { + // Less than 4 bytes remaining, write individual int + writeInt(src[offset++]); + length--; + } + } + } + @Override public void writeLong(long v) { try { diff --git a/lucene/core/src/java/org/apache/lucene/store/ByteBuffersIndexOutput.java b/lucene/core/src/java/org/apache/lucene/store/ByteBuffersIndexOutput.java index fdbe8e1dbea0..77983d6fbf27 100644 --- a/lucene/core/src/java/org/apache/lucene/store/ByteBuffersIndexOutput.java +++ b/lucene/core/src/java/org/apache/lucene/store/ByteBuffersIndexOutput.java @@ -118,6 +118,12 @@ public void writeInt(int i) throws IOException { delegate.writeInt(i); } + @Override + public void writeInts(int[] src, int offset, int length) { + ensureOpen(); + delegate.writeInts(src, offset, length); + } + @Override public void writeShort(short i) throws IOException { ensureOpen(); diff --git a/lucene/core/src/java/org/apache/lucene/store/DataOutput.java b/lucene/core/src/java/org/apache/lucene/store/DataOutput.java index 3d856cbc5f60..62a75679094e 100644 --- a/lucene/core/src/java/org/apache/lucene/store/DataOutput.java +++ b/lucene/core/src/java/org/apache/lucene/store/DataOutput.java @@ -88,6 +88,20 @@ public void writeShort(short i) throws IOException { writeByte((byte) (i >> 8)); } + /** + * Writes a specified number of ints from an array at the specified offset. + * + * @param src the array to write ints from + * @param offset the offset in the array to start reading ints + * @param length the number of ints to write + * @see DataInput#readInts(int[], int, int) + */ + public void writeInts(int[] src, int offset, int length) throws IOException { + for (int i = 0; i < length; ++i) { + writeInt(src[offset + i]); + } + } + /** * Writes an int in a variable-length format. Writes between one and five bytes. Smaller values * take fewer bytes. Negative numbers are supported, but should be avoided. diff --git a/lucene/core/src/java/org/apache/lucene/store/OutputStreamIndexOutput.java b/lucene/core/src/java/org/apache/lucene/store/OutputStreamIndexOutput.java index 14b56a67e8a7..2fbb0374c189 100644 --- a/lucene/core/src/java/org/apache/lucene/store/OutputStreamIndexOutput.java +++ b/lucene/core/src/java/org/apache/lucene/store/OutputStreamIndexOutput.java @@ -72,6 +72,12 @@ public void writeInt(int i) throws IOException { bytesWritten += Integer.BYTES; } + @Override + public void writeInts(int[] src, int offset, int length) throws IOException { + os.writeInts(src, offset, length); + bytesWritten += (long) length * Integer.BYTES; + } + @Override public void writeLong(long i) throws IOException { os.writeLong(i); @@ -130,6 +136,29 @@ void writeInt(int i) throws IOException { count += Integer.BYTES; } + void writeInts(int[] src, int offset, int length) throws IOException { + int remaining = length; + int srcPos = offset; + + while (remaining > 0) { + int available = (buf.length - count) / Integer.BYTES; + int batch = Math.min(remaining, available); + + if (batch == 0) { + flush(); + continue; + } + + for (int i = 0; i < batch; i++) { + BitUtil.VH_LE_INT.set(buf, count, src[srcPos + i]); + count += Integer.BYTES; + } + + srcPos += batch; + remaining -= batch; + } + } + void writeLong(long i) throws IOException { flushIfNeeded(Long.BYTES); BitUtil.VH_LE_LONG.set(buf, count, i); From c68cf849302fa0bf93911afb31a8c6f7ba4a08bf Mon Sep 17 00:00:00 2001 From: Ramakrishna chilaka Date: Fri, 24 Oct 2025 19:49:50 +0530 Subject: [PATCH 2/2] adding CHANGES.txt --- lucene/CHANGES.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 14a95c88fc84..20d0e4bae9ca 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -203,6 +203,8 @@ Optimizations * GITHUB#15343: Ensure that `AcceptDocs#cost()` only ever calls `BitSets#cardinality()` once per instance to avoid redundant computation. (Ben Trent) +* GITHUB#15358: Optimize bulk integer encoding with writeInts for better performance. (Ramakrishna Chilaka) + * GITHUB#14963: Bypass HNSW graph building for tiny segments. (Shubham Chaudhary, Ben Trent) Bug Fixes