From 6839182c28860244570056130bc17f98a4510fc6 Mon Sep 17 00:00:00 2001 From: Kaival Parikh Date: Thu, 16 Oct 2025 17:13:58 +0000 Subject: [PATCH 1/6] Align float vectors to 64 bytes --- .../benchmark/jmh/VectorScorerBenchmark.java | 121 +++++++++++++++--- .../lucene99/Lucene99FlatVectorsWriter.java | 33 +++-- 2 files changed, 123 insertions(+), 31 deletions(-) diff --git a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/VectorScorerBenchmark.java b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/VectorScorerBenchmark.java index 66e72bf11c3b..035ea98e4fde 100644 --- a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/VectorScorerBenchmark.java +++ b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/VectorScorerBenchmark.java @@ -19,12 +19,16 @@ import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT; import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.nio.file.Files; +import java.util.Random; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil; import org.apache.lucene.codecs.hnsw.FlatVectorsScorer; import org.apache.lucene.codecs.lucene95.OffHeapByteVectorValues; +import org.apache.lucene.codecs.lucene95.OffHeapFloatVectorValues; import org.apache.lucene.index.KnnVectorValues; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.store.Directory; @@ -62,57 +66,140 @@ value = 3, jvmArgsAppend = {"-Xmx2g", "-Xms2g", "-XX:+AlwaysPreTouch"}) public class VectorScorerBenchmark { + private static final float EPSILON = 1e-4f; @Param({"1", "128", "207", "256", "300", "512", "702", "1024"}) public int size; + @Param({"0", "1", "2", "4", "6", "8", "16", "20", "32", "50", "64", "100", "128", "255", "256"}) + public int padBytes; + Directory dir; - IndexInput in; - KnnVectorValues vectorValues; + IndexInput bytesIn; + IndexInput floatsIn; + KnnVectorValues byteVectorValues; + KnnVectorValues floatVectorValues; byte[] vec1, vec2; - UpdateableRandomVectorScorer scorer; + float[] floatsA, floatsB; + float expectedBytes, expectedFloats; + UpdateableRandomVectorScorer byteScorer; + UpdateableRandomVectorScorer floatScorer; @Setup(Level.Iteration) public void init() throws IOException { + Random random = ThreadLocalRandom.current(); + vec1 = new byte[size]; vec2 = new byte[size]; - ThreadLocalRandom.current().nextBytes(vec1); - ThreadLocalRandom.current().nextBytes(vec2); + random.nextBytes(vec1); + random.nextBytes(vec2); + expectedBytes = DOT_PRODUCT.compare(vec1, vec2); + + // random float arrays for float methods + floatsA = new float[size]; + floatsB = new float[size]; + for (int i = 0; i < size; ++i) { + floatsA[i] = random.nextFloat(); + floatsB[i] = random.nextFloat(); + } + expectedFloats = DOT_PRODUCT.compare(floatsA, floatsB); dir = new MMapDirectory(Files.createTempDirectory("VectorScorerBenchmark")); - try (IndexOutput out = dir.createOutput("vector.data", IOContext.DEFAULT)) { + try (IndexOutput out = dir.createOutput("byteVector.data", IOContext.DEFAULT)) { + out.writeBytes(new byte[padBytes], 0, padBytes); + out.writeBytes(vec1, 0, vec1.length); out.writeBytes(vec2, 0, vec2.length); } - in = dir.openInput("vector.data", IOContext.DEFAULT); - vectorValues = vectorValues(size, 2, in, DOT_PRODUCT); - scorer = + try (IndexOutput out = dir.createOutput("floatVector.data", IOContext.DEFAULT)) { + out.writeBytes(new byte[padBytes], 0, padBytes); + + byte[] buffer = new byte[size * Float.BYTES]; + ByteBuffer.wrap(buffer).order(ByteOrder.LITTLE_ENDIAN).asFloatBuffer().put(floatsA); + out.writeBytes(buffer, 0, buffer.length); + ByteBuffer.wrap(buffer).order(ByteOrder.LITTLE_ENDIAN).asFloatBuffer().put(floatsB); + out.writeBytes(buffer, 0, buffer.length); + } + + bytesIn = dir.openInput("byteVector.data", IOContext.DEFAULT); + byteVectorValues = byteVectorValues(DOT_PRODUCT); + byteScorer = + FlatVectorScorerUtil.getLucene99FlatVectorsScorer() + .getRandomVectorScorerSupplier(DOT_PRODUCT, byteVectorValues) + .scorer(); + byteScorer.setScoringOrdinal(0); + + floatsIn = dir.openInput("floatVector.data", IOContext.DEFAULT); + floatVectorValues = floatVectorValues(DOT_PRODUCT); + floatScorer = FlatVectorScorerUtil.getLucene99FlatVectorsScorer() - .getRandomVectorScorerSupplier(DOT_PRODUCT, vectorValues) + .getRandomVectorScorerSupplier(DOT_PRODUCT, floatVectorValues) .scorer(); - scorer.setScoringOrdinal(0); + floatScorer.setScoringOrdinal(0); } @TearDown public void teardown() throws IOException { - IOUtils.close(dir, in); + IOUtils.close(dir, bytesIn); } @Benchmark public float binaryDotProductDefault() throws IOException { - return scorer.score(1); + float result = byteScorer.score(1); + if (Math.abs(result - expectedBytes) > EPSILON) { + throw new RuntimeException("Expected " + result + " but got " + expectedBytes); + } + return result; } @Benchmark @Fork(jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"}) public float binaryDotProductMemSeg() throws IOException { - return scorer.score(1); + float result = byteScorer.score(1); + if (Math.abs(result - expectedBytes) > EPSILON) { + throw new RuntimeException("Expected " + result + " but got " + expectedBytes); + } + return result; + } + + @Benchmark + public float floatDotProductDefault() throws IOException { + float result = floatScorer.score(1); + if (Math.abs(result - expectedFloats) > EPSILON) { + throw new RuntimeException("Expected " + result + " but got " + expectedFloats); + } + return result; + } + + @Benchmark + @Fork(jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"}) + public float floatDotProductMemSeg() throws IOException { + float result = floatScorer.score(1); + if (Math.abs(result - expectedFloats) > EPSILON) { + throw new RuntimeException("Expected " + result + " but got " + expectedFloats); + } + return result; } - static KnnVectorValues vectorValues( - int dims, int size, IndexInput in, VectorSimilarityFunction sim) throws IOException { + KnnVectorValues byteVectorValues(VectorSimilarityFunction sim) throws IOException { return new OffHeapByteVectorValues.DenseOffHeapVectorValues( - dims, size, in.slice("test", 0, in.length()), dims, new ThrowingFlatVectorScorer(), sim); + size, + 2, + bytesIn.slice("test", padBytes, size * 2L), + size, + new ThrowingFlatVectorScorer(), + sim); + } + + KnnVectorValues floatVectorValues(VectorSimilarityFunction sim) throws IOException { + int byteSize = size * Float.BYTES; + return new OffHeapFloatVectorValues.DenseOffHeapVectorValues( + size, + 2, + floatsIn.slice("test", padBytes, byteSize * 2L), + byteSize, + new ThrowingFlatVectorScorer(), + sim); } static final class ThrowingFlatVectorScorer implements FlatVectorsScorer { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java index 1432f5ea46b8..3289909a09ed 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java @@ -153,9 +153,18 @@ public long ramBytesUsed() { return total; } + /** Align vectors for optimal vectorized performance. */ + private static long alignOutput(IndexOutput output, VectorEncoding encoding) throws IOException { + return output.alignFilePointer( + switch (encoding) { + case BYTE -> Float.BYTES; + case FLOAT32 -> 64; + }); + } + private void writeField(FieldWriter fieldData, int maxDoc) throws IOException { // write vector values - long vectorDataOffset = vectorData.alignFilePointer(Float.BYTES); + long vectorDataOffset = alignOutput(vectorData, fieldData.fieldInfo.getVectorEncoding()); switch (fieldData.fieldInfo.getVectorEncoding()) { case BYTE -> writeByteVectors(fieldData); case FLOAT32 -> writeFloat32Vectors(fieldData); @@ -190,19 +199,18 @@ private void writeSortingField(FieldWriter fieldData, int maxDoc, Sorter.DocM mapOldOrdToNewOrd(fieldData.docsWithField, sortMap, null, ordMap, newDocsWithField); // write vector values - long vectorDataOffset = - switch (fieldData.fieldInfo.getVectorEncoding()) { - case BYTE -> writeSortedByteVectors(fieldData, ordMap); - case FLOAT32 -> writeSortedFloat32Vectors(fieldData, ordMap); - }; + long vectorDataOffset = alignOutput(vectorData, fieldData.fieldInfo.getVectorEncoding()); + switch (fieldData.fieldInfo.getVectorEncoding()) { + case BYTE -> writeSortedByteVectors(fieldData, ordMap); + case FLOAT32 -> writeSortedFloat32Vectors(fieldData, ordMap); + } long vectorDataLength = vectorData.getFilePointer() - vectorDataOffset; writeMeta(fieldData.fieldInfo, maxDoc, vectorDataOffset, vectorDataLength, newDocsWithField); } - private long writeSortedFloat32Vectors(FieldWriter fieldData, int[] ordMap) + private void writeSortedFloat32Vectors(FieldWriter fieldData, int[] ordMap) throws IOException { - long vectorDataOffset = vectorData.alignFilePointer(Float.BYTES); final ByteBuffer buffer = ByteBuffer.allocate(fieldData.dim * Float.BYTES).order(ByteOrder.LITTLE_ENDIAN); for (int ordinal : ordMap) { @@ -210,23 +218,20 @@ private long writeSortedFloat32Vectors(FieldWriter fieldData, int[] ordMap) buffer.asFloatBuffer().put(vector); vectorData.writeBytes(buffer.array(), buffer.array().length); } - return vectorDataOffset; } - private long writeSortedByteVectors(FieldWriter fieldData, int[] ordMap) throws IOException { - long vectorDataOffset = vectorData.alignFilePointer(Float.BYTES); + private void writeSortedByteVectors(FieldWriter fieldData, int[] ordMap) throws IOException { for (int ordinal : ordMap) { byte[] vector = (byte[]) fieldData.vectors.get(ordinal); vectorData.writeBytes(vector, vector.length); } - return vectorDataOffset; } @Override public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOException { // Since we know we will not be searching for additional indexing, we can just write the // the vectors directly to the new segment. - long vectorDataOffset = vectorData.alignFilePointer(Float.BYTES); + long vectorDataOffset = alignOutput(vectorData, fieldInfo.getVectorEncoding()); // No need to use temporary file as we don't have to re-open for reading DocsWithFieldSet docsWithField = switch (fieldInfo.getVectorEncoding()) { @@ -252,7 +257,7 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE @Override public CloseableRandomVectorScorerSupplier mergeOneFieldToIndex( FieldInfo fieldInfo, MergeState mergeState) throws IOException { - long vectorDataOffset = vectorData.alignFilePointer(Float.BYTES); + long vectorDataOffset = alignOutput(vectorData, fieldInfo.getVectorEncoding()); IndexOutput tempVectorData = segmentWriteState.directory.createTempOutput( vectorData.getName(), "temp", segmentWriteState.context); From 5764ac8174ab3f54e40af5ce72d7a843f71e3c42 Mon Sep 17 00:00:00 2001 From: Kaival Parikh Date: Mon, 20 Oct 2025 19:17:53 +0000 Subject: [PATCH 2/6] Also align temp file used during merge --- .../lucene99/Lucene99FlatVectorsWriter.java | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java index 3289909a09ed..b511028bdb81 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java @@ -257,13 +257,13 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE @Override public CloseableRandomVectorScorerSupplier mergeOneFieldToIndex( FieldInfo fieldInfo, MergeState mergeState) throws IOException { - long vectorDataOffset = alignOutput(vectorData, fieldInfo.getVectorEncoding()); IndexOutput tempVectorData = segmentWriteState.directory.createTempOutput( vectorData.getName(), "temp", segmentWriteState.context); IndexInput vectorDataInput = null; try { // write the vector data to a temporary file + long tempVectorDataOffset = alignOutput(tempVectorData, fieldInfo.getVectorEncoding()); DocsWithFieldSet docsWithField = switch (fieldInfo.getVectorEncoding()) { case BYTE -> @@ -277,6 +277,8 @@ public CloseableRandomVectorScorerSupplier mergeOneFieldToIndex( KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues( fieldInfo, mergeState)); }; + long vectorDataLength = tempVectorData.getFilePointer() - tempVectorDataOffset; + CodecUtil.writeFooter(tempVectorData); IOUtils.close(tempVectorData); @@ -288,10 +290,13 @@ public CloseableRandomVectorScorerSupplier mergeOneFieldToIndex( tempVectorData.getName(), IOContext.DEFAULT.withHints( FileTypeHint.DATA, FileDataHint.KNN_VECTORS, DataAccessHint.RANDOM)); + vectorDataInput.seek(tempVectorDataOffset); + // copy the temporary file vectors to the actual data file - vectorData.copyBytes(vectorDataInput, vectorDataInput.length() - CodecUtil.footerLength()); + long vectorDataOffset = alignOutput(vectorData, fieldInfo.getVectorEncoding()); + vectorData.copyBytes(vectorDataInput, vectorDataLength); + CodecUtil.retrieveChecksum(vectorDataInput); - long vectorDataLength = vectorData.getFilePointer() - vectorDataOffset; writeMeta( fieldInfo, segmentWriteState.segmentInfo.maxDoc(), @@ -310,7 +315,8 @@ public CloseableRandomVectorScorerSupplier mergeOneFieldToIndex( new OffHeapByteVectorValues.DenseOffHeapVectorValues( fieldInfo.getVectorDimension(), docsWithField.cardinality(), - finalVectorDataInput, + finalVectorDataInput.slice( + "temp-vector-data", tempVectorDataOffset, vectorDataLength), fieldInfo.getVectorDimension() * Byte.BYTES, vectorsScorer, fieldInfo.getVectorSimilarityFunction())); @@ -320,7 +326,8 @@ public CloseableRandomVectorScorerSupplier mergeOneFieldToIndex( new OffHeapFloatVectorValues.DenseOffHeapVectorValues( fieldInfo.getVectorDimension(), docsWithField.cardinality(), - finalVectorDataInput, + finalVectorDataInput.slice( + "temp-vector-data", tempVectorDataOffset, vectorDataLength), fieldInfo.getVectorDimension() * Float.BYTES, vectorsScorer, fieldInfo.getVectorSimilarityFunction())); From f9402e9c7b279e99649a89c0286ee7b7e51b3b8c Mon Sep 17 00:00:00 2001 From: Kaival Parikh Date: Tue, 21 Oct 2025 17:03:19 +0000 Subject: [PATCH 3/6] Revert "Also align temp file used during merge" This reverts commit 5764ac8174ab3f54e40af5ce72d7a843f71e3c42. --- .../lucene99/Lucene99FlatVectorsWriter.java | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java index b511028bdb81..3289909a09ed 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java @@ -257,13 +257,13 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE @Override public CloseableRandomVectorScorerSupplier mergeOneFieldToIndex( FieldInfo fieldInfo, MergeState mergeState) throws IOException { + long vectorDataOffset = alignOutput(vectorData, fieldInfo.getVectorEncoding()); IndexOutput tempVectorData = segmentWriteState.directory.createTempOutput( vectorData.getName(), "temp", segmentWriteState.context); IndexInput vectorDataInput = null; try { // write the vector data to a temporary file - long tempVectorDataOffset = alignOutput(tempVectorData, fieldInfo.getVectorEncoding()); DocsWithFieldSet docsWithField = switch (fieldInfo.getVectorEncoding()) { case BYTE -> @@ -277,8 +277,6 @@ public CloseableRandomVectorScorerSupplier mergeOneFieldToIndex( KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues( fieldInfo, mergeState)); }; - long vectorDataLength = tempVectorData.getFilePointer() - tempVectorDataOffset; - CodecUtil.writeFooter(tempVectorData); IOUtils.close(tempVectorData); @@ -290,13 +288,10 @@ public CloseableRandomVectorScorerSupplier mergeOneFieldToIndex( tempVectorData.getName(), IOContext.DEFAULT.withHints( FileTypeHint.DATA, FileDataHint.KNN_VECTORS, DataAccessHint.RANDOM)); - vectorDataInput.seek(tempVectorDataOffset); - // copy the temporary file vectors to the actual data file - long vectorDataOffset = alignOutput(vectorData, fieldInfo.getVectorEncoding()); - vectorData.copyBytes(vectorDataInput, vectorDataLength); - + vectorData.copyBytes(vectorDataInput, vectorDataInput.length() - CodecUtil.footerLength()); CodecUtil.retrieveChecksum(vectorDataInput); + long vectorDataLength = vectorData.getFilePointer() - vectorDataOffset; writeMeta( fieldInfo, segmentWriteState.segmentInfo.maxDoc(), @@ -315,8 +310,7 @@ public CloseableRandomVectorScorerSupplier mergeOneFieldToIndex( new OffHeapByteVectorValues.DenseOffHeapVectorValues( fieldInfo.getVectorDimension(), docsWithField.cardinality(), - finalVectorDataInput.slice( - "temp-vector-data", tempVectorDataOffset, vectorDataLength), + finalVectorDataInput, fieldInfo.getVectorDimension() * Byte.BYTES, vectorsScorer, fieldInfo.getVectorSimilarityFunction())); @@ -326,8 +320,7 @@ public CloseableRandomVectorScorerSupplier mergeOneFieldToIndex( new OffHeapFloatVectorValues.DenseOffHeapVectorValues( fieldInfo.getVectorDimension(), docsWithField.cardinality(), - finalVectorDataInput.slice( - "temp-vector-data", tempVectorDataOffset, vectorDataLength), + finalVectorDataInput, fieldInfo.getVectorDimension() * Float.BYTES, vectorsScorer, fieldInfo.getVectorSimilarityFunction())); From 0f46f3c4a0d1dbe6bc7559de000d87c872605a50 Mon Sep 17 00:00:00 2001 From: Kaival Parikh Date: Wed, 12 Nov 2025 04:54:05 +0000 Subject: [PATCH 4/6] Refactor + add comment + CHANGES.txt entry --- lucene/CHANGES.txt | 3 +++ .../lucene99/Lucene99FlatVectorsWriter.java | 25 +++++++++++-------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 0742b8d96fa6..936af863cd11 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -243,6 +243,9 @@ Other Applications using SecurityManager now need to grant SerializablePermission("serialFilter") to the analysis-smartcn module. (Uwe Schindler, Isaac David) +* GITHUB#15341: Align float vectors on disk to 64 bytes, for optimal performance on Arm Neoverse + machines. (Mike McCandless, Kaival Parikh) + Build --------------------- * Upgrade forbiddenapis to version 3.10. (Uwe Schindler) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java index 3289909a09ed..3416a131735d 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java @@ -153,19 +153,19 @@ public long ramBytesUsed() { return total; } - /** Align vectors for optimal vectorized performance. */ private static long alignOutput(IndexOutput output, VectorEncoding encoding) throws IOException { return output.alignFilePointer( switch (encoding) { case BYTE -> Float.BYTES; - case FLOAT32 -> 64; + case FLOAT32 -> 64; // optimal alignment for Arm Neoverse machines. }); } private void writeField(FieldWriter fieldData, int maxDoc) throws IOException { // write vector values - long vectorDataOffset = alignOutput(vectorData, fieldData.fieldInfo.getVectorEncoding()); - switch (fieldData.fieldInfo.getVectorEncoding()) { + VectorEncoding encoding = fieldData.fieldInfo.getVectorEncoding(); + long vectorDataOffset = alignOutput(vectorData, encoding); + switch (encoding) { case BYTE -> writeByteVectors(fieldData); case FLOAT32 -> writeFloat32Vectors(fieldData); } @@ -199,8 +199,9 @@ private void writeSortingField(FieldWriter fieldData, int maxDoc, Sorter.DocM mapOldOrdToNewOrd(fieldData.docsWithField, sortMap, null, ordMap, newDocsWithField); // write vector values - long vectorDataOffset = alignOutput(vectorData, fieldData.fieldInfo.getVectorEncoding()); - switch (fieldData.fieldInfo.getVectorEncoding()) { + VectorEncoding encoding = fieldData.fieldInfo.getVectorEncoding(); + long vectorDataOffset = alignOutput(vectorData, encoding); + switch (encoding) { case BYTE -> writeSortedByteVectors(fieldData, ordMap); case FLOAT32 -> writeSortedFloat32Vectors(fieldData, ordMap); } @@ -231,10 +232,11 @@ private void writeSortedByteVectors(FieldWriter fieldData, int[] ordMap) thro public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOException { // Since we know we will not be searching for additional indexing, we can just write the // the vectors directly to the new segment. - long vectorDataOffset = alignOutput(vectorData, fieldInfo.getVectorEncoding()); + VectorEncoding encoding = fieldInfo.getVectorEncoding(); + long vectorDataOffset = alignOutput(vectorData, encoding); // No need to use temporary file as we don't have to re-open for reading DocsWithFieldSet docsWithField = - switch (fieldInfo.getVectorEncoding()) { + switch (encoding) { case BYTE -> writeByteVectorData( vectorData, @@ -257,7 +259,8 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE @Override public CloseableRandomVectorScorerSupplier mergeOneFieldToIndex( FieldInfo fieldInfo, MergeState mergeState) throws IOException { - long vectorDataOffset = alignOutput(vectorData, fieldInfo.getVectorEncoding()); + VectorEncoding encoding = fieldInfo.getVectorEncoding(); + long vectorDataOffset = alignOutput(vectorData, encoding); IndexOutput tempVectorData = segmentWriteState.directory.createTempOutput( vectorData.getName(), "temp", segmentWriteState.context); @@ -265,7 +268,7 @@ public CloseableRandomVectorScorerSupplier mergeOneFieldToIndex( try { // write the vector data to a temporary file DocsWithFieldSet docsWithField = - switch (fieldInfo.getVectorEncoding()) { + switch (encoding) { case BYTE -> writeByteVectorData( tempVectorData, @@ -303,7 +306,7 @@ public CloseableRandomVectorScorerSupplier mergeOneFieldToIndex( vectorDataInput = null; final RandomVectorScorerSupplier randomVectorScorerSupplier = - switch (fieldInfo.getVectorEncoding()) { + switch (encoding) { case BYTE -> vectorsScorer.getRandomVectorScorerSupplier( fieldInfo.getVectorSimilarityFunction(), From 8b70ce896474b0a38be176c59727d7481e45062b Mon Sep 17 00:00:00 2001 From: Kaival Parikh Date: Mon, 17 Nov 2025 21:28:06 +0000 Subject: [PATCH 5/6] Only apply the optimal byte alignment if it will hold for all vectors i.e. only applied when dimension is a multiple of 16 Also add Javadoc comment about the alignment --- .../lucene99/Lucene99FlatVectorsFormat.java | 4 ++++ .../lucene99/Lucene99FlatVectorsWriter.java | 24 ++++++++++++++----- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsFormat.java index c8ef2709db66..2bee5dfe898a 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsFormat.java @@ -63,6 +63,10 @@ * that only in sparse case * * + *

NOTE: This format aligns float vectors of specific dimensions (multiples of 16) to 64 bytes in + * the index, for optimal performance on Arm Neoverse machines. There may be a small performance + * penalty in using float vectors of other dimensions on these machines. + * * @lucene.experimental */ public final class Lucene99FlatVectorsFormat extends FlatVectorsFormat { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java index 3416a131735d..d611ed30b20a 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java @@ -153,18 +153,27 @@ public long ramBytesUsed() { return total; } - private static long alignOutput(IndexOutput output, VectorEncoding encoding) throws IOException { + private static long alignOutput(IndexOutput output, VectorEncoding encoding, int dimension) + throws IOException { return output.alignFilePointer( switch (encoding) { case BYTE -> Float.BYTES; - case FLOAT32 -> 64; // optimal alignment for Arm Neoverse machines. + case FLOAT32 -> { + if (dimension % 16 == 0) { + yield 64; // optimal alignment for Arm Neoverse machines. + } + // vector dimension is such that 64 byte alignment will not hold for all subsequent + // vectors, use next best alignment that will hold. + yield Float.BYTES; + } }); } private void writeField(FieldWriter fieldData, int maxDoc) throws IOException { // write vector values VectorEncoding encoding = fieldData.fieldInfo.getVectorEncoding(); - long vectorDataOffset = alignOutput(vectorData, encoding); + int dimension = fieldData.fieldInfo.getVectorDimension(); + long vectorDataOffset = alignOutput(vectorData, encoding, dimension); switch (encoding) { case BYTE -> writeByteVectors(fieldData); case FLOAT32 -> writeFloat32Vectors(fieldData); @@ -200,7 +209,8 @@ private void writeSortingField(FieldWriter fieldData, int maxDoc, Sorter.DocM // write vector values VectorEncoding encoding = fieldData.fieldInfo.getVectorEncoding(); - long vectorDataOffset = alignOutput(vectorData, encoding); + int dimension = fieldData.fieldInfo.getVectorDimension(); + long vectorDataOffset = alignOutput(vectorData, encoding, dimension); switch (encoding) { case BYTE -> writeSortedByteVectors(fieldData, ordMap); case FLOAT32 -> writeSortedFloat32Vectors(fieldData, ordMap); @@ -233,7 +243,8 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE // Since we know we will not be searching for additional indexing, we can just write the // the vectors directly to the new segment. VectorEncoding encoding = fieldInfo.getVectorEncoding(); - long vectorDataOffset = alignOutput(vectorData, encoding); + int dimension = fieldInfo.getVectorDimension(); + long vectorDataOffset = alignOutput(vectorData, encoding, dimension); // No need to use temporary file as we don't have to re-open for reading DocsWithFieldSet docsWithField = switch (encoding) { @@ -260,7 +271,8 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE public CloseableRandomVectorScorerSupplier mergeOneFieldToIndex( FieldInfo fieldInfo, MergeState mergeState) throws IOException { VectorEncoding encoding = fieldInfo.getVectorEncoding(); - long vectorDataOffset = alignOutput(vectorData, encoding); + int dimension = fieldInfo.getVectorDimension(); + long vectorDataOffset = alignOutput(vectorData, encoding, dimension); IndexOutput tempVectorData = segmentWriteState.directory.createTempOutput( vectorData.getName(), "temp", segmentWriteState.context); From 08ab76e9411ddb10242dfb495f71d08293004c2d Mon Sep 17 00:00:00 2001 From: Kaival Parikh Date: Wed, 19 Nov 2025 18:09:30 +0000 Subject: [PATCH 6/6] Undo "Only apply the optimal byte alignment if it will hold for all vectors" Also add comment about padBytes in VectorScorerBenchmark (used to capture performance impact of byte alignment) --- .../benchmark/jmh/VectorScorerBenchmark.java | 4 ++-- .../lucene99/Lucene99FlatVectorsFormat.java | 8 ++++--- .../lucene99/Lucene99FlatVectorsWriter.java | 24 +++++-------------- 3 files changed, 13 insertions(+), 23 deletions(-) diff --git a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/VectorScorerBenchmark.java b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/VectorScorerBenchmark.java index 035ea98e4fde..10b6818f2a43 100644 --- a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/VectorScorerBenchmark.java +++ b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/VectorScorerBenchmark.java @@ -71,8 +71,8 @@ public class VectorScorerBenchmark { @Param({"1", "128", "207", "256", "300", "512", "702", "1024"}) public int size; - @Param({"0", "1", "2", "4", "6", "8", "16", "20", "32", "50", "64", "100", "128", "255", "256"}) - public int padBytes; + @Param({"0", "1", "4", "64"}) + public int padBytes; // capture performance impact of byte alignment in the index Directory dir; IndexInput bytesIn; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsFormat.java index 2bee5dfe898a..46be88836ca1 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsFormat.java @@ -63,9 +63,11 @@ * that only in sparse case * * - *

NOTE: This format aligns float vectors of specific dimensions (multiples of 16) to 64 bytes in - * the index, for optimal performance on Arm Neoverse machines. There may be a small performance - * penalty in using float vectors of other dimensions on these machines. + *

NOTE: Arm Neoverse machines have a performance overhead in reading data that is not aligned to + * 64 bytes, so this format aligns the .vec file to that size. There may be a + * performance penalty in searching of float vectors that do not have a dimension of a + * multiple of 16 (equivalent to 64 bytes), because the alignment will not hold for all vectors in + * the file. * * @lucene.experimental */ diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java index d611ed30b20a..3416a131735d 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java @@ -153,27 +153,18 @@ public long ramBytesUsed() { return total; } - private static long alignOutput(IndexOutput output, VectorEncoding encoding, int dimension) - throws IOException { + private static long alignOutput(IndexOutput output, VectorEncoding encoding) throws IOException { return output.alignFilePointer( switch (encoding) { case BYTE -> Float.BYTES; - case FLOAT32 -> { - if (dimension % 16 == 0) { - yield 64; // optimal alignment for Arm Neoverse machines. - } - // vector dimension is such that 64 byte alignment will not hold for all subsequent - // vectors, use next best alignment that will hold. - yield Float.BYTES; - } + case FLOAT32 -> 64; // optimal alignment for Arm Neoverse machines. }); } private void writeField(FieldWriter fieldData, int maxDoc) throws IOException { // write vector values VectorEncoding encoding = fieldData.fieldInfo.getVectorEncoding(); - int dimension = fieldData.fieldInfo.getVectorDimension(); - long vectorDataOffset = alignOutput(vectorData, encoding, dimension); + long vectorDataOffset = alignOutput(vectorData, encoding); switch (encoding) { case BYTE -> writeByteVectors(fieldData); case FLOAT32 -> writeFloat32Vectors(fieldData); @@ -209,8 +200,7 @@ private void writeSortingField(FieldWriter fieldData, int maxDoc, Sorter.DocM // write vector values VectorEncoding encoding = fieldData.fieldInfo.getVectorEncoding(); - int dimension = fieldData.fieldInfo.getVectorDimension(); - long vectorDataOffset = alignOutput(vectorData, encoding, dimension); + long vectorDataOffset = alignOutput(vectorData, encoding); switch (encoding) { case BYTE -> writeSortedByteVectors(fieldData, ordMap); case FLOAT32 -> writeSortedFloat32Vectors(fieldData, ordMap); @@ -243,8 +233,7 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE // Since we know we will not be searching for additional indexing, we can just write the // the vectors directly to the new segment. VectorEncoding encoding = fieldInfo.getVectorEncoding(); - int dimension = fieldInfo.getVectorDimension(); - long vectorDataOffset = alignOutput(vectorData, encoding, dimension); + long vectorDataOffset = alignOutput(vectorData, encoding); // No need to use temporary file as we don't have to re-open for reading DocsWithFieldSet docsWithField = switch (encoding) { @@ -271,8 +260,7 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE public CloseableRandomVectorScorerSupplier mergeOneFieldToIndex( FieldInfo fieldInfo, MergeState mergeState) throws IOException { VectorEncoding encoding = fieldInfo.getVectorEncoding(); - int dimension = fieldInfo.getVectorDimension(); - long vectorDataOffset = alignOutput(vectorData, encoding, dimension); + long vectorDataOffset = alignOutput(vectorData, encoding); IndexOutput tempVectorData = segmentWriteState.directory.createTempOutput( vectorData.getName(), "temp", segmentWriteState.context);