apache · benwtrent · Apr 2, 2024 · Jan 9, 2024 · Jan 10, 2024 · Jan 10, 2024
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
@@ -204,6 +204,9 @@ New Features
   This may improve paging logic especially when large segments are merged under memory pressure.
   (Uwe Schindler, Chris Hegarty, Robert Muir, Adrien Grand)
 
+* GITHUB#13197: Expand support for new scalar bit levels for HNSW vectors. This includes 4-bit vectors and an option
+  to compress them to gain a 50% reduction in memory usage. (Ben Trent)
+
 Improvements
 ---------------------
 

diff --git a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/VectorUtilBenchmark.java b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/VectorUtilBenchmark.java
@@ -36,10 +36,12 @@ public class VectorUtilBenchmark {
 
   private byte[] bytesA;
   private byte[] bytesB;
+  private byte[] halfBytesA;
+  private byte[] halfBytesB;
   private float[] floatsA;
   private float[] floatsB;
 
-  @Param({"1", "128", "207", "256", "300", "512", "702", "1024"})
+  @Param({"1024"})
   int size;
 
   @Setup(Level.Iteration)
@@ -51,6 +53,14 @@ public void init() {
     bytesB = new byte[size];
     random.nextBytes(bytesA);
     random.nextBytes(bytesB);
+    // random half byte arrays for binary methods
+    // this means that all values must be between 0 and 15
+    halfBytesA = new byte[size];
+    halfBytesB = new byte[size];
+    for (int i = 0; i < size; ++i) {
+      halfBytesA[i] = (byte) random.nextInt(16);
+      halfBytesB[i] = (byte) random.nextInt(16);
+    }
 
     // random float arrays for float methods
     floatsA = new float[size];
@@ -94,6 +104,17 @@ public int binarySquareVector() {
     return VectorUtil.squareDistance(bytesA, bytesB);
   }
 
+  @Benchmark
+  public int binaryHalfByteScalar() {
+    return VectorUtil.int4DotProduct(halfBytesA, halfBytesB);
+  }
+
+  @Benchmark
+  @Fork(jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
+  public int binaryHalfByteVector() {
+    return VectorUtil.int4DotProduct(halfBytesA, halfBytesB);
+  }
+
   @Benchmark
   public float floatCosineScalar() {
     return VectorUtil.cosine(floatsA, floatsB);

diff --git a/.../src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswScalarQuantizedVectorsFormat.java b/.../src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswScalarQuantizedVectorsFormat.java
@@ -65,7 +65,8 @@ public final class Lucene99HnswScalarQuantizedVectorsFormat extends KnnVectorsFo
 
   /** Constructs a format using default graph construction parameters */
   public Lucene99HnswScalarQuantizedVectorsFormat() {
-    this(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH, DEFAULT_NUM_MERGE_WORKER, null, null);
+    this(
+        DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH, DEFAULT_NUM_MERGE_WORKER, (byte) 7, true, null, null);
   }
 
   /**
@@ -75,7 +76,7 @@ public Lucene99HnswScalarQuantizedVectorsFormat() {
    * @param beamWidth the size of the queue maintained during graph construction.
    */
   public Lucene99HnswScalarQuantizedVectorsFormat(int maxConn, int beamWidth) {
-    this(maxConn, beamWidth, DEFAULT_NUM_MERGE_WORKER, null, null);
+    this(maxConn, beamWidth, DEFAULT_NUM_MERGE_WORKER, (byte) 7, true, null, null);
   }
 
   /**
@@ -85,6 +86,11 @@ public Lucene99HnswScalarQuantizedVectorsFormat(int maxConn, int beamWidth) {
    * @param beamWidth the size of the queue maintained during graph construction.
    * @param numMergeWorkers number of workers (threads) that will be used when doing merge. If
    *     larger than 1, a non-null {@link ExecutorService} must be passed as mergeExec
+   * @param bits the number of bits to use for scalar quantization (must be between 1 and 8,
+   *     inclusive)
+   * @param compress whether to compress the vectors, if true, the vectors that are quantized with
+   *     lte 4 bits will be compressed into a single byte. If false, the vectors will be stored as
+   *     is. This provides a trade-off of memory usage and speed.
    * @param confidenceInterval the confidenceInterval for scalar quantizing the vectors, when `null`
    *     it is calculated based on the vector field dimensions.
    * @param mergeExec the {@link ExecutorService} that will be used by ALL vector writers that are
@@ -94,6 +100,8 @@ public Lucene99HnswScalarQuantizedVectorsFormat(
       int maxConn,
       int beamWidth,
       int numMergeWorkers,
+      byte bits,
+      boolean compress,
       Float confidenceInterval,
       ExecutorService mergeExec) {
     super("Lucene99HnswScalarQuantizedVectorsFormat");
@@ -127,7 +135,8 @@ public Lucene99HnswScalarQuantizedVectorsFormat(
     } else {
       this.mergeExec = null;
     }
-    this.flatVectorsFormat = new Lucene99ScalarQuantizedVectorsFormat(confidenceInterval);
+    this.flatVectorsFormat =
+        new Lucene99ScalarQuantizedVectorsFormat(confidenceInterval, bits, compress);
   }
 
   @Override

diff --git a/...core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsFormat.java b/...core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsFormat.java
@@ -35,7 +35,8 @@ public final class Lucene99ScalarQuantizedVectorsFormat extends FlatVectorsForma
   static final String NAME = "Lucene99ScalarQuantizedVectorsFormat";
 
   static final int VERSION_START = 0;
-  static final int VERSION_CURRENT = VERSION_START;
+  static final int VERSION_ADD_BITS = 1;
+  static final int VERSION_CURRENT = VERSION_ADD_BITS;
   static final String META_CODEC_NAME = "Lucene99ScalarQuantizedVectorsFormatMeta";
   static final String VECTOR_DATA_CODEC_NAME = "Lucene99ScalarQuantizedVectorsFormatData";
   static final String META_EXTENSION = "vemq";
@@ -55,18 +56,27 @@ public final class Lucene99ScalarQuantizedVectorsFormat extends FlatVectorsForma
    */
   final Float confidenceInterval;
 
+  final byte bits;
+  final boolean compress;
+
   /** Constructs a format using default graph construction parameters */
   public Lucene99ScalarQuantizedVectorsFormat() {
-    this(null);
+    this(null, 7, true);
   }
 
   /**
    * Constructs a format using the given graph construction parameters.
    *
    * @param confidenceInterval the confidenceInterval for scalar quantizing the vectors, when `null`
-   *     it is calculated based on the vector field dimensions.
+   *     it is calculated dynamically.
+   * @param bits the number of bits to use for scalar quantization (must be between 1 and 8,
+   *     inclusive)
+   * @param compress whether to compress the vectors, if true, the vectors that are quantized with
+   *     lte 4 bits will be compressed into a single byte. If false, the vectors will be stored as
+   *     is. This provides a trade-off of memory usage and speed.
    */
-  public Lucene99ScalarQuantizedVectorsFormat(Float confidenceInterval) {
+  public Lucene99ScalarQuantizedVectorsFormat(
+      Float confidenceInterval, int bits, boolean compress) {
     if (confidenceInterval != null
         && (confidenceInterval < MINIMUM_CONFIDENCE_INTERVAL
             || confidenceInterval > MAXIMUM_CONFIDENCE_INTERVAL)) {
@@ -78,7 +88,12 @@ public Lucene99ScalarQuantizedVectorsFormat(Float confidenceInterval) {
               + "; confidenceInterval="
               + confidenceInterval);
     }
+    if (bits < 1 || bits > 8) {
+      throw new IllegalArgumentException("bits must be between 1 and 8; bits=" + bits);
+    }
+    this.bits = (byte) bits;
     this.confidenceInterval = confidenceInterval;
+    this.compress = compress;
   }
 
   public static float calculateDefaultConfidenceInterval(int vectorDimension) {
@@ -92,6 +107,10 @@ public String toString() {
         + NAME
         + ", confidenceInterval="
         + confidenceInterval
+        + ", bits="
+        + bits
+        + ", compress="
+        + compress
         + ", rawVectorFormat="
         + rawVectorFormat
         + ")";
@@ -100,7 +119,7 @@ public String toString() {
   @Override
   public FlatVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
     return new Lucene99ScalarQuantizedVectorsWriter(
-        state, confidenceInterval, rawVectorFormat.fieldsWriter(state));
+        state, confidenceInterval, bits, compress, rawVectorFormat.fieldsWriter(state));
   }
 
   @Override

diff --git a/...core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java b/...core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java
@@ -82,7 +82,7 @@ public Lucene99ScalarQuantizedVectorsReader(
                 Lucene99ScalarQuantizedVectorsFormat.VERSION_CURRENT,
                 state.segmentInfo.getId(),
                 state.segmentSuffix);
-        readFields(meta, state.fieldInfos);
+        readFields(meta, versionMeta, state.fieldInfos);
       } catch (Throwable exception) {
         priorE = exception;
       } finally {
@@ -102,13 +102,14 @@ public Lucene99ScalarQuantizedVectorsReader(
     }
   }
 
-  private void readFields(ChecksumIndexInput meta, FieldInfos infos) throws IOException {
+  private void readFields(ChecksumIndexInput meta, int versionMeta, FieldInfos infos)
+      throws IOException {
     for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) {
       FieldInfo info = infos.fieldInfo(fieldNumber);
       if (info == null) {
         throw new CorruptIndexException("Invalid field number: " + fieldNumber, meta);
       }
-      FieldEntry fieldEntry = readField(meta);
+      FieldEntry fieldEntry = readField(meta, versionMeta);
       validateFieldEntry(info, fieldEntry);
       fields.put(info.name, fieldEntry);
     }
@@ -126,8 +127,13 @@ static void validateFieldEntry(FieldInfo info, FieldEntry fieldEntry) {
               + fieldEntry.dimension);
     }
 
-    // int8 quantized and calculated stored offset.
-    long quantizedVectorBytes = dimension + Float.BYTES;
+    final long quantizedVectorBytes;
+    if (fieldEntry.bits <= 4 && fieldEntry.compress) {
+      quantizedVectorBytes = ((dimension + 1) >> 1) + Float.BYTES;
+    } else {
+      // int8 quantized and calculated stored offset.
+      quantizedVectorBytes = dimension + Float.BYTES;
+    }
     long numQuantizedVectorBytes = Math.multiplyExact(quantizedVectorBytes, fieldEntry.size);
     if (numQuantizedVectorBytes != fieldEntry.vectorDataLength) {
       throw new IllegalStateException(
@@ -209,6 +215,8 @@ public RandomVectorScorer getRandomVectorScorer(String field, float[] target) th
             fieldEntry.ordToDoc,
             fieldEntry.dimension,
             fieldEntry.size,
+            fieldEntry.bits,
+            fieldEntry.compress,
             fieldEntry.vectorDataOffset,
             fieldEntry.vectorDataLength,
             quantizedVectorData);
@@ -236,10 +244,10 @@ public long ramBytesUsed() {
     return size;
   }
 
-  private FieldEntry readField(IndexInput input) throws IOException {
+  private FieldEntry readField(IndexInput input, int versionMeta) throws IOException {
     VectorEncoding vectorEncoding = readVectorEncoding(input);
     VectorSimilarityFunction similarityFunction = readSimilarityFunction(input);
-    return new FieldEntry(input, vectorEncoding, similarityFunction);
+    return new FieldEntry(input, versionMeta, vectorEncoding, similarityFunction);
   }
 
   @Override
@@ -252,6 +260,8 @@ public QuantizedByteVectorValues getQuantizedVectorValues(String fieldName) thro
         fieldEntry.ordToDoc,
         fieldEntry.dimension,
         fieldEntry.size,
+        fieldEntry.bits,
+        fieldEntry.compress,
         fieldEntry.vectorDataOffset,
         fieldEntry.vectorDataLength,
         quantizedVectorData);
@@ -276,10 +286,13 @@ private static class FieldEntry implements Accountable {
     final long vectorDataLength;
     final ScalarQuantizer scalarQuantizer;
     final int size;
+    final byte bits;
+    final boolean compress;
     final OrdToDocDISIReaderConfiguration ordToDoc;
 
     FieldEntry(
         IndexInput input,
+        int versionMeta,
         VectorEncoding vectorEncoding,
         VectorSimilarityFunction similarityFunction)
         throws IOException {
@@ -290,12 +303,29 @@ private static class FieldEntry implements Accountable {
       dimension = input.readVInt();
       size = input.readInt();
       if (size > 0) {
-        float confidenceInterval = Float.intBitsToFloat(input.readInt());
-        float minQuantile = Float.intBitsToFloat(input.readInt());
-        float maxQuantile = Float.intBitsToFloat(input.readInt());
-        scalarQuantizer = new ScalarQuantizer(minQuantile, maxQuantile, confidenceInterval);
+        if (versionMeta < Lucene99ScalarQuantizedVectorsFormat.VERSION_ADD_BITS) {
+          int floatBits = input.readInt(); // confidenceInterval, unused
+          if (floatBits == -1) {
+            throw new CorruptIndexException(
+                "Missing confidence interval for scalar quantizer", input);
+          }
+          this.bits = (byte) 7;
+          this.compress = false;
+          float minQuantile = Float.intBitsToFloat(input.readInt());
+          float maxQuantile = Float.intBitsToFloat(input.readInt());
+          scalarQuantizer = new ScalarQuantizer(minQuantile, maxQuantile, (byte) 7);
+        } else {
+          input.readInt(); // confidenceInterval, unused
+          this.bits = input.readByte();
+          this.compress = input.readByte() == 1;
+          float minQuantile = Float.intBitsToFloat(input.readInt());
+          float maxQuantile = Float.intBitsToFloat(input.readInt());
+          scalarQuantizer = new ScalarQuantizer(minQuantile, maxQuantile, bits);
+        }
       } else {
         scalarQuantizer = null;
+        this.bits = (byte) 7;
+        this.compress = false;
       }
       ordToDoc = OrdToDocDISIReaderConfiguration.fromStoredMeta(input, size);
     }