From 2a9e2e73d6da64fc68dd2f08089de789707bd7ee Mon Sep 17 00:00:00 2001 From: Simon Cooper Date: Mon, 10 Nov 2025 09:27:36 +0000 Subject: [PATCH 1/8] Create int bfloat16 vector formats (#136627) --- server/src/main/java/module-info.java | 5 +- .../vectors/es93/ES93FlatVectorFormat.java | 125 ++++++++++++++++++ .../ES93HnswScalarQuantizedVectorsFormat.java | 97 ++++++++++++++ .../ES93ScalarQuantizedVectorsFormat.java | 85 ++++++++++++ .../org.apache.lucene.codecs.KnnVectorsFormat | 5 +- .../ES93FlatBFloat16VectorFormatTests.java | 73 ++++++++++ .../es93/ES93FlatVectorFormatTests.java | 72 ++++++++++ ...arQuantizedBFloat16VectorsFormatTests.java | 82 ++++++++++++ ...HnswScalarQuantizedVectorsFormatTests.java | 81 ++++++++++++ ...larQuantizedBFloat16VectorFormatTests.java | 85 ++++++++++++ ...ES93ScalarQuantizedVectorsFormatTests.java | 75 +++++++++++ 11 files changed, 783 insertions(+), 2 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatVectorFormat.java create mode 100644 server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormat.java create mode 100644 server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormat.java create mode 100644 server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatBFloat16VectorFormatTests.java create mode 100644 server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatVectorFormatTests.java create mode 100644 server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedBFloat16VectorsFormatTests.java create mode 100644 server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormatTests.java create mode 100644 server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedBFloat16VectorFormatTests.java create mode 100644 server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormatTests.java diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java index 9c5d11e1cf9e1..1abeb15209ce7 100644 --- a/server/src/main/java/module-info.java +++ b/server/src/main/java/module-info.java @@ -465,8 +465,11 @@ org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat, org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat, org.elasticsearch.index.codec.vectors.diskbbq.next.ESNextDiskBBQVectorsFormat, - org.elasticsearch.index.codec.vectors.es93.ES93BinaryQuantizedVectorsFormat, + org.elasticsearch.index.codec.vectors.es93.ES93FlatVectorFormat, org.elasticsearch.index.codec.vectors.es93.ES93HnswVectorsFormat, + org.elasticsearch.index.codec.vectors.es93.ES93ScalarQuantizedVectorsFormat, + org.elasticsearch.index.codec.vectors.es93.ES93HnswScalarQuantizedVectorsFormat, + org.elasticsearch.index.codec.vectors.es93.ES93BinaryQuantizedVectorsFormat, org.elasticsearch.index.codec.vectors.es93.ES93HnswBinaryQuantizedVectorsFormat; provides org.apache.lucene.codecs.Codec diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatVectorFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatVectorFormat.java new file mode 100644 index 0000000000000..bdad21596d479 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatVectorFormat.java @@ -0,0 +1,125 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.vectors.es93; + +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.KnnVectorsReader; +import org.apache.lucene.codecs.KnnVectorsWriter; +import org.apache.lucene.codecs.hnsw.FlatVectorsFormat; +import org.apache.lucene.codecs.hnsw.FlatVectorsReader; +import org.apache.lucene.index.ByteVectorValues; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FloatVectorValues; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.search.AcceptDocs; +import org.apache.lucene.search.KnnCollector; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.hnsw.OrdinalTranslatedKnnCollector; +import org.apache.lucene.util.hnsw.RandomVectorScorer; + +import java.io.IOException; +import java.util.Map; + +import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT; + +public class ES93FlatVectorFormat extends KnnVectorsFormat { + + static final String NAME = "ES93FlatVectorFormat"; + + private final FlatVectorsFormat format; + + /** + * Sole constructor + */ + public ES93FlatVectorFormat() { + super(NAME); + format = new ES93GenericFlatVectorsFormat(); + } + + public ES93FlatVectorFormat(ES93GenericFlatVectorsFormat.ElementType elementType) { + super(NAME); + assert elementType != ES93GenericFlatVectorsFormat.ElementType.BIT : "ES815BitFlatVectorFormat should be used for bits"; + format = new ES93GenericFlatVectorsFormat(elementType, false); + } + + @Override + public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { + return format.fieldsWriter(state); + } + + @Override + public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException { + return new ES93FlatVectorReader(format.fieldsReader(state)); + } + + @Override + public int getMaxDimensions(String fieldName) { + return MAX_DIMS_COUNT; + } + + static class ES93FlatVectorReader extends KnnVectorsReader { + + private final FlatVectorsReader reader; + + ES93FlatVectorReader(FlatVectorsReader reader) { + super(); + this.reader = reader; + } + + @Override + public void checkIntegrity() throws IOException { + reader.checkIntegrity(); + } + + @Override + public FloatVectorValues getFloatVectorValues(String field) throws IOException { + return reader.getFloatVectorValues(field); + } + + @Override + public ByteVectorValues getByteVectorValues(String field) throws IOException { + return reader.getByteVectorValues(field); + } + + @Override + public void search(String field, float[] target, KnnCollector knnCollector, AcceptDocs acceptDocs) throws IOException { + collectAllMatchingDocs(knnCollector, acceptDocs, reader.getRandomVectorScorer(field, target)); + } + + private void collectAllMatchingDocs(KnnCollector knnCollector, AcceptDocs acceptDocs, RandomVectorScorer scorer) + throws IOException { + OrdinalTranslatedKnnCollector collector = new OrdinalTranslatedKnnCollector(knnCollector, scorer::ordToDoc); + Bits acceptedOrds = scorer.getAcceptOrds(acceptDocs.bits()); + for (int i = 0; i < scorer.maxOrd(); i++) { + if (acceptedOrds == null || acceptedOrds.get(i)) { + collector.collect(i, scorer.score(i)); + collector.incVisitedCount(1); + } + } + assert collector.earlyTerminated() == false; + } + + @Override + public void search(String field, byte[] target, KnnCollector knnCollector, AcceptDocs acceptDocs) throws IOException { + collectAllMatchingDocs(knnCollector, acceptDocs, reader.getRandomVectorScorer(field, target)); + } + + @Override + public Map getOffHeapByteSize(FieldInfo fieldInfo) { + return reader.getOffHeapByteSize(fieldInfo); + } + + @Override + public void close() throws IOException { + reader.close(); + } + } +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormat.java new file mode 100644 index 0000000000000..4f47b82c3b5a6 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormat.java @@ -0,0 +1,97 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.vectors.es93; + +import org.apache.lucene.codecs.KnnVectorsReader; +import org.apache.lucene.codecs.KnnVectorsWriter; +import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil; +import org.apache.lucene.codecs.hnsw.FlatVectorsFormat; +import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorScorer; +import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsFormat; +import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsReader; +import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsWriter; +import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader; +import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsWriter; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; +import org.elasticsearch.index.codec.vectors.AbstractHnswVectorsFormat; + +import java.io.IOException; +import java.util.concurrent.ExecutorService; + +public class ES93HnswScalarQuantizedVectorsFormat extends AbstractHnswVectorsFormat { + + static final String NAME = "ES93HnswScalarQuantizedVectorsFormat"; + + static final Lucene104ScalarQuantizedVectorScorer flatVectorScorer = new Lucene104ScalarQuantizedVectorScorer( + FlatVectorScorerUtil.getLucene99FlatVectorsScorer() + ); + + private final Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding encoding; + private final FlatVectorsFormat rawVectorFormat; + + public ES93HnswScalarQuantizedVectorsFormat() { + super(NAME); + this.encoding = Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SEVEN_BIT; + this.rawVectorFormat = new ES93GenericFlatVectorsFormat(ES93GenericFlatVectorsFormat.ElementType.STANDARD, false); + } + + public ES93HnswScalarQuantizedVectorsFormat( + int maxConn, + int beamWidth, + Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding encoding, + ES93GenericFlatVectorsFormat.ElementType elementType, + boolean useDirectIO + ) { + super(NAME, maxConn, beamWidth); + this.encoding = encoding; + this.rawVectorFormat = new ES93GenericFlatVectorsFormat(elementType, useDirectIO); + } + + public ES93HnswScalarQuantizedVectorsFormat( + int maxConn, + int beamWidth, + Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding encoding, + ES93GenericFlatVectorsFormat.ElementType elementType, + boolean useDirectIO, + int numMergeWorkers, + ExecutorService mergeExec + ) { + super(NAME, maxConn, beamWidth, numMergeWorkers, mergeExec); + this.encoding = encoding; + this.rawVectorFormat = new ES93GenericFlatVectorsFormat(elementType, useDirectIO); + } + + @Override + protected FlatVectorsFormat flatVectorsFormat() { + return rawVectorFormat; + } + + @Override + public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { + return new Lucene99HnswVectorsWriter( + state, + maxConn, + beamWidth, + new Lucene104ScalarQuantizedVectorsWriter(state, encoding, rawVectorFormat.fieldsWriter(state), flatVectorScorer), + numMergeWorkers, + mergeExec, + 0 + ); + } + + @Override + public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException { + return new Lucene99HnswVectorsReader( + state, + new Lucene104ScalarQuantizedVectorsReader(state, rawVectorFormat.fieldsReader(state), flatVectorScorer) + ); + } +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormat.java new file mode 100644 index 0000000000000..075c1728f1029 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormat.java @@ -0,0 +1,85 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.vectors.es93; + +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.KnnVectorsReader; +import org.apache.lucene.codecs.KnnVectorsWriter; +import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil; +import org.apache.lucene.codecs.hnsw.FlatVectorsFormat; +import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorScorer; +import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsFormat; +import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsReader; +import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsWriter; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; + +import java.io.IOException; + +import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT; + +public class ES93ScalarQuantizedVectorsFormat extends KnnVectorsFormat { + + static final String NAME = "ES93ScalarQuantizedVectorsFormat"; + + static final Lucene104ScalarQuantizedVectorScorer flatVectorScorer = new Lucene104ScalarQuantizedVectorScorer( + FlatVectorScorerUtil.getLucene99FlatVectorsScorer() + ); + + private final Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding encoding; + private final FlatVectorsFormat rawVectorFormat; + + public ES93ScalarQuantizedVectorsFormat() { + this(ES93GenericFlatVectorsFormat.ElementType.STANDARD, Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SEVEN_BIT); + } + + public ES93ScalarQuantizedVectorsFormat(ES93GenericFlatVectorsFormat.ElementType elementType) { + this(elementType, Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SEVEN_BIT); + } + + public ES93ScalarQuantizedVectorsFormat( + ES93GenericFlatVectorsFormat.ElementType elementType, + Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding encoding + ) { + super(NAME); + assert elementType != ES93GenericFlatVectorsFormat.ElementType.BIT : "BIT should not be used with scalar quantization"; + this.encoding = encoding; + this.rawVectorFormat = new ES93GenericFlatVectorsFormat(elementType, false); + } + + @Override + public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { + return new Lucene104ScalarQuantizedVectorsWriter(state, encoding, rawVectorFormat.fieldsWriter(state), flatVectorScorer); + } + + @Override + public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException { + return new Lucene104ScalarQuantizedVectorsReader(state, rawVectorFormat.fieldsReader(state), flatVectorScorer); + } + + @Override + public int getMaxDimensions(String fieldName) { + return MAX_DIMS_COUNT; + } + + @Override + public String toString() { + return NAME + + "(name=" + + NAME + + ", encoding=" + + encoding + + ", flatVectorScorer=" + + flatVectorScorer + + ", rawVectorFormat=" + + rawVectorFormat + + ")"; + } +} diff --git a/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat index 5370d7244df9b..0dc34ea2e808d 100644 --- a/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat +++ b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat @@ -9,6 +9,9 @@ org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat org.elasticsearch.index.codec.vectors.diskbbq.next.ESNextDiskBBQVectorsFormat -org.elasticsearch.index.codec.vectors.es93.ES93BinaryQuantizedVectorsFormat +org.elasticsearch.index.codec.vectors.es93.ES93FlatVectorFormat org.elasticsearch.index.codec.vectors.es93.ES93HnswVectorsFormat +org.elasticsearch.index.codec.vectors.es93.ES93ScalarQuantizedVectorsFormat +org.elasticsearch.index.codec.vectors.es93.ES93HnswScalarQuantizedVectorsFormat +org.elasticsearch.index.codec.vectors.es93.ES93BinaryQuantizedVectorsFormat org.elasticsearch.index.codec.vectors.es93.ES93HnswBinaryQuantizedVectorsFormat diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatBFloat16VectorFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatBFloat16VectorFormatTests.java new file mode 100644 index 0000000000000..91d4054ae94ed --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatBFloat16VectorFormatTests.java @@ -0,0 +1,73 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.vectors.es93; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.KnnVectorsReader; +import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.KnnFloatVectorField; +import org.apache.lucene.index.CodecReader; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.util.TestUtil; +import org.elasticsearch.common.logging.LogConfigurator; +import org.elasticsearch.index.codec.vectors.BFloat16; +import org.elasticsearch.index.codec.vectors.BaseBFloat16KnnVectorsFormatTestCase; +import org.junit.AssumptionViolatedException; + +import java.io.IOException; + +import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT; +import static org.hamcrest.Matchers.aMapWithSize; +import static org.hamcrest.Matchers.hasEntry; + +public class ES93FlatBFloat16VectorFormatTests extends BaseBFloat16KnnVectorsFormatTestCase { + + static { + LogConfigurator.loadLog4jPlugins(); + LogConfigurator.configureESLogging(); // native access requires logging to be initialized + } + + @Override + protected Codec getCodec() { + return TestUtil.alwaysKnnVectorsFormat(new ES93FlatVectorFormat(ES93GenericFlatVectorsFormat.ElementType.BFLOAT16)); + } + + public void testSearchWithVisitedLimit() { + throw new AssumptionViolatedException("requires graph-based vector codec"); + } + + public void testSimpleOffHeapSize() throws IOException { + float[] vector = randomVector(random().nextInt(12, 500)); + try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { + Document doc = new Document(); + doc.add(new KnnFloatVectorField("f", vector, DOT_PRODUCT)); + w.addDocument(doc); + w.commit(); + try (IndexReader reader = DirectoryReader.open(w)) { + LeafReader r = getOnlyLeafReader(reader); + if (r instanceof CodecReader codecReader) { + KnnVectorsReader knnVectorsReader = codecReader.getVectorReader(); + if (knnVectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader fieldsReader) { + knnVectorsReader = fieldsReader.getFieldReader("f"); + } + var fieldInfo = r.getFieldInfos().fieldInfo("f"); + var offHeap = knnVectorsReader.getOffHeapByteSize(fieldInfo); + assertThat(offHeap, aMapWithSize(1)); + assertThat(offHeap, hasEntry("vec", (long) vector.length * BFloat16.BYTES)); + } + } + } + } +} diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatVectorFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatVectorFormatTests.java new file mode 100644 index 0000000000000..1ada03a70bed6 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatVectorFormatTests.java @@ -0,0 +1,72 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.vectors.es93; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.KnnVectorsReader; +import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.KnnFloatVectorField; +import org.apache.lucene.index.CodecReader; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; +import org.apache.lucene.tests.util.TestUtil; +import org.elasticsearch.common.logging.LogConfigurator; +import org.junit.AssumptionViolatedException; + +import java.io.IOException; + +import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT; +import static org.hamcrest.Matchers.aMapWithSize; +import static org.hamcrest.Matchers.hasEntry; + +public class ES93FlatVectorFormatTests extends BaseKnnVectorsFormatTestCase { + + static { + LogConfigurator.loadLog4jPlugins(); + LogConfigurator.configureESLogging(); // native access requires logging to be initialized + } + + @Override + protected Codec getCodec() { + return TestUtil.alwaysKnnVectorsFormat(new ES93FlatVectorFormat(ES93GenericFlatVectorsFormat.ElementType.STANDARD)); + } + + public void testSearchWithVisitedLimit() { + throw new AssumptionViolatedException("requires graph-based vector codec"); + } + + public void testSimpleOffHeapSize() throws IOException { + float[] vector = randomVector(random().nextInt(12, 500)); + try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { + Document doc = new Document(); + doc.add(new KnnFloatVectorField("f", vector, DOT_PRODUCT)); + w.addDocument(doc); + w.commit(); + try (IndexReader reader = DirectoryReader.open(w)) { + LeafReader r = getOnlyLeafReader(reader); + if (r instanceof CodecReader codecReader) { + KnnVectorsReader knnVectorsReader = codecReader.getVectorReader(); + if (knnVectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader fieldsReader) { + knnVectorsReader = fieldsReader.getFieldReader("f"); + } + var fieldInfo = r.getFieldInfos().fieldInfo("f"); + var offHeap = knnVectorsReader.getOffHeapByteSize(fieldInfo); + assertThat(offHeap, aMapWithSize(1)); + assertThat(offHeap, hasEntry("vec", (long) vector.length * Float.BYTES)); + } + } + } + } +} diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedBFloat16VectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedBFloat16VectorsFormatTests.java new file mode 100644 index 0000000000000..a1bda3e4b2342 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedBFloat16VectorsFormatTests.java @@ -0,0 +1,82 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.vectors.es93; + +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsFormat; +import org.apache.lucene.store.Directory; +import org.elasticsearch.index.codec.vectors.BFloat16; +import org.elasticsearch.index.codec.vectors.BaseHnswBFloat16VectorsFormatTestCase; + +import java.io.IOException; +import java.util.concurrent.ExecutorService; + +import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH; +import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN; +import static org.hamcrest.Matchers.aMapWithSize; +import static org.hamcrest.Matchers.allOf; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.hasEntry; + +public class ES93HnswScalarQuantizedBFloat16VectorsFormatTests extends BaseHnswBFloat16VectorsFormatTestCase { + + @Override + protected KnnVectorsFormat createFormat() { + return new ES93HnswScalarQuantizedVectorsFormat( + DEFAULT_MAX_CONN, + DEFAULT_BEAM_WIDTH, + Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SEVEN_BIT, + ES93GenericFlatVectorsFormat.ElementType.BFLOAT16, + random().nextBoolean() + ); + } + + @Override + protected KnnVectorsFormat createFormat(int maxConn, int beamWidth) { + return new ES93HnswScalarQuantizedVectorsFormat( + maxConn, + beamWidth, + Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SEVEN_BIT, + ES93GenericFlatVectorsFormat.ElementType.BFLOAT16, + random().nextBoolean() + ); + } + + @Override + protected KnnVectorsFormat createFormat(int maxConn, int beamWidth, int numMergeWorkers, ExecutorService service) { + return new ES93HnswScalarQuantizedVectorsFormat( + maxConn, + beamWidth, + Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SEVEN_BIT, + ES93GenericFlatVectorsFormat.ElementType.BFLOAT16, + random().nextBoolean(), + numMergeWorkers, + service + ); + } + + public void testSimpleOffHeapSize() throws IOException { + float[] vector = randomVector(random().nextInt(12, 500)); + try (Directory dir = newDirectory()) { + testSimpleOffHeapSize( + dir, + newIndexWriterConfig(), + vector, + allOf( + aMapWithSize(3), + hasEntry("vec", (long) vector.length * BFloat16.BYTES), + hasEntry("vex", 1L), + hasEntry(equalTo("veq"), greaterThan(0L)) + ) + ); + } + } +} diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormatTests.java new file mode 100644 index 0000000000000..c2bf9e6352f15 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormatTests.java @@ -0,0 +1,81 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.vectors.es93; + +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsFormat; +import org.apache.lucene.store.Directory; +import org.elasticsearch.index.codec.vectors.BaseHnswVectorsFormatTestCase; + +import java.io.IOException; +import java.util.concurrent.ExecutorService; + +import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH; +import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN; +import static org.hamcrest.Matchers.aMapWithSize; +import static org.hamcrest.Matchers.allOf; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.hasEntry; + +public class ES93HnswScalarQuantizedVectorsFormatTests extends BaseHnswVectorsFormatTestCase { + + @Override + protected KnnVectorsFormat createFormat() { + return new ES93HnswScalarQuantizedVectorsFormat( + DEFAULT_MAX_CONN, + DEFAULT_BEAM_WIDTH, + Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SEVEN_BIT, + ES93GenericFlatVectorsFormat.ElementType.STANDARD, + random().nextBoolean() + ); + } + + @Override + protected KnnVectorsFormat createFormat(int maxConn, int beamWidth) { + return new ES93HnswScalarQuantizedVectorsFormat( + maxConn, + beamWidth, + Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SEVEN_BIT, + ES93GenericFlatVectorsFormat.ElementType.STANDARD, + random().nextBoolean() + ); + } + + @Override + protected KnnVectorsFormat createFormat(int maxConn, int beamWidth, int numMergeWorkers, ExecutorService service) { + return new ES93HnswScalarQuantizedVectorsFormat( + maxConn, + beamWidth, + Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SEVEN_BIT, + ES93GenericFlatVectorsFormat.ElementType.STANDARD, + random().nextBoolean(), + numMergeWorkers, + service + ); + } + + public void testSimpleOffHeapSize() throws IOException { + float[] vector = randomVector(random().nextInt(12, 500)); + try (Directory dir = newDirectory()) { + testSimpleOffHeapSize( + dir, + newIndexWriterConfig(), + vector, + allOf( + aMapWithSize(3), + hasEntry("vec", (long) vector.length * Float.BYTES), + hasEntry("vex", 1L), + hasEntry(equalTo("veq"), greaterThan(0L)) + ) + ); + } + } +} diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedBFloat16VectorFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedBFloat16VectorFormatTests.java new file mode 100644 index 0000000000000..57578097f2db4 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedBFloat16VectorFormatTests.java @@ -0,0 +1,85 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.vectors.es93; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.KnnVectorsReader; +import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.KnnFloatVectorField; +import org.apache.lucene.index.CodecReader; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.util.TestUtil; +import org.elasticsearch.common.logging.LogConfigurator; +import org.elasticsearch.index.codec.vectors.BFloat16; +import org.elasticsearch.index.codec.vectors.BaseBFloat16KnnVectorsFormatTestCase; +import org.junit.AssumptionViolatedException; + +import java.io.IOException; + +import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT; +import static org.hamcrest.Matchers.aMapWithSize; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.hasEntry; + +public class ES93ScalarQuantizedBFloat16VectorFormatTests extends BaseBFloat16KnnVectorsFormatTestCase { + + static { + LogConfigurator.loadLog4jPlugins(); + LogConfigurator.configureESLogging(); // native access requires logging to be initialized + } + + private KnnVectorsFormat format; + + @Override + public void setUp() throws Exception { + format = new ES93ScalarQuantizedVectorsFormat(ES93GenericFlatVectorsFormat.ElementType.BFLOAT16); + super.setUp(); + } + + @Override + protected Codec getCodec() { + return TestUtil.alwaysKnnVectorsFormat(format); + } + + public void testSearchWithVisitedLimit() { + throw new AssumptionViolatedException("requires graph vector codec"); + } + + public void testSimpleOffHeapSize() throws IOException { + float[] vector = randomVector(random().nextInt(12, 500)); + try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { + Document doc = new Document(); + doc.add(new KnnFloatVectorField("f", vector, DOT_PRODUCT)); + w.addDocument(doc); + w.commit(); + try (IndexReader reader = DirectoryReader.open(w)) { + LeafReader r = getOnlyLeafReader(reader); + if (r instanceof CodecReader codecReader) { + KnnVectorsReader knnVectorsReader = codecReader.getVectorReader(); + if (knnVectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader fieldsReader) { + knnVectorsReader = fieldsReader.getFieldReader("f"); + } + var fieldInfo = r.getFieldInfos().fieldInfo("f"); + var offHeap = knnVectorsReader.getOffHeapByteSize(fieldInfo); + assertThat(offHeap, aMapWithSize(2)); + assertThat(offHeap, hasEntry("vec", (long) vector.length * BFloat16.BYTES)); + assertThat(offHeap, hasEntry(equalTo("veq"), greaterThan(0L))); + } + } + } + } +} diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormatTests.java new file mode 100644 index 0000000000000..a880852378d61 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormatTests.java @@ -0,0 +1,75 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.vectors.es93; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.KnnVectorsReader; +import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.KnnFloatVectorField; +import org.apache.lucene.index.CodecReader; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; +import org.apache.lucene.tests.util.TestUtil; +import org.elasticsearch.common.logging.LogConfigurator; +import org.junit.AssumptionViolatedException; + +import java.io.IOException; + +import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT; +import static org.hamcrest.Matchers.aMapWithSize; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.hasEntry; + +public class ES93ScalarQuantizedVectorsFormatTests extends BaseKnnVectorsFormatTestCase { + + static { + LogConfigurator.loadLog4jPlugins(); + LogConfigurator.configureESLogging(); // native access requires logging to be initialized + } + + @Override + protected Codec getCodec() { + return TestUtil.alwaysKnnVectorsFormat(new ES93ScalarQuantizedVectorsFormat(ES93GenericFlatVectorsFormat.ElementType.STANDARD)); + } + + public void testSearchWithVisitedLimit() { + throw new AssumptionViolatedException("requires graph vector codec"); + } + + public void testSimpleOffHeapSize() throws IOException { + float[] vector = randomVector(random().nextInt(12, 500)); + try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { + Document doc = new Document(); + doc.add(new KnnFloatVectorField("f", vector, DOT_PRODUCT)); + w.addDocument(doc); + w.commit(); + try (IndexReader reader = DirectoryReader.open(w)) { + LeafReader r = getOnlyLeafReader(reader); + if (r instanceof CodecReader codecReader) { + KnnVectorsReader knnVectorsReader = codecReader.getVectorReader(); + if (knnVectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader fieldsReader) { + knnVectorsReader = fieldsReader.getFieldReader("f"); + } + var fieldInfo = r.getFieldInfos().fieldInfo("f"); + var offHeap = knnVectorsReader.getOffHeapByteSize(fieldInfo); + assertThat(offHeap, aMapWithSize(2)); + assertThat(offHeap, hasEntry("vec", (long) vector.length * Float.BYTES)); + assertThat(offHeap, hasEntry(equalTo("veq"), greaterThan(0L))); + } + } + } + } +} From f2dec8f2b7c7a26a9fa5ea4670a4a8a6f2fe5fd2 Mon Sep 17 00:00:00 2001 From: Simon Cooper Date: Mon, 17 Nov 2025 13:27:24 +0000 Subject: [PATCH 2/8] Add scalar quantized implementations --- .../vectors/es93/ES93FlatVectorFormat.java | 6 +- .../ES93HnswScalarQuantizedVectorsFormat.java | 92 ++++++-- .../ES93ScalarQuantizedVectorsFormat.java | 220 ++++++++++++++++-- .../ES93FlatBFloat16VectorFormatTests.java | 3 +- .../es93/ES93FlatVectorFormatTests.java | 3 +- ...arQuantizedBFloat16VectorsFormatTests.java | 26 ++- ...HnswScalarQuantizedVectorsFormatTests.java | 26 ++- ...larQuantizedBFloat16VectorFormatTests.java | 3 +- ...ES93ScalarQuantizedVectorsFormatTests.java | 3 +- 9 files changed, 322 insertions(+), 60 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatVectorFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatVectorFormat.java index bdad21596d479..172ad11be497b 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatVectorFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatVectorFormat.java @@ -24,6 +24,7 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.hnsw.OrdinalTranslatedKnnCollector; import org.apache.lucene.util.hnsw.RandomVectorScorer; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import java.io.IOException; import java.util.Map; @@ -44,9 +45,9 @@ public ES93FlatVectorFormat() { format = new ES93GenericFlatVectorsFormat(); } - public ES93FlatVectorFormat(ES93GenericFlatVectorsFormat.ElementType elementType) { + public ES93FlatVectorFormat(DenseVectorFieldMapper.ElementType elementType) { super(NAME); - assert elementType != ES93GenericFlatVectorsFormat.ElementType.BIT : "ES815BitFlatVectorFormat should be used for bits"; + assert elementType != DenseVectorFieldMapper.ElementType.BIT : "ES815BitFlatVectorFormat should be used for bits"; format = new ES93GenericFlatVectorsFormat(elementType, false); } @@ -70,7 +71,6 @@ static class ES93FlatVectorReader extends KnnVectorsReader { private final FlatVectorsReader reader; ES93FlatVectorReader(FlatVectorsReader reader) { - super(); this.reader = reader; } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormat.java index 4f47b82c3b5a6..daef9f0fa2364 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormat.java @@ -13,60 +13,104 @@ import org.apache.lucene.codecs.KnnVectorsWriter; import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil; import org.apache.lucene.codecs.hnsw.FlatVectorsFormat; -import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorScorer; -import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsFormat; -import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsReader; -import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsWriter; +import org.apache.lucene.codecs.hnsw.FlatVectorsScorer; +import org.apache.lucene.codecs.hnsw.ScalarQuantizedVectorScorer; import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader; import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsWriter; +import org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsReader; +import org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsWriter; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.elasticsearch.index.codec.vectors.AbstractHnswVectorsFormat; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import java.io.IOException; import java.util.concurrent.ExecutorService; +import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_NUM_MERGE_WORKER; +import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsFormat.DYNAMIC_CONFIDENCE_INTERVAL; + public class ES93HnswScalarQuantizedVectorsFormat extends AbstractHnswVectorsFormat { static final String NAME = "ES93HnswScalarQuantizedVectorsFormat"; + private static final int ALLOWED_BITS = (1 << 8) | (1 << 7) | (1 << 4); + + + /** The minimum confidence interval */ + private static final float MINIMUM_CONFIDENCE_INTERVAL = 0.9f; - static final Lucene104ScalarQuantizedVectorScorer flatVectorScorer = new Lucene104ScalarQuantizedVectorScorer( - FlatVectorScorerUtil.getLucene99FlatVectorsScorer() + /** The maximum confidence interval */ + private static final float MAXIMUM_CONFIDENCE_INTERVAL = 1f; + + static final FlatVectorsScorer flatVectorScorer = new ES93ScalarQuantizedVectorsFormat.ESQuantizedFlatVectorsScorer( + new ScalarQuantizedVectorScorer(FlatVectorScorerUtil.getLucene99FlatVectorsScorer()) ); - private final Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding encoding; private final FlatVectorsFormat rawVectorFormat; + /** + * Controls the confidence interval used to scalar quantize the vectors the default value is + * calculated as `1-1/(vector_dimensions + 1)` + */ + public final Float confidenceInterval; + + private final byte bits; + private final boolean compress; + public ES93HnswScalarQuantizedVectorsFormat() { super(NAME); - this.encoding = Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SEVEN_BIT; - this.rawVectorFormat = new ES93GenericFlatVectorsFormat(ES93GenericFlatVectorsFormat.ElementType.STANDARD, false); + this.rawVectorFormat = new ES93GenericFlatVectorsFormat(DenseVectorFieldMapper.ElementType.FLOAT, false); + this.confidenceInterval = null; + this.bits = 7; + this.compress = false; } public ES93HnswScalarQuantizedVectorsFormat( int maxConn, int beamWidth, - Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding encoding, - ES93GenericFlatVectorsFormat.ElementType elementType, + DenseVectorFieldMapper.ElementType elementType, + Float confidenceInterval, + int bits, + boolean compress, boolean useDirectIO ) { - super(NAME, maxConn, beamWidth); - this.encoding = encoding; - this.rawVectorFormat = new ES93GenericFlatVectorsFormat(elementType, useDirectIO); + this(maxConn, beamWidth, elementType, confidenceInterval, bits, compress, useDirectIO, DEFAULT_NUM_MERGE_WORKER, null); } public ES93HnswScalarQuantizedVectorsFormat( int maxConn, int beamWidth, - Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding encoding, - ES93GenericFlatVectorsFormat.ElementType elementType, + DenseVectorFieldMapper.ElementType elementType, + Float confidenceInterval, + int bits, + boolean compress, boolean useDirectIO, int numMergeWorkers, ExecutorService mergeExec ) { super(NAME, maxConn, beamWidth, numMergeWorkers, mergeExec); - this.encoding = encoding; + + if (confidenceInterval != null + && confidenceInterval != DYNAMIC_CONFIDENCE_INTERVAL + && (confidenceInterval < MINIMUM_CONFIDENCE_INTERVAL || confidenceInterval > MAXIMUM_CONFIDENCE_INTERVAL)) { + throw new IllegalArgumentException( + "confidenceInterval must be between " + + MINIMUM_CONFIDENCE_INTERVAL + + " and " + + MAXIMUM_CONFIDENCE_INTERVAL + + "; confidenceInterval=" + + confidenceInterval + ); + } + if (bits < 1 || bits > 8 || (ALLOWED_BITS & (1 << bits)) == 0) { + throw new IllegalArgumentException("bits must be one of: 4, 7, 8; bits=" + bits); + } + assert elementType != DenseVectorFieldMapper.ElementType.BIT : "BIT should not be used with scalar quantization"; + this.rawVectorFormat = new ES93GenericFlatVectorsFormat(elementType, useDirectIO); + this.confidenceInterval = confidenceInterval; + this.bits = (byte) bits; + this.compress = compress; } @Override @@ -80,10 +124,16 @@ public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException state, maxConn, beamWidth, - new Lucene104ScalarQuantizedVectorsWriter(state, encoding, rawVectorFormat.fieldsWriter(state), flatVectorScorer), + new Lucene99ScalarQuantizedVectorsWriter( + state, + confidenceInterval, + bits, + compress, + rawVectorFormat.fieldsWriter(state), + flatVectorScorer + ), numMergeWorkers, - mergeExec, - 0 + mergeExec ); } @@ -91,7 +141,7 @@ public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException { return new Lucene99HnswVectorsReader( state, - new Lucene104ScalarQuantizedVectorsReader(state, rawVectorFormat.fieldsReader(state), flatVectorScorer) + new Lucene99ScalarQuantizedVectorsReader(state, rawVectorFormat.fieldsReader(state), flatVectorScorer) ); } } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormat.java index 075c1728f1029..d9f881b9ba3a6 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormat.java @@ -14,54 +14,114 @@ import org.apache.lucene.codecs.KnnVectorsWriter; import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil; import org.apache.lucene.codecs.hnsw.FlatVectorsFormat; -import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorScorer; -import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsFormat; -import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsReader; -import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsWriter; +import org.apache.lucene.codecs.hnsw.FlatVectorsReader; +import org.apache.lucene.codecs.hnsw.FlatVectorsScorer; +import org.apache.lucene.codecs.hnsw.ScalarQuantizedVectorScorer; +import org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsReader; +import org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsWriter; +import org.apache.lucene.index.ByteVectorValues; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FloatVectorValues; +import org.apache.lucene.index.KnnVectorValues; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.search.AcceptDocs; +import org.apache.lucene.search.KnnCollector; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.hnsw.OrdinalTranslatedKnnCollector; +import org.apache.lucene.util.hnsw.RandomVectorScorer; +import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier; +import org.apache.lucene.util.quantization.QuantizedByteVectorValues; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; +import org.elasticsearch.simdvec.VectorScorerFactory; +import org.elasticsearch.simdvec.VectorSimilarityType; import java.io.IOException; +import java.util.Map; +import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsFormat.DYNAMIC_CONFIDENCE_INTERVAL; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT; public class ES93ScalarQuantizedVectorsFormat extends KnnVectorsFormat { static final String NAME = "ES93ScalarQuantizedVectorsFormat"; + private static final int ALLOWED_BITS = (1 << 8) | (1 << 7) | (1 << 4); - static final Lucene104ScalarQuantizedVectorScorer flatVectorScorer = new Lucene104ScalarQuantizedVectorScorer( - FlatVectorScorerUtil.getLucene99FlatVectorsScorer() + static final FlatVectorsScorer flatVectorScorer = new ESQuantizedFlatVectorsScorer( + new ScalarQuantizedVectorScorer(FlatVectorScorerUtil.getLucene99FlatVectorsScorer()) ); - private final Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding encoding; + /** The minimum confidence interval */ + private static final float MINIMUM_CONFIDENCE_INTERVAL = 0.9f; + + /** The maximum confidence interval */ + private static final float MAXIMUM_CONFIDENCE_INTERVAL = 1f; + private final FlatVectorsFormat rawVectorFormat; + /** + * Controls the confidence interval used to scalar quantize the vectors the default value is + * calculated as `1-1/(vector_dimensions + 1)` + */ + public final Float confidenceInterval; + + private final byte bits; + private final boolean compress; + public ES93ScalarQuantizedVectorsFormat() { - this(ES93GenericFlatVectorsFormat.ElementType.STANDARD, Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SEVEN_BIT); + this(DenseVectorFieldMapper.ElementType.FLOAT, null, 7, false); } - public ES93ScalarQuantizedVectorsFormat(ES93GenericFlatVectorsFormat.ElementType elementType) { - this(elementType, Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SEVEN_BIT); + public ES93ScalarQuantizedVectorsFormat(DenseVectorFieldMapper.ElementType elementType) { + this(elementType, null, 7, false); } public ES93ScalarQuantizedVectorsFormat( - ES93GenericFlatVectorsFormat.ElementType elementType, - Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding encoding + DenseVectorFieldMapper.ElementType elementType, + Float confidenceInterval, + int bits, + boolean compress ) { super(NAME); - assert elementType != ES93GenericFlatVectorsFormat.ElementType.BIT : "BIT should not be used with scalar quantization"; - this.encoding = encoding; + if (confidenceInterval != null + && confidenceInterval != DYNAMIC_CONFIDENCE_INTERVAL + && (confidenceInterval < MINIMUM_CONFIDENCE_INTERVAL || confidenceInterval > MAXIMUM_CONFIDENCE_INTERVAL)) { + throw new IllegalArgumentException( + "confidenceInterval must be between " + + MINIMUM_CONFIDENCE_INTERVAL + + " and " + + MAXIMUM_CONFIDENCE_INTERVAL + + "; confidenceInterval=" + + confidenceInterval + ); + } + if (bits < 1 || bits > 8 || (ALLOWED_BITS & (1 << bits)) == 0) { + throw new IllegalArgumentException("bits must be one of: 4, 7, 8; bits=" + bits); + } + assert elementType != DenseVectorFieldMapper.ElementType.BIT : "BIT should not be used with scalar quantization"; + this.rawVectorFormat = new ES93GenericFlatVectorsFormat(elementType, false); + this.confidenceInterval = confidenceInterval; + this.bits = (byte) bits; + this.compress = compress; } @Override public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { - return new Lucene104ScalarQuantizedVectorsWriter(state, encoding, rawVectorFormat.fieldsWriter(state), flatVectorScorer); + return new Lucene99ScalarQuantizedVectorsWriter( + state, + confidenceInterval, + bits, + compress, + rawVectorFormat.fieldsWriter(state), + flatVectorScorer + ); } @Override public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException { - return new Lucene104ScalarQuantizedVectorsReader(state, rawVectorFormat.fieldsReader(state), flatVectorScorer); + return new ES93FlatVectorReader(new Lucene99ScalarQuantizedVectorsReader(state, rawVectorFormat.fieldsReader(state), flatVectorScorer)); } @Override @@ -74,12 +134,136 @@ public String toString() { return NAME + "(name=" + NAME - + ", encoding=" - + encoding + + ", confidenceInterval=" + + confidenceInterval + + ", bits=" + + bits + + ", compressed=" + + compress + ", flatVectorScorer=" + flatVectorScorer + ", rawVectorFormat=" + rawVectorFormat + ")"; } + + static class ES93FlatVectorReader extends KnnVectorsReader { + + private final FlatVectorsReader reader; + + ES93FlatVectorReader(FlatVectorsReader reader) { + this.reader = reader; + } + + @Override + public void checkIntegrity() throws IOException { + reader.checkIntegrity(); + } + + @Override + public FloatVectorValues getFloatVectorValues(String field) throws IOException { + return reader.getFloatVectorValues(field); + } + + @Override + public ByteVectorValues getByteVectorValues(String field) throws IOException { + return reader.getByteVectorValues(field); + } + + @Override + public void search(String field, float[] target, KnnCollector knnCollector, AcceptDocs acceptDocs) throws IOException { + collectAllMatchingDocs(knnCollector, acceptDocs, reader.getRandomVectorScorer(field, target)); + } + + private void collectAllMatchingDocs(KnnCollector knnCollector, AcceptDocs acceptDocs, RandomVectorScorer scorer) + throws IOException { + OrdinalTranslatedKnnCollector collector = new OrdinalTranslatedKnnCollector(knnCollector, scorer::ordToDoc); + Bits acceptedOrds = scorer.getAcceptOrds(acceptDocs.bits()); + for (int i = 0; i < scorer.maxOrd(); i++) { + if (acceptedOrds == null || acceptedOrds.get(i)) { + collector.collect(i, scorer.score(i)); + collector.incVisitedCount(1); + } + } + assert collector.earlyTerminated() == false; + } + + @Override + public void search(String field, byte[] target, KnnCollector knnCollector, AcceptDocs acceptDocs) throws IOException { + collectAllMatchingDocs(knnCollector, acceptDocs, reader.getRandomVectorScorer(field, target)); + } + + @Override + public Map getOffHeapByteSize(FieldInfo fieldInfo) { + return reader.getOffHeapByteSize(fieldInfo); + } + + @Override + public void close() throws IOException { + reader.close(); + } + } + + static final class ESQuantizedFlatVectorsScorer implements FlatVectorsScorer { + + final FlatVectorsScorer delegate; + final VectorScorerFactory factory; + + ESQuantizedFlatVectorsScorer(FlatVectorsScorer delegate) { + this.delegate = delegate; + factory = VectorScorerFactory.instance().orElse(null); + } + + @Override + public String toString() { + return "ESFlatVectorsScorer(" + "delegate=" + delegate + ", factory=" + factory + ')'; + } + + @Override + public RandomVectorScorerSupplier getRandomVectorScorerSupplier(VectorSimilarityFunction sim, KnnVectorValues values) + throws IOException { + if (values instanceof QuantizedByteVectorValues qValues && qValues.getSlice() != null) { + // TODO: optimize int4 quantization + if (qValues.getScalarQuantizer().getBits() != 7) { + return delegate.getRandomVectorScorerSupplier(sim, values); + } + if (factory != null) { + var scorer = factory.getInt7SQVectorScorerSupplier( + VectorSimilarityType.of(sim), + qValues.getSlice(), + qValues, + qValues.getScalarQuantizer().getConstantMultiplier() + ); + if (scorer.isPresent()) { + return scorer.get(); + } + } + } + return delegate.getRandomVectorScorerSupplier(sim, values); + } + + @Override + public RandomVectorScorer getRandomVectorScorer(VectorSimilarityFunction sim, KnnVectorValues values, float[] query) + throws IOException { + if (values instanceof QuantizedByteVectorValues qValues && qValues.getSlice() != null) { + // TODO: optimize int4 quantization + if (qValues.getScalarQuantizer().getBits() != 7) { + return delegate.getRandomVectorScorer(sim, values, query); + } + if (factory != null) { + var scorer = factory.getInt7SQVectorScorer(sim, qValues, query); + if (scorer.isPresent()) { + return scorer.get(); + } + } + } + return delegate.getRandomVectorScorer(sim, values, query); + } + + @Override + public RandomVectorScorer getRandomVectorScorer(VectorSimilarityFunction sim, KnnVectorValues values, byte[] query) + throws IOException { + return delegate.getRandomVectorScorer(sim, values, query); + } + } } diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatBFloat16VectorFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatBFloat16VectorFormatTests.java index 91d4054ae94ed..66ee2c038d8e0 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatBFloat16VectorFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatBFloat16VectorFormatTests.java @@ -24,6 +24,7 @@ import org.elasticsearch.common.logging.LogConfigurator; import org.elasticsearch.index.codec.vectors.BFloat16; import org.elasticsearch.index.codec.vectors.BaseBFloat16KnnVectorsFormatTestCase; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.junit.AssumptionViolatedException; import java.io.IOException; @@ -41,7 +42,7 @@ public class ES93FlatBFloat16VectorFormatTests extends BaseBFloat16KnnVectorsFor @Override protected Codec getCodec() { - return TestUtil.alwaysKnnVectorsFormat(new ES93FlatVectorFormat(ES93GenericFlatVectorsFormat.ElementType.BFLOAT16)); + return TestUtil.alwaysKnnVectorsFormat(new ES93FlatVectorFormat(DenseVectorFieldMapper.ElementType.BFLOAT16)); } public void testSearchWithVisitedLimit() { diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatVectorFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatVectorFormatTests.java index 1ada03a70bed6..7cc4ddb949832 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatVectorFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatVectorFormatTests.java @@ -23,6 +23,7 @@ import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; import org.apache.lucene.tests.util.TestUtil; import org.elasticsearch.common.logging.LogConfigurator; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.junit.AssumptionViolatedException; import java.io.IOException; @@ -40,7 +41,7 @@ public class ES93FlatVectorFormatTests extends BaseKnnVectorsFormatTestCase { @Override protected Codec getCodec() { - return TestUtil.alwaysKnnVectorsFormat(new ES93FlatVectorFormat(ES93GenericFlatVectorsFormat.ElementType.STANDARD)); + return TestUtil.alwaysKnnVectorsFormat(new ES93FlatVectorFormat(DenseVectorFieldMapper.ElementType.FLOAT)); } public void testSearchWithVisitedLimit() { diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedBFloat16VectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedBFloat16VectorsFormatTests.java index a1bda3e4b2342..3c8bb8cc25d97 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedBFloat16VectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedBFloat16VectorsFormatTests.java @@ -10,10 +10,11 @@ package org.elasticsearch.index.codec.vectors.es93; import org.apache.lucene.codecs.KnnVectorsFormat; -import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsFormat; import org.apache.lucene.store.Directory; import org.elasticsearch.index.codec.vectors.BFloat16; import org.elasticsearch.index.codec.vectors.BaseHnswBFloat16VectorsFormatTestCase; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; +import org.junit.AssumptionViolatedException; import java.io.IOException; import java.util.concurrent.ExecutorService; @@ -33,8 +34,10 @@ protected KnnVectorsFormat createFormat() { return new ES93HnswScalarQuantizedVectorsFormat( DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH, - Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SEVEN_BIT, - ES93GenericFlatVectorsFormat.ElementType.BFLOAT16, + DenseVectorFieldMapper.ElementType.BFLOAT16, + null, + 7, + false, random().nextBoolean() ); } @@ -44,8 +47,10 @@ protected KnnVectorsFormat createFormat(int maxConn, int beamWidth) { return new ES93HnswScalarQuantizedVectorsFormat( maxConn, beamWidth, - Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SEVEN_BIT, - ES93GenericFlatVectorsFormat.ElementType.BFLOAT16, + DenseVectorFieldMapper.ElementType.BFLOAT16, + null, + 7, + false, random().nextBoolean() ); } @@ -55,14 +60,21 @@ protected KnnVectorsFormat createFormat(int maxConn, int beamWidth, int numMerge return new ES93HnswScalarQuantizedVectorsFormat( maxConn, beamWidth, - Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SEVEN_BIT, - ES93GenericFlatVectorsFormat.ElementType.BFLOAT16, + DenseVectorFieldMapper.ElementType.BFLOAT16, + null, + 7, + false, random().nextBoolean(), numMergeWorkers, service ); } + @Override + public void testSingleVectorCase() throws Exception { + throw new AssumptionViolatedException("Scalar quantization changes the score significantly for MAXIMUM_INNER_PRODUCT"); + } + public void testSimpleOffHeapSize() throws IOException { float[] vector = randomVector(random().nextInt(12, 500)); try (Directory dir = newDirectory()) { diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormatTests.java index c2bf9e6352f15..d70eb94d3227e 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormatTests.java @@ -10,9 +10,10 @@ package org.elasticsearch.index.codec.vectors.es93; import org.apache.lucene.codecs.KnnVectorsFormat; -import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsFormat; import org.apache.lucene.store.Directory; import org.elasticsearch.index.codec.vectors.BaseHnswVectorsFormatTestCase; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; +import org.junit.AssumptionViolatedException; import java.io.IOException; import java.util.concurrent.ExecutorService; @@ -32,8 +33,10 @@ protected KnnVectorsFormat createFormat() { return new ES93HnswScalarQuantizedVectorsFormat( DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH, - Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SEVEN_BIT, - ES93GenericFlatVectorsFormat.ElementType.STANDARD, + DenseVectorFieldMapper.ElementType.FLOAT, + null, + 7, + false, random().nextBoolean() ); } @@ -43,8 +46,10 @@ protected KnnVectorsFormat createFormat(int maxConn, int beamWidth) { return new ES93HnswScalarQuantizedVectorsFormat( maxConn, beamWidth, - Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SEVEN_BIT, - ES93GenericFlatVectorsFormat.ElementType.STANDARD, + DenseVectorFieldMapper.ElementType.FLOAT, + null, + 7, + false, random().nextBoolean() ); } @@ -54,14 +59,21 @@ protected KnnVectorsFormat createFormat(int maxConn, int beamWidth, int numMerge return new ES93HnswScalarQuantizedVectorsFormat( maxConn, beamWidth, - Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SEVEN_BIT, - ES93GenericFlatVectorsFormat.ElementType.STANDARD, + DenseVectorFieldMapper.ElementType.FLOAT, + null, + 7, + false, random().nextBoolean(), numMergeWorkers, service ); } + @Override + public void testSingleVectorCase() throws Exception { + throw new AssumptionViolatedException("Scalar quantization changes the score significantly for MAXIMUM_INNER_PRODUCT"); + } + public void testSimpleOffHeapSize() throws IOException { float[] vector = randomVector(random().nextInt(12, 500)); try (Directory dir = newDirectory()) { diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedBFloat16VectorFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedBFloat16VectorFormatTests.java index 57578097f2db4..8e4bc24ab6403 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedBFloat16VectorFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedBFloat16VectorFormatTests.java @@ -25,6 +25,7 @@ import org.elasticsearch.common.logging.LogConfigurator; import org.elasticsearch.index.codec.vectors.BFloat16; import org.elasticsearch.index.codec.vectors.BaseBFloat16KnnVectorsFormatTestCase; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.junit.AssumptionViolatedException; import java.io.IOException; @@ -46,7 +47,7 @@ public class ES93ScalarQuantizedBFloat16VectorFormatTests extends BaseBFloat16Kn @Override public void setUp() throws Exception { - format = new ES93ScalarQuantizedVectorsFormat(ES93GenericFlatVectorsFormat.ElementType.BFLOAT16); + format = new ES93ScalarQuantizedVectorsFormat(DenseVectorFieldMapper.ElementType.BFLOAT16); super.setUp(); } diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormatTests.java index a880852378d61..5b6b381945c41 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormatTests.java @@ -23,6 +23,7 @@ import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; import org.apache.lucene.tests.util.TestUtil; import org.elasticsearch.common.logging.LogConfigurator; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.junit.AssumptionViolatedException; import java.io.IOException; @@ -42,7 +43,7 @@ public class ES93ScalarQuantizedVectorsFormatTests extends BaseKnnVectorsFormatT @Override protected Codec getCodec() { - return TestUtil.alwaysKnnVectorsFormat(new ES93ScalarQuantizedVectorsFormat(ES93GenericFlatVectorsFormat.ElementType.STANDARD)); + return TestUtil.alwaysKnnVectorsFormat(new ES93ScalarQuantizedVectorsFormat(DenseVectorFieldMapper.ElementType.FLOAT)); } public void testSearchWithVisitedLimit() { From e6c6b18d8deab1efa64f02b08454e59f0b506069 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 17 Nov 2025 14:28:53 +0000 Subject: [PATCH 3/8] [CI] Auto commit changes from spotless --- .../vectors/es93/ES93HnswScalarQuantizedVectorsFormat.java | 1 - .../codec/vectors/es93/ES93ScalarQuantizedVectorsFormat.java | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormat.java index daef9f0fa2364..ce5d2aeaf41b9 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormat.java @@ -35,7 +35,6 @@ public class ES93HnswScalarQuantizedVectorsFormat extends AbstractHnswVectorsFor static final String NAME = "ES93HnswScalarQuantizedVectorsFormat"; private static final int ALLOWED_BITS = (1 << 8) | (1 << 7) | (1 << 4); - /** The minimum confidence interval */ private static final float MINIMUM_CONFIDENCE_INTERVAL = 0.9f; diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormat.java index d9f881b9ba3a6..3750614da3698 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormat.java @@ -121,7 +121,9 @@ public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException @Override public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException { - return new ES93FlatVectorReader(new Lucene99ScalarQuantizedVectorsReader(state, rawVectorFormat.fieldsReader(state), flatVectorScorer)); + return new ES93FlatVectorReader( + new Lucene99ScalarQuantizedVectorsReader(state, rawVectorFormat.fieldsReader(state), flatVectorScorer) + ); } @Override From c7774d2e3ae5eb0864f6caf5e567a8befd0c255c Mon Sep 17 00:00:00 2001 From: Simon Cooper Date: Mon, 17 Nov 2025 16:22:34 +0000 Subject: [PATCH 4/8] Hook in new formats --- .../vectors/DenseVectorFieldMapper.java | 114 ++++++++++++------ .../vectors/DenseVectorFieldTypeTests.java | 5 + .../mapper/SemanticTextFieldMapperTests.java | 8 +- 3 files changed, 87 insertions(+), 40 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index aa02950e460b9..9186374281947 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -59,9 +59,12 @@ import org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat; import org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat; import org.elasticsearch.index.codec.vectors.es93.ES93BinaryQuantizedVectorsFormat; +import org.elasticsearch.index.codec.vectors.es93.ES93FlatVectorFormat; import org.elasticsearch.index.codec.vectors.es93.ES93GenericFlatVectorsFormat; import org.elasticsearch.index.codec.vectors.es93.ES93HnswBinaryQuantizedVectorsFormat; +import org.elasticsearch.index.codec.vectors.es93.ES93HnswScalarQuantizedVectorsFormat; import org.elasticsearch.index.codec.vectors.es93.ES93HnswVectorsFormat; +import org.elasticsearch.index.codec.vectors.es93.ES93ScalarQuantizedVectorsFormat; import org.elasticsearch.index.fielddata.FieldDataContext; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.mapper.BlockLoader; @@ -406,6 +409,7 @@ private DenseVectorIndexOptions defaultIndexOptions(boolean defaultInt8Hnsw, boo Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN, Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH, null, + false, null ); } @@ -1512,14 +1516,14 @@ public DenseVectorIndexOptions parseIndexOptions(String fieldName, Map Date: Mon, 17 Nov 2025 16:51:44 +0000 Subject: [PATCH 5/8] Compile fix --- .../xpack/inference/integration/SemanticTextIndexOptionsIT.java | 1 + 1 file changed, 1 insertion(+) diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextIndexOptionsIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextIndexOptionsIT.java index 1bd79aab95a4f..eb187961225b9 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextIndexOptionsIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextIndexOptionsIT.java @@ -120,6 +120,7 @@ public void testValidateIndexOptionsWithBasicLicense() throws Exception { randomIntBetween(1, 100), randomIntBetween(1, 10_000), null, + randomBoolean(), null ); assertAcked( From 5d084d1a5523b22f479fa94e0c8e23e4633ae5e8 Mon Sep 17 00:00:00 2001 From: Simon Cooper Date: Tue, 18 Nov 2025 10:05:07 +0000 Subject: [PATCH 6/8] Add yaml tests --- .../41_knn_search_byte_quantized_bfloat16.yml | 895 +++++++++++++++++ ...nn_search_half_byte_quantized_bfloat16.yml | 932 ++++++++++++++++++ .../42_knn_search_flat_bfloat16.yml | 432 ++++++++ .../42_knn_search_int4_flat_bfloat16.yml | 619 ++++++++++++ .../42_knn_search_int8_flat_bfloat16.yml | 555 +++++++++++ .../ES93HnswScalarQuantizedVectorsFormat.java | 22 + .../ES93ScalarQuantizedVectorsFormat.java | 2 +- .../vectors/DenseVectorFieldMapperTests.java | 137 ++- 8 files changed, 3548 insertions(+), 46 deletions(-) create mode 100644 rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_byte_quantized_bfloat16.yml create mode 100644 rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_half_byte_quantized_bfloat16.yml create mode 100644 rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_flat_bfloat16.yml create mode 100644 rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_int4_flat_bfloat16.yml create mode 100644 rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_int8_flat_bfloat16.yml diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_byte_quantized_bfloat16.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_byte_quantized_bfloat16.yml new file mode 100644 index 0000000000000..9912dab51f4cd --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_byte_quantized_bfloat16.yml @@ -0,0 +1,895 @@ +setup: + - requires: + cluster_features: [ "mapper.vectors.generic_vector_format" ] + reason: Needs generic vector support + - do: + indices.create: + index: hnsw_byte_quantized + body: + settings: + index: + number_of_shards: 1 + mappings: + properties: + name: + type: keyword + vector: + type: dense_vector + element_type: bfloat16 + dims: 5 + index: true + similarity: l2_norm + index_options: + type: int8_hnsw + another_vector: + type: dense_vector + element_type: bfloat16 + dims: 5 + index: true + similarity: l2_norm + index_options: + type: int8_hnsw + + - do: + index: + index: hnsw_byte_quantized + id: "1" + body: + name: cow.jpg + vector: [230.0, 300.33, -34.8988, 15.555, -200.0] + another_vector: [130.0, 115.0, -1.02, 15.555, -100.0] + # Flush in order to provoke a merge later + - do: + indices.flush: + index: hnsw_byte_quantized + + - do: + index: + index: hnsw_byte_quantized + id: "2" + body: + name: moose.jpg + vector: [-0.5, 100.0, -13, 14.8, -156.0] + another_vector: [-0.5, 50.0, -1, 1, 120] + # Flush in order to provoke a merge later + - do: + indices.flush: + index: hnsw_byte_quantized + + - do: + index: + index: hnsw_byte_quantized + id: "3" + body: + name: rabbit.jpg + vector: [0.5, 111.3, -13.0, 14.8, -156.0] + another_vector: [-0.5, 11.0, 0, 12, 111.0] + # Flush in order to provoke a merge later + - do: + indices.flush: + index: hnsw_byte_quantized + + - do: + indices.forcemerge: + index: hnsw_byte_quantized + max_num_segments: 1 + + - do: + indices.refresh: { } +--- +"kNN search only": + - do: + search: + index: hnsw_byte_quantized + body: + fields: [ "name" ] + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + k: 2 + num_candidates: 3 + + - match: {hits.hits.0._id: "2"} + - match: {hits.hits.0.fields.name.0: "moose.jpg"} + + - match: {hits.hits.1._id: "3"} + - match: {hits.hits.1.fields.name.0: "rabbit.jpg"} +--- +"kNN multi-field search only": + - do: + search: + index: hnsw_byte_quantized + body: + fields: [ "name" ] + knn: + - {field: vector, query_vector: [-0.5, 90.0, -10, 14.8, -156.0], k: 2, num_candidates: 3} + - {field: another_vector, query_vector: [-0.5, 11.0, 0, 12, 111.0], k: 2, num_candidates: 3} + + - match: {hits.hits.0._id: "3"} + - match: {hits.hits.0.fields.name.0: "rabbit.jpg"} + + - match: {hits.hits.1._id: "2"} + - match: {hits.hits.1.fields.name.0: "moose.jpg"} +--- +"kNN search plus query": + - do: + search: + index: hnsw_byte_quantized + body: + fields: [ "name" ] + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + k: 2 + num_candidates: 3 + query: + term: + name: cow.jpg + + - match: {hits.hits.0._id: "1"} + - match: {hits.hits.0.fields.name.0: "cow.jpg"} + + - match: {hits.hits.1._id: "2"} + - match: {hits.hits.1.fields.name.0: "moose.jpg"} + + - match: {hits.hits.2._id: "3"} + - match: {hits.hits.2.fields.name.0: "rabbit.jpg"} +--- +"kNN multi-field search with query": + - do: + search: + index: hnsw_byte_quantized + body: + fields: [ "name" ] + knn: + - {field: vector, query_vector: [-0.5, 90.0, -10, 14.8, -156.0], k: 2, num_candidates: 3} + - {field: another_vector, query_vector: [-0.5, 11.0, 0, 12, 111.0], k: 2, num_candidates: 3} + query: + term: + name: cow.jpg + + - match: {hits.hits.0._id: "3"} + - match: {hits.hits.0.fields.name.0: "rabbit.jpg"} + + - match: {hits.hits.1._id: "1"} + - match: {hits.hits.1.fields.name.0: "cow.jpg"} + + - match: {hits.hits.2._id: "2"} + - match: {hits.hits.2.fields.name.0: "moose.jpg"} +--- +"kNN search with filter": + - do: + search: + index: hnsw_byte_quantized + body: + fields: [ "name" ] + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + k: 2 + num_candidates: 3 + filter: + term: + name: "rabbit.jpg" + + - match: {hits.total.value: 1} + - match: {hits.hits.0._id: "3"} + - match: {hits.hits.0.fields.name.0: "rabbit.jpg"} + + - do: + search: + index: hnsw_byte_quantized + body: + fields: [ "name" ] + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + k: 2 + num_candidates: 3 + filter: + - term: + name: "rabbit.jpg" + - term: + _id: 2 + + - match: {hits.total.value: 0} + +--- +"KNN Vector similarity search only": + - do: + search: + index: hnsw_byte_quantized + body: + fields: [ "name" ] + knn: + num_candidates: 3 + k: 3 + field: vector + similarity: 10.3 + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + + - length: {hits.hits: 1} + + - match: {hits.hits.0._id: "2"} + - match: {hits.hits.0.fields.name.0: "moose.jpg"} +--- +"Vector similarity with filter only": + - do: + search: + index: hnsw_byte_quantized + body: + fields: [ "name" ] + knn: + num_candidates: 3 + k: 3 + field: vector + similarity: 11 + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + filter: {"term": {"name": "moose.jpg"}} + + - length: {hits.hits: 1} + + - match: {hits.hits.0._id: "2"} + - match: {hits.hits.0.fields.name.0: "moose.jpg"} + + - do: + search: + index: hnsw_byte_quantized + body: + fields: [ "name" ] + knn: + num_candidates: 3 + k: 3 + field: vector + similarity: 110 + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + filter: {"term": {"name": "cow.jpg"}} + + - length: {hits.hits: 0} +--- +"Knn search with mip": + - do: + indices.create: + index: mip + body: + settings: + index: + number_of_shards: 1 + mappings: + properties: + name: + type: keyword + vector: + type: dense_vector + element_type: bfloat16 + dims: 5 + index: true + similarity: max_inner_product + index_options: + type: int8_hnsw + + - do: + index: + index: mip + id: "1" + body: + name: cow.jpg + vector: [1, 2, 3, 4, 5] + + # Flush in order to provoke a merge later + - do: + indices.flush: { } + + - do: + index: + index: mip + id: "2" + body: + name: moose.jpg + vector: [1, 1, 1, 1, 1] + + # Flush in order to provoke a merge later + - do: + indices.flush: { } + + - do: + index: + index: mip + id: "3" + body: + name: rabbit.jpg + vector: [1, 2, 2, 2, 2] + + # We force merge into a single segment to make sure scores are more uniform + # Each segment can have a different quantization error, which can affect scores and mip is especially sensitive to this + - do: + indices.forcemerge: + index: mip + max_num_segments: 1 + + - do: + indices.refresh: {} + + - do: + search: + index: mip + body: + fields: [ "name" ] + knn: + num_candidates: 3 + k: 3 + field: vector + query_vector: [1, 2, 3, 4, 5] + + + - length: {hits.hits: 3} + - match: {hits.hits.0._id: "1"} + - match: {hits.hits.1._id: "3"} + - match: {hits.hits.2._id: "2"} + + - do: + search: + index: mip + body: + fields: [ "name" ] + knn: + num_candidates: 3 + k: 3 + field: vector + query_vector: [1, 2, 3, 4, 5] + filter: { "term": { "name": "moose.jpg" } } + + + + - length: {hits.hits: 1} + - match: {hits.hits.0._id: "2"} +--- +"Cosine similarity with indexed vector": + - skip: + features: "headers" + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + body: + query: + script_score: + query: {match_all: {} } + script: + source: "cosineSimilarity(params.query_vector, 'vector')" + params: + query_vector: [0.5, 111.3, -13.0, 14.8, -156.0] + + - match: {hits.total: 3} + + - match: {hits.hits.0._id: "3"} + - gte: {hits.hits.0._score: 0.999} + - lte: {hits.hits.0._score: 1.001} + + - match: {hits.hits.1._id: "2"} + - gte: {hits.hits.1._score: 0.998} + - lte: {hits.hits.1._score: 1.0} + + - match: {hits.hits.2._id: "1"} + - gte: {hits.hits.2._score: 0.78} + - lte: {hits.hits.2._score: 0.791} + +--- +# Won't be true for larger datasets, but this helps checking kNN vs rescoring vs exact search +"Vector rescoring has the same scoring as exact search for kNN section": + - requires: + reason: 'Quantized vector rescoring is required' + test_runner_features: [capabilities] + capabilities: + - method: GET + path: /_search + capabilities: [knn_quantized_vector_rescore_oversample] + - skip: + features: "headers" + + # Rescore + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: hnsw_byte_quantized + body: + size: 3 + query: + knn: + k: 3 + num_candidates: 3 + field: vector + query_vector: [0.5, 111.3, -13.0, 14.8, -156.0] + rescore_vector: + oversample: 1.5 + + # Get rescoring scores - hit ordering may change depending on how things are distributed + - match: { hits.total: 3 } + - set: { hits.hits.0._score: rescore_score0 } + - set: { hits.hits.1._score: rescore_score1 } + - set: { hits.hits.2._score: rescore_score2 } + + # Exact knn via script score + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + body: + query: + script_score: + query: {match_all: {} } + script: + source: "1.0 / (1.0 + Math.pow(l2norm(params.query_vector, 'vector'), 2.0))" + params: + query_vector: [0.5, 111.3, -13.0, 14.8, -156.0] + + # Compare scores as hit IDs may change depending on how things are distributed + - match: { hits.total: 3 } + - match: { hits.hits.0._score: $rescore_score0 } + - match: { hits.hits.1._score: $rescore_score1 } + - match: { hits.hits.2._score: $rescore_score2 } + +--- +"Test bad quantization parameters": + - do: + catch: bad_request + indices.create: + index: bad_hnsw_quantized + body: + mappings: + properties: + vector: + type: dense_vector + dims: 5 + element_type: byte + index: true + index_options: + type: int8_hnsw + + - do: + catch: bad_request + indices.create: + index: bad_hnsw_quantized + body: + mappings: + properties: + vector: + type: dense_vector + dims: 5 + index: false + index_options: + type: int8_hnsw +--- +"Test create, merge, and search cosine": + - do: + indices.create: + index: hnsw_byte_quantized_merge_cosine + body: + settings: + index: + number_of_shards: 1 + + - do: + indices.put_mapping: + index: hnsw_byte_quantized_merge_cosine + body: + properties: + embedding: + type: dense_vector + element_type: bfloat16 + similarity: cosine + index_options: + type: int8_hnsw + + - do: + index: + index: hnsw_byte_quantized_merge_cosine + id: "1" + body: + embedding: [1.0, 1.0, 1.0, 1.0] + + # Flush in order to provoke a merge later + - do: + indices.flush: { } + + - do: + index: + index: hnsw_byte_quantized_merge_cosine + id: "2" + body: + embedding: [1.0, 1.0, 1.0, 2.0] + + # Flush in order to provoke a merge later + - do: + indices.flush: { } + + - do: + index: + index: hnsw_byte_quantized_merge_cosine + id: "3" + body: + embedding: [1.0, 1.0, 1.0, 3.0] + + - do: + indices.forcemerge: + index: hnsw_byte_quantized_merge_cosine + max_num_segments: 1 + + - do: + indices.refresh: {} + + - do: + search: + index: hnsw_byte_quantized_merge_cosine + body: + size: 3 + query: + knn: + field: embedding + query_vector: [1.0, 1.0, 1.0, 1.0] + num_candidates: 10 + + - length: { hits.hits: 3 } + - match: { hits.hits.0._id: "1"} + - match: { hits.hits.1._id: "2"} + - match: { hits.hits.2._id: "3"} +--- +"Test create, merge, and search dot_product": + - requires: + cluster_features: "gte_v8.12.0" + reason: 'kNN float to byte quantization added in 8.12' + - do: + indices.create: + index: hnsw_byte_quantized_merge_dot_product + body: + settings: + index: + number_of_shards: 1 + + - do: + indices.put_mapping: + index: hnsw_byte_quantized_merge_dot_product + body: + properties: + embedding: + type: dense_vector + element_type: bfloat16 + similarity: dot_product + index_options: + type: int8_hnsw + + - do: + index: + index: hnsw_byte_quantized_merge_dot_product + id: "1" + body: + embedding: [0.6, 0.8] + + # Flush in order to provoke a merge later + - do: + indices.flush: { } + + - do: + index: + index: hnsw_byte_quantized_merge_dot_product + id: "2" + body: + embedding: [0.8, 0.6] + + # Flush in order to provoke a merge later + - do: + indices.flush: { } + + - do: + index: + index: hnsw_byte_quantized_merge_dot_product + id: "3" + body: + embedding: [-0.6, -0.8] + + - do: + indices.forcemerge: + index: hnsw_byte_quantized_merge_dot_product + max_num_segments: 1 + + - do: + indices.refresh: {} + + - do: + search: + index: hnsw_byte_quantized_merge_dot_product + body: + size: 3 + query: + knn: + field: embedding + query_vector: [0.6, 0.8] + num_candidates: 10 + + - length: { hits.hits: 3 } + - match: { hits.hits.0._id: "1"} + - match: { hits.hits.1._id: "2"} + - match: { hits.hits.2._id: "3"} +--- +"Test index configured rescore vector": + - requires: + cluster_features: ["mapper.dense_vector.rescore_vector"] + reason: Needs rescore_vector feature + - skip: + features: "headers" + - do: + indices.create: + index: int8_rescore_hnsw + body: + settings: + index: + number_of_shards: 1 + mappings: + properties: + vector: + type: dense_vector + element_type: bfloat16 + dims: 4 + similarity: max_inner_product + index_options: + type: int8_hnsw + rescore_vector: + oversample: 1.5 + + - do: + bulk: + index: int8_rescore_hnsw + refresh: true + body: | + { "index": {"_id": "1"}} + { "vector": [230.0, 300.33, -34.8988, 15.555] } + { "index": {"_id": "2"}} + { "vector": [-0.5, 100.0, -13, 14.8] } + { "index": {"_id": "3"}} + { "vector": [0.5, 111.3, -13.0, 14.8] } + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: int8_rescore_hnsw + body: + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8] + k: 3 + num_candidates: 3 + + - match: { hits.total: 3 } + - set: { hits.hits.0._score: rescore_score0 } + - set: { hits.hits.1._score: rescore_score1 } + - set: { hits.hits.2._score: rescore_score2 } + + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: int8_rescore_hnsw + body: + query: + script_score: + query: {match_all: {} } + script: + source: "double similarity = dotProduct(params.query_vector, 'vector'); return similarity < 0 ? 1 / (1 + -1 * similarity) : similarity + 1" + params: + query_vector: [-0.5, 90.0, -10, 14.8] + + # Compare scores as hit IDs may change depending on how things are distributed + - match: { hits.total: 3 } + - match: { hits.hits.0._score: $rescore_score0 } + - match: { hits.hits.1._score: $rescore_score1 } + - match: { hits.hits.2._score: $rescore_score2 } +--- +"Test index configured rescore vector updateable and settable to 0": + - requires: + cluster_features: ["mapper.dense_vector.rescore_zero_vector"] + reason: Needs rescore_zero_vector feature + + - do: + indices.create: + index: int8_rescore_0_hnsw + body: + settings: + index: + number_of_shards: 1 + mappings: + properties: + vector: + type: dense_vector + element_type: bfloat16 + index_options: + type: int8_hnsw + rescore_vector: + oversample: 0 + + - do: + indices.create: + index: int8_rescore_update_hnsw + body: + settings: + index: + number_of_shards: 1 + mappings: + properties: + vector: + type: dense_vector + element_type: bfloat16 + index_options: + type: int8_hnsw + rescore_vector: + oversample: 1 + + - do: + indices.put_mapping: + index: int8_rescore_update_hnsw + body: + properties: + vector: + type: dense_vector + index_options: + type: int8_hnsw + rescore_vector: + oversample: 0 + + - do: + indices.get_mapping: + index: int8_rescore_update_hnsw + + - match: { .int8_rescore_update_hnsw.mappings.properties.vector.index_options.rescore_vector.oversample: 0 } +--- +"Test index configured rescore vector score consistency": + - requires: + cluster_features: ["mapper.dense_vector.rescore_zero_vector"] + reason: Needs rescore_zero_vector feature + - skip: + features: "headers" + - do: + indices.create: + index: int8_rescore_zero_hnsw + body: + settings: + index: + number_of_shards: 1 + mappings: + properties: + vector: + type: dense_vector + element_type: bfloat16 + similarity: max_inner_product + dims: 4 + index_options: + type: int8_hnsw + rescore_vector: + oversample: 0 + + - do: + bulk: + index: int8_rescore_zero_hnsw + refresh: true + body: | + { "index": {"_id": "1"}} + { "vector": [230.0, 300.33, -34.8988, 15.555] } + { "index": {"_id": "2"}} + { "vector": [-0.5, 100.0, -13, 14.8] } + { "index": {"_id": "3"}} + { "vector": [0.5, 111.3, -13.0, 14.8] } + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: int8_rescore_zero_hnsw + body: + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8] + k: 3 + num_candidates: 3 + + - match: { hits.total: 3 } + - set: { hits.hits.0._score: raw_score0 } + - set: { hits.hits.1._score: raw_score1 } + - set: { hits.hits.2._score: raw_score2 } + + + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: int8_rescore_zero_hnsw + body: + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8] + k: 3 + num_candidates: 3 + rescore_vector: + oversample: 2 + + - match: { hits.total: 3 } + - set: { hits.hits.0._score: override_score0 } + - set: { hits.hits.1._score: override_score1 } + - set: { hits.hits.2._score: override_score2 } + + - do: + indices.put_mapping: + index: int8_rescore_zero_hnsw + body: + properties: + vector: + type: dense_vector + element_type: bfloat16 + similarity: max_inner_product + dims: 4 + index_options: + type: int8_hnsw + rescore_vector: + oversample: 2 + + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: int8_rescore_zero_hnsw + body: + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8] + k: 3 + num_candidates: 3 + + - match: { hits.total: 3 } + - set: { hits.hits.0._score: default_rescore0 } + - set: { hits.hits.1._score: default_rescore1 } + - set: { hits.hits.2._score: default_rescore2 } + + - do: + indices.put_mapping: + index: int8_rescore_zero_hnsw + body: + properties: + vector: + type: dense_vector + element_type: bfloat16 + similarity: max_inner_product + dims: 4 + index_options: + type: int8_hnsw + rescore_vector: + oversample: 0 + + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: int8_rescore_zero_hnsw + body: + query: + script_score: + query: {match_all: {} } + script: + source: "double similarity = dotProduct(params.query_vector, 'vector'); return similarity < 0 ? 1 / (1 + -1 * similarity) : similarity + 1" + params: + query_vector: [-0.5, 90.0, -10, 14.8] + + # Compare scores as hit IDs may change depending on how things are distributed + - match: { hits.total: 3 } + - match: { hits.hits.0._score: $override_score0 } + - match: { hits.hits.0._score: $default_rescore0 } + - match: { hits.hits.1._score: $override_score1 } + - match: { hits.hits.1._score: $default_rescore1 } + - match: { hits.hits.2._score: $override_score2 } + - match: { hits.hits.2._score: $default_rescore2 } diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_half_byte_quantized_bfloat16.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_half_byte_quantized_bfloat16.yml new file mode 100644 index 0000000000000..68272ec7b1815 --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_half_byte_quantized_bfloat16.yml @@ -0,0 +1,932 @@ +setup: + - requires: + cluster_features: [ "mapper.vectors.generic_vector_format" ] + reason: Needs generic vector support + - do: + indices.create: + index: hnsw_byte_quantized + body: + settings: + index: + number_of_shards: 1 + mappings: + properties: + name: + type: keyword + vector: + type: dense_vector + element_type: bfloat16 + dims: 4 + index: true + similarity: l2_norm + index_options: + type: int4_hnsw + another_vector: + type: dense_vector + element_type: bfloat16 + dims: 4 + index: true + similarity: l2_norm + index_options: + type: int4_hnsw + + - do: + index: + index: hnsw_byte_quantized + id: "1" + body: + name: cow.jpg + vector: [230.0, 300.33, -34.8988, 15.555] + another_vector: [130.0, 115.0, -1.02, 15.555] + + # Flush in order to provoke a merge later + - do: + indices.flush: { } + + - do: + index: + index: hnsw_byte_quantized + id: "2" + body: + name: moose.jpg + vector: [-0.5, 100.0, -13, 14.8] + another_vector: [-0.5, 50.0, -1, 1] + + # Flush in order to provoke a merge later + - do: + indices.flush: { } + + - do: + index: + index: hnsw_byte_quantized + id: "3" + body: + name: rabbit.jpg + vector: [0.5, 111.3, -13.0, 14.8] + another_vector: [-0.5, 11.0, 0, 12] + + - do: + indices.forcemerge: + index: hnsw_byte_quantized + max_num_segments: 1 + + - do: + indices.refresh: {} + +--- +"kNN search only": + - do: + search: + index: hnsw_byte_quantized + body: + fields: [ "name" ] + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8] + k: 2 + num_candidates: 3 + + - match: {hits.hits.0._id: "2"} + - match: {hits.hits.0.fields.name.0: "moose.jpg"} + + - match: {hits.hits.1._id: "3"} + - match: {hits.hits.1.fields.name.0: "rabbit.jpg"} +--- +"kNN multi-field search only": + - do: + search: + index: hnsw_byte_quantized + body: + fields: [ "name" ] + knn: + - {field: vector, query_vector: [-0.5, 90.0, -10, 14.8], k: 2, num_candidates: 3} + - {field: another_vector, query_vector: [-0.5, 11.0, 0, 12], k: 2, num_candidates: 3} + + - match: {hits.hits.0._id: "3"} + - match: {hits.hits.0.fields.name.0: "rabbit.jpg"} + + - match: {hits.hits.1._id: "2"} + - match: {hits.hits.1.fields.name.0: "moose.jpg"} +--- +"kNN search plus query": + - do: + search: + index: hnsw_byte_quantized + body: + fields: [ "name" ] + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8] + k: 2 + num_candidates: 3 + query: + term: + name: + value: cow.jpg + boost: 1.5 + + - match: {hits.hits.0._id: "1"} + - match: {hits.hits.0.fields.name.0: "cow.jpg"} + + - match: {hits.hits.1._id: "2"} + - match: {hits.hits.1.fields.name.0: "moose.jpg"} + + - match: {hits.hits.2._id: "3"} + - match: {hits.hits.2.fields.name.0: "rabbit.jpg"} +--- +"kNN multi-field search with query": + - do: + search: + index: hnsw_byte_quantized + body: + fields: [ "name" ] + knn: + - {field: vector, query_vector: [-0.5, 90.0, -10, 14.8], k: 2, num_candidates: 3} + - {field: another_vector, query_vector: [-0.5, 11.0, 0, 12], k: 2, num_candidates: 3, boost: 2.0} + query: + term: + name: + value: cow.jpg + boost: 2.0 + + - match: {hits.hits.0._id: "3"} + - match: {hits.hits.0.fields.name.0: "rabbit.jpg"} + + - match: {hits.hits.1._id: "1"} + - match: {hits.hits.1.fields.name.0: "cow.jpg"} + + - match: {hits.hits.2._id: "2"} + - match: {hits.hits.2.fields.name.0: "moose.jpg"} +--- +"kNN search with filter": + - do: + search: + index: hnsw_byte_quantized + body: + fields: [ "name" ] + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8] + k: 2 + num_candidates: 3 + filter: + term: + name: "rabbit.jpg" + + - match: {hits.total.value: 1} + - match: {hits.hits.0._id: "3"} + - match: {hits.hits.0.fields.name.0: "rabbit.jpg"} + + - do: + search: + index: hnsw_byte_quantized + body: + fields: [ "name" ] + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8] + k: 2 + num_candidates: 3 + filter: + - term: + name: "rabbit.jpg" + - term: + _id: 2 + + - match: {hits.total.value: 0} + +--- +"KNN Vector similarity search only": + - do: + search: + index: hnsw_byte_quantized + body: + fields: [ "name" ] + knn: + num_candidates: 3 + k: 3 + field: vector + similarity: 17 + query_vector: [-0.5, 90.0, -10, 14.8] + + - length: {hits.hits: 1} + + - match: {hits.hits.0._id: "2"} + - match: {hits.hits.0.fields.name.0: "moose.jpg"} +--- +"Vector similarity with filter only": + - do: + search: + index: hnsw_byte_quantized + body: + fields: [ "name" ] + knn: + num_candidates: 3 + k: 3 + field: vector + similarity: 17 + query_vector: [-0.5, 90.0, -10, 14.8] + filter: {"term": {"name": "moose.jpg"}} + + - length: {hits.hits: 1} + + - match: {hits.hits.0._id: "2"} + - match: {hits.hits.0.fields.name.0: "moose.jpg"} + + - do: + search: + index: hnsw_byte_quantized + body: + fields: [ "name" ] + knn: + num_candidates: 3 + k: 3 + field: vector + similarity: 110 + query_vector: [-0.5, 90.0, -10, 14.8] + filter: {"term": {"name": "cow.jpg"}} + + - length: {hits.hits: 0} +--- +"Knn search with mip": + - do: + indices.create: + index: mip + body: + settings: + index: + number_of_shards: 1 + mappings: + properties: + name: + type: keyword + vector: + type: dense_vector + element_type: bfloat16 + dims: 6 + index: true + similarity: max_inner_product + index_options: + type: int4_hnsw + + - do: + index: + index: mip + id: "1" + body: + name: cow.jpg + vector: [1, 2, 3, 4, 5, 0] + + # Flush in order to provoke a merge later + - do: + indices.flush: { } + + - do: + index: + index: mip + id: "2" + body: + name: moose.jpg + vector: [1, 1, 1, 1, 1, 0] + + # Flush in order to provoke a merge later + - do: + indices.flush: { } + + - do: + index: + index: mip + id: "3" + body: + name: rabbit.jpg + vector: [1, 2, 2, 2, 2, 0] + + # We force merge into a single segment to make sure scores are more uniform + # Each segment can have a different quantization error, which can affect scores and mip is especially sensitive to this + - do: + indices.forcemerge: + index: mip + max_num_segments: 1 + + - do: + indices.refresh: {} + + - do: + search: + index: mip + body: + fields: [ "name" ] + knn: + num_candidates: 3 + k: 3 + field: vector + query_vector: [1, 2, 3, 4, 5, 0] + + + - length: {hits.hits: 3} + - match: {hits.hits.0._id: "1"} + - match: {hits.hits.1._id: "3"} + - match: {hits.hits.2._id: "2"} + + - do: + search: + index: mip + body: + fields: [ "name" ] + knn: + num_candidates: 3 + k: 3 + field: vector + query_vector: [1, 2, 3, 4, 5, 0] + filter: { "term": { "name": "moose.jpg" } } + + + + - length: {hits.hits: 1} + - match: {hits.hits.0._id: "2"} +--- +"Cosine similarity with indexed vector": + - skip: + features: "headers" + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + body: + query: + script_score: + query: {match_all: {} } + script: + source: "cosineSimilarity(params.query_vector, 'vector')" + params: + query_vector: [0.5, 111.3, -13.0, 14.8] + + - match: {hits.total: 3} + + - match: {hits.hits.0._id: "3"} + - gte: {hits.hits.0._score: 0.999} + - lte: {hits.hits.0._score: 1.001} + + - match: {hits.hits.1._id: "2"} + - gte: {hits.hits.1._score: 0.998} + - lte: {hits.hits.1._score: 1.0} + + - match: {hits.hits.2._id: "1"} + - gte: {hits.hits.2._score: 0.78} + - lte: {hits.hits.2._score: 0.80} +--- +"Test bad quantization parameters": + - do: + catch: bad_request + indices.create: + index: bad_hnsw_quantized + body: + mappings: + properties: + vector: + type: dense_vector + dims: 6 + element_type: byte + index: true + index_options: + type: int4_hnsw + + - do: + catch: bad_request + indices.create: + index: bad_hnsw_quantized + body: + mappings: + properties: + vector: + type: dense_vector + dims: 6 + index: false + index_options: + type: int4_hnsw +--- +"Test create, merge, and search cosine": + - do: + indices.create: + index: hnsw_byte_quantized_merge_cosine + body: + settings: + index: + number_of_shards: 1 + - do: + indices.put_mapping: + index: hnsw_byte_quantized_merge_cosine + body: + properties: + embedding: + type: dense_vector + element_type: bfloat16 + similarity: cosine + index_options: + type: int4_hnsw + + - do: + index: + index: hnsw_byte_quantized_merge_cosine + id: "1" + body: + embedding: [0.5, 0.5, 0.5, 0.5, 0.5, 1.0] + + # Flush in order to provoke a merge later + - do: + indices.flush: { } + + - do: + index: + index: hnsw_byte_quantized_merge_cosine + id: "2" + body: + embedding: [0.0, 0.0, 0.0, 1.0, 1.0, 0.5] + + # Flush in order to provoke a merge later + - do: + indices.flush: { } + + - do: + index: + index: hnsw_byte_quantized_merge_cosine + id: "3" + body: + embedding: [0.0, 0.0, 0.0, 0.0, 0.0, 10.5] + + - do: + indices.forcemerge: + index: hnsw_byte_quantized_merge_cosine + max_num_segments: 1 + + - do: + indices.refresh: {} + + - do: + search: + index: hnsw_byte_quantized_merge_cosine + body: + size: 3 + query: + knn: + field: embedding + query_vector: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + num_candidates: 10 + + - length: { hits.hits: 3 } + - match: { hits.hits.0._id: "1"} + - match: { hits.hits.1._id: "2"} + - match: { hits.hits.2._id: "3"} +--- +"Test create, merge, and search dot_product": + - do: + indices.create: + index: hnsw_byte_quantized_merge_dot_product + body: + settings: + index: + number_of_shards: 1 + - do: + indices.put_mapping: + index: hnsw_byte_quantized_merge_dot_product + body: + properties: + embedding: + type: dense_vector + element_type: bfloat16 + similarity: dot_product + index_options: + type: int4_hnsw + + - do: + index: + index: hnsw_byte_quantized_merge_dot_product + id: "1" + body: + embedding: [0.6, 0.8] + + # Flush in order to provoke a merge later + - do: + indices.flush: { } + + - do: + index: + index: hnsw_byte_quantized_merge_dot_product + id: "2" + body: + embedding: [0.8, 0.6] + + # Flush in order to provoke a merge later + - do: + indices.flush: { } + + - do: + index: + index: hnsw_byte_quantized_merge_dot_product + id: "3" + body: + embedding: [-0.6, -0.8] + + - do: + indices.forcemerge: + index: hnsw_byte_quantized_merge_dot_product + max_num_segments: 1 + + - do: + indices.refresh: {} + + - do: + search: + index: hnsw_byte_quantized_merge_dot_product + body: + size: 3 + query: + knn: + field: embedding + query_vector: [0.6, 0.8] + num_candidates: 10 + + - length: { hits.hits: 3 } + - match: { hits.hits.0._id: "1"} + - match: { hits.hits.1._id: "2"} + - match: { hits.hits.2._id: "3"} +--- +"Vector rescoring has the same scoring as exact search for kNN section": + - requires: + reason: 'Quantized vector rescoring is required' + test_runner_features: [capabilities] + capabilities: + - method: GET + path: /_search + capabilities: [knn_quantized_vector_rescore_oversample] + - skip: + features: "headers" + + # Rescore + - do: + headers: + Content-Type: application/json + search: + index: hnsw_byte_quantized + rest_total_hits_as_int: true + body: + fields: [ "name" ] + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8] + k: 3 + num_candidates: 3 + rescore_vector: + oversample: 1.5 + + # Get rescoring scores - hit ordering may change depending on how things are distributed + - match: { hits.total: 3 } + - set: { hits.hits.0._score: rescore_score0 } + - set: { hits.hits.1._score: rescore_score1 } + - set: { hits.hits.2._score: rescore_score2 } + + # Exact knn via script score + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + body: + query: + script_score: + query: {match_all: {} } + script: + source: "1.0 / (1.0 + Math.pow(l2norm(params.query_vector, 'vector'), 2.0))" + params: + query_vector: [-0.5, 90.0, -10, 14.8] + + # Compare scores as hit IDs may change depending on how things are distributed + - match: { hits.total: 3 } + - match: { hits.hits.0._score: $rescore_score0 } + - match: { hits.hits.1._score: $rescore_score1 } + - match: { hits.hits.2._score: $rescore_score2 } + +--- +"Test odd dimensions fail indexing": + - do: + catch: bad_request + indices.create: + index: bad_hnsw_quantized + body: + mappings: + properties: + vector: + type: dense_vector + element_type: bfloat16 + dims: 5 + index: true + index_options: + type: int4_hnsw + + - do: + indices.create: + index: dynamic_dim_hnsw_quantized + body: + mappings: + properties: + vector: + type: dense_vector + element_type: bfloat16 + index: true + similarity: l2_norm + index_options: + type: int4_hnsw + + - do: + catch: bad_request + index: + index: dynamic_dim_hnsw_quantized + body: + vector: [1.0, 2.0, 3.0, 4.0, 5.0] + + - do: + index: + index: dynamic_dim_hnsw_quantized + body: + vector: [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] +--- +"Test index configured rescore vector": + - requires: + cluster_features: ["mapper.dense_vector.rescore_vector"] + reason: Needs rescore_vector feature + - skip: + features: "headers" + - do: + indices.create: + index: int4_rescore_hnsw + body: + settings: + index: + number_of_shards: 1 + mappings: + properties: + vector: + type: dense_vector + element_type: bfloat16 + dims: 4 + similarity: max_inner_product + index_options: + type: int4_hnsw + rescore_vector: + oversample: 1.5 + + - do: + bulk: + index: int4_rescore_hnsw + refresh: true + body: | + { "index": {"_id": "1"}} + { "vector": [230.0, 300.33, -34.8988, 15.555] } + { "index": {"_id": "2"}} + { "vector": [-0.5, 100.0, -13, 14.8] } + { "index": {"_id": "3"}} + { "vector": [0.5, 111.3, -13.0, 14.8] } + + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: int4_rescore_hnsw + body: + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8] + k: 3 + num_candidates: 3 + + - match: { hits.total: 3 } + - set: { hits.hits.0._score: rescore_score0 } + - set: { hits.hits.1._score: rescore_score1 } + - set: { hits.hits.2._score: rescore_score2 } + + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: int4_rescore_hnsw + body: + query: + script_score: + query: {match_all: {} } + script: + source: "double similarity = dotProduct(params.query_vector, 'vector'); return similarity < 0 ? 1 / (1 + -1 * similarity) : similarity + 1" + params: + query_vector: [-0.5, 90.0, -10, 14.8] + + # Compare scores as hit IDs may change depending on how things are distributed + - match: { hits.total: 3 } + - match: { hits.hits.0._score: $rescore_score0 } + - match: { hits.hits.1._score: $rescore_score1 } + - match: { hits.hits.2._score: $rescore_score2 } +--- +"Test index configured rescore vector updateable and settable to 0": + - requires: + cluster_features: ["mapper.dense_vector.rescore_zero_vector"] + reason: Needs rescore_zero_vector feature + + - do: + indices.create: + index: int4_rescore_0_hnsw + body: + settings: + index: + number_of_shards: 1 + mappings: + properties: + vector: + type: dense_vector + element_type: bfloat16 + index_options: + type: int4_hnsw + rescore_vector: + oversample: 0 + + - do: + indices.create: + index: int4_rescore_update_hnsw + body: + settings: + index: + number_of_shards: 1 + mappings: + properties: + vector: + type: dense_vector + element_type: bfloat16 + index_options: + type: int4_hnsw + rescore_vector: + oversample: 1 + + - do: + indices.put_mapping: + index: int4_rescore_update_hnsw + body: + properties: + vector: + type: dense_vector + element_type: bfloat16 + index_options: + type: int4_hnsw + rescore_vector: + oversample: 0 + + - do: + indices.get_mapping: + index: int4_rescore_update_hnsw + + - match: { .int4_rescore_update_hnsw.mappings.properties.vector.index_options.rescore_vector.oversample: 0 } +--- +"Test index configured rescore vector score consistency": + - requires: + cluster_features: ["mapper.dense_vector.rescore_zero_vector"] + reason: Needs rescore_zero_vector feature + - skip: + features: "headers" + - do: + indices.create: + index: int4_rescore_zero_hnsw + body: + settings: + index: + number_of_shards: 1 + mappings: + properties: + vector: + type: dense_vector + similarity: max_inner_product + dims: 4 + index_options: + type: int4_hnsw + rescore_vector: + oversample: 0 + + - do: + bulk: + index: int4_rescore_zero_hnsw + refresh: true + body: | + { "index": {"_id": "1"}} + { "vector": [230.0, 300.33, -34.8988, 15.555] } + { "index": {"_id": "2"}} + { "vector": [-0.5, 100.0, -13, 14.8] } + { "index": {"_id": "3"}} + { "vector": [0.5, 111.3, -13.0, 14.8] } + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: int4_rescore_zero_hnsw + body: + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8] + k: 3 + num_candidates: 3 + + - match: { hits.total: 3 } + - set: { hits.hits.0._score: raw_score0 } + - set: { hits.hits.1._score: raw_score1 } + - set: { hits.hits.2._score: raw_score2 } + + + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: int4_rescore_zero_hnsw + body: + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8] + k: 3 + num_candidates: 3 + rescore_vector: + oversample: 2 + + - match: { hits.total: 3 } + - set: { hits.hits.0._score: override_score0 } + - set: { hits.hits.1._score: override_score1 } + - set: { hits.hits.2._score: override_score2 } + + - do: + indices.put_mapping: + index: int4_rescore_zero_hnsw + body: + properties: + vector: + type: dense_vector + element_type: bfloat16 + similarity: max_inner_product + dims: 4 + index_options: + type: int4_hnsw + rescore_vector: + oversample: 2 + + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: int4_rescore_zero_hnsw + body: + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8] + k: 3 + num_candidates: 3 + + - match: { hits.total: 3 } + - set: { hits.hits.0._score: default_rescore0 } + - set: { hits.hits.1._score: default_rescore1 } + - set: { hits.hits.2._score: default_rescore2 } + + - do: + indices.put_mapping: + index: int4_rescore_zero_hnsw + body: + properties: + vector: + type: dense_vector + element_type: bfloat16 + similarity: max_inner_product + dims: 4 + index_options: + type: int4_hnsw + rescore_vector: + oversample: 0 + + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: int4_rescore_zero_hnsw + body: + query: + script_score: + query: {match_all: {} } + script: + source: "double similarity = dotProduct(params.query_vector, 'vector'); return similarity < 0 ? 1 / (1 + -1 * similarity) : similarity + 1" + params: + query_vector: [-0.5, 90.0, -10, 14.8] + + # Compare scores as hit IDs may change depending on how things are distributed + - match: { hits.total: 3 } + - match: { hits.hits.0._score: $override_score0 } + - match: { hits.hits.0._score: $default_rescore0 } + - match: { hits.hits.1._score: $override_score1 } + - match: { hits.hits.1._score: $default_rescore1 } + - match: { hits.hits.2._score: $override_score2 } + - match: { hits.hits.2._score: $default_rescore2 } diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_flat_bfloat16.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_flat_bfloat16.yml new file mode 100644 index 0000000000000..4d4d20f9dcb4e --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_flat_bfloat16.yml @@ -0,0 +1,432 @@ +setup: + - requires: + cluster_features: [ "mapper.vectors.generic_vector_format" ] + reason: Needs generic vector support + - do: + indices.create: + index: flat + body: + mappings: + properties: + name: + type: keyword + vector: + type: dense_vector + element_type: bfloat16 + dims: 5 + index: true + similarity: l2_norm + index_options: + type: flat + another_vector: + type: dense_vector + element_type: bfloat16 + dims: 5 + index: true + similarity: l2_norm + index_options: + type: flat + + - do: + index: + index: flat + id: "1" + body: + name: cow.jpg + vector: [230.0, 300.33, -34.8988, 15.555, -200.0] + another_vector: [130.0, 115.0, -1.02, 15.555, -100.0] + + - do: + index: + index: flat + id: "2" + body: + name: moose.jpg + vector: [-0.5, 100.0, -13, 14.8, -156.0] + another_vector: [-0.5, 50.0, -1, 1, 120] + + - do: + index: + index: flat + id: "3" + body: + name: rabbit.jpg + vector: [0.5, 111.3, -13.0, 14.8, -156.0] + another_vector: [-0.5, 11.0, 0, 12, 111.0] + + - do: + indices.refresh: {} + +--- +"kNN search only": + - do: + search: + index: flat + body: + fields: [ "name" ] + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + k: 2 + num_candidates: 3 + + - match: {hits.hits.0._id: "2"} + - match: {hits.hits.0.fields.name.0: "moose.jpg"} + + - match: {hits.hits.1._id: "3"} + - match: {hits.hits.1.fields.name.0: "rabbit.jpg"} +--- +"kNN multi-field search only": + - do: + search: + index: flat + body: + fields: [ "name" ] + knn: + - {field: vector, query_vector: [-0.5, 90.0, -10, 14.8, -156.0], k: 2, num_candidates: 3} + - {field: another_vector, query_vector: [-0.5, 11.0, 0, 12, 111.0], k: 2, num_candidates: 3} + + - match: {hits.hits.0._id: "3"} + - match: {hits.hits.0.fields.name.0: "rabbit.jpg"} + + - match: {hits.hits.1._id: "2"} + - match: {hits.hits.1.fields.name.0: "moose.jpg"} +--- +"kNN search plus query": + - do: + search: + index: flat + body: + fields: [ "name" ] + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + k: 2 + num_candidates: 3 + query: + term: + name: cow.jpg + + - match: {hits.hits.0._id: "1"} + - match: {hits.hits.0.fields.name.0: "cow.jpg"} + + - match: {hits.hits.1._id: "2"} + - match: {hits.hits.1.fields.name.0: "moose.jpg"} + + - match: {hits.hits.2._id: "3"} + - match: {hits.hits.2.fields.name.0: "rabbit.jpg"} +--- +"kNN multi-field search with query": + - do: + search: + index: flat + body: + fields: [ "name" ] + knn: + - {field: vector, query_vector: [-0.5, 90.0, -10, 14.8, -156.0], k: 2, num_candidates: 3} + - {field: another_vector, query_vector: [-0.5, 11.0, 0, 12, 111.0], k: 2, num_candidates: 3} + query: + term: + name: cow.jpg + + - match: {hits.hits.0._id: "3"} + - match: {hits.hits.0.fields.name.0: "rabbit.jpg"} + + - match: {hits.hits.1._id: "1"} + - match: {hits.hits.1.fields.name.0: "cow.jpg"} + + - match: {hits.hits.2._id: "2"} + - match: {hits.hits.2.fields.name.0: "moose.jpg"} +--- +"kNN search with filter": + - do: + search: + index: flat + body: + fields: [ "name" ] + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + k: 2 + num_candidates: 3 + filter: + term: + name: "rabbit.jpg" + + - match: {hits.total.value: 1} + - match: {hits.hits.0._id: "3"} + - match: {hits.hits.0.fields.name.0: "rabbit.jpg"} + + - do: + search: + index: flat + body: + fields: [ "name" ] + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + k: 2 + num_candidates: 3 + filter: + - term: + name: "rabbit.jpg" + - term: + _id: 2 + + - match: {hits.total.value: 0} + +--- +"KNN Vector similarity search only": + - do: + search: + index: flat + body: + fields: [ "name" ] + knn: + num_candidates: 3 + k: 3 + field: vector + similarity: 11 + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + + - length: {hits.hits: 1} + + - match: {hits.hits.0._id: "2"} + - match: {hits.hits.0.fields.name.0: "moose.jpg"} +--- +"Vector similarity with filter only": + - do: + search: + index: flat + body: + fields: [ "name" ] + knn: + num_candidates: 3 + k: 3 + field: vector + similarity: 11 + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + filter: {"term": {"name": "moose.jpg"}} + + - length: {hits.hits: 1} + + - match: {hits.hits.0._id: "2"} + - match: {hits.hits.0.fields.name.0: "moose.jpg"} + + - do: + search: + index: flat + body: + fields: [ "name" ] + knn: + num_candidates: 3 + k: 3 + field: vector + similarity: 110 + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + filter: {"term": {"name": "cow.jpg"}} + + - length: {hits.hits: 0} +--- +"Cosine similarity with indexed vector": + - skip: + features: "headers" + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + body: + query: + script_score: + query: {match_all: {} } + script: + source: "cosineSimilarity(params.query_vector, 'vector')" + params: + query_vector: [0.5, 111.3, -13.0, 14.8, -156.0] + + - match: {hits.total: 3} + + - match: {hits.hits.0._id: "3"} + - gte: {hits.hits.0._score: 0.999} + - lte: {hits.hits.0._score: 1.001} + + - match: {hits.hits.1._id: "2"} + - gte: {hits.hits.1._score: 0.998} + - lte: {hits.hits.1._score: 1.0} + + - match: {hits.hits.2._id: "1"} + - gte: {hits.hits.2._score: 0.78} + - lte: {hits.hits.2._score: 0.791} +--- +"Vector rescoring has no effect for non-quantized vectors and provides same results as non-rescored knn": + - requires: + reason: 'Quantized vector rescoring is required' + test_runner_features: [capabilities] + capabilities: + - method: GET + path: /_search + capabilities: [knn_quantized_vector_rescore_oversample] + - skip: + features: "headers" + + # Non-rescored knn + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: flat + body: + fields: [ "name" ] + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + k: 3 + num_candidates: 3 + + # Get scores - hit ordering may change depending on how things are distributed + - match: { hits.total: 3 } + - set: { hits.hits.0._score: knn_score0 } + - set: { hits.hits.1._score: knn_score1 } + - set: { hits.hits.2._score: knn_score2 } + + # Rescored knn + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: flat + body: + fields: [ "name" ] + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + k: 3 + num_candidates: 3 + rescore_vector: + oversample: 1.5 + + # Compare scores as hit IDs may change depending on how things are distributed + - match: { hits.total: 3 } + - match: { hits.hits.0._score: $knn_score0 } + - match: { hits.hits.1._score: $knn_score1 } + - match: { hits.hits.2._score: $knn_score2 } +--- +"Test bad parameters": + - do: + catch: bad_request + indices.create: + index: bad_flat + body: + mappings: + properties: + vector: + type: dense_vector + dims: 5 + index: true + index_options: + type: flat + m: 42 +--- +"Nested flat search": + - do: + indices.create: + index: flat_nested + body: + settings: + index: + number_of_shards: 1 + mappings: + properties: + name: + type: keyword + nested: + type: nested + properties: + paragraph_id: + type: keyword + vector: + type: dense_vector + element_type: bfloat16 + dims: 5 + index: true + similarity: l2_norm + index_options: + type: flat + - do: + index: + index: flat_nested + id: "1" + body: + name: cow.jpg + nested: + - paragraph_id: 0 + vector: [230.0, 300.33, -34.8988, 15.555, -200.0] + - paragraph_id: 1 + vector: [240.0, 300, -3, 1, -20] + + - do: + index: + index: flat_nested + id: "2" + body: + name: moose.jpg + nested: + - paragraph_id: 0 + vector: [-0.5, 100.0, -13, 14.8, -156.0] + - paragraph_id: 2 + vector: [0, 100.0, 0, 14.8, -156.0] + - paragraph_id: 3 + vector: [0, 1.0, 0, 1.8, -15.0] + + - do: + index: + index: flat_nested + id: "3" + body: + name: rabbit.jpg + nested: + - paragraph_id: 0 + vector: [0.5, 111.3, -13.0, 14.8, -156.0] + + - do: + indices.refresh: {} + - do: + search: + index: flat_nested + body: + fields: [ "name" ] + query: + nested: + path: nested + query: + knn: + field: nested.vector + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] + num_candidates: 3 + k: 2 + + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.0.fields.name.0: "moose.jpg" } + + - match: { hits.hits.1._id: "3" } + - match: { hits.hits.1.fields.name.0: "rabbit.jpg" } + + - do: + search: + index: flat_nested + body: + fields: [ "name" ] + knn: + field: nested.vector + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] + num_candidates: 3 + k: 2 + + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.0.fields.name.0: "moose.jpg" } + + - match: { hits.hits.1._id: "3" } + - match: { hits.hits.1.fields.name.0: "rabbit.jpg" } diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_int4_flat_bfloat16.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_int4_flat_bfloat16.yml new file mode 100644 index 0000000000000..e60ea690664e5 --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_int4_flat_bfloat16.yml @@ -0,0 +1,619 @@ +setup: + - requires: + cluster_features: [ "mapper.vectors.generic_vector_format" ] + reason: Needs generic vector support + - do: + indices.create: + index: int4_flat + body: + settings: + index: + number_of_shards: 1 + mappings: + properties: + name: + type: keyword + vector: + type: dense_vector + element_type: bfloat16 + dims: 4 + index: true + similarity: l2_norm + index_options: + type: int4_flat + another_vector: + type: dense_vector + element_type: bfloat16 + dims: 4 + index: true + similarity: l2_norm + index_options: + type: int4_flat + + - do: + index: + index: int4_flat + id: "1" + body: + name: cow.jpg + vector: [230.0, 300.33, -34.8988, 15.555] + another_vector: [130.0, 115.0, -1.02, 15.555] + # Flush in order to provoke a merge later & ensure replicas have same doc order + - do: + indices.flush: { } + - do: + index: + index: int4_flat + id: "2" + body: + name: moose.jpg + vector: [-0.5, 100.0, -13, 14.8] + another_vector: [-0.5, 50.0, -1, 1] + # Flush in order to provoke a merge later & ensure replicas have same doc order + - do: + indices.flush: { } + - do: + index: + index: int4_flat + id: "3" + body: + name: rabbit.jpg + vector: [0.5, 111.3, -13.0, 14.8] + another_vector: [-0.5, 11.0, 0, 12] + + - do: + indices.flush: { } + + # For added test reliability, pending the resolution of https://github.com/elastic/elasticsearch/issues/109416. + - do: + indices.forcemerge: + max_num_segments: 1 + index: int4_flat + - do: + indices.refresh: {} +--- +"kNN search only": + - do: + search: + index: int4_flat + body: + fields: [ "name" ] + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8] + k: 2 + num_candidates: 3 + + - match: {hits.hits.0._id: "2"} + - match: {hits.hits.0.fields.name.0: "moose.jpg"} + + - match: {hits.hits.1._id: "3"} + - match: {hits.hits.1.fields.name.0: "rabbit.jpg"} +--- +"kNN multi-field search only": + - do: + search: + index: int4_flat + body: + fields: [ "name" ] + knn: + - {field: vector, query_vector: [-0.5, 90.0, -10, 14.8], k: 2, num_candidates: 3} + - {field: another_vector, query_vector: [-0.5, 11.0, 0, 12], k: 2, num_candidates: 3} + + - match: {hits.hits.0._id: "3"} + - match: {hits.hits.0.fields.name.0: "rabbit.jpg"} + + - match: {hits.hits.1._id: "2"} + - match: {hits.hits.1.fields.name.0: "moose.jpg"} +--- +"kNN search plus query": + - do: + search: + index: int4_flat + body: + fields: [ "name" ] + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8] + k: 2 + num_candidates: 3 + query: + term: + name: + value: cow.jpg + boost: 1.5 + + - match: {hits.hits.0._id: "1"} + - match: {hits.hits.0.fields.name.0: "cow.jpg"} + + - match: {hits.hits.1._id: "2"} + - match: {hits.hits.1.fields.name.0: "moose.jpg"} + + - match: {hits.hits.2._id: "3"} + - match: {hits.hits.2.fields.name.0: "rabbit.jpg"} +--- +"kNN multi-field search with query": + - do: + search: + index: int4_flat + body: + fields: [ "name" ] + knn: + - {field: vector, query_vector: [-0.5, 90.0, -10, 14.8], k: 2, num_candidates: 3} + - {field: another_vector, query_vector: [-0.5, 11.0, 0, 12], k: 2, num_candidates: 3, boost: 2.0} + query: + term: + name: + value: cow.jpg + boost: 2.0 + + - match: {hits.hits.0._id: "3"} + - match: {hits.hits.0.fields.name.0: "rabbit.jpg"} + + - match: {hits.hits.1._id: "1"} + - match: {hits.hits.1.fields.name.0: "cow.jpg"} + + - match: {hits.hits.2._id: "2"} + - match: {hits.hits.2.fields.name.0: "moose.jpg"} +--- +"kNN search with filter": + - do: + search: + index: int4_flat + body: + fields: [ "name" ] + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8] + k: 2 + num_candidates: 3 + filter: + term: + name: "rabbit.jpg" + + - match: {hits.total.value: 1} + - match: {hits.hits.0._id: "3"} + - match: {hits.hits.0.fields.name.0: "rabbit.jpg"} + + - do: + search: + index: int4_flat + body: + fields: [ "name" ] + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8] + k: 2 + num_candidates: 3 + filter: + - term: + name: "rabbit.jpg" + - term: + _id: 2 + + - match: {hits.total.value: 0} + +--- +"KNN Vector similarity search only": + - do: + search: + index: int4_flat + body: + fields: [ "name" ] + knn: + num_candidates: 3 + k: 3 + field: vector + # Set high allowed similarity, reduce once we can update underlying quantization algo + similarity: 110 + query_vector: [-0.5, 90.0, -10, 14.8] + + - is_true: hits.hits.0 + + #- match: {hits.hits.0._id: "2"} + #- match: {hits.hits.0.fields.name.0: "moose.jpg"} +--- +"Vector similarity with filter only": + - do: + search: + index: int4_flat + body: + fields: [ "name" ] + knn: + num_candidates: 3 + k: 3 + field: vector + # Set high allowed similarity, reduce once we can update underlying quantization algo + similarity: 110 + query_vector: [-0.5, 90.0, -10, 14.8] + filter: {"term": {"name": "moose.jpg"}} + + - length: {hits.hits: 1} + + - match: {hits.hits.0._id: "2"} + - match: {hits.hits.0.fields.name.0: "moose.jpg"} + + - do: + search: + index: int4_flat + body: + fields: [ "name" ] + knn: + num_candidates: 3 + k: 3 + field: vector + similarity: 110 + query_vector: [-0.5, 90.0, -10, 14.8] + filter: {"term": {"name": "cow.jpg"}} + + - length: {hits.hits: 0} +--- +"Cosine similarity with indexed vector": + - skip: + features: "headers" + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + body: + query: + script_score: + query: {match_all: {} } + script: + source: "cosineSimilarity(params.query_vector, 'vector')" + params: + query_vector: [0.5, 111.3, -13.0, 14.8] + + - match: {hits.total: 3} + + - match: {hits.hits.0._id: "3"} + - gte: {hits.hits.0._score: 0.999} + - lte: {hits.hits.0._score: 1.001} + + - match: {hits.hits.1._id: "2"} + - gte: {hits.hits.1._score: 0.998} + - lte: {hits.hits.1._score: 1.0} + + - match: {hits.hits.2._id: "1"} + - gte: {hits.hits.2._score: 0.78} + - lte: {hits.hits.2._score: 0.80} +--- +"Test bad parameters": + - do: + catch: bad_request + indices.create: + index: bad_int4_flat + body: + mappings: + properties: + vector: + type: dense_vector + element_type: bfloat16 + dims: 6 + index: true + index_options: + type: int4_flat + m: 42 + + - do: + catch: bad_request + indices.create: + index: bad_int4_flat + body: + mappings: + properties: + vector: + type: dense_vector + dims: 6 + element_type: byte + index: true + index_options: + type: int4_flat +--- +"Test odd dimensions fail indexing": + # verify index creation fails + - do: + catch: bad_request + indices.create: + index: bad_hnsw_quantized + body: + mappings: + properties: + vector: + type: dense_vector + element_type: bfloat16 + dims: 5 + index: true + similarity: l2_norm + index_options: + type: int4_flat + + # verify dynamic dimension fails + - do: + indices.create: + index: dynamic_dim_hnsw_quantized + body: + mappings: + properties: + vector: + type: dense_vector + element_type: bfloat16 + index: true + similarity: l2_norm + index_options: + type: int4_hnsw + + # verify index fails for odd dim vector + - do: + catch: bad_request + index: + index: dynamic_dim_hnsw_quantized + body: + vector: [1.0, 2.0, 3.0, 4.0, 5.0] + + # verify that we can index an even dim vector after the odd dim vector failure + - do: + index: + index: dynamic_dim_hnsw_quantized + body: + vector: [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] +--- +"Vector rescoring has the same scoring as exact search for kNN section": + - requires: + reason: 'Quantized vector rescoring is required' + test_runner_features: [capabilities] + capabilities: + - method: GET + path: /_search + capabilities: [knn_quantized_vector_rescore_oversample] + - skip: + features: "headers" + + # Rescore + - do: + headers: + Content-Type: application/json + search: + index: int4_flat + rest_total_hits_as_int: true + body: + fields: [ "name" ] + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8] + k: 3 + num_candidates: 3 + rescore_vector: + oversample: 1.5 + + # Get rescoring scores - hit ordering may change depending on how things are distributed + - match: { hits.total: 3 } + - set: { hits.hits.0._score: rescore_score0 } + - set: { hits.hits.1._score: rescore_score1 } + - set: { hits.hits.2._score: rescore_score2 } + + # Exact knn via script score + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + body: + query: + script_score: + query: {match_all: {} } + script: + source: "1.0 / (1.0 + Math.pow(l2norm(params.query_vector, 'vector'), 2.0))" + params: + query_vector: [-0.5, 90.0, -10, 14.8] + + # Get rescoring scores - hit ordering may change depending on how things are distributed + - match: { hits.total: 3 } + - match: { hits.hits.0._score: $rescore_score0 } + - match: { hits.hits.1._score: $rescore_score1 } + - match: { hits.hits.2._score: $rescore_score2 } +--- +"Test index configured rescore vector": + - requires: + cluster_features: ["mapper.dense_vector.rescore_vector"] + reason: Needs rescore_vector feature + - skip: + features: "headers" + - do: + indices.create: + index: int4_rescore_flat + body: + settings: + index: + number_of_shards: 1 + mappings: + properties: + vector: + type: dense_vector + element_type: bfloat16 + dims: 64 + index: true + similarity: max_inner_product + index_options: + type: int4_flat + rescore_vector: + oversample: 1.5 + + - do: + bulk: + index: int4_rescore_flat + refresh: true + body: | + { "index": {"_id": "1"}} + { "vector": [0.077, 0.32 , -0.205, 0.63 , 0.032, 0.201, 0.167, -0.313, 0.176, 0.531, -0.375, 0.334, -0.046, 0.078, -0.349, 0.272, 0.307, -0.083, 0.504, 0.255, -0.404, 0.289, -0.226, -0.132, -0.216, 0.49 , 0.039, 0.507, -0.307, 0.107, 0.09 , -0.265, -0.285, 0.336, -0.272, 0.369, -0.282, 0.086, -0.132, 0.475, -0.224, 0.203, 0.439, 0.064, 0.246, -0.396, 0.297, 0.242, -0.028, 0.321, -0.022, -0.009, -0.001 , 0.031, -0.533, 0.45, -0.683, 1.331, 0.194, -0.157, -0.1 , -0.279, -0.098, -0.176] } + { "index": {"_id": "2"}} + { "vector": [0.196, 0.514, 0.039, 0.555, -0.042, 0.242, 0.463, -0.348, -0.08 , 0.442, -0.067, -0.05 , -0.001, 0.298, -0.377, 0.048, 0.307, 0.159, 0.278, 0.119, -0.057, 0.333, -0.289, -0.438, -0.014, 0.361, -0.169, 0.292, -0.229, 0.123, 0.031, -0.138, -0.139, 0.315, -0.216, 0.322, -0.445, -0.059, 0.071, 0.429, -0.602, -0.142, 0.11 , 0.192, 0.259, -0.241, 0.181, -0.166, 0.082, 0.107, -0.05 , 0.155, 0.011, 0.161, -0.486, 0.569, -0.489, 0.901, 0.208, 0.011, -0.209, -0.153, -0.27 , -0.013] } + { "index": {"_id": "3"}} + { "vector": [0.196, 0.514, 0.039, 0.555, -0.042, 0.242, 0.463, -0.348, -0.08 , 0.442, -0.067, -0.05 , -0.001, 0.298, -0.377, 0.048, 0.307, 0.159, 0.278, 0.119, -0.057, 0.333, -0.289, -0.438, -0.014, 0.361, -0.169, 0.292, -0.229, 0.123, 0.031, -0.138, -0.139, 0.315, -0.216, 0.322, -0.445, -0.059, 0.071, 0.429, -0.602, -0.142, 0.11 , 0.192, 0.259, -0.241, 0.181, -0.166, 0.082, 0.107, -0.05 , 0.155, 0.011, 0.161, -0.486, 0.569, -0.489, 0.901, 0.208, 0.011, -0.209, -0.153, -0.27 , -0.013] } + + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: int4_rescore_flat + body: + knn: + field: vector + query_vector: [0.128, 0.067, -0.08 , 0.395, -0.11 , -0.259, 0.473, -0.393, + 0.292, 0.571, -0.491, 0.444, -0.288, 0.198, -0.343, 0.015, + 0.232, 0.088, 0.228, 0.151, -0.136, 0.236, -0.273, -0.259, + -0.217, 0.359, -0.207, 0.352, -0.142, 0.192, -0.061, -0.17 , + -0.343, 0.189, -0.221, 0.32 , -0.301, -0.1 , 0.005, 0.232, + -0.344, 0.136, 0.252, 0.157, -0.13 , -0.244, 0.193, -0.034, + -0.12 , -0.193, -0.102, 0.252, -0.185, -0.167, -0.575, 0.582, + -0.426, 0.983, 0.212, 0.204, 0.03 , -0.276, -0.425, -0.158] + k: 3 + num_candidates: 3 + + - match: { hits.total: 3 } + - set: { hits.hits.0._score: rescore_score0 } + - set: { hits.hits.1._score: rescore_score1 } + - set: { hits.hits.2._score: rescore_score2 } + + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: int4_rescore_flat + body: + query: + script_score: + query: {match_all: {} } + script: + source: "double similarity = dotProduct(params.query_vector, 'vector'); return similarity < 0 ? 1 / (1 + -1 * similarity) : similarity + 1" + params: + query_vector: [0.128, 0.067, -0.08 , 0.395, -0.11 , -0.259, 0.473, -0.393, + 0.292, 0.571, -0.491, 0.444, -0.288, 0.198, -0.343, 0.015, + 0.232, 0.088, 0.228, 0.151, -0.136, 0.236, -0.273, -0.259, + -0.217, 0.359, -0.207, 0.352, -0.142, 0.192, -0.061, -0.17 , + -0.343, 0.189, -0.221, 0.32 , -0.301, -0.1 , 0.005, 0.232, + -0.344, 0.136, 0.252, 0.157, -0.13 , -0.244, 0.193, -0.034, + -0.12 , -0.193, -0.102, 0.252, -0.185, -0.167, -0.575, 0.582, + -0.426, 0.983, 0.212, 0.204, 0.03 , -0.276, -0.425, -0.158] + + # Compare scores as hit IDs may change depending on how things are distributed + - match: { hits.total: 3 } + - match: { hits.hits.0._score: $rescore_score0 } + - match: { hits.hits.1._score: $rescore_score1 } + - match: { hits.hits.2._score: $rescore_score2 } + +--- +"no default oversample value": + - requires: + cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"] + reason: "Needs default_oversample_value_for_bbq feature" + - do: + indices.get_mapping: + index: int4_flat + + - not_exists: int4_flat.mappings.properties.vector.index_options.rescore_vector +--- +"Nested flat search": + - do: + indices.create: + index: int4_flat_nested + body: + settings: + index: + number_of_shards: 1 + mappings: + properties: + name: + type: keyword + nested: + type: nested + properties: + paragraph_id: + type: keyword + vector: + type: dense_vector + element_type: bfloat16 + dims: 4 + index: true + similarity: l2_norm + index_options: + type: int4_flat + - do: + index: + index: int4_flat_nested + id: "1" + body: + name: cow.jpg + nested: + - paragraph_id: 0 + vector: [230.0, 300.33, -34.8988, 15.555 ] + - paragraph_id: 1 + vector: [240.0, 300, -3, 1 ] + + - do: + index: + index: int4_flat_nested + id: "2" + body: + name: moose.jpg + nested: + - paragraph_id: 0 + vector: [-0.5, 100.0, -13, 14.8] + - paragraph_id: 2 + vector: [0, 100.0, 0, 14.8] + - paragraph_id: 3 + vector: [0, 1.0, 0, 1.8] + + - do: + index: + index: int4_flat_nested + id: "3" + body: + name: rabbit.jpg + nested: + - paragraph_id: 0 + vector: [0.5, 111.3, -13.0, 14.8] + + - do: + indices.refresh: {} + - do: + search: + index: int4_flat_nested + body: + fields: [ "name" ] + query: + nested: + path: nested + query: + knn: + field: nested.vector + query_vector: [ -0.5, 90.0, -10, 14.8] + num_candidates: 3 + k: 2 + + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.0.fields.name.0: "moose.jpg" } + + - match: { hits.hits.1._id: "3" } + - match: { hits.hits.1.fields.name.0: "rabbit.jpg" } + + - do: + search: + index: int4_flat_nested + body: + fields: [ "name" ] + knn: + field: nested.vector + query_vector: [ -0.5, 90.0, -10, 14.8] + num_candidates: 3 + k: 2 + + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.0.fields.name.0: "moose.jpg" } + + - match: { hits.hits.1._id: "3" } + - match: { hits.hits.1.fields.name.0: "rabbit.jpg" } + diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_int8_flat_bfloat16.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_int8_flat_bfloat16.yml new file mode 100644 index 0000000000000..065593282b751 --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_int8_flat_bfloat16.yml @@ -0,0 +1,555 @@ +setup: + - requires: + cluster_features: [ "mapper.vectors.generic_vector_format" ] + reason: Needs generic vector support + - do: + indices.create: + index: int8_flat + body: + settings: + index: + number_of_shards: 1 + mappings: + properties: + name: + type: keyword + vector: + type: dense_vector + element_type: bfloat16 + dims: 5 + index: true + similarity: l2_norm + index_options: + type: int8_flat + confidence_interval: 0.9 + another_vector: + type: dense_vector + element_type: bfloat16 + dims: 5 + index: true + similarity: l2_norm + index_options: + type: int8_flat + confidence_interval: 0.9 + + - do: + index: + index: int8_flat + id: "1" + body: + name: cow.jpg + vector: [230.0, 300.33, -34.8988, 15.555, -200.0] + another_vector: [130.0, 115.0, -1.02, 15.555, -100.0] + + - do: + index: + index: int8_flat + id: "2" + body: + name: moose.jpg + vector: [-0.5, 100.0, -13, 14.8, -156.0] + another_vector: [-0.5, 50.0, -1, 1, 120] + + - do: + index: + index: int8_flat + id: "3" + body: + name: rabbit.jpg + vector: [0.5, 111.3, -13.0, 14.8, -156.0] + another_vector: [-0.5, 11.0, 0, 12, 111.0] + + - do: + indices.refresh: {} + +--- +"kNN search only": + - do: + search: + index: int8_flat + body: + fields: [ "name" ] + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + k: 2 + num_candidates: 3 + + - match: {hits.hits.0._id: "2"} + - match: {hits.hits.0.fields.name.0: "moose.jpg"} + + - match: {hits.hits.1._id: "3"} + - match: {hits.hits.1.fields.name.0: "rabbit.jpg"} +--- +"kNN multi-field search only": + - do: + search: + index: int8_flat + body: + fields: [ "name" ] + knn: + - {field: vector, query_vector: [-0.5, 90.0, -10, 14.8, -156.0], k: 2, num_candidates: 3} + - {field: another_vector, query_vector: [-0.5, 11.0, 0, 12, 111.0], k: 2, num_candidates: 3} + + - match: {hits.hits.0._id: "3"} + - match: {hits.hits.0.fields.name.0: "rabbit.jpg"} + + - match: {hits.hits.1._id: "2"} + - match: {hits.hits.1.fields.name.0: "moose.jpg"} +--- +"kNN search plus query": + - do: + search: + index: int8_flat + body: + fields: [ "name" ] + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + k: 2 + num_candidates: 3 + query: + term: + name: cow.jpg + + - match: {hits.hits.0._id: "1"} + - match: {hits.hits.0.fields.name.0: "cow.jpg"} + + - match: {hits.hits.1._id: "2"} + - match: {hits.hits.1.fields.name.0: "moose.jpg"} + + - match: {hits.hits.2._id: "3"} + - match: {hits.hits.2.fields.name.0: "rabbit.jpg"} +--- +"kNN multi-field search with query": + - do: + search: + index: int8_flat + body: + fields: [ "name" ] + knn: + - {field: vector, query_vector: [-0.5, 90.0, -10, 14.8, -156.0], k: 2, num_candidates: 3} + - {field: another_vector, query_vector: [-0.5, 11.0, 0, 12, 111.0], k: 2, num_candidates: 3} + query: + term: + name: cow.jpg + + - match: {hits.hits.0._id: "3"} + - match: {hits.hits.0.fields.name.0: "rabbit.jpg"} + + - match: {hits.hits.1._id: "1"} + - match: {hits.hits.1.fields.name.0: "cow.jpg"} + + - match: {hits.hits.2._id: "2"} + - match: {hits.hits.2.fields.name.0: "moose.jpg"} +--- +"kNN search with filter": + - do: + search: + index: int8_flat + body: + fields: [ "name" ] + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + k: 2 + num_candidates: 3 + filter: + term: + name: "rabbit.jpg" + + - match: {hits.total.value: 1} + - match: {hits.hits.0._id: "3"} + - match: {hits.hits.0.fields.name.0: "rabbit.jpg"} + + - do: + search: + index: int8_flat + body: + fields: [ "name" ] + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + k: 2 + num_candidates: 3 + filter: + - term: + name: "rabbit.jpg" + - term: + _id: 2 + + - match: {hits.total.value: 0} + +--- +"KNN Vector similarity search only": + - do: + search: + index: int8_flat + body: + fields: [ "name" ] + knn: + num_candidates: 3 + k: 3 + field: vector + similarity: 10.3 + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + + - length: {hits.hits: 1} + + - match: {hits.hits.0._id: "2"} + - match: {hits.hits.0.fields.name.0: "moose.jpg"} +--- +"Vector similarity with filter only": + - do: + search: + index: int8_flat + body: + fields: [ "name" ] + knn: + num_candidates: 3 + k: 3 + field: vector + similarity: 11 + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + filter: {"term": {"name": "moose.jpg"}} + + - length: {hits.hits: 1} + + - match: {hits.hits.0._id: "2"} + - match: {hits.hits.0.fields.name.0: "moose.jpg"} + + - do: + search: + index: int8_flat + body: + fields: [ "name" ] + knn: + num_candidates: 3 + k: 3 + field: vector + similarity: 110 + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + filter: {"term": {"name": "cow.jpg"}} + + - length: {hits.hits: 0} +--- +"Cosine similarity with indexed vector": + - skip: + features: "headers" + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + body: + query: + script_score: + query: {match_all: {} } + script: + source: "cosineSimilarity(params.query_vector, 'vector')" + params: + query_vector: [0.5, 111.3, -13.0, 14.8, -156.0] + + - match: {hits.total: 3} + + - match: {hits.hits.0._id: "3"} + - gte: {hits.hits.0._score: 0.999} + - lte: {hits.hits.0._score: 1.001} + + - match: {hits.hits.1._id: "2"} + - gte: {hits.hits.1._score: 0.998} + - lte: {hits.hits.1._score: 1.0} + + - match: {hits.hits.2._id: "1"} + - gte: {hits.hits.2._score: 0.78} + - lte: {hits.hits.2._score: 0.791} +--- +"Vector rescoring has the same scoring as exact search for kNN section": + - requires: + reason: 'Quantized vector rescoring is required' + test_runner_features: [capabilities] + capabilities: + - method: GET + path: /_search + capabilities: [knn_quantized_vector_rescore_oversample] + - skip: + features: "headers" + + # Rescore + - do: + headers: + Content-Type: application/json + search: + index: int8_flat + rest_total_hits_as_int: true + body: + fields: [ "name" ] + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + k: 3 + num_candidates: 3 + rescore_vector: + oversample: 1.5 + + # Get rescoring scores - hit ordering may change depending on how things are distributed + - match: { hits.total: 3 } + - set: { hits.hits.0._score: rescore_score0 } + - set: { hits.hits.1._score: rescore_score1 } + - set: { hits.hits.2._score: rescore_score2 } + + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + body: + query: + script_score: + query: {match_all: {} } + script: + source: "1.0 / (1.0 + Math.pow(l2norm(params.query_vector, 'vector'), 2.0))" + params: + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + + # Get rescoring scores - hit ordering may change depending on how things are distributed + - match: { hits.total: 3 } + - match: { hits.hits.0._score: $rescore_score0 } + - match: { hits.hits.1._score: $rescore_score1 } + - match: { hits.hits.2._score: $rescore_score2 } +--- +"Test bad parameters": + - do: + catch: bad_request + indices.create: + index: bad_int8_flat + body: + mappings: + properties: + vector: + type: dense_vector + element_type: bfloat16 + dims: 5 + index: true + index_options: + type: int8_flat + m: 42 + + - do: + catch: bad_request + indices.create: + index: bad_int8_flat + body: + mappings: + properties: + vector: + type: dense_vector + dims: 5 + element_type: byte + index: true + index_options: + type: int8_flat +--- +"Test index configured rescore vector": + - requires: + cluster_features: ["mapper.dense_vector.rescore_vector"] + reason: Needs rescore_vector feature + - skip: + features: "headers" + - do: + indices.create: + index: int8_rescore_flat + body: + settings: + index: + number_of_shards: 1 + mappings: + properties: + vector: + type: dense_vector + element_type: bfloat16 + dims: 64 + index: true + similarity: max_inner_product + index_options: + type: int8_flat + rescore_vector: + oversample: 1.5 + + - do: + bulk: + index: int8_rescore_flat + refresh: true + body: | + { "index": {"_id": "1"}} + { "vector": [0.077, 0.32 , -0.205, 0.63 , 0.032, 0.201, 0.167, -0.313, 0.176, 0.531, -0.375, 0.334, -0.046, 0.078, -0.349, 0.272, 0.307, -0.083, 0.504, 0.255, -0.404, 0.289, -0.226, -0.132, -0.216, 0.49 , 0.039, 0.507, -0.307, 0.107, 0.09 , -0.265, -0.285, 0.336, -0.272, 0.369, -0.282, 0.086, -0.132, 0.475, -0.224, 0.203, 0.439, 0.064, 0.246, -0.396, 0.297, 0.242, -0.028, 0.321, -0.022, -0.009, -0.001 , 0.031, -0.533, 0.45, -0.683, 1.331, 0.194, -0.157, -0.1 , -0.279, -0.098, -0.176] } + { "index": {"_id": "2"}} + { "vector": [0.196, 0.514, 0.039, 0.555, -0.042, 0.242, 0.463, -0.348, -0.08 , 0.442, -0.067, -0.05 , -0.001, 0.298, -0.377, 0.048, 0.307, 0.159, 0.278, 0.119, -0.057, 0.333, -0.289, -0.438, -0.014, 0.361, -0.169, 0.292, -0.229, 0.123, 0.031, -0.138, -0.139, 0.315, -0.216, 0.322, -0.445, -0.059, 0.071, 0.429, -0.602, -0.142, 0.11 , 0.192, 0.259, -0.241, 0.181, -0.166, 0.082, 0.107, -0.05 , 0.155, 0.011, 0.161, -0.486, 0.569, -0.489, 0.901, 0.208, 0.011, -0.209, -0.153, -0.27 , -0.013] } + { "index": {"_id": "3"}} + { "vector": [0.196, 0.514, 0.039, 0.555, -0.042, 0.242, 0.463, -0.348, -0.08 , 0.442, -0.067, -0.05 , -0.001, 0.298, -0.377, 0.048, 0.307, 0.159, 0.278, 0.119, -0.057, 0.333, -0.289, -0.438, -0.014, 0.361, -0.169, 0.292, -0.229, 0.123, 0.031, -0.138, -0.139, 0.315, -0.216, 0.322, -0.445, -0.059, 0.071, 0.429, -0.602, -0.142, 0.11 , 0.192, 0.259, -0.241, 0.181, -0.166, 0.082, 0.107, -0.05 , 0.155, 0.011, 0.161, -0.486, 0.569, -0.489, 0.901, 0.208, 0.011, -0.209, -0.153, -0.27 , -0.013] } + + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: int8_rescore_flat + body: + knn: + field: vector + query_vector: [0.128, 0.067, -0.08 , 0.395, -0.11 , -0.259, 0.473, -0.393, + 0.292, 0.571, -0.491, 0.444, -0.288, 0.198, -0.343, 0.015, + 0.232, 0.088, 0.228, 0.151, -0.136, 0.236, -0.273, -0.259, + -0.217, 0.359, -0.207, 0.352, -0.142, 0.192, -0.061, -0.17 , + -0.343, 0.189, -0.221, 0.32 , -0.301, -0.1 , 0.005, 0.232, + -0.344, 0.136, 0.252, 0.157, -0.13 , -0.244, 0.193, -0.034, + -0.12 , -0.193, -0.102, 0.252, -0.185, -0.167, -0.575, 0.582, + -0.426, 0.983, 0.212, 0.204, 0.03 , -0.276, -0.425, -0.158] + k: 3 + num_candidates: 3 + + - match: { hits.total: 3 } + - set: { hits.hits.0._score: rescore_score0 } + - set: { hits.hits.1._score: rescore_score1 } + - set: { hits.hits.2._score: rescore_score2 } + + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: int8_rescore_flat + body: + query: + script_score: + query: {match_all: {} } + script: + source: "double similarity = dotProduct(params.query_vector, 'vector'); return similarity < 0 ? 1 / (1 + -1 * similarity) : similarity + 1" + params: + query_vector: [0.128, 0.067, -0.08 , 0.395, -0.11 , -0.259, 0.473, -0.393, + 0.292, 0.571, -0.491, 0.444, -0.288, 0.198, -0.343, 0.015, + 0.232, 0.088, 0.228, 0.151, -0.136, 0.236, -0.273, -0.259, + -0.217, 0.359, -0.207, 0.352, -0.142, 0.192, -0.061, -0.17 , + -0.343, 0.189, -0.221, 0.32 , -0.301, -0.1 , 0.005, 0.232, + -0.344, 0.136, 0.252, 0.157, -0.13 , -0.244, 0.193, -0.034, + -0.12 , -0.193, -0.102, 0.252, -0.185, -0.167, -0.575, 0.582, + -0.426, 0.983, 0.212, 0.204, 0.03 , -0.276, -0.425, -0.158] + + # Compare scores as hit IDs may change depending on how things are distributed + - match: { hits.total: 3 } + - match: { hits.hits.0._score: $rescore_score0 } + - match: { hits.hits.1._score: $rescore_score1 } + - match: { hits.hits.2._score: $rescore_score2 } + +--- +"no default oversample value": + - requires: + cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"] + reason: "Needs default_oversample_value_for_bbq feature" + - do: + indices.get_mapping: + index: int8_flat + + - not_exists: int8_flat.mappings.properties.vector.index_options.rescore_vector +--- +"Nested flat search": + - do: + indices.create: + index: int8_flat_nested + body: + settings: + index: + number_of_shards: 1 + mappings: + properties: + name: + type: keyword + nested: + type: nested + properties: + paragraph_id: + type: keyword + vector: + type: dense_vector + element_type: bfloat16 + dims: 5 + index: true + similarity: l2_norm + index_options: + type: int8_flat + - do: + index: + index: int8_flat_nested + id: "1" + body: + name: cow.jpg + nested: + - paragraph_id: 0 + vector: [230.0, 300.33, -34.8988, 15.555, -200.0] + - paragraph_id: 1 + vector: [240.0, 300, -3, 1, -20] + + - do: + index: + index: int8_flat_nested + id: "2" + body: + name: moose.jpg + nested: + - paragraph_id: 0 + vector: [-0.5, 100.0, -13, 14.8, -156.0] + - paragraph_id: 2 + vector: [0, 100.0, 0, 14.8, -156.0] + - paragraph_id: 3 + vector: [0, 1.0, 0, 1.8, -15.0] + + - do: + index: + index: int8_flat_nested + id: "3" + body: + name: rabbit.jpg + nested: + - paragraph_id: 0 + vector: [0.5, 111.3, -13.0, 14.8, -156.0] + + - do: + indices.refresh: {} + - do: + search: + index: int8_flat_nested + body: + fields: [ "name" ] + query: + nested: + path: nested + query: + knn: + field: nested.vector + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] + num_candidates: 3 + k: 2 + + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.0.fields.name.0: "moose.jpg" } + + - match: { hits.hits.1._id: "3" } + - match: { hits.hits.1.fields.name.0: "rabbit.jpg" } + + - do: + search: + index: int8_flat_nested + body: + fields: [ "name" ] + knn: + field: nested.vector + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] + num_candidates: 3 + k: 2 + + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.0.fields.name.0: "moose.jpg" } + + - match: { hits.hits.1._id: "3" } + - match: { hits.hits.1.fields.name.0: "rabbit.jpg" } + diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormat.java index ce5d2aeaf41b9..3c3f1fa48d2a8 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormat.java @@ -143,4 +143,26 @@ public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException new Lucene99ScalarQuantizedVectorsReader(state, rawVectorFormat.fieldsReader(state), flatVectorScorer) ); } + + @Override + public String toString() { + return NAME + + "(name=" + + NAME + + ", maxConn=" + + maxConn + + ", beamWidth=" + + beamWidth + + ", confidenceInterval=" + + confidenceInterval + + ", bits=" + + bits + + ", compressed=" + + compress + + ", flatVectorScorer=" + + flatVectorScorer + + ", flatVectorFormat=" + + rawVectorFormat + + ")"; + } } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormat.java index 3750614da3698..1c143c8ec5a36 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormat.java @@ -218,7 +218,7 @@ static final class ESQuantizedFlatVectorsScorer implements FlatVectorsScorer { @Override public String toString() { - return "ESFlatVectorsScorer(" + "delegate=" + delegate + ", factory=" + factory + ')'; + return "ESQuantizedFlatVectorsScorer(" + "delegate=" + delegate + ", factory=" + factory + ')'; } @Override diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java index aaf01aef48217..46eff4acc80f1 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java @@ -1976,21 +1976,40 @@ public void testKnnQuantizedFlatVectorsFormat() throws IOException { knnVectorsFormat = ((LegacyPerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); } VectorScorerFactory factory = VectorScorerFactory.instance().orElse(null); - String expectedString = "ES813Int8FlatVectorFormat(name=ES813Int8FlatVectorFormat, innerFormat=" - + "ES814ScalarQuantizedVectorsFormat(name=ES814ScalarQuantizedVectorsFormat," - + " confidenceInterval=" - + (setConfidenceInterval ? Float.toString(confidenceInterval) : (quantizedFlatFormat.equals("int4_flat") ? "0.0" : null)) - + ", bits=" - + (quantizedFlatFormat.equals("int4_flat") ? 4 : 7) - + ", compressed=" - + quantizedFlatFormat.equals("int4_flat") - + ", flatVectorScorer=ESFlatVectorsScorer(" - + "delegate=ScalarQuantizedVectorScorer(nonQuantizedDelegate=DefaultFlatVectorScorer())" - + ", factory=" - + (factory != null ? factory : "null") - + "), " - + "rawVectorFormat=Lucene99FlatVectorsFormat(vectorsScorer=DefaultFlatVectorScorer())))"; - assertEquals(expectedString, knnVectorsFormat.toString()); + String expectedString = ES93GenericFlatVectorsFormat.GENERIC_VECTOR_FORMAT.isEnabled() + ? "ES93ScalarQuantizedVectorsFormat(name=ES93ScalarQuantizedVectorsFormat," + + " confidenceInterval=" + + (setConfidenceInterval + ? Float.toString(confidenceInterval) + : (quantizedFlatFormat.equals("int4_flat") ? "0.0" : null)) + + ", bits=" + + (quantizedFlatFormat.equals("int4_flat") ? 4 : 7) + + ", compressed=" + + quantizedFlatFormat.equals("int4_flat") + + ", flatVectorScorer=ESQuantizedFlatVectorsScorer(" + + "delegate=ScalarQuantizedVectorScorer(nonQuantizedDelegate=DefaultFlatVectorScorer())" + + ", factory=" + + (factory != null ? factory : "null") + + "), " + + "rawVectorFormat=ES93GenericFlatVectorsFormat(name=ES93GenericFlatVectorsFormat" + + ", format=Lucene99FlatVectorsFormat(name=Lucene99FlatVectorsFormat, flatVectorScorer=DefaultFlatVectorScorer())))" + : "ES813Int8FlatVectorFormat(name=ES813Int8FlatVectorFormat, innerFormat=" + + "ES814ScalarQuantizedVectorsFormat(name=ES814ScalarQuantizedVectorsFormat," + + " confidenceInterval=" + + (setConfidenceInterval + ? Float.toString(confidenceInterval) + : (quantizedFlatFormat.equals("int4_flat") ? "0.0" : null)) + + ", bits=" + + (quantizedFlatFormat.equals("int4_flat") ? 4 : 7) + + ", compressed=" + + quantizedFlatFormat.equals("int4_flat") + + ", flatVectorScorer=ESQuantizedFlatVectorsScorer(" + + "delegate=ScalarQuantizedVectorScorer(nonQuantizedDelegate=DefaultFlatVectorScorer())" + + ", factory=" + + (factory != null ? factory : "null") + + "), " + + "rawVectorFormat=Lucene99FlatVectorsFormat(vectorsScorer=DefaultFlatVectorScorer())))"; + assertThat(knnVectorsFormat, hasToString(expectedString)); } } @@ -2027,21 +2046,36 @@ public void testKnnQuantizedHNSWVectorsFormat() throws IOException { knnVectorsFormat = ((LegacyPerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); } VectorScorerFactory factory = VectorScorerFactory.instance().orElse(null); - String expectedString = "ES814HnswScalarQuantizedVectorsFormat(name=ES814HnswScalarQuantizedVectorsFormat, maxConn=" - + m - + ", beamWidth=" - + efConstruction - + ", flatVectorFormat=ES814ScalarQuantizedVectorsFormat(" - + "name=ES814ScalarQuantizedVectorsFormat, confidenceInterval=" - + (setConfidenceInterval ? confidenceInterval : null) - + ", bits=7, compressed=false, " - + "flatVectorScorer=ESFlatVectorsScorer(delegate=ScalarQuantizedVectorScorer(nonQuantizedDelegate=DefaultFlatVectorScorer()), " - + "factory=" - + (factory != null ? factory : "null") - + "), " - + "rawVectorFormat=Lucene99FlatVectorsFormat(vectorsScorer=DefaultFlatVectorScorer())" - + "))"; - assertEquals(expectedString, knnVectorsFormat.toString()); + String expectedString = ES93GenericFlatVectorsFormat.GENERIC_VECTOR_FORMAT.isEnabled() + ? "ES93HnswScalarQuantizedVectorsFormat(name=ES93HnswScalarQuantizedVectorsFormat, maxConn=" + + m + + ", beamWidth=" + + efConstruction + + ", confidenceInterval=" + + (setConfidenceInterval ? confidenceInterval : null) + + ", bits=7, compressed=false, " + + "flatVectorScorer=ESQuantizedFlatVectorsScorer(delegate=" + + "ScalarQuantizedVectorScorer(nonQuantizedDelegate=DefaultFlatVectorScorer()), " + + "factory=" + + (factory != null ? factory : "null") + + "), flatVectorFormat=ES93GenericFlatVectorsFormat(name=ES93GenericFlatVectorsFormat" + + ", format=Lucene99FlatVectorsFormat(name=Lucene99FlatVectorsFormat, flatVectorScorer=DefaultFlatVectorScorer())))" + : "ES814HnswScalarQuantizedVectorsFormat(name=ES814HnswScalarQuantizedVectorsFormat, maxConn=" + + m + + ", beamWidth=" + + efConstruction + + ", flatVectorFormat=ES814ScalarQuantizedVectorsFormat(" + + "name=ES814ScalarQuantizedVectorsFormat, confidenceInterval=" + + (setConfidenceInterval ? confidenceInterval : null) + + ", bits=7, compressed=false, " + + "flatVectorScorer=ESFlatVectorsScorer(delegate=ScalarQuantizedVectorScorer(" + + "nonQuantizedDelegate=DefaultFlatVectorScorer()), " + + "factory=" + + (factory != null ? factory : "null") + + "), " + + "rawVectorFormat=Lucene99FlatVectorsFormat(vectorsScorer=DefaultFlatVectorScorer())" + + "))"; + assertThat(knnVectorsFormat, hasToString(expectedString)); } public void testKnnBBQHNSWVectorsFormat() throws IOException { @@ -2170,21 +2204,34 @@ public void testKnnHalfByteQuantizedHNSWVectorsFormat() throws IOException { knnVectorsFormat = ((LegacyPerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); } VectorScorerFactory factory = VectorScorerFactory.instance().orElse(null); - String expectedString = "ES814HnswScalarQuantizedVectorsFormat(name=ES814HnswScalarQuantizedVectorsFormat, maxConn=" - + m - + ", beamWidth=" - + efConstruction - + ", flatVectorFormat=ES814ScalarQuantizedVectorsFormat(" - + "name=ES814ScalarQuantizedVectorsFormat, confidenceInterval=" - + (setConfidenceInterval ? confidenceInterval : 0.0f) - + ", bits=4, compressed=true, " - + "flatVectorScorer=ESFlatVectorsScorer(delegate=ScalarQuantizedVectorScorer(nonQuantizedDelegate=DefaultFlatVectorScorer()), " - + "factory=" - + (factory != null ? factory : "null") - + "), " - + "rawVectorFormat=Lucene99FlatVectorsFormat(vectorsScorer=DefaultFlatVectorScorer())" - + "))"; - assertEquals(expectedString, knnVectorsFormat.toString()); + String expectedString = ES93GenericFlatVectorsFormat.GENERIC_VECTOR_FORMAT.isEnabled() + ? "ES93HnswScalarQuantizedVectorsFormat(name=ES93HnswScalarQuantizedVectorsFormat, maxConn=" + + m + + ", beamWidth=" + + efConstruction + + ", confidenceInterval=" + + (setConfidenceInterval ? confidenceInterval : 0.0f) + + ", bits=4, compressed=true, flatVectorScorer=ESQuantizedFlatVectorsScorer(delegate=" + + "ScalarQuantizedVectorScorer(nonQuantizedDelegate=DefaultFlatVectorScorer()), factory=" + + (factory != null ? factory : "null") + + "), flatVectorFormat=ES93GenericFlatVectorsFormat(name=ES93GenericFlatVectorsFormat" + + ", format=Lucene99FlatVectorsFormat(name=Lucene99FlatVectorsFormat, flatVectorScorer=DefaultFlatVectorScorer())))" + : "ES814HnswScalarQuantizedVectorsFormat(name=ES814HnswScalarQuantizedVectorsFormat, maxConn=" + + m + + ", beamWidth=" + + efConstruction + + ", flatVectorFormat=ES814ScalarQuantizedVectorsFormat(" + + "name=ES814ScalarQuantizedVectorsFormat, confidenceInterval=" + + (setConfidenceInterval ? confidenceInterval : 0.0f) + + ", bits=4, compressed=true, " + + "flatVectorScorer=ESFlatVectorsScorer(delegate=ScalarQuantizedVectorScorer(" + + "nonQuantizedDelegate=DefaultFlatVectorScorer()), " + + "factory=" + + (factory != null ? factory : "null") + + "), " + + "rawVectorFormat=Lucene99FlatVectorsFormat(vectorsScorer=DefaultFlatVectorScorer())" + + "))"; + assertThat(knnVectorsFormat, hasToString(expectedString)); } public void testInvalidVectorDimensions() { From 3641e251726e23e96632fb5765f32fdb0160279a Mon Sep 17 00:00:00 2001 From: Simon Cooper Date: Tue, 18 Nov 2025 10:45:39 +0000 Subject: [PATCH 7/8] Test fixes --- .../search.vectors/41_knn_search_byte_quantized_bfloat16.yml | 1 + .../41_knn_search_half_byte_quantized_bfloat16.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_byte_quantized_bfloat16.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_byte_quantized_bfloat16.yml index 9912dab51f4cd..92141ba8e220a 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_byte_quantized_bfloat16.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_byte_quantized_bfloat16.yml @@ -736,6 +736,7 @@ setup: properties: vector: type: dense_vector + element_type: bfloat16 index_options: type: int8_hnsw rescore_vector: diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_half_byte_quantized_bfloat16.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_half_byte_quantized_bfloat16.yml index 68272ec7b1815..8b9467f0d97e1 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_half_byte_quantized_bfloat16.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_half_byte_quantized_bfloat16.yml @@ -802,6 +802,7 @@ setup: properties: vector: type: dense_vector + element_type: bfloat16 similarity: max_inner_product dims: 4 index_options: From f7ecb04a09c546e0522293757f5415f38df1011a Mon Sep 17 00:00:00 2001 From: Simon Cooper Date: Tue, 18 Nov 2025 12:36:18 +0000 Subject: [PATCH 8/8] Only 4 or 7 bits Add DirectIOIT test cases --- .../elasticsearch/index/store/DirectIOIT.java | 3 ++- .../ES93HnswScalarQuantizedVectorsFormat.java | 4 ++-- .../es93/ES93ScalarQuantizedVectorsFormat.java | 4 ++-- .../mapper/vectors/DenseVectorFieldMapper.java | 16 ++++++++-------- .../vectors/DenseVectorFieldTypeTests.java | 4 ++-- 5 files changed, 16 insertions(+), 15 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/index/store/DirectIOIT.java b/server/src/internalClusterTest/java/org/elasticsearch/index/store/DirectIOIT.java index 578182b7e5aec..a85a435408402 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/index/store/DirectIOIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/index/store/DirectIOIT.java @@ -38,6 +38,7 @@ import java.util.Map; import java.util.OptionalLong; import java.util.stream.IntStream; +import java.util.stream.Stream; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; @@ -76,7 +77,7 @@ protected boolean useDirectIO(String name, IOContext context, OptionalLong fileL @ParametersFactory public static Iterable parameters() { - return List.of(new Object[] { "bbq_hnsw" }, new Object[] { "bbq_disk" }); + return Stream.of("int4_hnsw", "int8_hnsw", "bbq_hnsw", "bbq_disk").map(s -> new Object[] { s }).toList(); } public DirectIOIT(String type) { diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormat.java index 3c3f1fa48d2a8..cd861954ddc02 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormat.java @@ -33,7 +33,7 @@ public class ES93HnswScalarQuantizedVectorsFormat extends AbstractHnswVectorsFormat { static final String NAME = "ES93HnswScalarQuantizedVectorsFormat"; - private static final int ALLOWED_BITS = (1 << 8) | (1 << 7) | (1 << 4); + private static final int ALLOWED_BITS = (1 << 7) | (1 << 4); /** The minimum confidence interval */ private static final float MINIMUM_CONFIDENCE_INTERVAL = 0.9f; @@ -102,7 +102,7 @@ public ES93HnswScalarQuantizedVectorsFormat( ); } if (bits < 1 || bits > 8 || (ALLOWED_BITS & (1 << bits)) == 0) { - throw new IllegalArgumentException("bits must be one of: 4, 7, 8; bits=" + bits); + throw new IllegalArgumentException("bits must be one of: 4, 7; bits=" + bits); } assert elementType != DenseVectorFieldMapper.ElementType.BIT : "BIT should not be used with scalar quantization"; diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormat.java index 1c143c8ec5a36..e69cbbd331872 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormat.java @@ -46,7 +46,7 @@ public class ES93ScalarQuantizedVectorsFormat extends KnnVectorsFormat { static final String NAME = "ES93ScalarQuantizedVectorsFormat"; - private static final int ALLOWED_BITS = (1 << 8) | (1 << 7) | (1 << 4); + private static final int ALLOWED_BITS = (1 << 7) | (1 << 4); static final FlatVectorsScorer flatVectorScorer = new ESQuantizedFlatVectorsScorer( new ScalarQuantizedVectorScorer(FlatVectorScorerUtil.getLucene99FlatVectorsScorer()) @@ -97,7 +97,7 @@ public ES93ScalarQuantizedVectorsFormat( ); } if (bits < 1 || bits > 8 || (ALLOWED_BITS & (1 << bits)) == 0) { - throw new IllegalArgumentException("bits must be one of: 4, 7, 8; bits=" + bits); + throw new IllegalArgumentException("bits must be one of: 4, 7; bits=" + bits); } assert elementType != DenseVectorFieldMapper.ElementType.BIT : "BIT should not be used with scalar quantization"; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index 9186374281947..112ca683ef078 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -1755,7 +1755,7 @@ public DenseVectorIndexOptions parseIndexOptions(String fieldName, Map