From e352345c233fc977212d2d6e386ac346c1f9603f Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Mon, 13 May 2024 10:59:01 -0400 Subject: [PATCH] Ensure we return non-negative scores when scoring scalar dot-products (#108522) closes: https://github.com/elastic/elasticsearch/issues/108339 --- docs/changelog/108522.yaml | 5 ++ .../vec/internal/Int7DotProduct.java | 5 +- .../vec/VectorScorerFactoryTests.java | 46 +++++++++++++++++++ 3 files changed, 54 insertions(+), 2 deletions(-) create mode 100644 docs/changelog/108522.yaml diff --git a/docs/changelog/108522.yaml b/docs/changelog/108522.yaml new file mode 100644 index 0000000000000..5bc064d7995e9 --- /dev/null +++ b/docs/changelog/108522.yaml @@ -0,0 +1,5 @@ +pr: 108522 +summary: Ensure we return non-negative scores when scoring scalar dot-products +area: Vector Search +type: bug +issues: [] diff --git a/libs/vec/src/main21/java/org/elasticsearch/vec/internal/Int7DotProduct.java b/libs/vec/src/main21/java/org/elasticsearch/vec/internal/Int7DotProduct.java index 9b452219bd635..5231bb8e3c67f 100644 --- a/libs/vec/src/main21/java/org/elasticsearch/vec/internal/Int7DotProduct.java +++ b/libs/vec/src/main21/java/org/elasticsearch/vec/internal/Int7DotProduct.java @@ -47,10 +47,11 @@ public float score(int firstOrd, int secondOrd) throws IOException { if (firstSeg != null && secondSeg != null) { int dotProduct = dotProduct7u(firstSeg, secondSeg, length); + assert dotProduct >= 0; float adjustedDistance = dotProduct * scoreCorrectionConstant + firstOffset + secondOffset; - return (1 + adjustedDistance) / 2; + return Math.max((1 + adjustedDistance) / 2, 0f); } else { - return fallbackScore(firstByteOffset, secondByteOffset); + return Math.max(fallbackScore(firstByteOffset, secondByteOffset), 0f); } } } diff --git a/libs/vec/src/test/java/org/elasticsearch/vec/VectorScorerFactoryTests.java b/libs/vec/src/test/java/org/elasticsearch/vec/VectorScorerFactoryTests.java index 246ddaeb2ebcf..07d30a887c683 100644 --- a/libs/vec/src/test/java/org/elasticsearch/vec/VectorScorerFactoryTests.java +++ b/libs/vec/src/test/java/org/elasticsearch/vec/VectorScorerFactoryTests.java @@ -28,6 +28,7 @@ import static org.elasticsearch.vec.VectorSimilarityType.EUCLIDEAN; import static org.elasticsearch.vec.VectorSimilarityType.MAXIMUM_INNER_PRODUCT; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; // @com.carrotsearch.randomizedtesting.annotations.Repeat(iterations = 100) public class VectorScorerFactoryTests extends AbstractVectorTestCase { @@ -96,6 +97,51 @@ void testSimpleImpl(long maxChunkSize) throws IOException { } } + public void testNonNegativeDotProduct() throws IOException { + assumeTrue(notSupportedMsg(), supported()); + var factory = AbstractVectorTestCase.factory.get(); + + try (Directory dir = new MMapDirectory(createTempDir(getTestName()), MMapDirectory.DEFAULT_MAX_CHUNK_SIZE)) { + // keep vecs `0` so dot product is `0` + byte[] vec1 = new byte[32]; + byte[] vec2 = new byte[32]; + String fileName = getTestName() + "-32"; + try (IndexOutput out = dir.createOutput(fileName, IOContext.DEFAULT)) { + var negativeOffset = floatToByteArray(-5f); + byte[] bytes = concat(vec1, negativeOffset, vec2, negativeOffset); + out.writeBytes(bytes, 0, bytes.length); + } + try (IndexInput in = dir.openInput(fileName, IOContext.DEFAULT)) { + // dot product + float expected = 0f; // TODO fix in Lucene: https://github.com/apache/lucene/pull/13356 luceneScore(DOT_PRODUCT, vec1, vec2, + // 1, -5, -5); + var scorer = factory.getInt7ScalarQuantizedVectorScorer(32, 2, 1, DOT_PRODUCT, in).get(); + assertThat(scorer.score(0, 1), equalTo(expected)); + assertThat(scorer.score(0, 1), greaterThanOrEqualTo(0f)); + assertThat((new VectorScorerSupplierAdapter(scorer)).scorer(0).score(1), equalTo(expected)); + // max inner product + expected = luceneScore(MAXIMUM_INNER_PRODUCT, vec1, vec2, 1, -5, -5); + scorer = factory.getInt7ScalarQuantizedVectorScorer(32, 2, 1, MAXIMUM_INNER_PRODUCT, in).get(); + assertThat(scorer.score(0, 1), greaterThanOrEqualTo(0f)); + assertThat(scorer.score(0, 1), equalTo(expected)); + assertThat((new VectorScorerSupplierAdapter(scorer)).scorer(0).score(1), equalTo(expected)); + // cosine + expected = 0f; // TODO fix in Lucene: https://github.com/apache/lucene/pull/13356 luceneScore(COSINE, vec1, vec2, 1, -5, + // -5); + scorer = factory.getInt7ScalarQuantizedVectorScorer(32, 2, 1, COSINE, in).get(); + assertThat(scorer.score(0, 1), equalTo(expected)); + assertThat(scorer.score(0, 1), greaterThanOrEqualTo(0f)); + assertThat((new VectorScorerSupplierAdapter(scorer)).scorer(0).score(1), equalTo(expected)); + // euclidean + expected = luceneScore(EUCLIDEAN, vec1, vec2, 1, -5, -5); + scorer = factory.getInt7ScalarQuantizedVectorScorer(32, 2, 1, EUCLIDEAN, in).get(); + assertThat(scorer.score(0, 1), equalTo(expected)); + assertThat(scorer.score(0, 1), greaterThanOrEqualTo(0f)); + assertThat((new VectorScorerSupplierAdapter(scorer)).scorer(0).score(1), equalTo(expected)); + } + } + } + public void testRandom() throws IOException { assumeTrue(notSupportedMsg(), supported()); testRandom(MMapDirectory.DEFAULT_MAX_CHUNK_SIZE, BYTE_ARRAY_RANDOM_INT7_FUNC);