Skip to content

Commit

Permalink
Prevent humongous allocations when calculating scalar quantiles (#13090)
Browse files Browse the repository at this point in the history
The initial release of scalar quantization would periodically create a humongous allocation, which can put unwarranted pressure on the GC & on the heap usage as a whole.

This commit adjusts this by only allocating a float array of 20*dimensions and averaging the discovered quantiles from there. 

Why does this work?

 - Quantiles based on confidence intervals are (generally) unbiased and doing an average gives statistically good results
 - The selector algorithm scales linearly, so the cost is just about the same
 - We need to do more than `1` vector at a time to prevent extreme confidence intervals interacting strangely with edge cases
  • Loading branch information
benwtrent committed Feb 8, 2024
1 parent c70c946 commit 477cd56
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 90 deletions.
1 change: 1 addition & 0 deletions lucene/CHANGES.txt
Expand Up @@ -86,6 +86,7 @@ Optimizations

* GITHUB#12962: Speedup concurrent multi-segment HNWS graph search (Mayya Sharipova, Tom Veasey)

* GITHUB#13090: Prevent humongous allocations in ScalarQuantizer when building quantiles. (Ben Trent)

Bug Fixes
---------------------
Expand Down
Expand Up @@ -68,6 +68,9 @@
public class ScalarQuantizer {

public static final int SCALAR_QUANTIZATION_SAMPLE_SIZE = 25_000;
// 20*dimension provides protection from extreme confidence intervals
// and also prevents humongous allocations
static final int SCRATCH_SIZE = 20;

private final float alpha;
private final float scale;
Expand Down Expand Up @@ -206,41 +209,6 @@ static int[] reservoirSampleIndices(int numFloatVecs, int sampleSize) {
return vectorsToTake;
}

static float[] sampleVectors(FloatVectorValues floatVectorValues, int[] vectorsToTake)
throws IOException {
int dim = floatVectorValues.dimension();
float[] values = new float[vectorsToTake.length * dim];
int copyOffset = 0;
int index = 0;
for (int i : vectorsToTake) {
while (index <= i) {
// We cannot use `advance(docId)` as MergedVectorValues does not support it
floatVectorValues.nextDoc();
index++;
}
assert floatVectorValues.docID() != NO_MORE_DOCS;
float[] floatVector = floatVectorValues.vectorValue();
System.arraycopy(floatVector, 0, values, copyOffset, floatVector.length);
copyOffset += dim;
}
return values;
}

/**
* See {@link #fromVectors(FloatVectorValues, float, int)} for details on how the quantiles are
* calculated. NOTE: If there are deleted vectors in the index, do not use this method, but
* instead use {@link #fromVectors(FloatVectorValues, float, int)}. This is because the
* totalVectorCount is used to account for deleted documents when sampling.
*/
public static ScalarQuantizer fromVectors(
FloatVectorValues floatVectorValues, float confidenceInterval) throws IOException {
return fromVectors(
floatVectorValues,
confidenceInterval,
floatVectorValues.size(),
SCALAR_QUANTIZATION_SAMPLE_SIZE);
}

/**
* This will read the float vector values and calculate the quantiles. If the number of float
* vectors is less than {@link #SCALAR_QUANTIZATION_SAMPLE_SIZE} then all the values will be read
Expand Down Expand Up @@ -269,6 +237,7 @@ static ScalarQuantizer fromVectors(
int quantizationSampleSize)
throws IOException {
assert 0.9f <= confidenceInterval && confidenceInterval <= 1f;
assert quantizationSampleSize > SCRATCH_SIZE;
if (totalVectorCount == 0) {
return new ScalarQuantizer(0f, 0f, confidenceInterval);
}
Expand All @@ -283,24 +252,60 @@ static ScalarQuantizer fromVectors(
}
return new ScalarQuantizer(min, max, confidenceInterval);
}
int dim = floatVectorValues.dimension();
final float[] quantileGatheringScratch =
new float[floatVectorValues.dimension() * Math.min(SCRATCH_SIZE, totalVectorCount)];
int count = 0;
double upperSum = 0;
double lowerSum = 0;
if (totalVectorCount <= quantizationSampleSize) {
int copyOffset = 0;
float[] values = new float[totalVectorCount * dim];
int scratchSize = Math.min(SCRATCH_SIZE, totalVectorCount);
int i = 0;
while (floatVectorValues.nextDoc() != NO_MORE_DOCS) {
float[] floatVector = floatVectorValues.vectorValue();
System.arraycopy(floatVector, 0, values, copyOffset, floatVector.length);
copyOffset += dim;
float[] vectorValue = floatVectorValues.vectorValue();
System.arraycopy(
vectorValue, 0, quantileGatheringScratch, i * vectorValue.length, vectorValue.length);
i++;
if (i == scratchSize) {
float[] upperAndLower =
getUpperAndLowerQuantile(quantileGatheringScratch, confidenceInterval);
upperSum += upperAndLower[1];
lowerSum += upperAndLower[0];
i = 0;
count++;
}
}
float[] upperAndLower = getUpperAndLowerQuantile(values, confidenceInterval);
return new ScalarQuantizer(upperAndLower[0], upperAndLower[1], confidenceInterval);
// Note, we purposefully don't use the rest of the scratch state if we have fewer than
// `SCRATCH_SIZE` vectors, mainly because if we are sampling so few vectors then we don't
// want to be adversely affected by the extreme confidence intervals over small sample sizes
return new ScalarQuantizer(
(float) lowerSum / count, (float) upperSum / count, confidenceInterval);
}
int numFloatVecs = totalVectorCount;
// Reservoir sample the vector ordinals we want to read
int[] vectorsToTake = reservoirSampleIndices(numFloatVecs, quantizationSampleSize);
float[] values = sampleVectors(floatVectorValues, vectorsToTake);
float[] upperAndLower = getUpperAndLowerQuantile(values, confidenceInterval);
return new ScalarQuantizer(upperAndLower[0], upperAndLower[1], confidenceInterval);
int[] vectorsToTake = reservoirSampleIndices(totalVectorCount, quantizationSampleSize);
int index = 0;
int idx = 0;
for (int i : vectorsToTake) {
while (index <= i) {
// We cannot use `advance(docId)` as MergedVectorValues does not support it
floatVectorValues.nextDoc();
index++;
}
assert floatVectorValues.docID() != NO_MORE_DOCS;
float[] vectorValue = floatVectorValues.vectorValue();
System.arraycopy(
vectorValue, 0, quantileGatheringScratch, idx * vectorValue.length, vectorValue.length);
idx++;
if (idx == SCRATCH_SIZE) {
float[] upperAndLower =
getUpperAndLowerQuantile(quantileGatheringScratch, confidenceInterval);
upperSum += upperAndLower[1];
lowerSum += upperAndLower[0];
count++;
idx = 0;
}
}
return new ScalarQuantizer(
(float) lowerSum / count, (float) upperSum / count, confidenceInterval);
}

/**
Expand Down
Expand Up @@ -16,6 +16,8 @@
*/
package org.apache.lucene.util.quantization;

import static org.apache.lucene.util.quantization.ScalarQuantizer.SCRATCH_SIZE;

import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
Expand Down Expand Up @@ -73,44 +75,6 @@ public void testEdgeCase() {
assertEquals(1f, upperAndLower[1], 1e-7f);
}

public void testSamplingEdgeCases() throws IOException {
int numVecs = 65;
int dims = 64;
float[][] floats = randomFloats(numVecs, dims);
FloatVectorValues floatVectorValues = fromFloats(floats);
int[] vectorsToTake = new int[] {0, floats.length - 1};
float[] sampled = ScalarQuantizer.sampleVectors(floatVectorValues, vectorsToTake);
int i = 0;
for (; i < dims; i++) {
assertEquals(floats[vectorsToTake[0]][i], sampled[i], 0.0f);
}
for (; i < dims * 2; i++) {
assertEquals(floats[vectorsToTake[1]][i - dims], sampled[i], 0.0f);
}
}

public void testVectorSampling() throws IOException {
int numVecs = random().nextInt(123) + 5;
int dims = 4;
float[][] floats = randomFloats(numVecs, dims);
FloatVectorValues floatVectorValues = fromFloats(floats);
int[] vectorsToTake =
ScalarQuantizer.reservoirSampleIndices(numVecs, random().nextInt(numVecs - 1) + 1);
int prev = vectorsToTake[0];
// ensure sorted & unique
for (int i = 1; i < vectorsToTake.length; i++) {
assertTrue(vectorsToTake[i] > prev);
prev = vectorsToTake[i];
}
float[] sampled = ScalarQuantizer.sampleVectors(floatVectorValues, vectorsToTake);
// ensure we got the right vectors
for (int i = 0; i < vectorsToTake.length; i++) {
for (int j = 0; j < dims; j++) {
assertEquals(floats[vectorsToTake[i]][j], sampled[i * dims + j], 0.0f);
}
}
}

public void testScalarWithSampling() throws IOException {
int numVecs = random().nextInt(128) + 5;
int dims = 64;
Expand All @@ -123,7 +87,7 @@ public void testScalarWithSampling() throws IOException {
floatVectorValues,
0.99f,
floatVectorValues.numLiveVectors,
floatVectorValues.numLiveVectors - 1);
Math.max(floatVectorValues.numLiveVectors - 1, SCRATCH_SIZE + 1));
}
{
TestSimpleFloatVectorValues floatVectorValues =
Expand All @@ -132,7 +96,7 @@ public void testScalarWithSampling() throws IOException {
floatVectorValues,
0.99f,
floatVectorValues.numLiveVectors,
floatVectorValues.numLiveVectors + 1);
Math.max(floatVectorValues.numLiveVectors - 1, SCRATCH_SIZE + 1));
}
{
TestSimpleFloatVectorValues floatVectorValues =
Expand All @@ -141,7 +105,7 @@ public void testScalarWithSampling() throws IOException {
floatVectorValues,
0.99f,
floatVectorValues.numLiveVectors,
floatVectorValues.numLiveVectors);
Math.max(floatVectorValues.numLiveVectors - 1, SCRATCH_SIZE + 1));
}
{
TestSimpleFloatVectorValues floatVectorValues =
Expand All @@ -150,7 +114,7 @@ public void testScalarWithSampling() throws IOException {
floatVectorValues,
0.99f,
floatVectorValues.numLiveVectors,
random().nextInt(floatVectorValues.floats.length - 1) + 1);
Math.max(random().nextInt(floatVectorValues.floats.length - 1) + 1, SCRATCH_SIZE + 1));
}
}

Expand Down

0 comments on commit 477cd56

Please sign in to comment.