Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LUCENE-9583: Remove RandomAccessVectorValuesProducer #1071

Merged
merged 7 commits into from Aug 20, 2022
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -22,7 +22,6 @@
import java.util.Objects;
import java.util.SplittableRandom;
import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.RandomAccessVectorValuesProducer;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.hnsw.NeighborQueue;
Expand Down Expand Up @@ -72,14 +71,14 @@ public final class Lucene90HnswGraphBuilder {
* to ensure repeatable construction.
*/
public Lucene90HnswGraphBuilder(
RandomAccessVectorValuesProducer vectors,
RandomAccessVectorValues vectors,
VectorSimilarityFunction similarityFunction,
int maxConn,
int beamWidth,
long seed)
throws IOException {
vectorValues = vectors.randomAccess();
buildVectors = vectors.randomAccess();
vectorValues = vectors.copy();
buildVectors = vectors.copy();
this.similarityFunction = Objects.requireNonNull(similarityFunction);
if (maxConn <= 0) {
throw new IllegalArgumentException("maxConn must be positive");
Expand Down
Expand Up @@ -33,7 +33,6 @@
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.RandomAccessVectorValuesProducer;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.index.VectorValues;
Expand Down Expand Up @@ -381,8 +380,7 @@ int size() {
}

/** Read the vector values from the index input. This supports both iterated and random access. */
static class OffHeapVectorValues extends VectorValues
implements RandomAccessVectorValues, RandomAccessVectorValuesProducer {
static class OffHeapVectorValues extends VectorValues implements RandomAccessVectorValues {

final int dimension;
final int[] ordToDoc;
Expand Down Expand Up @@ -468,7 +466,7 @@ public long cost() {
}

@Override
public RandomAccessVectorValues randomAccess() {
public RandomAccessVectorValues copy() {
return new OffHeapVectorValues(dimension, ordToDoc, dataIn.clone());
}

Expand Down
Expand Up @@ -33,7 +33,6 @@
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.RandomAccessVectorValuesProducer;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.VectorEncoding;
import org.apache.lucene.index.VectorSimilarityFunction;
Expand Down Expand Up @@ -422,8 +421,7 @@ int ordToDoc(int ord) {
}

/** Read the vector values from the index input. This supports both iterated and random access. */
static class OffHeapVectorValues extends VectorValues
implements RandomAccessVectorValues, RandomAccessVectorValuesProducer {
static class OffHeapVectorValues extends VectorValues implements RandomAccessVectorValues {

private final int dimension;
private final int size;
Expand Down Expand Up @@ -516,7 +514,7 @@ public long cost() {
}

@Override
public RandomAccessVectorValues randomAccess() {
public RandomAccessVectorValues copy() {
return new OffHeapVectorValues(dimension, size, ordToDoc, dataIn.clone());
}

Expand Down
Expand Up @@ -21,7 +21,6 @@
import java.nio.ByteBuffer;
import org.apache.lucene.codecs.lucene90.IndexedDISI;
import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.RandomAccessVectorValuesProducer;
import org.apache.lucene.index.VectorValues;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.RandomAccessInput;
Expand All @@ -30,8 +29,7 @@
import org.apache.lucene.util.packed.DirectMonotonicReader;

/** Read the vector values from the index input. This supports both iterated and random access. */
abstract class OffHeapVectorValues extends VectorValues
implements RandomAccessVectorValues, RandomAccessVectorValuesProducer {
abstract class OffHeapVectorValues extends VectorValues implements RandomAccessVectorValues {

protected final int dimension;
protected final int size;
Expand Down Expand Up @@ -144,7 +142,7 @@ public int advance(int target) throws IOException {
}

@Override
public RandomAccessVectorValues randomAccess() throws IOException {
public RandomAccessVectorValues copy() throws IOException {
return new DenseOffHeapVectorValues(dimension, size, slice.clone());
}

Expand Down Expand Up @@ -217,7 +215,7 @@ public int advance(int target) throws IOException {
}

@Override
public RandomAccessVectorValues randomAccess() throws IOException {
public RandomAccessVectorValues copy() throws IOException {
return new SparseOffHeapVectorValues(fieldEntry, dataIn, slice.clone());
}

Expand Down Expand Up @@ -294,7 +292,7 @@ public long cost() {
}

@Override
public RandomAccessVectorValues randomAccess() throws IOException {
public RandomAccessVectorValues copy() throws IOException {
throw new UnsupportedOperationException();
}

Expand Down
Expand Up @@ -26,7 +26,7 @@
import org.apache.lucene.index.BufferingKnnVectorsWriter;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.RandomAccessVectorValuesProducer;
import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.index.VectorValues;
Expand Down Expand Up @@ -224,7 +224,7 @@ private void writeGraphOffsets(IndexOutput out, long[] offsets) throws IOExcepti

private void writeGraph(
IndexOutput graphData,
RandomAccessVectorValuesProducer vectorValues,
RandomAccessVectorValues vectorValues,
VectorSimilarityFunction similarityFunction,
long graphDataOffset,
long[] offsets,
Expand All @@ -239,7 +239,7 @@ private void writeGraph(
beamWidth,
Lucene90HnswGraphBuilder.randSeed);
hnswGraphBuilder.setInfoStream(segmentWriteState.infoStream);
Lucene90OnHeapHnswGraph graph = hnswGraphBuilder.build(vectorValues.randomAccess());
Lucene90OnHeapHnswGraph graph = hnswGraphBuilder.build(vectorValues.copy());

for (int ord = 0; ord < offsets.length; ord++) {
// write graph
Expand Down
Expand Up @@ -24,7 +24,6 @@
import java.util.Objects;
import java.util.SplittableRandom;
import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.RandomAccessVectorValuesProducer;
import org.apache.lucene.index.VectorEncoding;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.util.FixedBitSet;
Expand Down Expand Up @@ -79,14 +78,14 @@ public final class Lucene91HnswGraphBuilder {
* to ensure repeatable construction.
*/
public Lucene91HnswGraphBuilder(
RandomAccessVectorValuesProducer vectors,
RandomAccessVectorValues vectors,
VectorSimilarityFunction similarityFunction,
int maxConn,
int beamWidth,
long seed)
throws IOException {
vectorValues = vectors.randomAccess();
buildVectors = vectors.randomAccess();
vectorValues = vectors.copy();
buildVectors = vectors.copy();
this.similarityFunction = Objects.requireNonNull(similarityFunction);
if (maxConn <= 0) {
throw new IllegalArgumentException("maxConn must be positive");
Expand Down
Expand Up @@ -27,7 +27,7 @@
import org.apache.lucene.index.DocsWithFieldSet;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.RandomAccessVectorValuesProducer;
import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.index.VectorValues;
Expand Down Expand Up @@ -233,7 +233,7 @@ private void writeMeta(
}

private Lucene91OnHeapHnswGraph writeGraph(
RandomAccessVectorValuesProducer vectorValues, VectorSimilarityFunction similarityFunction)
RandomAccessVectorValues vectorValues, VectorSimilarityFunction similarityFunction)
throws IOException {

// build graph
Expand All @@ -245,7 +245,7 @@ private Lucene91OnHeapHnswGraph writeGraph(
beamWidth,
Lucene91HnswGraphBuilder.randSeed);
hnswGraphBuilder.setInfoStream(segmentWriteState.infoStream);
Lucene91OnHeapHnswGraph graph = hnswGraphBuilder.build(vectorValues.randomAccess());
Lucene91OnHeapHnswGraph graph = hnswGraphBuilder.build(vectorValues.copy());

// write vectors' neighbours on each level into the vectorIndex file
int countOnLevel0 = graph.size();
Expand Down
Expand Up @@ -29,7 +29,7 @@
import org.apache.lucene.index.DocsWithFieldSet;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.RandomAccessVectorValuesProducer;
import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.VectorEncoding;
import org.apache.lucene.index.VectorSimilarityFunction;
Expand Down Expand Up @@ -268,7 +268,7 @@ private void writeMeta(
}

private OnHeapHnswGraph writeGraph(
RandomAccessVectorValuesProducer vectorValues,
RandomAccessVectorValues vectorValues,
VectorEncoding vectorEncoding,
VectorSimilarityFunction similarityFunction)
throws IOException {
Expand All @@ -283,7 +283,7 @@ private OnHeapHnswGraph writeGraph(
beamWidth,
HnswGraphBuilder.randSeed);
hnswGraphBuilder.setInfoStream(segmentWriteState.infoStream);
OnHeapHnswGraph graph = hnswGraphBuilder.build(vectorValues.randomAccess());
OnHeapHnswGraph graph = hnswGraphBuilder.build(vectorValues.copy());

// write vectors' neighbours on each level into the vectorIndex file
int countOnLevel0 = graph.size();
Expand Down
Expand Up @@ -29,7 +29,6 @@
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.RandomAccessVectorValuesProducer;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.index.VectorValues;
Expand Down Expand Up @@ -268,7 +267,7 @@ int size() {
}

private static class SimpleTextVectorValues extends VectorValues
implements RandomAccessVectorValues, RandomAccessVectorValuesProducer {
implements RandomAccessVectorValues {

private final BytesRefBuilder scratch = new BytesRefBuilder();
private final FieldEntry entry;
Expand Down Expand Up @@ -310,7 +309,7 @@ public BytesRef binaryValue() {
}

@Override
public RandomAccessVectorValues randomAccess() {
public RandomAccessVectorValues copy() {
return this;
}

Expand Down
Expand Up @@ -422,7 +422,7 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE
beamWidth,
HnswGraphBuilder.randSeed);
hnswGraphBuilder.setInfoStream(segmentWriteState.infoStream);
graph = hnswGraphBuilder.build(offHeapVectors.randomAccess());
graph = hnswGraphBuilder.build(offHeapVectors.copy());
writeGraph(graph);
}
long vectorIndexLength = vectorIndex.getFilePointer() - vectorIndexOffset;
Expand Down Expand Up @@ -617,7 +617,7 @@ public float[] copyValue(float[] value) {
hnswGraphBuilder =
(HnswGraphBuilder<T>)
HnswGraphBuilder.create(
() -> raVectorValues,
raVectorValues,
fieldInfo.getVectorEncoding(),
fieldInfo.getVectorSimilarityFunction(),
M,
Expand Down Expand Up @@ -694,5 +694,10 @@ public float[] vectorValue(int targetOrd) throws IOException {
public BytesRef binaryValue(int targetOrd) throws IOException {
return (BytesRef) vectors.get(targetOrd);
}

@Override
public RandomAccessVectorValues copy() throws IOException {
return this;
}
}
}
Expand Up @@ -21,7 +21,6 @@
import java.nio.ByteBuffer;
import org.apache.lucene.codecs.lucene90.IndexedDISI;
import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.RandomAccessVectorValuesProducer;
import org.apache.lucene.index.VectorValues;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.RandomAccessInput;
Expand All @@ -30,8 +29,7 @@
import org.apache.lucene.util.packed.DirectMonotonicReader;

/** Read the vector values from the index input. This supports both iterated and random access. */
abstract class OffHeapVectorValues extends VectorValues
implements RandomAccessVectorValues, RandomAccessVectorValuesProducer {
abstract class OffHeapVectorValues extends VectorValues implements RandomAccessVectorValues {

protected final int dimension;
protected final int size;
Expand Down Expand Up @@ -150,7 +148,7 @@ public int advance(int target) throws IOException {
}

@Override
public RandomAccessVectorValues randomAccess() throws IOException {
public RandomAccessVectorValues copy() throws IOException {
return new DenseOffHeapVectorValues(dimension, size, slice.clone(), byteSize);
}

Expand Down Expand Up @@ -226,7 +224,7 @@ public int advance(int target) throws IOException {
}

@Override
public RandomAccessVectorValues randomAccess() throws IOException {
public RandomAccessVectorValues copy() throws IOException {
return new SparseOffHeapVectorValues(fieldEntry, dataIn, slice.clone(), byteSize);
}

Expand Down Expand Up @@ -303,7 +301,7 @@ public long cost() {
}

@Override
public RandomAccessVectorValues randomAccess() throws IOException {
public RandomAccessVectorValues copy() throws IOException {
throw new UnsupportedOperationException();
}

Expand Down
Expand Up @@ -208,7 +208,7 @@ public long ramBytesUsed() {
}

private static class BufferedVectorValues extends VectorValues
implements RandomAccessVectorValues, RandomAccessVectorValuesProducer {
implements RandomAccessVectorValues {

final DocsWithFieldSet docsWithField;

Expand Down Expand Up @@ -236,7 +236,7 @@ private static class BufferedVectorValues extends VectorValues
}

@Override
public RandomAccessVectorValues randomAccess() {
public RandomAccessVectorValues copy() {
return new BufferedVectorValues(docsWithField, vectors, dimension);
}

Expand Down
Expand Up @@ -49,4 +49,11 @@ public interface RandomAccessVectorValues {
* @param targetOrd a valid ordinal, &ge; 0 and &lt; {@link #size()}.
*/
BytesRef binaryValue(int targetOrd) throws IOException;

/**
* Creates a new copy of this {@link RandomAccessVectorValues}. This is helpful when you need to
* access different values at once, to avoid overwriting the underlying float vector returned by
* {@link RandomAccessVectorValues#vectorValue}.
*/
RandomAccessVectorValues copy() throws IOException;
}

This file was deleted.