Skip to content

Commit

Permalink
improve ramBytesUsed estimates
Browse files Browse the repository at this point in the history
  • Loading branch information
jbellis committed May 11, 2024
1 parent 624e4e5 commit 005e202
Show file tree
Hide file tree
Showing 9 changed files with 63 additions and 36 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import io.github.jbellis.jvector.util.Bits;
import io.github.jbellis.jvector.util.DocIdSetIterator;
import io.github.jbellis.jvector.util.FixedBitSet;
import io.github.jbellis.jvector.util.RamUsageEstimator;

import java.util.concurrent.atomic.AtomicReference;
import java.util.function.IntFunction;
Expand Down Expand Up @@ -361,6 +362,22 @@ public void insert(int neighborId, float score, float overflow) {
});
}

public static long ramBytesUsed(int nodes) {
int REF_BYTES = RamUsageEstimator.NUM_BYTES_OBJECT_REF;
int OH_BYTES = RamUsageEstimator.NUM_BYTES_OBJECT_HEADER;
int AREF_BYTES = OH_BYTES + 2 * REF_BYTES;

return OH_BYTES
+ REF_BYTES + AREF_BYTES // Neighbors AtomicReference
+ OH_BYTES + REF_BYTES + Integer.BYTES // Neighbors
+ NodeArray.ramBytesUsed(nodes) // NodeArray
+ Float.BYTES // alpha
+ REF_BYTES // BSP
+ Integer.BYTES // maxDegree
+ Integer.BYTES // maxOverflowDegree
+ Float.BYTES; // shortEdges
}

/** Only for testing; this is a linear search */
boolean contains(int i) {
var it = this.iterator();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import io.github.jbellis.jvector.annotations.VisibleForTesting;
import io.github.jbellis.jvector.util.ArrayUtil;
import io.github.jbellis.jvector.util.Bits;
import io.github.jbellis.jvector.util.RamUsageEstimator;
import org.agrona.collections.IntHashSet;

import java.util.Arrays;
Expand Down Expand Up @@ -287,6 +288,18 @@ protected final int descSortFindRightMostInsertionPoint(float newScore) {
return start;
}

public static long ramBytesUsed(int size) {
int REF_BYTES = RamUsageEstimator.NUM_BYTES_OBJECT_REF;
int AH_BYTES = RamUsageEstimator.NUM_BYTES_ARRAY_HEADER;
int OH_BYTES = RamUsageEstimator.NUM_BYTES_OBJECT_HEADER;

return OH_BYTES
+ Integer.BYTES // size field
+ REF_BYTES + AH_BYTES // nodes array
+ REF_BYTES + AH_BYTES // scores array
+ (long) size * (Integer.BYTES + Float.BYTES); // array contents
}

/**
* Caution! This performs a linear scan.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,32 +146,18 @@ public NodesIterator getNodes() {

@Override
public long ramBytesUsed() {
// the main graph structure
long total = (long) size() * RamUsageEstimator.NUM_BYTES_OBJECT_REF;
long neighborSize = neighborsRamUsed(maxOverflowDegree) * size();
return total + neighborSize + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER;
}
int OH_BYTES = RamUsageEstimator.NUM_BYTES_OBJECT_HEADER;
var REF_BYTES = RamUsageEstimator.NUM_BYTES_OBJECT_REF;
var AH_BYTES = RamUsageEstimator.NUM_BYTES_ARRAY_HEADER;

public long ramBytesUsedOneNode() {
var graphBytesUsed =
neighborsRamUsed(maxOverflowDegree);
var clockBytesUsed = Integer.BYTES;
return graphBytesUsed + clockBytesUsed;
long neighborSize = ramBytesUsedOneNode() * size();
return OH_BYTES + REF_BYTES * 2L + AH_BYTES + neighborSize;
}

private static long neighborsRamUsed(int count) {
long REF_BYTES = RamUsageEstimator.NUM_BYTES_OBJECT_REF;
long AH_BYTES = RamUsageEstimator.NUM_BYTES_ARRAY_HEADER;
long neighborSetBytes =
REF_BYTES // atomicreference
+ Integer.BYTES
+ Integer.BYTES
+ REF_BYTES // NeighborArray
+ AH_BYTES * 2 // NeighborArray internals
+ REF_BYTES * 2
+ Integer.BYTES
+ 1;
return neighborSetBytes + (long) count * (Integer.BYTES + Float.BYTES);
public long ramBytesUsedOneNode() {
// we include the REF_BYTES for the CNS reference here to make it self-contained for addGraphNode()
int REF_BYTES = RamUsageEstimator.NUM_BYTES_OBJECT_REF;
return REF_BYTES + ConcurrentNeighborSet.ramBytesUsed(maxOverflowDegree + 1);
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,13 +219,14 @@ public ProductQuantization getCompressor() {

@Override
public long ramBytesUsed() {
long codebooksSize = pq.memorySize();
if (compressedVectors.isEmpty()) {
return codebooksSize;
}

long compressedVectorSize = RamUsageEstimator.sizeOf(compressedVectors.get(0));
return codebooksSize + (compressedVectorSize * compressedVectors.size());
int REF_BYTES = RamUsageEstimator.NUM_BYTES_OBJECT_REF;
int OH_BYTES = RamUsageEstimator.NUM_BYTES_OBJECT_HEADER;
int AH_BYTES = RamUsageEstimator.NUM_BYTES_ARRAY_HEADER;

long codebooksSize = pq.ramBytesUsed();
long listSize = (long) REF_BYTES * (1 + compressedVectors.size());
long dataSize = (long) (OH_BYTES + AH_BYTES + pq.compressedVectorSize()) * compressedVectors.size();
return codebooksSize + listSize + dataSize;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import io.github.jbellis.jvector.disk.RandomAccessReader;
import io.github.jbellis.jvector.graph.RandomAccessVectorValues;
import io.github.jbellis.jvector.graph.disk.OnDiskGraphIndex;
import io.github.jbellis.jvector.util.Accountable;
import io.github.jbellis.jvector.util.PhysicalCoreExecutor;
import io.github.jbellis.jvector.vector.VectorUtil;
import io.github.jbellis.jvector.vector.VectorizationProvider;
Expand Down Expand Up @@ -49,7 +50,7 @@
* Product Quantization for float vectors. Supports arbitrary source and target dimensionality;
* in particular, the source does not need to be evenly divisible by the target.
*/
public class ProductQuantization implements VectorCompressor<ByteSequence<?>> {
public class ProductQuantization implements VectorCompressor<ByteSequence<?>>, Accountable {
private static final int MAGIC = 0x75EC4012; // JVECTOR, with some imagination

private static final VectorTypeSupport vectorTypeSupport = VectorizationProvider.getInstance().getVectorTypeSupport();
Expand Down Expand Up @@ -685,7 +686,8 @@ public int compressedVectorSize() {
return codebooks.length;
}

public long memorySize() {
@Override
public long ramBytesUsed() {
long size = 0;
for (VectorFloat<?> codebook : codebooks) {
size += codebook.ramBytesUsed();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ public ArrayByteSequence copy() {

@Override
public long ramBytesUsed() {
return RamUsageEstimator.sizeOf(data) + RamUsageEstimator.shallowSizeOfInstance(ByteSequence.class);
int OH_BYTES = RamUsageEstimator.NUM_BYTES_OBJECT_HEADER;
return OH_BYTES + RamUsageEstimator.sizeOf(data);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ public void copyFrom(VectorFloat<?> src, int srcOffset, int destOffset, int leng
@Override
public long ramBytesUsed()
{
return RamUsageEstimator.sizeOf(data) + RamUsageEstimator.shallowSizeOfInstance(ArrayVectorFloat.class);
int OH_BYTES = RamUsageEstimator.NUM_BYTES_OBJECT_HEADER;
return OH_BYTES + RamUsageEstimator.sizeOf(data);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

package io.github.jbellis.jvector.vector;

import io.github.jbellis.jvector.util.RamUsageEstimator;
import io.github.jbellis.jvector.vector.types.ByteSequence;

import java.lang.foreign.MemoryLayout;
Expand Down Expand Up @@ -50,7 +51,9 @@ public class MemorySegmentByteSequence implements ByteSequence<MemorySegment> {

@Override
public long ramBytesUsed() {
return MemoryLayout.sequenceLayout(length, ValueLayout.JAVA_BYTE).byteSize();
int OH_BYTES = RamUsageEstimator.NUM_BYTES_OBJECT_HEADER;
int REF_BYTES = RamUsageEstimator.NUM_BYTES_OBJECT_REF;
return OH_BYTES + REF_BYTES + Integer.BYTES + MemoryLayout.sequenceLayout(length, ValueLayout.JAVA_BYTE).byteSize();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

package io.github.jbellis.jvector.vector;

import io.github.jbellis.jvector.util.RamUsageEstimator;
import io.github.jbellis.jvector.vector.types.VectorFloat;

import java.lang.foreign.MemorySegment;
Expand Down Expand Up @@ -44,7 +45,9 @@ final public class MemorySegmentVectorFloat implements VectorFloat<MemorySegment
@Override
public long ramBytesUsed()
{
return segment.byteSize();
int OH_BYTES = RamUsageEstimator.NUM_BYTES_OBJECT_HEADER;
int REF_BYTES = RamUsageEstimator.NUM_BYTES_OBJECT_REF;
return OH_BYTES + REF_BYTES + segment.byteSize();
}

@Override
Expand Down

0 comments on commit 005e202

Please sign in to comment.