Skip to content

Commit

Permalink
expose min and max docID from vectorDocID map
Browse files Browse the repository at this point in the history
  • Loading branch information
moshaad7 committed Apr 2, 2024
1 parent dde3ac9 commit eea92c4
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 4 deletions.
10 changes: 8 additions & 2 deletions faiss_vector_posting.go
Original file line number Diff line number Diff line change
Expand Up @@ -409,8 +409,14 @@ func (sb *SegmentBase) UpdateFieldStats(stats segment.FieldStats) {
continue
}

_, _, numVecs, _, indexBytesLen, _ := getVectorSectionContentOffsets(sb, pos)
_, _, numVecs, _, minDocID, maxDocID, indexBytesLen, _ := getVectorSectionContentOffsets(sb, pos)

stats.Store("num_vectors", fieldName, numVecs)
stats.Store("vectors_index_size", fieldName, indexBytesLen)
stats.Store("vector_index_bytes", fieldName, indexBytesLen)

if minDocID != math.MaxUint64 && maxDocID != 0 {
stats.Store("vector_min_doc_id", fieldName, minDocID)
stats.Store("vector_max_doc_id", fieldName, maxDocID)
}
}
}
2 changes: 1 addition & 1 deletion faiss_vector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ func TestVectorSegment(t *testing.T) {

fieldsSectionsMap := vecSegBase.fieldsSectionsMap
stubVecFieldStartAddr := fieldsSectionsMap[vecSegBase.fieldsMap["stubVec"]-1][SectionFaissVectorIndex]
docValueStart, docValueEnd, numVecs, _, indexBytesLen,
docValueStart, docValueEnd, numVecs, _, _, _, indexBytesLen,
_ := getVectorSectionContentOffsets(vecSegBase, stubVecFieldStartAddr)

if docValueStart != fieldNotUninverted {
Expand Down
13 changes: 12 additions & 1 deletion section_faiss_vector_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -771,6 +771,8 @@ func getVectorSectionContentOffsets(sb *SegmentBase, offset uint64) (
docValueEnd,
numVecs,
vecDocIDsMappingOffset,
minDocID,
maxDocID,
indexBytesLen,
indexBytesOffset uint64,
) {
Expand All @@ -789,10 +791,19 @@ func getVectorSectionContentOffsets(sb *SegmentBase, offset uint64) (
pos += uint64(n)

vecDocIDsMappingOffset = pos
minDocID = uint64(math.MaxUint64)
maxDocID = uint64(0)
var docID uint64
for i := 0; i < int(numVecs); i++ {
_, n := binary.Varint(sb.mem[pos : pos+binary.MaxVarintLen64])
pos += uint64(n)
_, n = binary.Uvarint(sb.mem[pos : pos+binary.MaxVarintLen64])
docID, n = binary.Uvarint(sb.mem[pos : pos+binary.MaxVarintLen64])
if docID < minDocID {
minDocID = docID
}
if docID > maxDocID {
maxDocID = docID
}
pos += uint64(n)
}

Expand Down

0 comments on commit eea92c4

Please sign in to comment.