Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions bindings/python/src/svs/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,10 @@ def read_vecs(filename: str):

* `bvecs`: 8-bit unsigned integers.
* `fvecs`: 32-bit floating point numbers.
* `ivecs`: 32-bit signed integers.
* `ivecs`: 32-bit unsigned integers.

*Note*: The format differs from the IRISA format.
Both vector dimensionality and `ivecs` values are unsigned.

Args:
filename: The file to read.
Expand All @@ -93,24 +96,21 @@ def read_vecs(filename: str):
file_type = filename[-5:]
if file_type == 'bvecs':
dtype = np.uint8
struct_format = 'B'
n_bytes = 1
padding = 4
elif file_type == 'fvecs':
dtype = np.float32
struct_format = 'f'
n_bytes = 4
padding = 1
elif file_type == 'ivecs':
dtype = np.uint32
struct_format = 'i'
n_bytes = 4
padding = 1
else:
raise ValueError('Can only open bvecs, fvecs, and ivecs.')

with open(filename, 'rb') as fin:
vec_size = struct.unpack('i', fin.read(4))[0]
vec_size = struct.unpack('I', fin.read(4))[0]

X = np.fromfile(filename, dtype=dtype)
X = X.reshape((-1, vec_size + padding))
Expand Down
3 changes: 2 additions & 1 deletion examples/cpp/shared/example_vamana_with_compression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "svs/orchestrators/dynamic_vamana.h"
#include "svs/orchestrators/exhaustive.h"
#include "svs/orchestrators/vamana.h"
#include <cstdint>

int main() {
// STEP 1: Compress Data with LeanVec, reducing dimensionality to leanvec_dim dimensions
Expand Down Expand Up @@ -69,7 +70,7 @@ int main() {
//! [Perform Queries]

//! [Recall]
auto groundtruth = svs::load_data<int>(
auto groundtruth = svs::load_data<uint32_t>(
std::filesystem::path(SVS_DATA_DIR) / "groundtruth_euclidean.ivecs"
);
double recall = svs::k_recall_at_n(groundtruth, results, n_neighbors, n_neighbors);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "svs/orchestrators/dynamic_vamana.h"
#include "svs/orchestrators/exhaustive.h"
#include "svs/orchestrators/vamana.h"
#include <cstdint>

// Alias for blocked Lean dataset that supports resize/compact
using BlockedLean = svs::leanvec::LeanDataset<
Expand Down Expand Up @@ -113,7 +114,7 @@ int main() {
//! [Perform Queries]

//! [Recall]
auto groundtruth = svs::load_data<int>(
auto groundtruth = svs::load_data<uint32_t>(
std::filesystem::path(SVS_DATA_DIR) / "groundtruth_euclidean.ivecs"
);
double recall = svs::k_recall_at_n(groundtruth, results, n_neighbors, n_neighbors);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "svs/orchestrators/dynamic_vamana.h"
#include "svs/orchestrators/exhaustive.h"
#include "svs/orchestrators/vamana.h"
#include <cstdint>

int main() {
// STEP 1: Compress Data with LVQ
Expand Down Expand Up @@ -57,7 +58,7 @@ int main() {
//! [Perform Queries]

//! [Recall]
auto groundtruth = svs::load_data<int>(
auto groundtruth = svs::load_data<uint32_t>(
std::filesystem::path(SVS_DATA_DIR) / "groundtruth_euclidean.ivecs"
);
double recall = svs::k_recall_at_n(groundtruth, results, n_neighbors, n_neighbors);
Expand Down
3 changes: 2 additions & 1 deletion examples/cpp/shared/shared.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "svs/orchestrators/dynamic_vamana.h"
#include "svs/orchestrators/exhaustive.h"
#include "svs/orchestrators/vamana.h"
#include <cstdint>

#include "utils.h"

Expand Down Expand Up @@ -96,7 +97,7 @@ void vamana_search(Data& data, Distance distance) {

index.set_search_window_size(search_window_size);
const auto query_data = svs::load_data<float>(qfname);
const auto groundtruth = svs::load_data<int>(gtfname);
const auto groundtruth = svs::load_data<uint32_t>(gtfname);

auto tic = svs::lib::now();
auto query_result = index.search(query_data, n_neighbors);
Expand Down
25 changes: 13 additions & 12 deletions examples/cpp/shared/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,15 @@
* I/O functions for fvecs, ivecs and xVecs
*****************************************************/

#include <cstdint>
#include <random>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>

int fvec_fwrite(FILE* fo, const float* v, int d) {
int fvec_fwrite(FILE* fo, const float* v, uint32_t d) {
int ret;
ret = fwrite(&d, sizeof(int), 1, fo);
ret = fwrite(&d, sizeof(uint32_t), 1, fo);
if (ret != 1) {
perror("fvec_fwrite: write error 1");
return -1;
Expand All @@ -38,7 +39,7 @@ int fvec_fwrite(FILE* fo, const float* v, int d) {
return 0;
}

int fvecs_write(const char* fname, int d, int n, const float* vf) {
int fvecs_write(const char* fname, uint32_t d, int n, const float* vf) {
FILE* fo = fopen(fname, "w");
if (!fo) {
perror("fvecs_write: cannot open file");
Expand All @@ -55,22 +56,22 @@ int fvecs_write(const char* fname, int d, int n, const float* vf) {
return n;
}

int ivec_iwrite(FILE* fo, const int* v, int d) {
int ivec_iwrite(FILE* fo, const uint32_t* v, uint32_t d) {
int ret;
ret = fwrite(&d, sizeof(int), 1, fo);
ret = fwrite(&d, sizeof(uint32_t), 1, fo);
if (ret != 1) {
perror("fvec_fwrite: write error 1");
perror("ivec_iwrite: write error 1");
return -1;
}
ret = fwrite(v, sizeof(float), d, fo);
ret = fwrite(v, sizeof(uint32_t), d, fo);
if (ret != d) {
perror("fvec_fwrite: write error 2");
perror("ivec_iwrite: write error 2");
return -1;
}
return 0;
}

int ivecs_write(const char* fname, int d, int n, const int* vf) {
int ivecs_write(const char* fname, uint32_t d, int n, const uint32_t* vf) {
FILE* fo = fopen(fname, "w");
if (!fo) {
perror("fvecs_write: cannot open file");
Expand All @@ -93,7 +94,7 @@ void generate_random_data(size_t data_dim, size_t dataset_size, size_t query_siz
std::default_random_engine generator;
std::normal_distribution<float> dataset_dist(0.0f, dataset_std);
std::normal_distribution<float> query_dist(0.0f, query_std);
std::uniform_int_distribution<> uni_dist(0, dataset_size - 1);
std::uniform_int_distribution<uint32_t> uni_dist(0, dataset_size - 1);

generator.seed(100);
std::vector<float> dataset(dataset_size * data_dim);
Expand All @@ -102,9 +103,9 @@ void generate_random_data(size_t data_dim, size_t dataset_size, size_t query_siz
}

std::vector<float> queries(query_size * data_dim);
std::vector<int> gt(query_size);
std::vector<uint32_t> gt(query_size);
for (size_t i = 0; i < query_size; ++i) {
int e = uni_dist(generator);
uint32_t e = uni_dist(generator);
for (size_t j = 0; j < data_dim; ++j) {
queries[i * data_dim + j] = dataset[e * data_dim + j] + query_dist(generator);
}
Expand Down
Loading