diff --git a/bindings/python/src/svs/common.py b/bindings/python/src/svs/common.py index 48802dd04..69c4f5d40 100644 --- a/bindings/python/src/svs/common.py +++ b/bindings/python/src/svs/common.py @@ -81,7 +81,10 @@ def read_vecs(filename: str): * `bvecs`: 8-bit unsigned integers. * `fvecs`: 32-bit floating point numbers. - * `ivecs`: 32-bit signed integers. + * `ivecs`: 32-bit unsigned integers. + + *Note*: The format differs from the IRISA format. + Both vector dimensionality and `ivecs` values are unsigned. Args: filename: The file to read. @@ -93,24 +96,21 @@ def read_vecs(filename: str): file_type = filename[-5:] if file_type == 'bvecs': dtype = np.uint8 - struct_format = 'B' n_bytes = 1 padding = 4 elif file_type == 'fvecs': dtype = np.float32 - struct_format = 'f' n_bytes = 4 padding = 1 elif file_type == 'ivecs': dtype = np.uint32 - struct_format = 'i' n_bytes = 4 padding = 1 else: raise ValueError('Can only open bvecs, fvecs, and ivecs.') with open(filename, 'rb') as fin: - vec_size = struct.unpack('i', fin.read(4))[0] + vec_size = struct.unpack('I', fin.read(4))[0] X = np.fromfile(filename, dtype=dtype) X = X.reshape((-1, vec_size + padding)) diff --git a/examples/cpp/shared/example_vamana_with_compression.cpp b/examples/cpp/shared/example_vamana_with_compression.cpp index 362abee8a..ebcfc32f7 100644 --- a/examples/cpp/shared/example_vamana_with_compression.cpp +++ b/examples/cpp/shared/example_vamana_with_compression.cpp @@ -23,6 +23,7 @@ #include "svs/orchestrators/dynamic_vamana.h" #include "svs/orchestrators/exhaustive.h" #include "svs/orchestrators/vamana.h" +#include int main() { // STEP 1: Compress Data with LeanVec, reducing dimensionality to leanvec_dim dimensions @@ -69,7 +70,7 @@ int main() { //! [Perform Queries] //! [Recall] - auto groundtruth = svs::load_data( + auto groundtruth = svs::load_data( std::filesystem::path(SVS_DATA_DIR) / "groundtruth_euclidean.ivecs" ); double recall = svs::k_recall_at_n(groundtruth, results, n_neighbors, n_neighbors); diff --git a/examples/cpp/shared/example_vamana_with_compression_dynamic.cpp b/examples/cpp/shared/example_vamana_with_compression_dynamic.cpp index ce71ccd48..2b134c151 100644 --- a/examples/cpp/shared/example_vamana_with_compression_dynamic.cpp +++ b/examples/cpp/shared/example_vamana_with_compression_dynamic.cpp @@ -23,6 +23,7 @@ #include "svs/orchestrators/dynamic_vamana.h" #include "svs/orchestrators/exhaustive.h" #include "svs/orchestrators/vamana.h" +#include // Alias for blocked Lean dataset that supports resize/compact using BlockedLean = svs::leanvec::LeanDataset< @@ -113,7 +114,7 @@ int main() { //! [Perform Queries] //! [Recall] - auto groundtruth = svs::load_data( + auto groundtruth = svs::load_data( std::filesystem::path(SVS_DATA_DIR) / "groundtruth_euclidean.ivecs" ); double recall = svs::k_recall_at_n(groundtruth, results, n_neighbors, n_neighbors); diff --git a/examples/cpp/shared/example_vamana_with_compression_lvq.cpp b/examples/cpp/shared/example_vamana_with_compression_lvq.cpp index 7a253d955..8d377d919 100644 --- a/examples/cpp/shared/example_vamana_with_compression_lvq.cpp +++ b/examples/cpp/shared/example_vamana_with_compression_lvq.cpp @@ -23,6 +23,7 @@ #include "svs/orchestrators/dynamic_vamana.h" #include "svs/orchestrators/exhaustive.h" #include "svs/orchestrators/vamana.h" +#include int main() { // STEP 1: Compress Data with LVQ @@ -57,7 +58,7 @@ int main() { //! [Perform Queries] //! [Recall] - auto groundtruth = svs::load_data( + auto groundtruth = svs::load_data( std::filesystem::path(SVS_DATA_DIR) / "groundtruth_euclidean.ivecs" ); double recall = svs::k_recall_at_n(groundtruth, results, n_neighbors, n_neighbors); diff --git a/examples/cpp/shared/shared.cpp b/examples/cpp/shared/shared.cpp index 67d611835..41df8e8f2 100644 --- a/examples/cpp/shared/shared.cpp +++ b/examples/cpp/shared/shared.cpp @@ -23,6 +23,7 @@ #include "svs/orchestrators/dynamic_vamana.h" #include "svs/orchestrators/exhaustive.h" #include "svs/orchestrators/vamana.h" +#include #include "utils.h" @@ -96,7 +97,7 @@ void vamana_search(Data& data, Distance distance) { index.set_search_window_size(search_window_size); const auto query_data = svs::load_data(qfname); - const auto groundtruth = svs::load_data(gtfname); + const auto groundtruth = svs::load_data(gtfname); auto tic = svs::lib::now(); auto query_result = index.search(query_data, n_neighbors); diff --git a/examples/cpp/shared/utils.h b/examples/cpp/shared/utils.h index 40dbbd7f3..f5f9f6619 100644 --- a/examples/cpp/shared/utils.h +++ b/examples/cpp/shared/utils.h @@ -18,14 +18,15 @@ * I/O functions for fvecs, ivecs and xVecs *****************************************************/ +#include #include #include #include #include -int fvec_fwrite(FILE* fo, const float* v, int d) { +int fvec_fwrite(FILE* fo, const float* v, uint32_t d) { int ret; - ret = fwrite(&d, sizeof(int), 1, fo); + ret = fwrite(&d, sizeof(uint32_t), 1, fo); if (ret != 1) { perror("fvec_fwrite: write error 1"); return -1; @@ -38,7 +39,7 @@ int fvec_fwrite(FILE* fo, const float* v, int d) { return 0; } -int fvecs_write(const char* fname, int d, int n, const float* vf) { +int fvecs_write(const char* fname, uint32_t d, int n, const float* vf) { FILE* fo = fopen(fname, "w"); if (!fo) { perror("fvecs_write: cannot open file"); @@ -55,22 +56,22 @@ int fvecs_write(const char* fname, int d, int n, const float* vf) { return n; } -int ivec_iwrite(FILE* fo, const int* v, int d) { +int ivec_iwrite(FILE* fo, const uint32_t* v, uint32_t d) { int ret; - ret = fwrite(&d, sizeof(int), 1, fo); + ret = fwrite(&d, sizeof(uint32_t), 1, fo); if (ret != 1) { - perror("fvec_fwrite: write error 1"); + perror("ivec_iwrite: write error 1"); return -1; } - ret = fwrite(v, sizeof(float), d, fo); + ret = fwrite(v, sizeof(uint32_t), d, fo); if (ret != d) { - perror("fvec_fwrite: write error 2"); + perror("ivec_iwrite: write error 2"); return -1; } return 0; } -int ivecs_write(const char* fname, int d, int n, const int* vf) { +int ivecs_write(const char* fname, uint32_t d, int n, const uint32_t* vf) { FILE* fo = fopen(fname, "w"); if (!fo) { perror("fvecs_write: cannot open file"); @@ -93,7 +94,7 @@ void generate_random_data(size_t data_dim, size_t dataset_size, size_t query_siz std::default_random_engine generator; std::normal_distribution dataset_dist(0.0f, dataset_std); std::normal_distribution query_dist(0.0f, query_std); - std::uniform_int_distribution<> uni_dist(0, dataset_size - 1); + std::uniform_int_distribution uni_dist(0, dataset_size - 1); generator.seed(100); std::vector dataset(dataset_size * data_dim); @@ -102,9 +103,9 @@ void generate_random_data(size_t data_dim, size_t dataset_size, size_t query_siz } std::vector queries(query_size * data_dim); - std::vector gt(query_size); + std::vector gt(query_size); for (size_t i = 0; i < query_size; ++i) { - int e = uni_dist(generator); + uint32_t e = uni_dist(generator); for (size_t j = 0; j < data_dim; ++j) { queries[i * data_dim + j] = dataset[e * data_dim + j] + query_dist(generator); }