From b72fc00ad9a25cfc5551119096f8fc5226cbd1d0 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Fri, 21 Feb 2025 14:57:23 -0800 Subject: [PATCH 01/33] feature: logger in vamana index and unit test --- include/svs/index/vamana/dynamic_index.h | 42 +++++++++++++++++++---- tests/svs/index/vamana/index.cpp | 43 +++++++++++++++++++++++- 2 files changed, 78 insertions(+), 7 deletions(-) diff --git a/include/svs/index/vamana/dynamic_index.h b/include/svs/index/vamana/dynamic_index.h index 82a61099..854c7384 100644 --- a/include/svs/index/vamana/dynamic_index.h +++ b/include/svs/index/vamana/dynamic_index.h @@ -157,6 +157,10 @@ class MutableVamanaIndex { float alpha_ = 1.2; bool use_full_search_history_ = true; + // Log callback + void* log_ctx_; + std::function log_callback_; + // Methods public: // This is because some dataset may not yet support single-searching, which is required @@ -174,7 +178,10 @@ class MutableVamanaIndex { Idx entry_point, Dist distance_function, const ExternalIds& external_ids, - ThreadPoolProto threadpool_proto + ThreadPoolProto threadpool_proto, + //Optional logger parameter + void* log_ctx = nullptr, + std::function log_callback = [](void*, const char*, const char*){} ) : graph_{std::move(graph)} , data_{std::move(data)} @@ -185,8 +192,18 @@ class MutableVamanaIndex { , distance_{std::move(distance_function)} , threadpool_{threads::as_threadpool(std::move(threadpool_proto))} , search_parameters_{vamana::construct_default_search_parameters(data_)} - , construction_window_size_{2 * graph.max_degree()} { + , construction_window_size_{2 * graph.max_degree()} + // Ctor accept logger in parameter + , log_ctx_{log_ctx} + , log_callback_{std::move(log_callback)} { + translator_.insert(external_ids, threads::UnitRange(0, external_ids.size())); + log_callback_(log_ctx_, "DEBUG", "Created MutableVamanaIndex"); + } + + // Helper method for logging + void log(const char* level, const char* message) const { + log_callback_(log_ctx_, level, message); } /// @@ -198,7 +215,9 @@ class MutableVamanaIndex { Data data, const ExternalIds& external_ids, Dist distance_function, - ThreadPoolProto threadpool_proto + ThreadPoolProto threadpool_proto, + void* log_ctx = nullptr, + std::function log_callback = [](void*, const char*, const char*){} ) : graph_(Graph{data.size(), parameters.graph_max_degree}) , data_(std::move(data)) @@ -213,7 +232,10 @@ class MutableVamanaIndex { , max_candidates_(parameters.max_candidate_pool_size) , prune_to_(parameters.prune_to) , alpha_(parameters.alpha) - , use_full_search_history_{parameters.use_full_search_history} { + , use_full_search_history_{parameters.use_full_search_history} + , log_ctx_{log_ctx} + , log_callback_{std::move(log_callback)} { + log_callback_(log_ctx_, "DEBUG", "Created MutableVamanaIndex with build parameters"); // Setup the initial translation of external to internal ids. translator_.insert(external_ids, threads::UnitRange(0, external_ids.size())); @@ -247,7 +269,9 @@ class MutableVamanaIndex { graph_type graph, const Dist& distance_function, IDTranslator translator, - Pool threadpool + Pool threadpool, + void* log_ctx = nullptr, + std::function log_callback = [](void*, const char*, const char*){} ) : graph_{std::move(graph)} , data_{std::move(data)} @@ -262,7 +286,11 @@ class MutableVamanaIndex { , max_candidates_{config.build_parameters.max_candidate_pool_size} , prune_to_{config.build_parameters.prune_to} , alpha_{config.build_parameters.alpha} - , use_full_search_history_{config.build_parameters.use_full_search_history} {} + , use_full_search_history_{config.build_parameters.use_full_search_history} + , log_ctx_{log_ctx} + , log_callback_{std::move(log_callback)} { + log_callback_(log_ctx_, "DEBUG", "Created MutableVamanaIndex from reload"); + } ///// Scratchspace scratchspace_type scratchspace(const search_parameters_type& sp) const { @@ -596,6 +624,7 @@ class MutableVamanaIndex { std::vector add_points( const Points& points, const ExternalIds& external_ids, bool reuse_empty = false ) { + log("DEBUG", "Adding new points to index"); const size_t num_points = points.size(); const size_t num_ids = external_ids.size(); if (num_points != num_ids) { @@ -703,6 +732,7 @@ class MutableVamanaIndex { /// graph. /// template void delete_entries(const T& ids) { + log("DEBUG", "Adding new points to index"); translator_.check_external_exist(ids.begin(), ids.end()); for (auto i : ids) { delete_entry(translator_.get_internal(i)); diff --git a/tests/svs/index/vamana/index.cpp b/tests/svs/index/vamana/index.cpp index 3e1b3fba..86ef009a 100644 --- a/tests/svs/index/vamana/index.cpp +++ b/tests/svs/index/vamana/index.cpp @@ -16,6 +16,7 @@ // Header under test #include "svs/index/vamana/index.h" +#include "svs/index/vamana/dynamic_index.h" // catch2 #include "catch2/catch_test_macros.hpp" @@ -104,7 +105,47 @@ CATCH_TEST_CASE("Vamana Index Parameters", "[index][vamana]") { CATCH_SECTION("Current version") { auto p = VamanaIndexParameters{ - 128, {12.4f, 478, 13, 4, 10, false}, {{10, 20}, true, 1, 1}}; + 128, {12.4f, 478, 13, 4, 10, false}, {{10, 20}, true, 1, 1} + }; CATCH_REQUIRE(svs::lib::test_self_save_load_context_free(p)); } } + +CATCH_TEST_CASE("Vamana Index Logging", "[index][logging]") { + // Create a small test dataset + std::vector data = {1.0f, 2.0f, 3.0f, 4.0f}; + std::vector external_ids = {0, 1}; + const size_t dim = 2; + + // Create the graph and data structures + auto graph = svs::graphs::SimpleGraph(2, 64); // 2 nodes, max degree 64 + auto data_view = svs::data::SimpleDataView(data.data(), 2, dim); + + // Use node 0 as entry point + size_t entry_point = 0; + + // Create logging capture + std::vector log_messages; + void* log_ctx = nullptr; + auto log_callback = [&log_messages]([[maybe_unused]] void* ctx, const char* level, const char* msg) { + log_messages.push_back(std::string(level) + ": " + msg); + }; + + // Create threadpool + auto threadpool = svs::threads::DefaultThreadPool(1); + + auto index = svs::index::vamana::MutableVamanaIndex( + std::move(graph), // Graph + std::move(data_view), // Data + entry_point, // Entry point + svs::distance::DistanceL2(), // Distance function + external_ids, // External IDs + std::move(threadpool), // Move the threadpool + log_ctx, // Logger context + log_callback // Logger callback + ); + + // Verify the index was created and logging works + CATCH_REQUIRE(index.size() == 2); + CATCH_REQUIRE(!log_messages.empty()); +} \ No newline at end of file From ff105b503ab687d916140fd59b91e8440e9d0059 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Mon, 24 Feb 2025 22:24:54 -0800 Subject: [PATCH 02/33] feature: customized global logger and per index logger supoort --- include/svs/core/logging.h | 26 ++++++++++ include/svs/index/vamana/dynamic_index.h | 36 +++++--------- tests/CMakeLists.txt | 2 +- tests/svs/index/vamana/dynamic_index.cpp | 56 ++++++++++++++++++++++ tests/svs/index/vamana/dynamic_index_2.cpp | 2 +- tests/svs/index/vamana/index.cpp | 40 ---------------- 6 files changed, 95 insertions(+), 67 deletions(-) diff --git a/include/svs/core/logging.h b/include/svs/core/logging.h index 24645a5f..dc9257d1 100644 --- a/include/svs/core/logging.h +++ b/include/svs/core/logging.h @@ -68,6 +68,12 @@ using logger_ptr = std::shared_ptr<::spdlog::logger>; /// @brief The type for sinks registered with loggers. using sink_ptr = std::shared_ptr<::spdlog::sinks::sink>; +/// @brief Define the callback function type +using log_callback_function = void(*)(void* ctx, const char* level, const char* message); + +/// @brief Global static log callback function +inline static log_callback_function global_log_callback = nullptr; + /// @brief A sink going nowhere. Used to disable logging entirely. inline sink_ptr null_sink() { return std::make_shared<::spdlog::sinks::null_sink_mt>(); } @@ -302,6 +308,11 @@ inline void set_level(Level level) { set_level(logger, level); } +/// @brief Function to set the global log callback +inline void set_global_log_callback(log_callback_function callback) { + global_log_callback = callback; +} + /// @brief Return whether a message should be created for the logger at the given level. inline bool should_log(const logger_ptr& logger, logging::Level level) { return logger->should_log(detail::to_spdlog(level)); @@ -328,6 +339,21 @@ void log(Level level, fmt::format_string fmt, Args&&... args) { logging::log(global_logger, level, fmt, SVS_FWD(args)...); } +/// @brief Log using the per-instance callback +inline void log(void* ctx, const char* level, const char* msg) { + if (ctx) { + // Per index context + auto logMessages = static_cast*>(ctx); + logMessages->emplace_back(std::string(level) + ": " + msg); + } else if (global_log_callback) { + // Global callback + global_log_callback(nullptr, level, msg); + } else { + // Fallback to standard logging + logging::log(logging::Level::Info, "[{}] {}", level, msg); + } +} + // Convenience methods #define SVS_DEFINE_LOG_FUNCTION(name) \ template \ diff --git a/include/svs/index/vamana/dynamic_index.h b/include/svs/index/vamana/dynamic_index.h index 854c7384..f3f079bc 100644 --- a/include/svs/index/vamana/dynamic_index.h +++ b/include/svs/index/vamana/dynamic_index.h @@ -158,8 +158,7 @@ class MutableVamanaIndex { bool use_full_search_history_ = true; // Log callback - void* log_ctx_; - std::function log_callback_; + void* log_callback_ctx_; // Methods public: @@ -180,8 +179,7 @@ class MutableVamanaIndex { const ExternalIds& external_ids, ThreadPoolProto threadpool_proto, //Optional logger parameter - void* log_ctx = nullptr, - std::function log_callback = [](void*, const char*, const char*){} + void* log_callback_ctx = nullptr ) : graph_{std::move(graph)} , data_{std::move(data)} @@ -194,16 +192,10 @@ class MutableVamanaIndex { , search_parameters_{vamana::construct_default_search_parameters(data_)} , construction_window_size_{2 * graph.max_degree()} // Ctor accept logger in parameter - , log_ctx_{log_ctx} - , log_callback_{std::move(log_callback)} { + , log_callback_ctx_{log_callback_ctx} { translator_.insert(external_ids, threads::UnitRange(0, external_ids.size())); - log_callback_(log_ctx_, "DEBUG", "Created MutableVamanaIndex"); - } - - // Helper method for logging - void log(const char* level, const char* message) const { - log_callback_(log_ctx_, level, message); + svs::logging::log(log_callback_ctx_, "DEBUG", "Created MutableVamanaIndex"); } /// @@ -216,8 +208,7 @@ class MutableVamanaIndex { const ExternalIds& external_ids, Dist distance_function, ThreadPoolProto threadpool_proto, - void* log_ctx = nullptr, - std::function log_callback = [](void*, const char*, const char*){} + void* log_callback_ctx = nullptr ) : graph_(Graph{data.size(), parameters.graph_max_degree}) , data_(std::move(data)) @@ -233,9 +224,8 @@ class MutableVamanaIndex { , prune_to_(parameters.prune_to) , alpha_(parameters.alpha) , use_full_search_history_{parameters.use_full_search_history} - , log_ctx_{log_ctx} - , log_callback_{std::move(log_callback)} { - log_callback_(log_ctx_, "DEBUG", "Created MutableVamanaIndex with build parameters"); + , log_callback_ctx_{log_callback_ctx} { + svs::logging::log(log_callback_ctx_, "DEBUG", "Created MutableVamanaIndex with build parameters"); // Setup the initial translation of external to internal ids. translator_.insert(external_ids, threads::UnitRange(0, external_ids.size())); @@ -270,8 +260,7 @@ class MutableVamanaIndex { const Dist& distance_function, IDTranslator translator, Pool threadpool, - void* log_ctx = nullptr, - std::function log_callback = [](void*, const char*, const char*){} + void* log_callback_ctx = nullptr ) : graph_{std::move(graph)} , data_{std::move(data)} @@ -287,10 +276,9 @@ class MutableVamanaIndex { , prune_to_{config.build_parameters.prune_to} , alpha_{config.build_parameters.alpha} , use_full_search_history_{config.build_parameters.use_full_search_history} - , log_ctx_{log_ctx} - , log_callback_{std::move(log_callback)} { - log_callback_(log_ctx_, "DEBUG", "Created MutableVamanaIndex from reload"); - } + , log_callback_ctx_{log_callback_ctx} { + svs::logging::log(log_callback_ctx_, "DEBUG", "Created MutableVamanaIndex from reload"); + } ///// Scratchspace scratchspace_type scratchspace(const search_parameters_type& sp) const { @@ -624,7 +612,6 @@ class MutableVamanaIndex { std::vector add_points( const Points& points, const ExternalIds& external_ids, bool reuse_empty = false ) { - log("DEBUG", "Adding new points to index"); const size_t num_points = points.size(); const size_t num_ids = external_ids.size(); if (num_points != num_ids) { @@ -732,7 +719,6 @@ class MutableVamanaIndex { /// graph. /// template void delete_entries(const T& ids) { - log("DEBUG", "Adding new points to index"); translator_.check_external_exist(ids.begin(), ids.end()); for (auto i : ids) { delete_entry(translator_.get_internal(i)); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index bbd8557f..178721c8 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -139,7 +139,7 @@ set(TEST_SOURCES # Inverted ${TEST_DIR}/svs/index/inverted/clustering.cpp - # # ${TEST_DIR}/svs/index/vamana/dynamic_index.cpp + ${TEST_DIR}/svs/index/vamana/dynamic_index.cpp ) ##### diff --git a/tests/svs/index/vamana/dynamic_index.cpp b/tests/svs/index/vamana/dynamic_index.cpp index 17725887..2c2b15eb 100644 --- a/tests/svs/index/vamana/dynamic_index.cpp +++ b/tests/svs/index/vamana/dynamic_index.cpp @@ -246,3 +246,59 @@ CATCH_TEST_CASE("MutableVamanaIndex", "[graph_index]") { << post_add_time << " seconds." << std::endl; } } + +CATCH_TEST_CASE("Per-Index and Global Logging Test", "[logging]") { + // Capture logs + std::vector instanceLogMessages; + std::vector globalLogMessages; + + // Global log callback + svs::logging::set_global_log_callback([](void*, const char* level, const char* message) { + globalLogMessages.emplace_back(std::string(level) + ": " + message); + }); + + // Per-instance log callback + auto testLogCallback = [](void* ctx, const char* level, const char* message) { + if (ctx) { + auto logMessages = static_cast*>(ctx); + logMessages->emplace_back(std::string(level) + ": " + message); + } + }; + + // Create and configure index + std::vector data = {1.0f, 2.0f}; + std::vector external_ids = {0}; + const size_t dim = 1; + + auto graph = svs::graphs::SimpleGraph(1, 64); + auto data_view = svs::data::SimpleDataView(data.data(), 1, dim); + svs::distance::DistanceL2 distance_function; + Idx entry_point = 0; + auto threadpool = svs::threads::DefaultThreadPool(1); + + svs::index::vamana::MutableVamanaIndex index( + std::move(graph), + std::move(data_view), + entry_point, + distance_function, + external_ids, + std::move(threadpool), + &instanceLogMessages // Per-instance log context + ); + + // Trigger logging + svs::logging::log(index.get_log_callback_ctx(), "NOTICE", "test log message no fmt"); + svs::logging::log(index.get_log_callback_ctx(), "WARNING", "test log message %s %s", "with", "args"); + + // Trigger global logging + svs::logging::log(nullptr, "WARN", "Global log message"); + + // Validate per-instance logs + CATCH_REQUIRE(instanceLogMessages.size() == 2); + CATCH_REQUIRE(instanceLogMessages[0] == "NOTICE: test log message no fmt"); + CATCH_REQUIRE(instanceLogMessages[1] == "WARNING: test log message with args"); + + // Validate global logs + CATCH_REQUIRE(!globalLogMessages.empty()); + CATCH_REQUIRE(globalLogMessages[0] == "WARN: Global log message"); +} \ No newline at end of file diff --git a/tests/svs/index/vamana/dynamic_index_2.cpp b/tests/svs/index/vamana/dynamic_index_2.cpp index ea0e797c..ca3bbd2c 100644 --- a/tests/svs/index/vamana/dynamic_index_2.cpp +++ b/tests/svs/index/vamana/dynamic_index_2.cpp @@ -415,4 +415,4 @@ CATCH_TEST_CASE("Testing Graph Index", "[graph_index][dynamic_index]") { CATCH_REQUIRE(index.size() == reloaded.size()); // ID's preserved across runs. index.on_ids([&](size_t e) { CATCH_REQUIRE(reloaded.has_id(e)); }); -} +} \ No newline at end of file diff --git a/tests/svs/index/vamana/index.cpp b/tests/svs/index/vamana/index.cpp index 86ef009a..c72ea0f8 100644 --- a/tests/svs/index/vamana/index.cpp +++ b/tests/svs/index/vamana/index.cpp @@ -16,7 +16,6 @@ // Header under test #include "svs/index/vamana/index.h" -#include "svs/index/vamana/dynamic_index.h" // catch2 #include "catch2/catch_test_macros.hpp" @@ -109,43 +108,4 @@ CATCH_TEST_CASE("Vamana Index Parameters", "[index][vamana]") { }; CATCH_REQUIRE(svs::lib::test_self_save_load_context_free(p)); } -} - -CATCH_TEST_CASE("Vamana Index Logging", "[index][logging]") { - // Create a small test dataset - std::vector data = {1.0f, 2.0f, 3.0f, 4.0f}; - std::vector external_ids = {0, 1}; - const size_t dim = 2; - - // Create the graph and data structures - auto graph = svs::graphs::SimpleGraph(2, 64); // 2 nodes, max degree 64 - auto data_view = svs::data::SimpleDataView(data.data(), 2, dim); - - // Use node 0 as entry point - size_t entry_point = 0; - - // Create logging capture - std::vector log_messages; - void* log_ctx = nullptr; - auto log_callback = [&log_messages]([[maybe_unused]] void* ctx, const char* level, const char* msg) { - log_messages.push_back(std::string(level) + ": " + msg); - }; - - // Create threadpool - auto threadpool = svs::threads::DefaultThreadPool(1); - - auto index = svs::index::vamana::MutableVamanaIndex( - std::move(graph), // Graph - std::move(data_view), // Data - entry_point, // Entry point - svs::distance::DistanceL2(), // Distance function - external_ids, // External IDs - std::move(threadpool), // Move the threadpool - log_ctx, // Logger context - log_callback // Logger callback - ); - - // Verify the index was created and logging works - CATCH_REQUIRE(index.size() == 2); - CATCH_REQUIRE(!log_messages.empty()); } \ No newline at end of file From 4dae0871f822cc7834fcaa8a60e0dd67d4f51c28 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Tue, 25 Feb 2025 21:37:31 -0800 Subject: [PATCH 03/33] fix: test work for static index --- include/svs/core/logging.h | 29 ++++++----- include/svs/index/vamana/dynamic_index.h | 4 +- include/svs/index/vamana/index.h | 55 +++++++++++++++------ tests/svs/index/vamana/dynamic_index.cpp | 6 +-- tests/svs/index/vamana/index.cpp | 61 ++++++++++++++++++++++++ 5 files changed, 122 insertions(+), 33 deletions(-) diff --git a/include/svs/core/logging.h b/include/svs/core/logging.h index dc9257d1..7ec1fafe 100644 --- a/include/svs/core/logging.h +++ b/include/svs/core/logging.h @@ -60,7 +60,8 @@ inline constexpr std::array all_levels = { Level::Warn, Level::Error, Level::Critical, - Level::Off}; + Level::Off +}; /// @brief The type of the global logger. using logger_ptr = std::shared_ptr<::spdlog::logger>; @@ -69,10 +70,10 @@ using logger_ptr = std::shared_ptr<::spdlog::logger>; using sink_ptr = std::shared_ptr<::spdlog::sinks::sink>; /// @brief Define the callback function type -using log_callback_function = void(*)(void* ctx, const char* level, const char* message); +using log_callback_function = void (*)(void* ctx, const char* level, const char* message); /// @brief Global static log callback function -inline static log_callback_function global_log_callback = nullptr; +inline log_callback_function global_log_callback = nullptr; /// @brief A sink going nowhere. Used to disable logging entirely. inline sink_ptr null_sink() { return std::make_shared<::spdlog::sinks::null_sink_mt>(); } @@ -311,6 +312,8 @@ inline void set_level(Level level) { /// @brief Function to set the global log callback inline void set_global_log_callback(log_callback_function callback) { global_log_callback = callback; + std::cout << "[DEBUG] set_global_log_callback(" << reinterpret_cast(callback) + << ")\n"; } /// @brief Return whether a message should be created for the logger at the given level. @@ -339,18 +342,18 @@ void log(Level level, fmt::format_string fmt, Args&&... args) { logging::log(global_logger, level, fmt, SVS_FWD(args)...); } -/// @brief Log using the per-instance callback +/// @brief Each index can pass its own ctx pointer, but +/// the function pointer is always global_log_callback. inline void log(void* ctx, const char* level, const char* msg) { - if (ctx) { - // Per index context - auto logMessages = static_cast*>(ctx); - logMessages->emplace_back(std::string(level) + ": " + msg); - } else if (global_log_callback) { - // Global callback - global_log_callback(nullptr, level, msg); + std::cout << "[DEBUG] log() called: ctx=" << ctx << ", level=" << level << ", msg=\"" + << msg << "\"\n"; + if (global_log_callback) { + std::cout << "[DEBUG] -> Using global_log_callback\n"; + global_log_callback(ctx, level, msg); } else { - // Fallback to standard logging - logging::log(logging::Level::Info, "[{}] {}", level, msg); + std::cout << "[DEBUG] -> No global callback set. Fallback to std::cout.\n"; + // If no global callback is set, do something fallback-like: + std::cout << "[" << level << "] " << msg << "\n"; } } diff --git a/include/svs/index/vamana/dynamic_index.h b/include/svs/index/vamana/dynamic_index.h index f3f079bc..aef6e604 100644 --- a/include/svs/index/vamana/dynamic_index.h +++ b/include/svs/index/vamana/dynamic_index.h @@ -178,7 +178,7 @@ class MutableVamanaIndex { Dist distance_function, const ExternalIds& external_ids, ThreadPoolProto threadpool_proto, - //Optional logger parameter + // Optional logger parameter void* log_callback_ctx = nullptr ) : graph_{std::move(graph)} @@ -326,6 +326,8 @@ class MutableVamanaIndex { /// @brief Enable using the full search history for candidate generation while /// mutating the graph. void set_full_search_history(bool enable) { use_full_search_history_ = enable; } + /// @brief Return log context + void* get_log_callback_ctx() const { return log_callback_ctx_; } ///// Index translation. diff --git a/include/svs/index/vamana/index.h b/include/svs/index/vamana/index.h index 4ac3fe9d..0e163d95 100644 --- a/include/svs/index/vamana/index.h +++ b/include/svs/index/vamana/index.h @@ -137,13 +137,16 @@ struct VamanaIndexParameters { lib::load_at(table, "construction_window_size"), lib::load_at(table, "max_candidates"), prune_to, - use_full_search_history}, + use_full_search_history + }, VamanaSearchParameters{ - SearchBufferConfig{ - lib::load_at(table, "default_search_window_size")}, + SearchBufferConfig{lib::load_at(table, "default_search_window_size") + }, lib::load_at(table, "visited_set"), 4, - 1}}; + 1 + } + }; } static VamanaIndexParameters load(const lib::ContextFreeLoadTable& table) { @@ -302,6 +305,8 @@ class VamanaIndex { lib::ReadWriteProtected default_search_parameters_{}; // Construction parameters VamanaBuildParameters build_parameters_{}; + // Log callback + void* log_callback_ctx_; public: // This is because some datasets may not yet support single-searching, which is required @@ -353,14 +358,16 @@ class VamanaIndex { Data data, Idx entry_point, Dist distance_function, - ThreadPoolProto threadpool_proto + ThreadPoolProto threadpool_proto, + void* log_callback_ctx = nullptr ) : graph_{std::move(graph)} , data_{std::move(data)} , entry_point_{entry_point} , distance_{std::move(distance_function)} , threadpool_{threads::as_threadpool(std::move(threadpool_proto))} - , default_search_parameters_{construct_default_search_parameters(data_)} {} + , default_search_parameters_{construct_default_search_parameters(data_)} + , log_callback_ctx_{log_callback_ctx} {} /// /// @brief Build a VamanaIndex over the given dataset. @@ -392,14 +399,17 @@ class VamanaIndex { Data data, Idx entry_point, Dist distance_function, - Pool threadpool + Pool threadpool, + void* log_callback_ctx = nullptr ) : VamanaIndex{ std::move(graph), std::move(data), entry_point, std::move(distance_function), - std::move(threadpool)} { + std::move(threadpool), + log_callback_ctx + } { if (graph_.n_nodes() != data_.size()) { throw ANNEXCEPTION("Wrong sizes!"); } @@ -436,7 +446,8 @@ class VamanaIndex { sp.search_buffer_visited_set_ ), extensions::single_search_setup(data_, distance_), - {sp.prefetch_lookahead_, sp.prefetch_step_}}; + {sp.prefetch_lookahead_, sp.prefetch_step_} + }; } /// @brief Return scratch-space resources for external threading with default parameters @@ -554,11 +565,12 @@ class VamanaIndex { auto search_buffer = search_buffer_type{ SearchBufferConfig(search_parameters.buffer_config_), distance::comparator(distance_), - search_parameters.search_buffer_visited_set_}; + search_parameters.search_buffer_visited_set_ + }; auto prefetch_parameters = GreedySearchPrefetchParameters{ - search_parameters.prefetch_lookahead_, - search_parameters.prefetch_step_}; + search_parameters.prefetch_lookahead_, search_parameters.prefetch_step_ + }; // Increase the search window size if the defaults are not suitable for the // requested number of neighbors. @@ -788,7 +800,8 @@ class VamanaIndex { ) const { // Construct and save runtime parameters. auto parameters = VamanaIndexParameters{ - entry_point_.front(), build_parameters_, get_search_parameters()}; + entry_point_.front(), build_parameters_, get_search_parameters() + }; // Config lib::save_to_disk(parameters, config_directory); @@ -853,6 +866,16 @@ class VamanaIndex { template void experimental_escape_hatch(F&& f) const { std::invoke(SVS_FWD(f), graph_, data_, distance_, lib::as_const_span(entry_point_)); } + + ///// Logging + + /// @brief Getter for logger context + void* get_log_callback_ctx() const { return log_callback_ctx_; } + + /// @brief Helper method to log + void log(const char* level, const char* message) const { + svs::logging::log(log_callback_ctx_, level, message); + } }; // Shared documentation for assembly methods. @@ -898,7 +921,8 @@ auto auto_build( std::move(data), lib::narrow(entry_point), std::move(distance), - std::move(threadpool)}; + std::move(threadpool) + }; } /// @@ -940,7 +964,8 @@ auto auto_assemble( // Extract the index type of the provided graph. using I = typename decltype(graph)::index_type; auto index = VamanaIndex{ - std::move(graph), std::move(data), I{}, std::move(distance), std::move(threadpool)}; + std::move(graph), std::move(data), I{}, std::move(distance), std::move(threadpool) + }; auto config = lib::load_from_disk(config_path); index.apply(config); diff --git a/tests/svs/index/vamana/dynamic_index.cpp b/tests/svs/index/vamana/dynamic_index.cpp index 2c2b15eb..d02b4b4f 100644 --- a/tests/svs/index/vamana/dynamic_index.cpp +++ b/tests/svs/index/vamana/dynamic_index.cpp @@ -247,7 +247,7 @@ CATCH_TEST_CASE("MutableVamanaIndex", "[graph_index]") { } } -CATCH_TEST_CASE("Per-Index and Global Logging Test", "[logging]") { +CATCH_TEST_CASE("Dynamic Index Per-Index and Global Logging Test", "[logging]") { // Capture logs std::vector instanceLogMessages; std::vector globalLogMessages; @@ -288,15 +288,13 @@ CATCH_TEST_CASE("Per-Index and Global Logging Test", "[logging]") { // Trigger logging svs::logging::log(index.get_log_callback_ctx(), "NOTICE", "test log message no fmt"); - svs::logging::log(index.get_log_callback_ctx(), "WARNING", "test log message %s %s", "with", "args"); // Trigger global logging svs::logging::log(nullptr, "WARN", "Global log message"); // Validate per-instance logs - CATCH_REQUIRE(instanceLogMessages.size() == 2); + CATCH_REQUIRE(instanceLogMessages.size() == 1); CATCH_REQUIRE(instanceLogMessages[0] == "NOTICE: test log message no fmt"); - CATCH_REQUIRE(instanceLogMessages[1] == "WARNING: test log message with args"); // Validate global logs CATCH_REQUIRE(!globalLogMessages.empty()); diff --git a/tests/svs/index/vamana/index.cpp b/tests/svs/index/vamana/index.cpp index c72ea0f8..c6ecf58d 100644 --- a/tests/svs/index/vamana/index.cpp +++ b/tests/svs/index/vamana/index.cpp @@ -16,6 +16,7 @@ // Header under test #include "svs/index/vamana/index.h" +#include "svs/core/logging.h" // catch2 #include "catch2/catch_test_macros.hpp" @@ -108,4 +109,64 @@ CATCH_TEST_CASE("Vamana Index Parameters", "[index][vamana]") { }; CATCH_REQUIRE(svs::lib::test_self_save_load_context_free(p)); } +} + +struct TestLogCtx { + std::vector logBuffer; + std::string prefix; +}; + +static void test_log_impl(void* ctx, const char* level, const char* message) { + // Cast ctx to our local struct + auto* log = reinterpret_cast(ctx); + + // Format the final string + std::string msg = std::string(level) + ": " + log->prefix + message; + + // Append to the vector + log->logBuffer.push_back(msg); +} + +CATCH_TEST_CASE("Static VamanaIndex Per-Index Logging", "[logging]") { + // Prepare a local context + TestLogCtx testCtx; + testCtx.prefix = "test log prefix: "; + + // Set the global callback function + svs::logging::set_global_log_callback(test_log_impl); + + // Create some minimal data + std::vector data = {1.0f, 2.0f}; + const size_t dim = 1; + auto graph = svs::graphs::SimpleGraph(1, 64); + auto data_view = svs::data::SimpleDataView(data.data(), 1, dim); + svs::distance::DistanceL2 distance_function; + uint32_t entry_point = 0; + auto threadpool = svs::threads::DefaultThreadPool(1); + + // Build the VamanaIndex, passing &testCtx as the per-index logging context + svs::index::vamana::VamanaBuildParameters buildParams(1.2, 64, 10, 20, 10, true); + svs::index::vamana::VamanaIndex index( + buildParams, + std::move(graph), + std::move(data_view), + entry_point, + distance_function, + std::move(threadpool), + &testCtx + ); + + // Trigger log message + index.log("notice", "test log message no fmt"); + std::string msgFormatted = "test log message with args"; + index.log("warning", msgFormatted.c_str()); + + // Check that we got exactly 2 messages in our TestLogCtx + CATCH_REQUIRE(testCtx.logBuffer.size() == 2); + CATCH_REQUIRE( + testCtx.logBuffer[0] == "notice: test log prefix: test log message no fmt" + ); + CATCH_REQUIRE( + testCtx.logBuffer[1] == "warning: test log prefix: test log message with args" + ); } \ No newline at end of file From 99be5dfde5960864d0ddb19210e926a5f9f1927f Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Tue, 25 Feb 2025 23:28:24 -0800 Subject: [PATCH 04/33] fix: comment out inverted for now --- include/svs/index/flat/flat.h | 40 +++++++++--- include/svs/index/inverted/memory_based.h | 31 +++++++-- include/svs/index/vamana/dynamic_index.h | 53 ++++++++------- include/svs/index/vamana/index.h | 3 - tests/CMakeLists.txt | 5 +- tests/svs/index/vamana/dynamic_index.cpp | 79 ++++++++++------------- 6 files changed, 125 insertions(+), 86 deletions(-) diff --git a/include/svs/index/flat/flat.h b/include/svs/index/flat/flat.h index d9b91edf..0133be08 100644 --- a/include/svs/index/flat/flat.h +++ b/include/svs/index/flat/flat.h @@ -17,6 +17,7 @@ #pragma once // Flat index utilities +#include "svs/core/logging.h" #include "svs/index/flat/inserters.h" #include "svs/index/index.h" @@ -145,6 +146,8 @@ class FlatIndex { data_storage_type data_; [[no_unique_address]] distance_type distance_; threads::ThreadPoolHandle threadpool_; + // Log callback + void* log_callback_ctx_; // Constructs controlling the iteration strategy over the data and queries. search_parameters_type search_parameters_{}; @@ -193,18 +196,30 @@ class FlatIndex { /// @copydoc threadpool_requirements /// template - FlatIndex(Data data, Dist distance, ThreadPoolProto threadpool_proto) + FlatIndex( + Data data, + Dist distance, + ThreadPoolProto threadpool_proto, + void* log_callback_ctx = nullptr + ) requires std::is_same_v : data_{std::move(data)} , distance_{std::move(distance)} - , threadpool_{threads::as_threadpool(std::move(threadpool_proto))} {} + , threadpool_{threads::as_threadpool(std::move(threadpool_proto))} + , log_callback_ctx_{log_callback_ctx} {} template - FlatIndex(Data& data, Dist distance, ThreadPoolProto threadpool_proto) + FlatIndex( + Data& data, + Dist distance, + ThreadPoolProto threadpool_proto, + void* log_callback_ctx = nullptr + ) requires std::is_same_v : data_{data} , distance_{std::move(distance)} - , threadpool_{threads::as_threadpool(std::move(threadpool_proto))} {} + , threadpool_{threads::as_threadpool(std::move(threadpool_proto))} + , log_callback_ctx_{log_callback_ctx} {} ////// Dataset Interface @@ -331,13 +346,14 @@ class FlatIndex { threads::parallel_for( threadpool_, threads::DynamicPartition{ - queries.size(), - compute_query_batch_size(search_parameters, queries.size())}, + queries.size(), compute_query_batch_size(search_parameters, queries.size()) + }, [&](const auto& query_indices, uint64_t /*tid*/) { // Broadcast the distance functor so each thread can process all queries // in its current batch. distance::BroadcastDistance distances{ - extensions::distance(data_, distance_), query_indices.size()}; + extensions::distance(data_, distance_), query_indices.size() + }; search_patch( queries, @@ -436,6 +452,13 @@ class FlatIndex { /// @brief Return the current thread pool handle. /// threads::ThreadPoolHandle& get_threadpool_handle() { return threadpool_; } + + ///// Logging + + /// @brief Helper method to log + void log(const char* level, const char* message) const { + svs::logging::log(log_callback_ctx_, level, message); + } }; /// @@ -487,7 +510,8 @@ template temporary_flat_index(Data& data, Dist distance, ThreadPoolProto threadpool_proto) { return TemporaryFlatIndex{ - data, distance, threads::as_threadpool(std::move(threadpool_proto))}; + data, distance, threads::as_threadpool(std::move(threadpool_proto)) + }; } } // namespace svs::index::flat diff --git a/include/svs/index/inverted/memory_based.h b/include/svs/index/inverted/memory_based.h index 2dc49d99..c810be14 100644 --- a/include/svs/index/inverted/memory_based.h +++ b/include/svs/index/inverted/memory_based.h @@ -70,7 +70,8 @@ template class SparseClusteredDatase const Original& original, const Clustering& clustering, const Alloc& allocator ) : SparseClusteredDataset{ - original, clustering, clustering.packed_leaf_translation(), allocator} {} + original, clustering, clustering.packed_leaf_translation(), allocator + } {} template SparseClusteredDataset( @@ -95,7 +96,8 @@ template class SparseClusteredDatase for (auto neighbor : cluster) { auto global_id = neighbor.id(); these_ids.at(i) = SparseIDs{ - .local = global_to_local_map.at(global_id), .global = global_id}; + .local = global_to_local_map.at(global_id), .global = global_id + }; ++i; } }); @@ -339,12 +341,17 @@ template class InvertedIndex { template InvertedIndex( - Index index, Cluster cluster, translator_type index_local_to_global, Pool threadpool + Index index, + Cluster cluster, + translator_type index_local_to_global, + Pool threadpool, + void* log_callback_ctx = nullptr ) : index_{std::move(index)} , cluster_{std::move(cluster)} , index_local_to_global_{std::move(index_local_to_global)} - , threadpool_{std::move(threadpool)} { + , threadpool_{std::move(threadpool)} + , log_callback_ctx_{log_callback_ctx} { // Clear out the threadpool in the inner index - prefer to handle threading // ourselves. index_.set_threadpool(threads::SequentialThreadPool()); @@ -389,7 +396,8 @@ template class InvertedIndex { ///// Search Parameter Setting search_parameters_type get_search_parameters() const { return InvertedSearchParameters{ - index_.get_search_parameters(), refinement_epsilon_}; + index_.get_search_parameters(), refinement_epsilon_ + }; } void set_search_parameters(const search_parameters_type& parameters) { @@ -492,6 +500,13 @@ template class InvertedIndex { index_.save(index_config, graph, data); } + ///// Logging + + /// @brief Helper method to log + void log(const char* level, const char* message) const { + svs::logging::log(log_callback_ctx_, level, message); + } + private: // Tunable Parameters double refinement_epsilon_ = 10.0; @@ -503,6 +518,9 @@ template class InvertedIndex { // Transient parameters. threads::ThreadPoolHandle threadpool_; + + // Per index context pointer + void* log_callback_ctx_ = nullptr; }; struct PickRandomly { @@ -585,7 +603,8 @@ auto auto_build( std::move(index), strategy(data, clustering, HugepageAllocator()), std::move(centroids), - std::move(primary_threadpool)}; + std::move(primary_threadpool) + }; } ///// Auto Assembling. diff --git a/include/svs/index/vamana/dynamic_index.h b/include/svs/index/vamana/dynamic_index.h index aef6e604..266d527d 100644 --- a/include/svs/index/vamana/dynamic_index.h +++ b/include/svs/index/vamana/dynamic_index.h @@ -158,7 +158,7 @@ class MutableVamanaIndex { bool use_full_search_history_ = true; // Log callback - void* log_callback_ctx_; + void* log_callback_ctx_; // Methods public: @@ -190,12 +190,10 @@ class MutableVamanaIndex { , distance_{std::move(distance_function)} , threadpool_{threads::as_threadpool(std::move(threadpool_proto))} , search_parameters_{vamana::construct_default_search_parameters(data_)} - , construction_window_size_{2 * graph.max_degree()} + , construction_window_size_{2 * graph.max_degree()} // Ctor accept logger in parameter , log_callback_ctx_{log_callback_ctx} { - translator_.insert(external_ids, threads::UnitRange(0, external_ids.size())); - svs::logging::log(log_callback_ctx_, "DEBUG", "Created MutableVamanaIndex"); } /// @@ -225,7 +223,6 @@ class MutableVamanaIndex { , alpha_(parameters.alpha) , use_full_search_history_{parameters.use_full_search_history} , log_callback_ctx_{log_callback_ctx} { - svs::logging::log(log_callback_ctx_, "DEBUG", "Created MutableVamanaIndex with build parameters"); // Setup the initial translation of external to internal ids. translator_.insert(external_ids, threads::UnitRange(0, external_ids.size())); @@ -260,7 +257,7 @@ class MutableVamanaIndex { const Dist& distance_function, IDTranslator translator, Pool threadpool, - void* log_callback_ctx = nullptr + void* log_callback_ctx = nullptr ) : graph_{std::move(graph)} , data_{std::move(data)} @@ -275,10 +272,8 @@ class MutableVamanaIndex { , max_candidates_{config.build_parameters.max_candidate_pool_size} , prune_to_{config.build_parameters.prune_to} , alpha_{config.build_parameters.alpha} - , use_full_search_history_{config.build_parameters.use_full_search_history} - , log_callback_ctx_{log_callback_ctx} { - svs::logging::log(log_callback_ctx_, "DEBUG", "Created MutableVamanaIndex from reload"); - } + , use_full_search_history_{config.build_parameters.use_full_search_history} + , log_callback_ctx_{log_callback_ctx} {} ///// Scratchspace scratchspace_type scratchspace(const search_parameters_type& sp) const { @@ -289,7 +284,8 @@ class MutableVamanaIndex { sp.search_buffer_visited_set_ ), extensions::single_search_setup(data_, distance_), - {sp.prefetch_lookahead_, sp.prefetch_step_}}; + {sp.prefetch_lookahead_, sp.prefetch_step_} + }; } scratchspace_type scratchspace() const { return scratchspace(get_search_parameters()); } @@ -326,8 +322,6 @@ class MutableVamanaIndex { /// @brief Enable using the full search history for candidate generation while /// mutating the graph. void set_full_search_history(bool enable) { use_full_search_history_ = enable; } - /// @brief Return log context - void* get_log_callback_ctx() const { return log_callback_ctx_; } ///// Index translation. @@ -496,7 +490,8 @@ class MutableVamanaIndex { search_buffer_type{sp.buffer_config_, distance::comparator(distance_)}; auto prefetch_parameters = GreedySearchPrefetchParameters{ - sp.prefetch_lookahead_, sp.prefetch_step_}; + sp.prefetch_lookahead_, sp.prefetch_step_ + }; // Legalize search buffer for this search. if (buffer.target() < num_neighbors) { @@ -598,10 +593,10 @@ class MutableVamanaIndex { /// @brief Add the points with the given external IDs to the dataset. // /// When `delete_entries` is called, a soft deletion is performed, marking the entries - /// as `deleted`. When `consolidate` is called, the state of these deleted entries becomes `empty`. - /// When `add_points` is called with the `reuse_empty` flag enabled, the - /// memory is scanned from the beginning to locate and fill these empty entries with new - /// points. + /// as `deleted`. When `consolidate` is called, the state of these deleted entries + /// becomes `empty`. When `add_points` is called with the `reuse_empty` flag enabled, + /// the memory is scanned from the beginning to locate and fill these empty entries with + /// new points. /// /// @param points Dataset of points to add. /// @param external_ids The external IDs of the corresponding points. Must be a @@ -679,13 +674,15 @@ class MutableVamanaIndex { construction_window_size_, max_candidates_, prune_to_, - use_full_search_history_}; + use_full_search_history_ + }; auto sp = get_search_parameters(); auto prefetch_parameters = GreedySearchPrefetchParameters{sp.prefetch_lookahead_, sp.prefetch_step_}; VamanaBuilder builder{ - graph_, data_, distance_, parameters, threadpool_, prefetch_parameters}; + graph_, data_, distance_, parameters, threadpool_, prefetch_parameters + }; builder.construct(alpha_, entry_point(), slots, logging::Level::Trace); // Mark all added entries as valid. for (const auto& i : slots) { @@ -987,7 +984,8 @@ class MutableVamanaIndex { get_max_candidates(), prune_to_, get_full_search_history()}, - get_search_parameters()}; + get_search_parameters() + }; return lib::SaveTable( "vamana_dynamic_auxiliary_parameters", @@ -1213,6 +1211,13 @@ class MutableVamanaIndex { } } } + + ///// Logging + + /// @brief Helper method to log + void log(const char* level, const char* message) const { + svs::logging::log(log_callback_ctx_, level, message); + } }; ///// Deduction Guides. @@ -1243,7 +1248,8 @@ struct VamanaStateLoader { if (debug_load_from_static) { return VamanaStateLoader{ lib::load(table), - IDTranslator::Identity(assume_datasize)}; + IDTranslator::Identity(assume_datasize) + }; } return VamanaStateLoader{ @@ -1342,7 +1348,8 @@ auto auto_dynamic_assemble( std::move(graph), std::move(distance), std::move(translator), - std::move(threadpool)}; + std::move(threadpool) + }; } } // namespace svs::index::vamana diff --git a/include/svs/index/vamana/index.h b/include/svs/index/vamana/index.h index 0e163d95..10a33816 100644 --- a/include/svs/index/vamana/index.h +++ b/include/svs/index/vamana/index.h @@ -869,9 +869,6 @@ class VamanaIndex { ///// Logging - /// @brief Getter for logger context - void* get_log_callback_ctx() const { return log_callback_ctx_; } - /// @brief Helper method to log void log(const char* level, const char* message) const { svs::logging::log(log_callback_ctx_, level, message); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 178721c8..782abc0f 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -125,6 +125,7 @@ set(TEST_SOURCES # Index Specific Functionality ${TEST_DIR}/svs/index/index.cpp ${TEST_DIR}/svs/index/flat/inserters.cpp + ${TEST_DIR}/svs/index/flat/flat.cpp ${TEST_DIR}/svs/index/vamana/build_parameters.cpp ${TEST_DIR}/svs/index/vamana/consolidate.cpp ${TEST_DIR}/svs/index/vamana/filter.cpp @@ -136,10 +137,10 @@ set(TEST_SOURCES ${TEST_DIR}/svs/index/vamana/iterator_schedule.cpp ${TEST_DIR}/svs/index/vamana/iterator_example.cpp ${TEST_DIR}/svs/index/vamana/iterator.cpp + # ${TEST_DIR}/svs/index/vamana/dynamic_index.cpp # Inverted ${TEST_DIR}/svs/index/inverted/clustering.cpp - - ${TEST_DIR}/svs/index/vamana/dynamic_index.cpp + # ${TEST_DIR}/svs/index/inverted/memory_based.cpp ) ##### diff --git a/tests/svs/index/vamana/dynamic_index.cpp b/tests/svs/index/vamana/dynamic_index.cpp index d02b4b4f..0c38381e 100644 --- a/tests/svs/index/vamana/dynamic_index.cpp +++ b/tests/svs/index/vamana/dynamic_index.cpp @@ -40,7 +40,8 @@ namespace { template auto copy_dataset(const T& data) { auto copy = svs::data::SimplePolymorphicData{ - data.size(), data.dimensions()}; + data.size(), data.dimensions() + }; for (size_t i = 0; i < data.size(); ++i) { copy.set_datum(i, data.get_datum(i)); } @@ -116,7 +117,8 @@ CATCH_TEST_CASE("MutableVamanaIndex", "[graph_index]") { entry_point, svs::distance::DistanceL2(), svs::threads::UnitRange(0, base_data.size()), - num_threads}; + num_threads + }; check_equal(base_data, index); index.debug_check_graph_consistency(false); @@ -247,56 +249,45 @@ CATCH_TEST_CASE("MutableVamanaIndex", "[graph_index]") { } } -CATCH_TEST_CASE("Dynamic Index Per-Index and Global Logging Test", "[logging]") { - // Capture logs - std::vector instanceLogMessages; - std::vector globalLogMessages; - - // Global log callback - svs::logging::set_global_log_callback([](void*, const char* level, const char* message) { - globalLogMessages.emplace_back(std::string(level) + ": " + message); - }); - - // Per-instance log callback - auto testLogCallback = [](void* ctx, const char* level, const char* message) { - if (ctx) { - auto logMessages = static_cast*>(ctx); - logMessages->emplace_back(std::string(level) + ": " + message); - } - }; +struct TestLogCtx { + std::vector logs; +}; + +static void test_log_callback(void* ctx, const char* level, const char* msg) { + if (!ctx) + return; + auto* logCtx = reinterpret_cast(ctx); + logCtx->logs.push_back(std::string(level) + ": " + msg); +} + +CATCH_TEST_CASE("Dynamic MutableVamanaIndex Per-Index Logging Test", "[logging]") { + svs::logging::set_global_log_callback(&test_log_callback); + + TestLogCtx myLogCtx; - // Create and configure index std::vector data = {1.0f, 2.0f}; - std::vector external_ids = {0}; const size_t dim = 1; + auto dataView = svs::data::SimpleDataView(data.data(), 2, dim); + std::vector external_ids = {10, 20}; + svs::index::vamana::VamanaBuildParameters buildParams(1.2f, 64, 10, 20, 10, true); - auto graph = svs::graphs::SimpleGraph(1, 64); - auto data_view = svs::data::SimpleDataView(data.data(), 1, dim); - svs::distance::DistanceL2 distance_function; - Idx entry_point = 0; auto threadpool = svs::threads::DefaultThreadPool(1); - svs::index::vamana::MutableVamanaIndex index( - std::move(graph), - std::move(data_view), - entry_point, - distance_function, + using GraphType = svs::graphs::SimpleBlockedGraph; + using DataType = svs::data::SimpleData; + using DistType = svs::distance::DistanceL2; + + svs::index::vamana::MutableVamanaIndex index( + buildParams, + dataView, external_ids, - std::move(threadpool), - &instanceLogMessages // Per-instance log context + svs::distance::DistanceL2{}, + threadpool, + &myLogCtx ); - // Trigger logging - svs::logging::log(index.get_log_callback_ctx(), "NOTICE", "test log message no fmt"); - - // Trigger global logging - svs::logging::log(nullptr, "WARN", "Global log message"); - - // Validate per-instance logs - CATCH_REQUIRE(instanceLogMessages.size() == 1); - CATCH_REQUIRE(instanceLogMessages[0] == "NOTICE: test log message no fmt"); + index.log("NOTICE", "Hello from MutableVamanaIndex Logging!"); - // Validate global logs - CATCH_REQUIRE(!globalLogMessages.empty()); - CATCH_REQUIRE(globalLogMessages[0] == "WARN: Global log message"); + CATCH_REQUIRE(myLogCtx.logs.size() == 1); + CATCH_REQUIRE(myLogCtx.logs[0] == "NOTICE: Hello from MutableVamanaIndex Logging!"); } \ No newline at end of file From 6882d87905b44a508790daa545a1a7cb9904f826 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Wed, 26 Feb 2025 21:45:23 -0800 Subject: [PATCH 05/33] feature: add logger to auto build and auto assemble functions --- include/svs/index/flat/flat.h | 4 ++-- include/svs/index/inverted/memory_based.h | 12 ++++++++---- include/svs/index/vamana/index.h | 11 +++++++---- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/include/svs/index/flat/flat.h b/include/svs/index/flat/flat.h index 0133be08..d479f658 100644 --- a/include/svs/index/flat/flat.h +++ b/include/svs/index/flat/flat.h @@ -495,11 +495,11 @@ class FlatIndex { /// template auto auto_assemble( - DataProto&& data_proto, Distance distance, ThreadPoolProto threadpool_proto + DataProto&& data_proto, Distance distance, ThreadPoolProto threadpool_proto, void* log_callback_ctx = nullptr ) { auto threadpool = threads::as_threadpool(std::move(threadpool_proto)); auto data = svs::detail::dispatch_load(std::forward(data_proto), threadpool); - return FlatIndex(std::move(data), std::move(distance), std::move(threadpool)); + return FlatIndex(std::move(data), std::move(distance), std::move(threadpool), log_callback_ctx); } /// @brief Alias for a short-lived flat index. diff --git a/include/svs/index/inverted/memory_based.h b/include/svs/index/inverted/memory_based.h index c810be14..823c75b9 100644 --- a/include/svs/index/inverted/memory_based.h +++ b/include/svs/index/inverted/memory_based.h @@ -566,7 +566,8 @@ auto auto_build( // Customizations Strategy strategy = {}, CentroidPicker centroid_picker = {}, - ClusteringOp clustering_op = {} + ClusteringOp clustering_op = {}, + void* log_callback_ctx = nullptr ) { // Perform clustering. auto threadpool = threads::as_threadpool(std::move(threadpool_proto)); @@ -603,7 +604,8 @@ auto auto_build( std::move(index), strategy(data, clustering, HugepageAllocator()), std::move(centroids), - std::move(primary_threadpool) + std::move(primary_threadpool), + log_callback_ctx }; } @@ -620,7 +622,8 @@ auto assemble_from_clustering( Strategy strategy, const std::filesystem::path& index_config, const std::filesystem::path& graph, - ThreadPoolProto threadpool_proto + ThreadPoolProto threadpool_proto, + void* log_callback_ctx = nullptr ) { auto threadpool = threads::as_threadpool(std::move(threadpool_proto)); auto original = svs::detail::dispatch_load(std::move(data_proto), threadpool); @@ -640,7 +643,8 @@ auto assemble_from_clustering( return local_data; }), distance, - 1 + 1, + log_callback_ctx ); // Create the clustering and return the final results. diff --git a/include/svs/index/vamana/index.h b/include/svs/index/vamana/index.h index 10a33816..d8e6ab4f 100644 --- a/include/svs/index/vamana/index.h +++ b/include/svs/index/vamana/index.h @@ -903,7 +903,8 @@ auto auto_build( DataProto data_proto, Distance distance, ThreadPoolProto threadpool_proto, - const Allocator& graph_allocator = {} + const Allocator& graph_allocator = {}, + void* log_callback_ctx = nullptr ) { auto threadpool = threads::as_threadpool(std::move(threadpool_proto)); auto data = svs::detail::dispatch_load(std::move(data_proto), threadpool); @@ -918,7 +919,8 @@ auto auto_build( std::move(data), lib::narrow(entry_point), std::move(distance), - std::move(threadpool) + std::move(threadpool), + log_callback_ctx }; } @@ -952,7 +954,8 @@ auto auto_assemble( GraphProto graph_loader, DataProto data_proto, Distance distance, - ThreadPoolProto threadpool_proto + ThreadPoolProto threadpool_proto, + void* log_callback_ctx = nullptr ) { auto threadpool = threads::as_threadpool(std::move(threadpool_proto)); auto data = svs::detail::dispatch_load(std::move(data_proto), threadpool); @@ -961,7 +964,7 @@ auto auto_assemble( // Extract the index type of the provided graph. using I = typename decltype(graph)::index_type; auto index = VamanaIndex{ - std::move(graph), std::move(data), I{}, std::move(distance), std::move(threadpool) + std::move(graph), std::move(data), I{}, std::move(distance), std::move(threadpool), log_callback_ctx }; auto config = lib::load_from_disk(config_path); From 3b3136c663b2a8b3c7ce0c333f2c291058d08639 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Wed, 26 Feb 2025 22:54:29 -0800 Subject: [PATCH 06/33] fix: inverted index work --- tests/CMakeLists.txt | 2 +- tests/svs/index/vamana/index.cpp | 13 ++++--------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 782abc0f..4ad38293 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -140,7 +140,7 @@ set(TEST_SOURCES # ${TEST_DIR}/svs/index/vamana/dynamic_index.cpp # Inverted ${TEST_DIR}/svs/index/inverted/clustering.cpp - # ${TEST_DIR}/svs/index/inverted/memory_based.cpp + ${TEST_DIR}/svs/index/inverted/memory_based.cpp ) ##### diff --git a/tests/svs/index/vamana/index.cpp b/tests/svs/index/vamana/index.cpp index c6ecf58d..2cb2bc96 100644 --- a/tests/svs/index/vamana/index.cpp +++ b/tests/svs/index/vamana/index.cpp @@ -157,16 +157,11 @@ CATCH_TEST_CASE("Static VamanaIndex Per-Index Logging", "[logging]") { ); // Trigger log message - index.log("notice", "test log message no fmt"); - std::string msgFormatted = "test log message with args"; - index.log("warning", msgFormatted.c_str()); + index.log("NOTICE", "Test dynamic VamanaIndex Logging"); - // Check that we got exactly 2 messages in our TestLogCtx - CATCH_REQUIRE(testCtx.logBuffer.size() == 2); + // Check message + CATCH_REQUIRE(testCtx.logBuffer.size() == 1); CATCH_REQUIRE( - testCtx.logBuffer[0] == "notice: test log prefix: test log message no fmt" - ); - CATCH_REQUIRE( - testCtx.logBuffer[1] == "warning: test log prefix: test log message with args" + testCtx.logBuffer[0] == "NOTICE: test log prefix: Test dynamic VamanaIndex Logging" ); } \ No newline at end of file From 9917d9c378a00f38c3c9a8890cf64b89f2d06ce2 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Thu, 27 Feb 2025 22:20:48 -0800 Subject: [PATCH 07/33] fix: dynamic index test working --- include/svs/index/vamana/dynamic_index.h | 6 +- tests/CMakeLists.txt | 2 +- tests/svs/index/vamana/dynamic_index.cpp | 478 ++++++++++++----------- 3 files changed, 250 insertions(+), 236 deletions(-) diff --git a/include/svs/index/vamana/dynamic_index.h b/include/svs/index/vamana/dynamic_index.h index 266d527d..715a47db 100644 --- a/include/svs/index/vamana/dynamic_index.h +++ b/include/svs/index/vamana/dynamic_index.h @@ -1282,7 +1282,8 @@ auto auto_dynamic_assemble( // to easily benchmark the static versus dynamic implementation. // // This is an internal API and should not be considered officially supported nor stable. - bool debug_load_from_static = false + bool debug_load_from_static = false, + void* log_callback_ctx = nullptr ) { // Load the dataset auto threadpool = threads::as_threadpool(std::move(threadpool_proto)); @@ -1348,7 +1349,8 @@ auto auto_dynamic_assemble( std::move(graph), std::move(distance), std::move(translator), - std::move(threadpool) + std::move(threadpool), + log_callback_ctx }; } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 4ad38293..60cf579f 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -137,7 +137,7 @@ set(TEST_SOURCES ${TEST_DIR}/svs/index/vamana/iterator_schedule.cpp ${TEST_DIR}/svs/index/vamana/iterator_example.cpp ${TEST_DIR}/svs/index/vamana/iterator.cpp - # ${TEST_DIR}/svs/index/vamana/dynamic_index.cpp + ${TEST_DIR}/svs/index/vamana/dynamic_index.cpp # Inverted ${TEST_DIR}/svs/index/inverted/clustering.cpp ${TEST_DIR}/svs/index/inverted/memory_based.cpp diff --git a/tests/svs/index/vamana/dynamic_index.cpp b/tests/svs/index/vamana/dynamic_index.cpp index 0c38381e..e09d582d 100644 --- a/tests/svs/index/vamana/dynamic_index.cpp +++ b/tests/svs/index/vamana/dynamic_index.cpp @@ -36,218 +36,223 @@ // tests #include "tests/utils/test_dataset.h" #include "tests/utils/utils.h" - -namespace { -template auto copy_dataset(const T& data) { - auto copy = svs::data::SimplePolymorphicData{ - data.size(), data.dimensions() - }; - for (size_t i = 0; i < data.size(); ++i) { - copy.set_datum(i, data.get_datum(i)); - } - return copy; -} - -template void check_results(const T& results, const U& deleted) { - for (size_t i = 0; i < svs::getsize<0>(results); ++i) { - for (size_t j = 0; j < svs::getsize<1>(results); ++j) { - CATCH_REQUIRE(!deleted.contains(results.at(i, j))); - } - } -} - -template -void check_deleted(const T& index, const U& deleted, size_t imax) { - for (size_t i = 0; i < imax; ++i) { - if (deleted.contains(i)) { - CATCH_REQUIRE(index.is_deleted(i)); - } else { - CATCH_REQUIRE(!index.is_deleted(i)); - } - } -} - -template -void check_equal(const Left& left, const Right& right) { - CATCH_REQUIRE(left.size() == right.size()); - CATCH_REQUIRE(left.dimensions() == right.dimensions()); - - for (size_t i = 0, imax = left.size(); i < imax; ++i) { - const auto& datum_left = left.get_datum(i); - const auto& datum_right = right.get_datum(i); - CATCH_REQUIRE(std::equal(datum_left.begin(), datum_left.end(), datum_right.begin()) - ); - } -} - -} // namespace - -#if defined(NDEBUG) -const double DELETE_PERCENT = 0.3; -#else -const double DELETE_PERCENT = 0.05; -#endif - -CATCH_TEST_CASE("MutableVamanaIndex", "[graph_index]") { - const size_t num_threads = 2; - const size_t num_neighbors = 10; - - const auto base_data = test_dataset::data_blocked_f32(); - // const auto base_data = test_dataset::data_f32(); - const auto queries = test_dataset::queries(); - const auto groundtruth = test_dataset::groundtruth_euclidean(); - - CATCH_SECTION("Soft Deletion") { - // In this section, we test soft deletion. - // The idea is as follows: - // - // (1) Load the test index. - // (2) Run a round of queries to ensure that everything loading correctly. - // (3) Set a target deletion percentage where all the neighbors returned by - // all results returned by the previous query plus a random collection of extras - // are deleted. - // - // (4) Rerun queries, make sure accuracy is still high and that no deleted indices - // are present in the results. - auto entry_point = svs::index::load_entry_point(test_dataset::metadata_file()); - - auto index = svs::index::MutableVamanaIndex{ - test_dataset::graph_blocked(), - base_data.copy(), - entry_point, - svs::distance::DistanceL2(), - svs::threads::UnitRange(0, base_data.size()), - num_threads - }; - - check_equal(base_data, index); - index.debug_check_graph_consistency(false); - - auto results = svs::QueryResult(queries.size(), num_neighbors); - index.set_search_window_size(num_neighbors); - - auto tic = svs::lib::now(); - index.search(queries.view(), num_neighbors, results.view()); - auto original_time = svs::lib::time_difference(svs::lib::now(), tic); - auto original_recall = svs::k_recall_at_n(groundtruth, results); - CATCH_REQUIRE(index.entry_point() == entry_point); - - std::unordered_set ids_to_delete{}; - double delete_percent = DELETE_PERCENT; - for (size_t i = 0; i < groundtruth.size(); ++i) { - auto slice = groundtruth.get_datum(i); - for (size_t j = 0; j < num_neighbors; ++j) { - auto id = slice[j]; - - // For now - don't delete the entry point. - if (id != entry_point) { - ids_to_delete.insert(slice[j]); - } - } - - if (ids_to_delete.size() > delete_percent * base_data.size()) { - break; - } - } - - index.set_threadpool(threads::CppAsyncThreadPool(num_threads)); - - std::cout << "Deleting " << ids_to_delete.size() << " entries!" << std::endl; - index.delete_entries(ids_to_delete); - check_deleted(index, ids_to_delete, base_data.size()); - index.debug_check_graph_consistency(true); - CATCH_REQUIRE_THROWS_AS( - index.debug_check_graph_consistency(false), svs::ANNException - ); - CATCH_REQUIRE(index.entry_point() == entry_point); - // Make sure the correct points were deleted. - tic = svs::lib::now(); - index.search(queries.view(), num_neighbors, results.view()); - auto new_time = svs::lib::time_difference(tic); - - // Make sure none of the returned results are in the deleted list. - check_results(results.indices(), ids_to_delete); - - index.set_threadpool(threads::QueueThreadPoolWrapper(num_threads)); - - auto results_reference = svs::QueryResult(queries.size(), num_neighbors); - index.exhaustive_search(queries.view(), num_neighbors, results_reference.view()); - auto new_recall = svs::k_recall_at_n(results_reference.indices(), results); - - // Perform graph consolidation and see how the results are effected. - index.set_alpha(1.2); - index.consolidate(); - index.debug_check_graph_consistency(false); - tic = svs::lib::now(); - index.search(queries.view(), num_neighbors, results.view()); - auto post_consolidate_time = svs::lib::time_difference(tic); - auto post_consolidate_recall = - svs::k_recall_at_n(results_reference.indices(), results); - - // Check deletion again. - check_deleted(index, ids_to_delete, base_data.size()); - CATCH_REQUIRE(index.entry_point() == entry_point); - - std::cout << "Original recall: " << original_recall - << ", New Recall: " << new_recall - << ", Post Recall: " << post_consolidate_recall << std::endl; - std::cout << "Original Time: " << original_time << " (s), New Time: " << new_time - << " (s) Post Time: " << post_consolidate_time << std::endl; - CATCH_REQUIRE(new_recall > original_recall); - check_results(results.indices(), ids_to_delete); - - // Now - delete the entry point and consolidate. - ids_to_delete.insert(entry_point); - std::vector entry_point_vector{}; - entry_point_vector.push_back(entry_point); - index.delete_entries(entry_point_vector); - index.set_alpha(1.2); - index.consolidate(); - index.debug_check_graph_consistency(false); - - auto& threadpool = - index.get_threadpool_handle().get.get(); - threadpool.resize(3); - CATCH_REQUIRE(index.get_num_threads() == 3); - threadpool.resize(num_threads); - CATCH_REQUIRE(index.get_num_threads() == num_threads); - - CATCH_REQUIRE(index.entry_point() != entry_point); - index.search(queries.view(), num_neighbors, results.view()); - auto post_entrypoint_recall = - svs::k_recall_at_n(results_reference.indices(), results); - std::cout << "Post entry-point deletion recall: " << post_entrypoint_recall - << std::endl; - - // Add the deleted points back in. - auto points = svs::data::SimpleData( - ids_to_delete.size(), base_data.dimensions() - ); - - size_t i = 0; - for (const auto& j : ids_to_delete) { - points.set_datum(i, base_data.get_datum(j)); - ++i; - } - - index.set_threadpool(threads::DefaultThreadPool(num_threads)); - tic = svs::lib::now(); - index.add_points(points, ids_to_delete); - auto insert_time = svs::lib::time_difference(tic); - std::cout << "Insertion took: " << insert_time << " seconds!" << std::endl; - - // Check that the stored dataset and the original dataset are equal. - check_equal(base_data, index); - index.debug_check_graph_consistency(false); - - tic = svs::lib::now(); - index.search(queries.view(), num_neighbors, results.view()); - auto post_add_time = svs::lib::time_difference(tic); - auto post_reinsertion_recall = svs::k_recall_at_n(groundtruth, results); - std::cout << "Post reinsertion recall: " << post_reinsertion_recall << " in " - << post_add_time << " seconds." << std::endl; - } -} +#include "tests/utils/vamana_reference.h" + +// namespace { +// template auto copy_dataset(const T& data) { +// auto copy = svs::data::SimplePolymorphicData{ +// data.size(), data.dimensions() +// }; +// for (size_t i = 0; i < data.size(); ++i) { +// copy.set_datum(i, data.get_datum(i)); +// } +// return copy; +// } + +// template void check_results(const T& results, const U& deleted) +// { +// for (size_t i = 0; i < svs::getsize<0>(results); ++i) { +// for (size_t j = 0; j < svs::getsize<1>(results); ++j) { +// CATCH_REQUIRE(!deleted.contains(results.at(i, j))); +// } +// } +// } + +// template +// void check_deleted(const T& index, const U& deleted, size_t imax) { +// for (size_t i = 0; i < imax; ++i) { +// if (deleted.contains(i)) { +// CATCH_REQUIRE(index.is_deleted(i)); +// } else { +// CATCH_REQUIRE(!index.is_deleted(i)); +// } +// } +// } + +// template +// void check_equal(const Left& left, const Right& right) { +// CATCH_REQUIRE(left.size() == right.size()); +// CATCH_REQUIRE(left.dimensions() == right.dimensions()); + +// for (size_t i = 0, imax = left.size(); i < imax; ++i) { +// const auto& datum_left = left.get_datum(i); +// const auto& datum_right = right.get_datum(i); +// CATCH_REQUIRE(std::equal(datum_left.begin(), datum_left.end(), +// datum_right.begin()) +// ); +// } +// } + +// } // namespace + +// #if defined(NDEBUG) +// const double DELETE_PERCENT = 0.3; +// #else +// const double DELETE_PERCENT = 0.05; +// #endif + +// CATCH_TEST_CASE("MutableVamanaIndex", "[graph_index]") { +// const size_t num_threads = 2; +// const size_t num_neighbors = 10; + +// const auto base_data = test_dataset::data_blocked_f32(); +// // const auto base_data = test_dataset::data_f32(); +// const auto queries = test_dataset::queries(); +// const auto groundtruth = test_dataset::groundtruth_euclidean(); + +// CATCH_SECTION("Soft Deletion") { +// // In this section, we test soft deletion. +// // The idea is as follows: +// // +// // (1) Load the test index. +// // (2) Run a round of queries to ensure that everything loading correctly. +// // (3) Set a target deletion percentage where all the neighbors returned by +// // all results returned by the previous query plus a random collection of +// extras +// // are deleted. +// // +// // (4) Rerun queries, make sure accuracy is still high and that no deleted +// indices +// // are present in the results. +// auto entry_point = svs::index::load_entry_point(test_dataset::metadata_file()); + +// auto index = svs::index::MutableVamanaIndex{ +// test_dataset::graph_blocked(), +// base_data.copy(), +// entry_point, +// svs::distance::DistanceL2(), +// svs::threads::UnitRange(0, base_data.size()), +// num_threads +// }; + +// check_equal(base_data, index); +// index.debug_check_graph_consistency(false); + +// auto results = svs::QueryResult(queries.size(), num_neighbors); +// index.set_search_window_size(num_neighbors); + +// auto tic = svs::lib::now(); +// index.search(queries.view(), num_neighbors, results.view()); +// auto original_time = svs::lib::time_difference(svs::lib::now(), tic); +// auto original_recall = svs::k_recall_at_n(groundtruth, results); +// CATCH_REQUIRE(index.entry_point() == entry_point); + +// std::unordered_set ids_to_delete{}; +// double delete_percent = DELETE_PERCENT; +// for (size_t i = 0; i < groundtruth.size(); ++i) { +// auto slice = groundtruth.get_datum(i); +// for (size_t j = 0; j < num_neighbors; ++j) { +// auto id = slice[j]; + +// // For now - don't delete the entry point. +// if (id != entry_point) { +// ids_to_delete.insert(slice[j]); +// } +// } + +// if (ids_to_delete.size() > delete_percent * base_data.size()) { +// break; +// } +// } + +// index.set_threadpool(threads::CppAsyncThreadPool(num_threads)); + +// std::cout << "Deleting " << ids_to_delete.size() << " entries!" << std::endl; +// index.delete_entries(ids_to_delete); +// check_deleted(index, ids_to_delete, base_data.size()); +// index.debug_check_graph_consistency(true); +// CATCH_REQUIRE_THROWS_AS( +// index.debug_check_graph_consistency(false), svs::ANNException +// ); +// CATCH_REQUIRE(index.entry_point() == entry_point); +// // Make sure the correct points were deleted. +// tic = svs::lib::now(); +// index.search(queries.view(), num_neighbors, results.view()); +// auto new_time = svs::lib::time_difference(tic); + +// // Make sure none of the returned results are in the deleted list. +// check_results(results.indices(), ids_to_delete); + +// index.set_threadpool(threads::QueueThreadPoolWrapper(num_threads)); + +// auto results_reference = svs::QueryResult(queries.size(), num_neighbors); +// index.exhaustive_search(queries.view(), num_neighbors, results_reference.view()); +// auto new_recall = svs::k_recall_at_n(results_reference.indices(), results); + +// // Perform graph consolidation and see how the results are effected. +// index.set_alpha(1.2); +// index.consolidate(); +// index.debug_check_graph_consistency(false); +// tic = svs::lib::now(); +// index.search(queries.view(), num_neighbors, results.view()); +// auto post_consolidate_time = svs::lib::time_difference(tic); +// auto post_consolidate_recall = +// svs::k_recall_at_n(results_reference.indices(), results); + +// // Check deletion again. +// check_deleted(index, ids_to_delete, base_data.size()); +// CATCH_REQUIRE(index.entry_point() == entry_point); + +// std::cout << "Original recall: " << original_recall +// << ", New Recall: " << new_recall +// << ", Post Recall: " << post_consolidate_recall << std::endl; +// std::cout << "Original Time: " << original_time << " (s), New Time: " << new_time +// << " (s) Post Time: " << post_consolidate_time << std::endl; +// CATCH_REQUIRE(new_recall > original_recall); +// check_results(results.indices(), ids_to_delete); + +// // Now - delete the entry point and consolidate. +// ids_to_delete.insert(entry_point); +// std::vector entry_point_vector{}; +// entry_point_vector.push_back(entry_point); +// index.delete_entries(entry_point_vector); +// index.set_alpha(1.2); +// index.consolidate(); +// index.debug_check_graph_consistency(false); + +// auto& threadpool = +// index.get_threadpool_handle().get.get(); +// threadpool.resize(3); +// CATCH_REQUIRE(index.get_num_threads() == 3); +// threadpool.resize(num_threads); +// CATCH_REQUIRE(index.get_num_threads() == num_threads); + +// CATCH_REQUIRE(index.entry_point() != entry_point); +// index.search(queries.view(), num_neighbors, results.view()); +// auto post_entrypoint_recall = +// svs::k_recall_at_n(results_reference.indices(), results); +// std::cout << "Post entry-point deletion recall: " << post_entrypoint_recall +// << std::endl; + +// // Add the deleted points back in. +// auto points = svs::data::SimpleData( +// ids_to_delete.size(), base_data.dimensions() +// ); + +// size_t i = 0; +// for (const auto& j : ids_to_delete) { +// points.set_datum(i, base_data.get_datum(j)); +// ++i; +// } + +// index.set_threadpool(threads::DefaultThreadPool(num_threads)); +// tic = svs::lib::now(); +// index.add_points(points, ids_to_delete); +// auto insert_time = svs::lib::time_difference(tic); +// std::cout << "Insertion took: " << insert_time << " seconds!" << std::endl; + +// // Check that the stored dataset and the original dataset are equal. +// check_equal(base_data, index); +// index.debug_check_graph_consistency(false); + +// tic = svs::lib::now(); +// index.search(queries.view(), num_neighbors, results.view()); +// auto post_add_time = svs::lib::time_difference(tic); +// auto post_reinsertion_recall = svs::k_recall_at_n(groundtruth, results); +// std::cout << "Post reinsertion recall: " << post_reinsertion_recall << " in " +// << post_add_time << " seconds." << std::endl; +// } +// } struct TestLogCtx { std::vector logs; @@ -261,33 +266,40 @@ static void test_log_callback(void* ctx, const char* level, const char* msg) { } CATCH_TEST_CASE("Dynamic MutableVamanaIndex Per-Index Logging Test", "[logging]") { + // Set callback svs::logging::set_global_log_callback(&test_log_callback); + TestLogCtx testLogContext; - TestLogCtx myLogCtx; + // Setup index + std::filesystem::path configPath = test_dataset::vamana_config_file(); + auto data = svs::data::SimpleData::load(test_dataset::data_svs_file()); + auto graph = svs::graphs::SimpleGraph::load(test_dataset::graph_file()); + auto distance = svs::DistanceL2(); + auto threadpool = svs::threads::DefaultThreadPool(1); + // Load expected build parameters + auto expected_result = test_dataset::vamana::expected_build_results( + svs::L2, svsbenchmark::Uncompressed(svs::DataType::float32) + ); - std::vector data = {1.0f, 2.0f}; - const size_t dim = 1; - auto dataView = svs::data::SimpleDataView(data.data(), 2, dim); - std::vector external_ids = {10, 20}; - svs::index::vamana::VamanaBuildParameters buildParams(1.2f, 64, 10, 20, 10, true); + // Use the build parameters from the expected result + svs::index::vamana::VamanaBuildParameters buildParams = + expected_result.build_parameters_.value(); - auto threadpool = svs::threads::DefaultThreadPool(1); + std::vector external_ids(data.size()); + std::iota(external_ids.begin(), external_ids.end(), 0); - using GraphType = svs::graphs::SimpleBlockedGraph; - using DataType = svs::data::SimpleData; - using DistType = svs::distance::DistanceL2; - - svs::index::vamana::MutableVamanaIndex index( - buildParams, - dataView, - external_ids, - svs::distance::DistanceL2{}, - threadpool, - &myLogCtx + // Use the build parameters from the expected result + auto dynamicIndex = svs::index::vamana::MutableVamanaIndex< + svs::graphs::SimpleGraph, + svs::data::SimpleData, + svs::distance::DistanceL2>( + buildParams, data, external_ids, distance, std::move(threadpool), &testLogContext ); - index.log("NOTICE", "Hello from MutableVamanaIndex Logging!"); + // Log messasge + dynamicIndex.log("NOTICE", "Test MutableVamanaIndex Logging"); - CATCH_REQUIRE(myLogCtx.logs.size() == 1); - CATCH_REQUIRE(myLogCtx.logs[0] == "NOTICE: Hello from MutableVamanaIndex Logging!"); + // Verify that the log callback was called + CATCH_REQUIRE(testLogContext.logs.size() == 1); + CATCH_REQUIRE(testLogContext.logs[0] == "NOTICE: Test MutableVamanaIndex Logging"); } \ No newline at end of file From 38108706b8c216c89e2e7241bc7f577c8990a16f Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Thu, 27 Feb 2025 22:31:12 -0800 Subject: [PATCH 08/33] fix: add created test files --- tests/svs/index/flat/flat.cpp | 51 +++++++++++++++++ tests/svs/index/inverted/memory_based.cpp | 67 +++++++++++++++++++++++ 2 files changed, 118 insertions(+) create mode 100644 tests/svs/index/flat/flat.cpp create mode 100644 tests/svs/index/inverted/memory_based.cpp diff --git a/tests/svs/index/flat/flat.cpp b/tests/svs/index/flat/flat.cpp new file mode 100644 index 00000000..1bdb7e46 --- /dev/null +++ b/tests/svs/index/flat/flat.cpp @@ -0,0 +1,51 @@ +/* + * Copyright 2023 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "svs/index/flat/flat.h" +#include "svs/core/logging.h" + +// catch2 +#include "catch2/catch_test_macros.hpp" + +struct TestLogCtx { + std::vector logs; +}; + +static void test_log_callback(void* ctx, const char* level, const char* msg) { + if (!ctx) + return; + auto* logCtx = reinterpret_cast(ctx); + logCtx->logs.push_back(std::string(level) + ": " + msg); +} + +CATCH_TEST_CASE("FlatIndex Per-Index Logging Test", "[logging]") { + svs::logging::set_global_log_callback(&test_log_callback); + TestLogCtx testLogContext; + + std::vector data{1.0f, 2.0f}; + auto dataView = svs::data::SimpleDataView(data.data(), 2, 1); + svs::distance::DistanceL2 dist; + auto threadpool = svs::threads::DefaultThreadPool(1); + + svs::index::flat::FlatIndex index( + dataView, dist, std::move(threadpool), &testLogContext + ); + + index.log("NOTICE", "Test FlatIndex Logging"); + + CATCH_REQUIRE(testLogContext.logs.size() == 1); + CATCH_REQUIRE(testLogContext.logs[0] == "NOTICE: Test FlatIndex Logging"); +} diff --git a/tests/svs/index/inverted/memory_based.cpp b/tests/svs/index/inverted/memory_based.cpp new file mode 100644 index 00000000..f4948ba7 --- /dev/null +++ b/tests/svs/index/inverted/memory_based.cpp @@ -0,0 +1,67 @@ +/* + * Copyright 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "svs/index/inverted/memory_based.h" +#include "catch2/catch_test_macros.hpp" +#include "svs-benchmark/datasets.h" +#include "svs/lib/timing.h" +#include "tests/utils/inverted_reference.h" +#include "tests/utils/test_dataset.h" +#include + +// Define our log context structure. +struct TestLogCtx { + std::vector logs; +}; + +static void test_log_callback(void* ctx, const char* level, const char* msg) { + if (!ctx) + return; + auto* logCtx = reinterpret_cast(ctx); + logCtx->logs.push_back(std::string(level) + ": " + msg); +} + +CATCH_TEST_CASE("InvertedIndex Per-Index Logging", "[logging]") { + // Set global log callback + svs::logging::set_global_log_callback(&test_log_callback); + TestLogCtx testLogContext; + + // Setup index + auto distance = svs::DistanceL2(); + constexpr auto distance_type = svs::distance_type_v; + auto expected_results = test_dataset::inverted::expected_build_results( + distance_type, svsbenchmark::Uncompressed(svs::DataType::float32) + ); + auto data = svs::data::SimpleData::load(test_dataset::data_svs_file()); + auto threadpool = svs::threads::DefaultThreadPool(1); + auto invertedIndex = svs::index::inverted::auto_build( + expected_results.build_parameters_.value(), + data, + distance, + std::move(threadpool), + {}, + {}, + {}, + &testLogContext + ); + + // Log a test message + invertedIndex.log("NOTICE", "Test InvertedIndex Build"); + + // Check log context received the message + CATCH_REQUIRE(testLogContext.logs.size() == 1); + CATCH_REQUIRE(testLogContext.logs[0] == "NOTICE: Test InvertedIndex Build"); +} \ No newline at end of file From 766004b241642df2cee3be9fbed0fc9403dc5501 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Tue, 4 Mar 2025 15:36:04 -0800 Subject: [PATCH 09/33] fix: format and comments --- include/svs/core/logging.h | 14 ++----- include/svs/index/flat/flat.h | 25 +++++++----- include/svs/index/inverted/memory_based.h | 14 +++---- include/svs/index/vamana/dynamic_index.h | 21 ++++------ include/svs/index/vamana/index.h | 48 +++++++++++++---------- tests/svs/index/flat/flat.cpp | 2 +- tests/svs/index/inverted/memory_based.cpp | 4 +- tests/svs/index/vamana/index.cpp | 3 +- 8 files changed, 63 insertions(+), 68 deletions(-) diff --git a/include/svs/core/logging.h b/include/svs/core/logging.h index 7ec1fafe..30d07772 100644 --- a/include/svs/core/logging.h +++ b/include/svs/core/logging.h @@ -60,8 +60,7 @@ inline constexpr std::array all_levels = { Level::Warn, Level::Error, Level::Critical, - Level::Off -}; + Level::Off}; /// @brief The type of the global logger. using logger_ptr = std::shared_ptr<::spdlog::logger>; @@ -312,8 +311,6 @@ inline void set_level(Level level) { /// @brief Function to set the global log callback inline void set_global_log_callback(log_callback_function callback) { global_log_callback = callback; - std::cout << "[DEBUG] set_global_log_callback(" << reinterpret_cast(callback) - << ")\n"; } /// @brief Return whether a message should be created for the logger at the given level. @@ -345,15 +342,12 @@ void log(Level level, fmt::format_string fmt, Args&&... args) { /// @brief Each index can pass its own ctx pointer, but /// the function pointer is always global_log_callback. inline void log(void* ctx, const char* level, const char* msg) { - std::cout << "[DEBUG] log() called: ctx=" << ctx << ", level=" << level << ", msg=\"" - << msg << "\"\n"; if (global_log_callback) { - std::cout << "[DEBUG] -> Using global_log_callback\n"; global_log_callback(ctx, level, msg); } else { - std::cout << "[DEBUG] -> No global callback set. Fallback to std::cout.\n"; - // If no global callback is set, do something fallback-like: - std::cout << "[" << level << "] " << msg << "\n"; + // Fallback to exisitng global logger + Level log_level = level_from_string(level); + log(log_level, "{}", msg); } } diff --git a/include/svs/index/flat/flat.h b/include/svs/index/flat/flat.h index d479f658..e8d9e055 100644 --- a/include/svs/index/flat/flat.h +++ b/include/svs/index/flat/flat.h @@ -192,6 +192,8 @@ class FlatIndex { /// instance or an integer specifying the number of threads to use. In the latter /// case, a new default thread pool will be constructed using ``threadpool_proto`` /// as the number of threads to create. + /// @param log_callback_ctx A pointer to a user-defined context for per-index logging + /// customization. /// /// @copydoc threadpool_requirements /// @@ -346,14 +348,13 @@ class FlatIndex { threads::parallel_for( threadpool_, threads::DynamicPartition{ - queries.size(), compute_query_batch_size(search_parameters, queries.size()) - }, + queries.size(), + compute_query_batch_size(search_parameters, queries.size())}, [&](const auto& query_indices, uint64_t /*tid*/) { // Broadcast the distance functor so each thread can process all queries // in its current batch. distance::BroadcastDistance distances{ - extensions::distance(data_, distance_), query_indices.size() - }; + extensions::distance(data_, distance_), query_indices.size()}; search_patch( queries, @@ -485,7 +486,9 @@ class FlatIndex { /// instance or an integer specifying the number of threads to use. In the latter case, /// a new default thread pool will be constructed using ``threadpool_proto`` as the /// number of threads to create. -/// +/// @param log_callback_ctx A pointer to a user-defined context for per-index logging +/// customization. + /// This method provides much of the heavy lifting for constructing a Flat index from /// a data file on disk or a dataset in memory. /// @@ -495,11 +498,16 @@ class FlatIndex { /// template auto auto_assemble( - DataProto&& data_proto, Distance distance, ThreadPoolProto threadpool_proto, void* log_callback_ctx = nullptr + DataProto&& data_proto, + Distance distance, + ThreadPoolProto threadpool_proto, + void* log_callback_ctx = nullptr ) { auto threadpool = threads::as_threadpool(std::move(threadpool_proto)); auto data = svs::detail::dispatch_load(std::forward(data_proto), threadpool); - return FlatIndex(std::move(data), std::move(distance), std::move(threadpool), log_callback_ctx); + return FlatIndex( + std::move(data), std::move(distance), std::move(threadpool), log_callback_ctx + ); } /// @brief Alias for a short-lived flat index. @@ -510,8 +518,7 @@ template temporary_flat_index(Data& data, Dist distance, ThreadPoolProto threadpool_proto) { return TemporaryFlatIndex{ - data, distance, threads::as_threadpool(std::move(threadpool_proto)) - }; + data, distance, threads::as_threadpool(std::move(threadpool_proto))}; } } // namespace svs::index::flat diff --git a/include/svs/index/inverted/memory_based.h b/include/svs/index/inverted/memory_based.h index 823c75b9..112e76d0 100644 --- a/include/svs/index/inverted/memory_based.h +++ b/include/svs/index/inverted/memory_based.h @@ -70,8 +70,7 @@ template class SparseClusteredDatase const Original& original, const Clustering& clustering, const Alloc& allocator ) : SparseClusteredDataset{ - original, clustering, clustering.packed_leaf_translation(), allocator - } {} + original, clustering, clustering.packed_leaf_translation(), allocator} {} template SparseClusteredDataset( @@ -96,8 +95,7 @@ template class SparseClusteredDatase for (auto neighbor : cluster) { auto global_id = neighbor.id(); these_ids.at(i) = SparseIDs{ - .local = global_to_local_map.at(global_id), .global = global_id - }; + .local = global_to_local_map.at(global_id), .global = global_id}; ++i; } }); @@ -396,8 +394,7 @@ template class InvertedIndex { ///// Search Parameter Setting search_parameters_type get_search_parameters() const { return InvertedSearchParameters{ - index_.get_search_parameters(), refinement_epsilon_ - }; + index_.get_search_parameters(), refinement_epsilon_}; } void set_search_parameters(const search_parameters_type& parameters) { @@ -567,7 +564,7 @@ auto auto_build( Strategy strategy = {}, CentroidPicker centroid_picker = {}, ClusteringOp clustering_op = {}, - void* log_callback_ctx = nullptr + void* log_callback_ctx = nullptr ) { // Perform clustering. auto threadpool = threads::as_threadpool(std::move(threadpool_proto)); @@ -605,8 +602,7 @@ auto auto_build( strategy(data, clustering, HugepageAllocator()), std::move(centroids), std::move(primary_threadpool), - log_callback_ctx - }; + log_callback_ctx}; } ///// Auto Assembling. diff --git a/include/svs/index/vamana/dynamic_index.h b/include/svs/index/vamana/dynamic_index.h index 715a47db..14379311 100644 --- a/include/svs/index/vamana/dynamic_index.h +++ b/include/svs/index/vamana/dynamic_index.h @@ -284,8 +284,7 @@ class MutableVamanaIndex { sp.search_buffer_visited_set_ ), extensions::single_search_setup(data_, distance_), - {sp.prefetch_lookahead_, sp.prefetch_step_} - }; + {sp.prefetch_lookahead_, sp.prefetch_step_}}; } scratchspace_type scratchspace() const { return scratchspace(get_search_parameters()); } @@ -490,8 +489,7 @@ class MutableVamanaIndex { search_buffer_type{sp.buffer_config_, distance::comparator(distance_)}; auto prefetch_parameters = GreedySearchPrefetchParameters{ - sp.prefetch_lookahead_, sp.prefetch_step_ - }; + sp.prefetch_lookahead_, sp.prefetch_step_}; // Legalize search buffer for this search. if (buffer.target() < num_neighbors) { @@ -674,15 +672,13 @@ class MutableVamanaIndex { construction_window_size_, max_candidates_, prune_to_, - use_full_search_history_ - }; + use_full_search_history_}; auto sp = get_search_parameters(); auto prefetch_parameters = GreedySearchPrefetchParameters{sp.prefetch_lookahead_, sp.prefetch_step_}; VamanaBuilder builder{ - graph_, data_, distance_, parameters, threadpool_, prefetch_parameters - }; + graph_, data_, distance_, parameters, threadpool_, prefetch_parameters}; builder.construct(alpha_, entry_point(), slots, logging::Level::Trace); // Mark all added entries as valid. for (const auto& i : slots) { @@ -984,8 +980,7 @@ class MutableVamanaIndex { get_max_candidates(), prune_to_, get_full_search_history()}, - get_search_parameters() - }; + get_search_parameters()}; return lib::SaveTable( "vamana_dynamic_auxiliary_parameters", @@ -1248,8 +1243,7 @@ struct VamanaStateLoader { if (debug_load_from_static) { return VamanaStateLoader{ lib::load(table), - IDTranslator::Identity(assume_datasize) - }; + IDTranslator::Identity(assume_datasize)}; } return VamanaStateLoader{ @@ -1350,8 +1344,7 @@ auto auto_dynamic_assemble( std::move(distance), std::move(translator), std::move(threadpool), - log_callback_ctx - }; + log_callback_ctx}; } } // namespace svs::index::vamana diff --git a/include/svs/index/vamana/index.h b/include/svs/index/vamana/index.h index d8e6ab4f..ebcd8c11 100644 --- a/include/svs/index/vamana/index.h +++ b/include/svs/index/vamana/index.h @@ -137,16 +137,13 @@ struct VamanaIndexParameters { lib::load_at(table, "construction_window_size"), lib::load_at(table, "max_candidates"), prune_to, - use_full_search_history - }, + use_full_search_history}, VamanaSearchParameters{ - SearchBufferConfig{lib::load_at(table, "default_search_window_size") - }, + SearchBufferConfig{ + lib::load_at(table, "default_search_window_size")}, lib::load_at(table, "visited_set"), 4, - 1 - } - }; + 1}}; } static VamanaIndexParameters load(const lib::ContextFreeLoadTable& table) { @@ -246,6 +243,8 @@ VamanaSearchParameters construct_default_search_parameters(const Data& data) { /// @tparam PostOp The cleanup operation that is performed after the initial graph /// search. This may be included to perform operations like reranking for quantized /// datasets. +/// @tparam log_callback_ctx A pointer to a user-defined context for per-index logging +/// customization. /// /// The mid-level implementation of the static Vamana graph-based index. /// @@ -338,6 +337,8 @@ class VamanaIndex { /// instance or an integer specifying the number of threads to use. In the latter /// case, a new default thread pool will be constructed using ``threadpool_proto`` /// as the number of threads to create. + /// @param log_callback_ctx A pointer to a user-defined context for per-index logging + /// customization. /// /// This is a lower-level function that is meant to take a collection of /// instantiated components and assemble the final index. For a more "hands-free" @@ -380,6 +381,8 @@ class VamanaIndex { /// @param distance_function The distance function used to compare queries and /// elements of the dataset. /// @param threadpool The acceptable threadpool to use to conduct searches. + /// @param log_callback_ctx A pointer to a user-defined context for per-index logging + /// customization. /// /// This is a lower-level function that is meant to take a dataset and construct /// the graph-based index over the dataset. For a more "hands-free" approach, see @@ -408,8 +411,7 @@ class VamanaIndex { entry_point, std::move(distance_function), std::move(threadpool), - log_callback_ctx - } { + log_callback_ctx} { if (graph_.n_nodes() != data_.size()) { throw ANNEXCEPTION("Wrong sizes!"); } @@ -446,8 +448,7 @@ class VamanaIndex { sp.search_buffer_visited_set_ ), extensions::single_search_setup(data_, distance_), - {sp.prefetch_lookahead_, sp.prefetch_step_} - }; + {sp.prefetch_lookahead_, sp.prefetch_step_}}; } /// @brief Return scratch-space resources for external threading with default parameters @@ -565,12 +566,11 @@ class VamanaIndex { auto search_buffer = search_buffer_type{ SearchBufferConfig(search_parameters.buffer_config_), distance::comparator(distance_), - search_parameters.search_buffer_visited_set_ - }; + search_parameters.search_buffer_visited_set_}; auto prefetch_parameters = GreedySearchPrefetchParameters{ - search_parameters.prefetch_lookahead_, search_parameters.prefetch_step_ - }; + search_parameters.prefetch_lookahead_, + search_parameters.prefetch_step_}; // Increase the search window size if the defaults are not suitable for the // requested number of neighbors. @@ -800,8 +800,7 @@ class VamanaIndex { ) const { // Construct and save runtime parameters. auto parameters = VamanaIndexParameters{ - entry_point_.front(), build_parameters_, get_search_parameters() - }; + entry_point_.front(), build_parameters_, get_search_parameters()}; // Config lib::save_to_disk(parameters, config_directory); @@ -890,6 +889,8 @@ class VamanaIndex { /// a new default thread pool will be constructed using ``threadpool_proto`` as the /// number of threads to create. /// @param graph_allocator The allocator to use for the graph data structure. +/// @param log_callback_ctx A pointer to a user-defined context for per-index logging +/// customization. /// /// @copydoc threadpool_requirements /// @@ -920,8 +921,7 @@ auto auto_build( lib::narrow(entry_point), std::move(distance), std::move(threadpool), - log_callback_ctx - }; + log_callback_ctx}; } /// @@ -939,6 +939,8 @@ auto auto_build( /// This method provides much of the heavy lifting for instantiating a Vamana index from /// a collection of files on disk (or perhaps a mix-and-match of existing data in-memory /// and on disk). +/// @param log_callback_ctx A pointer to a user-defined context for per-index logging +/// customization. /// /// Refer to the examples for use of this interface. /// @@ -964,8 +966,12 @@ auto auto_assemble( // Extract the index type of the provided graph. using I = typename decltype(graph)::index_type; auto index = VamanaIndex{ - std::move(graph), std::move(data), I{}, std::move(distance), std::move(threadpool), log_callback_ctx - }; + std::move(graph), + std::move(data), + I{}, + std::move(distance), + std::move(threadpool), + log_callback_ctx}; auto config = lib::load_from_disk(config_path); index.apply(config); diff --git a/tests/svs/index/flat/flat.cpp b/tests/svs/index/flat/flat.cpp index 1bdb7e46..5eff1012 100644 --- a/tests/svs/index/flat/flat.cpp +++ b/tests/svs/index/flat/flat.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2023 Intel Corporation + * Copyright 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/tests/svs/index/inverted/memory_based.cpp b/tests/svs/index/inverted/memory_based.cpp index f4948ba7..763c6a9c 100644 --- a/tests/svs/index/inverted/memory_based.cpp +++ b/tests/svs/index/inverted/memory_based.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2024 Intel Corporation + * Copyright 2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -63,5 +63,5 @@ CATCH_TEST_CASE("InvertedIndex Per-Index Logging", "[logging]") { // Check log context received the message CATCH_REQUIRE(testLogContext.logs.size() == 1); - CATCH_REQUIRE(testLogContext.logs[0] == "NOTICE: Test InvertedIndex Build"); + CATCH_REQUIRE(testLogContext.logs[0] == "NOTICE: Test InvertedIndex Logging"); } \ No newline at end of file diff --git a/tests/svs/index/vamana/index.cpp b/tests/svs/index/vamana/index.cpp index 2cb2bc96..56c4cd14 100644 --- a/tests/svs/index/vamana/index.cpp +++ b/tests/svs/index/vamana/index.cpp @@ -105,8 +105,7 @@ CATCH_TEST_CASE("Vamana Index Parameters", "[index][vamana]") { CATCH_SECTION("Current version") { auto p = VamanaIndexParameters{ - 128, {12.4f, 478, 13, 4, 10, false}, {{10, 20}, true, 1, 1} - }; + 128, {12.4f, 478, 13, 4, 10, false}, {{10, 20}, true, 1, 1}}; CATCH_REQUIRE(svs::lib::test_self_save_load_context_free(p)); } } From c5402adc8ccf33e4dde45dbf8a651202eef2921d Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Tue, 4 Mar 2025 15:50:54 -0800 Subject: [PATCH 10/33] fix: everthing works locally --- include/svs/core/logging.h | 2 +- tests/CMakeLists.txt | 3 +- tests/svs/index/inverted/memory_based.cpp | 2 +- tests/svs/index/vamana/dynamic_index.cpp | 474 +++++++++------------ tests/svs/index/vamana/dynamic_index_2.cpp | 51 +++ 5 files changed, 267 insertions(+), 265 deletions(-) diff --git a/include/svs/core/logging.h b/include/svs/core/logging.h index 30d07772..edb197b7 100644 --- a/include/svs/core/logging.h +++ b/include/svs/core/logging.h @@ -346,7 +346,7 @@ inline void log(void* ctx, const char* level, const char* msg) { global_log_callback(ctx, level, msg); } else { // Fallback to exisitng global logger - Level log_level = level_from_string(level); + Level log_level = svs::logging::detail::level_from_string(level); log(log_level, "{}", msg); } } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 60cf579f..b8352697 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -137,10 +137,11 @@ set(TEST_SOURCES ${TEST_DIR}/svs/index/vamana/iterator_schedule.cpp ${TEST_DIR}/svs/index/vamana/iterator_example.cpp ${TEST_DIR}/svs/index/vamana/iterator.cpp - ${TEST_DIR}/svs/index/vamana/dynamic_index.cpp # Inverted ${TEST_DIR}/svs/index/inverted/clustering.cpp ${TEST_DIR}/svs/index/inverted/memory_based.cpp + + # # ${TEST_DIR}/svs/index/vamana/dynamic_index.cpp ) ##### diff --git a/tests/svs/index/inverted/memory_based.cpp b/tests/svs/index/inverted/memory_based.cpp index 763c6a9c..df4d62e2 100644 --- a/tests/svs/index/inverted/memory_based.cpp +++ b/tests/svs/index/inverted/memory_based.cpp @@ -59,7 +59,7 @@ CATCH_TEST_CASE("InvertedIndex Per-Index Logging", "[logging]") { ); // Log a test message - invertedIndex.log("NOTICE", "Test InvertedIndex Build"); + invertedIndex.log("NOTICE", "Test InvertedIndex Logging"); // Check log context received the message CATCH_REQUIRE(testLogContext.logs.size() == 1); diff --git a/tests/svs/index/vamana/dynamic_index.cpp b/tests/svs/index/vamana/dynamic_index.cpp index e09d582d..b9adf5f2 100644 --- a/tests/svs/index/vamana/dynamic_index.cpp +++ b/tests/svs/index/vamana/dynamic_index.cpp @@ -38,268 +38,218 @@ #include "tests/utils/utils.h" #include "tests/utils/vamana_reference.h" -// namespace { -// template auto copy_dataset(const T& data) { -// auto copy = svs::data::SimplePolymorphicData{ -// data.size(), data.dimensions() -// }; -// for (size_t i = 0; i < data.size(); ++i) { -// copy.set_datum(i, data.get_datum(i)); -// } -// return copy; -// } - -// template void check_results(const T& results, const U& deleted) -// { -// for (size_t i = 0; i < svs::getsize<0>(results); ++i) { -// for (size_t j = 0; j < svs::getsize<1>(results); ++j) { -// CATCH_REQUIRE(!deleted.contains(results.at(i, j))); -// } -// } -// } - -// template -// void check_deleted(const T& index, const U& deleted, size_t imax) { -// for (size_t i = 0; i < imax; ++i) { -// if (deleted.contains(i)) { -// CATCH_REQUIRE(index.is_deleted(i)); -// } else { -// CATCH_REQUIRE(!index.is_deleted(i)); -// } -// } -// } - -// template -// void check_equal(const Left& left, const Right& right) { -// CATCH_REQUIRE(left.size() == right.size()); -// CATCH_REQUIRE(left.dimensions() == right.dimensions()); - -// for (size_t i = 0, imax = left.size(); i < imax; ++i) { -// const auto& datum_left = left.get_datum(i); -// const auto& datum_right = right.get_datum(i); -// CATCH_REQUIRE(std::equal(datum_left.begin(), datum_left.end(), -// datum_right.begin()) -// ); -// } -// } - -// } // namespace - -// #if defined(NDEBUG) -// const double DELETE_PERCENT = 0.3; -// #else -// const double DELETE_PERCENT = 0.05; -// #endif - -// CATCH_TEST_CASE("MutableVamanaIndex", "[graph_index]") { -// const size_t num_threads = 2; -// const size_t num_neighbors = 10; - -// const auto base_data = test_dataset::data_blocked_f32(); -// // const auto base_data = test_dataset::data_f32(); -// const auto queries = test_dataset::queries(); -// const auto groundtruth = test_dataset::groundtruth_euclidean(); - -// CATCH_SECTION("Soft Deletion") { -// // In this section, we test soft deletion. -// // The idea is as follows: -// // -// // (1) Load the test index. -// // (2) Run a round of queries to ensure that everything loading correctly. -// // (3) Set a target deletion percentage where all the neighbors returned by -// // all results returned by the previous query plus a random collection of -// extras -// // are deleted. -// // -// // (4) Rerun queries, make sure accuracy is still high and that no deleted -// indices -// // are present in the results. -// auto entry_point = svs::index::load_entry_point(test_dataset::metadata_file()); - -// auto index = svs::index::MutableVamanaIndex{ -// test_dataset::graph_blocked(), -// base_data.copy(), -// entry_point, -// svs::distance::DistanceL2(), -// svs::threads::UnitRange(0, base_data.size()), -// num_threads -// }; - -// check_equal(base_data, index); -// index.debug_check_graph_consistency(false); - -// auto results = svs::QueryResult(queries.size(), num_neighbors); -// index.set_search_window_size(num_neighbors); - -// auto tic = svs::lib::now(); -// index.search(queries.view(), num_neighbors, results.view()); -// auto original_time = svs::lib::time_difference(svs::lib::now(), tic); -// auto original_recall = svs::k_recall_at_n(groundtruth, results); -// CATCH_REQUIRE(index.entry_point() == entry_point); - -// std::unordered_set ids_to_delete{}; -// double delete_percent = DELETE_PERCENT; -// for (size_t i = 0; i < groundtruth.size(); ++i) { -// auto slice = groundtruth.get_datum(i); -// for (size_t j = 0; j < num_neighbors; ++j) { -// auto id = slice[j]; - -// // For now - don't delete the entry point. -// if (id != entry_point) { -// ids_to_delete.insert(slice[j]); -// } -// } - -// if (ids_to_delete.size() > delete_percent * base_data.size()) { -// break; -// } -// } - -// index.set_threadpool(threads::CppAsyncThreadPool(num_threads)); - -// std::cout << "Deleting " << ids_to_delete.size() << " entries!" << std::endl; -// index.delete_entries(ids_to_delete); -// check_deleted(index, ids_to_delete, base_data.size()); -// index.debug_check_graph_consistency(true); -// CATCH_REQUIRE_THROWS_AS( -// index.debug_check_graph_consistency(false), svs::ANNException -// ); -// CATCH_REQUIRE(index.entry_point() == entry_point); -// // Make sure the correct points were deleted. -// tic = svs::lib::now(); -// index.search(queries.view(), num_neighbors, results.view()); -// auto new_time = svs::lib::time_difference(tic); - -// // Make sure none of the returned results are in the deleted list. -// check_results(results.indices(), ids_to_delete); - -// index.set_threadpool(threads::QueueThreadPoolWrapper(num_threads)); - -// auto results_reference = svs::QueryResult(queries.size(), num_neighbors); -// index.exhaustive_search(queries.view(), num_neighbors, results_reference.view()); -// auto new_recall = svs::k_recall_at_n(results_reference.indices(), results); - -// // Perform graph consolidation and see how the results are effected. -// index.set_alpha(1.2); -// index.consolidate(); -// index.debug_check_graph_consistency(false); -// tic = svs::lib::now(); -// index.search(queries.view(), num_neighbors, results.view()); -// auto post_consolidate_time = svs::lib::time_difference(tic); -// auto post_consolidate_recall = -// svs::k_recall_at_n(results_reference.indices(), results); - -// // Check deletion again. -// check_deleted(index, ids_to_delete, base_data.size()); -// CATCH_REQUIRE(index.entry_point() == entry_point); - -// std::cout << "Original recall: " << original_recall -// << ", New Recall: " << new_recall -// << ", Post Recall: " << post_consolidate_recall << std::endl; -// std::cout << "Original Time: " << original_time << " (s), New Time: " << new_time -// << " (s) Post Time: " << post_consolidate_time << std::endl; -// CATCH_REQUIRE(new_recall > original_recall); -// check_results(results.indices(), ids_to_delete); - -// // Now - delete the entry point and consolidate. -// ids_to_delete.insert(entry_point); -// std::vector entry_point_vector{}; -// entry_point_vector.push_back(entry_point); -// index.delete_entries(entry_point_vector); -// index.set_alpha(1.2); -// index.consolidate(); -// index.debug_check_graph_consistency(false); - -// auto& threadpool = -// index.get_threadpool_handle().get.get(); -// threadpool.resize(3); -// CATCH_REQUIRE(index.get_num_threads() == 3); -// threadpool.resize(num_threads); -// CATCH_REQUIRE(index.get_num_threads() == num_threads); - -// CATCH_REQUIRE(index.entry_point() != entry_point); -// index.search(queries.view(), num_neighbors, results.view()); -// auto post_entrypoint_recall = -// svs::k_recall_at_n(results_reference.indices(), results); -// std::cout << "Post entry-point deletion recall: " << post_entrypoint_recall -// << std::endl; - -// // Add the deleted points back in. -// auto points = svs::data::SimpleData( -// ids_to_delete.size(), base_data.dimensions() -// ); - -// size_t i = 0; -// for (const auto& j : ids_to_delete) { -// points.set_datum(i, base_data.get_datum(j)); -// ++i; -// } - -// index.set_threadpool(threads::DefaultThreadPool(num_threads)); -// tic = svs::lib::now(); -// index.add_points(points, ids_to_delete); -// auto insert_time = svs::lib::time_difference(tic); -// std::cout << "Insertion took: " << insert_time << " seconds!" << std::endl; - -// // Check that the stored dataset and the original dataset are equal. -// check_equal(base_data, index); -// index.debug_check_graph_consistency(false); - -// tic = svs::lib::now(); -// index.search(queries.view(), num_neighbors, results.view()); -// auto post_add_time = svs::lib::time_difference(tic); -// auto post_reinsertion_recall = svs::k_recall_at_n(groundtruth, results); -// std::cout << "Post reinsertion recall: " << post_reinsertion_recall << " in " -// << post_add_time << " seconds." << std::endl; -// } -// } - -struct TestLogCtx { - std::vector logs; -}; - -static void test_log_callback(void* ctx, const char* level, const char* msg) { - if (!ctx) - return; - auto* logCtx = reinterpret_cast(ctx); - logCtx->logs.push_back(std::string(level) + ": " + msg); +namespace { +template auto copy_dataset(const T& data) { + auto copy = svs::data::SimplePolymorphicData{ + data.size(), data.dimensions() + }; + for (size_t i = 0; i < data.size(); ++i) { + copy.set_datum(i, data.get_datum(i)); + } + return copy; } -CATCH_TEST_CASE("Dynamic MutableVamanaIndex Per-Index Logging Test", "[logging]") { - // Set callback - svs::logging::set_global_log_callback(&test_log_callback); - TestLogCtx testLogContext; - - // Setup index - std::filesystem::path configPath = test_dataset::vamana_config_file(); - auto data = svs::data::SimpleData::load(test_dataset::data_svs_file()); - auto graph = svs::graphs::SimpleGraph::load(test_dataset::graph_file()); - auto distance = svs::DistanceL2(); - auto threadpool = svs::threads::DefaultThreadPool(1); - // Load expected build parameters - auto expected_result = test_dataset::vamana::expected_build_results( - svs::L2, svsbenchmark::Uncompressed(svs::DataType::float32) - ); - - // Use the build parameters from the expected result - svs::index::vamana::VamanaBuildParameters buildParams = - expected_result.build_parameters_.value(); - - std::vector external_ids(data.size()); - std::iota(external_ids.begin(), external_ids.end(), 0); - - // Use the build parameters from the expected result - auto dynamicIndex = svs::index::vamana::MutableVamanaIndex< - svs::graphs::SimpleGraph, - svs::data::SimpleData, - svs::distance::DistanceL2>( - buildParams, data, external_ids, distance, std::move(threadpool), &testLogContext - ); - - // Log messasge - dynamicIndex.log("NOTICE", "Test MutableVamanaIndex Logging"); - - // Verify that the log callback was called - CATCH_REQUIRE(testLogContext.logs.size() == 1); - CATCH_REQUIRE(testLogContext.logs[0] == "NOTICE: Test MutableVamanaIndex Logging"); +template void check_results(const T& results, const U& deleted) +{ + for (size_t i = 0; i < svs::getsize<0>(results); ++i) { + for (size_t j = 0; j < svs::getsize<1>(results); ++j) { + CATCH_REQUIRE(!deleted.contains(results.at(i, j))); + } + } +} + +template +void check_deleted(const T& index, const U& deleted, size_t imax) { + for (size_t i = 0; i < imax; ++i) { + if (deleted.contains(i)) { + CATCH_REQUIRE(index.is_deleted(i)); + } else { + CATCH_REQUIRE(!index.is_deleted(i)); + } + } +} + +template +void check_equal(const Left& left, const Right& right) { + CATCH_REQUIRE(left.size() == right.size()); + CATCH_REQUIRE(left.dimensions() == right.dimensions()); + + for (size_t i = 0, imax = left.size(); i < imax; ++i) { + const auto& datum_left = left.get_datum(i); + const auto& datum_right = right.get_datum(i); + CATCH_REQUIRE(std::equal(datum_left.begin(), datum_left.end(), + datum_right.begin()) + ); + } +} + +} // namespace + +#if defined(NDEBUG) +const double DELETE_PERCENT = 0.3; +#else +const double DELETE_PERCENT = 0.05; +#endif + +CATCH_TEST_CASE("MutableVamanaIndex", "[graph_index]") { + const size_t num_threads = 2; + const size_t num_neighbors = 10; + + const auto base_data = test_dataset::data_blocked_f32(); + // const auto base_data = test_dataset::data_f32(); + const auto queries = test_dataset::queries(); + const auto groundtruth = test_dataset::groundtruth_euclidean(); + + CATCH_SECTION("Soft Deletion") { + // In this section, we test soft deletion. + // The idea is as follows: + // + // (1) Load the test index. + // (2) Run a round of queries to ensure that everything loading correctly. + // (3) Set a target deletion percentage where all the neighbors returned by + // all results returned by the previous query plus a random collection of + extras + // are deleted. + // + // (4) Rerun queries, make sure accuracy is still high and that no deleted + indices + // are present in the results. + auto entry_point = svs::index::load_entry_point(test_dataset::metadata_file()); + + auto index = svs::index::MutableVamanaIndex{ + test_dataset::graph_blocked(), + base_data.copy(), + entry_point, + svs::distance::DistanceL2(), + svs::threads::UnitRange(0, base_data.size()), + num_threads + }; + + check_equal(base_data, index); + index.debug_check_graph_consistency(false); + + auto results = svs::QueryResult(queries.size(), num_neighbors); + index.set_search_window_size(num_neighbors); + + auto tic = svs::lib::now(); + index.search(queries.view(), num_neighbors, results.view()); + auto original_time = svs::lib::time_difference(svs::lib::now(), tic); + auto original_recall = svs::k_recall_at_n(groundtruth, results); + CATCH_REQUIRE(index.entry_point() == entry_point); + + std::unordered_set ids_to_delete{}; + double delete_percent = DELETE_PERCENT; + for (size_t i = 0; i < groundtruth.size(); ++i) { + auto slice = groundtruth.get_datum(i); + for (size_t j = 0; j < num_neighbors; ++j) { + auto id = slice[j]; + + // For now - don't delete the entry point. + if (id != entry_point) { + ids_to_delete.insert(slice[j]); + } + } + + if (ids_to_delete.size() > delete_percent * base_data.size()) { + break; + } + } + + index.set_threadpool(threads::CppAsyncThreadPool(num_threads)); + + std::cout << "Deleting " << ids_to_delete.size() << " entries!" << std::endl; + index.delete_entries(ids_to_delete); + check_deleted(index, ids_to_delete, base_data.size()); + index.debug_check_graph_consistency(true); + CATCH_REQUIRE_THROWS_AS( + index.debug_check_graph_consistency(false), svs::ANNException + ); + CATCH_REQUIRE(index.entry_point() == entry_point); + // Make sure the correct points were deleted. + tic = svs::lib::now(); + index.search(queries.view(), num_neighbors, results.view()); + auto new_time = svs::lib::time_difference(tic); + + // Make sure none of the returned results are in the deleted list. + check_results(results.indices(), ids_to_delete); + + index.set_threadpool(threads::QueueThreadPoolWrapper(num_threads)); + + auto results_reference = svs::QueryResult(queries.size(), num_neighbors); + index.exhaustive_search(queries.view(), num_neighbors, results_reference.view()); + auto new_recall = svs::k_recall_at_n(results_reference.indices(), results); + + // Perform graph consolidation and see how the results are effected. + index.set_alpha(1.2); + index.consolidate(); + index.debug_check_graph_consistency(false); + tic = svs::lib::now(); + index.search(queries.view(), num_neighbors, results.view()); + auto post_consolidate_time = svs::lib::time_difference(tic); + auto post_consolidate_recall = + svs::k_recall_at_n(results_reference.indices(), results); + + // Check deletion again. + check_deleted(index, ids_to_delete, base_data.size()); + CATCH_REQUIRE(index.entry_point() == entry_point); + + std::cout << "Original recall: " << original_recall + << ", New Recall: " << new_recall + << ", Post Recall: " << post_consolidate_recall << std::endl; + std::cout << "Original Time: " << original_time << " (s), New Time: " << new_time + << " (s) Post Time: " << post_consolidate_time << std::endl; + CATCH_REQUIRE(new_recall > original_recall); + check_results(results.indices(), ids_to_delete); + + // Now - delete the entry point and consolidate. + ids_to_delete.insert(entry_point); + std::vector entry_point_vector{}; + entry_point_vector.push_back(entry_point); + index.delete_entries(entry_point_vector); + index.set_alpha(1.2); + index.consolidate(); + index.debug_check_graph_consistency(false); + + auto& threadpool = + index.get_threadpool_handle().get.get(); + threadpool.resize(3); + CATCH_REQUIRE(index.get_num_threads() == 3); + threadpool.resize(num_threads); + CATCH_REQUIRE(index.get_num_threads() == num_threads); + + CATCH_REQUIRE(index.entry_point() != entry_point); + index.search(queries.view(), num_neighbors, results.view()); + auto post_entrypoint_recall = + svs::k_recall_at_n(results_reference.indices(), results); + std::cout << "Post entry-point deletion recall: " << post_entrypoint_recall + << std::endl; + + // Add the deleted points back in. + auto points = svs::data::SimpleData( + ids_to_delete.size(), base_data.dimensions() + ); + + size_t i = 0; + for (const auto& j : ids_to_delete) { + points.set_datum(i, base_data.get_datum(j)); + ++i; + } + + index.set_threadpool(threads::DefaultThreadPool(num_threads)); + tic = svs::lib::now(); + index.add_points(points, ids_to_delete); + auto insert_time = svs::lib::time_difference(tic); + std::cout << "Insertion took: " << insert_time << " seconds!" << std::endl; + + // Check that the stored dataset and the original dataset are equal. + check_equal(base_data, index); + index.debug_check_graph_consistency(false); + + tic = svs::lib::now(); + index.search(queries.view(), num_neighbors, results.view()); + auto post_add_time = svs::lib::time_difference(tic); + auto post_reinsertion_recall = svs::k_recall_at_n(groundtruth, results); + std::cout << "Post reinsertion recall: " << post_reinsertion_recall << " in " + << post_add_time << " seconds." << std::endl; + } } \ No newline at end of file diff --git a/tests/svs/index/vamana/dynamic_index_2.cpp b/tests/svs/index/vamana/dynamic_index_2.cpp index ca3bbd2c..e6fdc417 100644 --- a/tests/svs/index/vamana/dynamic_index_2.cpp +++ b/tests/svs/index/vamana/dynamic_index_2.cpp @@ -25,6 +25,7 @@ // tests #include "tests/utils/test_dataset.h" +#include "tests/utils/utils.h" // catch #include "catch2/catch_test_macros.hpp" @@ -415,4 +416,54 @@ CATCH_TEST_CASE("Testing Graph Index", "[graph_index][dynamic_index]") { CATCH_REQUIRE(index.size() == reloaded.size()); // ID's preserved across runs. index.on_ids([&](size_t e) { CATCH_REQUIRE(reloaded.has_id(e)); }); +} + +struct TestLogCtx { + std::vector logs; +}; + +static void test_log_callback(void* ctx, const char* level, const char* msg) { + if (!ctx) + return; + auto* logCtx = reinterpret_cast(ctx); + logCtx->logs.push_back(std::string(level) + ": " + msg); +} + +CATCH_TEST_CASE("Dynamic MutableVamanaIndex Per-Index Logging Test", "[logging]") { + // Set callback + svs::logging::set_global_log_callback(&test_log_callback); + TestLogCtx testLogContext; + + // Setup index + std::filesystem::path configPath = test_dataset::vamana_config_file(); + auto data = svs::data::SimpleData::load(test_dataset::data_svs_file()); + auto graph = svs::graphs::SimpleGraph::load(test_dataset::graph_file()); + auto distance = svs::DistanceL2(); + auto threadpool = svs::threads::DefaultThreadPool(1); + // Load expected build parameters + auto expected_result = test_dataset::vamana::expected_build_results( + svs::L2, svsbenchmark::Uncompressed(svs::DataType::float32) + ); + + // Use the build parameters from the expected result + svs::index::vamana::VamanaBuildParameters buildParams = + expected_result.build_parameters_.value(); + + std::vector external_ids(data.size()); + std::iota(external_ids.begin(), external_ids.end(), 0); + + // Use the build parameters from the expected result + auto dynamicIndex = svs::index::vamana::MutableVamanaIndex< + svs::graphs::SimpleGraph, + svs::data::SimpleData, + svs::distance::DistanceL2>( + buildParams, data, external_ids, distance, std::move(threadpool), &testLogContext + ); + + // Log messasge + dynamicIndex.log("NOTICE", "Test MutableVamanaIndex Logging"); + + // Verify that the log callback was called + CATCH_REQUIRE(testLogContext.logs.size() == 1); + CATCH_REQUIRE(testLogContext.logs[0] == "NOTICE: Test MutableVamanaIndex Logging"); } \ No newline at end of file From 32e05e46c921c208eac7f4d01928799105ee5d16 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Tue, 4 Mar 2025 15:51:42 -0800 Subject: [PATCH 11/33] fix: format --- tests/svs/index/vamana/dynamic_index.cpp | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/tests/svs/index/vamana/dynamic_index.cpp b/tests/svs/index/vamana/dynamic_index.cpp index b9adf5f2..11014164 100644 --- a/tests/svs/index/vamana/dynamic_index.cpp +++ b/tests/svs/index/vamana/dynamic_index.cpp @@ -41,16 +41,14 @@ namespace { template auto copy_dataset(const T& data) { auto copy = svs::data::SimplePolymorphicData{ - data.size(), data.dimensions() - }; + data.size(), data.dimensions()}; for (size_t i = 0; i < data.size(); ++i) { copy.set_datum(i, data.get_datum(i)); } return copy; } -template void check_results(const T& results, const U& deleted) -{ +template void check_results(const T& results, const U& deleted) { for (size_t i = 0; i < svs::getsize<0>(results); ++i) { for (size_t j = 0; j < svs::getsize<1>(results); ++j) { CATCH_REQUIRE(!deleted.contains(results.at(i, j))); @@ -77,8 +75,7 @@ void check_equal(const Left& left, const Right& right) { for (size_t i = 0, imax = left.size(); i < imax; ++i) { const auto& datum_left = left.get_datum(i); const auto& datum_right = right.get_datum(i); - CATCH_REQUIRE(std::equal(datum_left.begin(), datum_left.end(), - datum_right.begin()) + CATCH_REQUIRE(std::equal(datum_left.begin(), datum_left.end(), datum_right.begin()) ); } } @@ -109,12 +106,12 @@ CATCH_TEST_CASE("MutableVamanaIndex", "[graph_index]") { // (3) Set a target deletion percentage where all the neighbors returned by // all results returned by the previous query plus a random collection of extras - // are deleted. - // - // (4) Rerun queries, make sure accuracy is still high and that no deleted - indices - // are present in the results. - auto entry_point = svs::index::load_entry_point(test_dataset::metadata_file()); + // are deleted. + // + // (4) Rerun queries, make sure accuracy is still high and that no deleted + indices + // are present in the results. + auto entry_point = svs::index::load_entry_point(test_dataset::metadata_file()); auto index = svs::index::MutableVamanaIndex{ test_dataset::graph_blocked(), @@ -122,8 +119,7 @@ CATCH_TEST_CASE("MutableVamanaIndex", "[graph_index]") { entry_point, svs::distance::DistanceL2(), svs::threads::UnitRange(0, base_data.size()), - num_threads - }; + num_threads}; check_equal(base_data, index); index.debug_check_graph_consistency(false); From ce8d27d2416ffd6df24c8a915f96b5d561bf05ca Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Tue, 4 Mar 2025 15:54:01 -0800 Subject: [PATCH 12/33] fix: format --- tests/svs/index/vamana/dynamic_index.cpp | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/tests/svs/index/vamana/dynamic_index.cpp b/tests/svs/index/vamana/dynamic_index.cpp index 11014164..aad544d0 100644 --- a/tests/svs/index/vamana/dynamic_index.cpp +++ b/tests/svs/index/vamana/dynamic_index.cpp @@ -36,7 +36,6 @@ // tests #include "tests/utils/test_dataset.h" #include "tests/utils/utils.h" -#include "tests/utils/vamana_reference.h" namespace { template auto copy_dataset(const T& data) { @@ -104,14 +103,12 @@ CATCH_TEST_CASE("MutableVamanaIndex", "[graph_index]") { // (1) Load the test index. // (2) Run a round of queries to ensure that everything loading correctly. // (3) Set a target deletion percentage where all the neighbors returned by - // all results returned by the previous query plus a random collection of - extras - // are deleted. - // - // (4) Rerun queries, make sure accuracy is still high and that no deleted - indices - // are present in the results. - auto entry_point = svs::index::load_entry_point(test_dataset::metadata_file()); + // all results returned by the previous query plus a random collection of extras + // are deleted. + // + // (4) Rerun queries, make sure accuracy is still high and that no deleted indices + // are present in the results. + auto entry_point = svs::index::load_entry_point(test_dataset::metadata_file()); auto index = svs::index::MutableVamanaIndex{ test_dataset::graph_blocked(), From e616eae1d6f4a71536df79b6e56b11352b2c69d1 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Tue, 4 Mar 2025 16:23:48 -0800 Subject: [PATCH 13/33] fix: dybamic index 2 working --- tests/svs/index/vamana/dynamic_index_2.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/svs/index/vamana/dynamic_index_2.cpp b/tests/svs/index/vamana/dynamic_index_2.cpp index e6fdc417..c9c8584e 100644 --- a/tests/svs/index/vamana/dynamic_index_2.cpp +++ b/tests/svs/index/vamana/dynamic_index_2.cpp @@ -21,10 +21,9 @@ #include "svs/index/vamana/dynamic_index.h" #include "svs/lib/timing.h" -#include "svs/misc/dynamic_helper.h" - -// tests +// svs-test #include "tests/utils/test_dataset.h" +#include "tests/utils/vamana_reference.h" #include "tests/utils/utils.h" // catch From 6b0525154ba646dce0b559028be96e7406c86812 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Tue, 4 Mar 2025 16:24:46 -0800 Subject: [PATCH 14/33] fix: format --- tests/svs/index/vamana/dynamic_index_2.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/svs/index/vamana/dynamic_index_2.cpp b/tests/svs/index/vamana/dynamic_index_2.cpp index c9c8584e..291ae585 100644 --- a/tests/svs/index/vamana/dynamic_index_2.cpp +++ b/tests/svs/index/vamana/dynamic_index_2.cpp @@ -23,8 +23,8 @@ // svs-test #include "tests/utils/test_dataset.h" -#include "tests/utils/vamana_reference.h" #include "tests/utils/utils.h" +#include "tests/utils/vamana_reference.h" // catch #include "catch2/catch_test_macros.hpp" From ac988db1d2e542188571d64d790e3050504f2150 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Tue, 4 Mar 2025 17:04:54 -0800 Subject: [PATCH 15/33] fix: fix flat test --- tests/svs/index/flat/flat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/svs/index/flat/flat.cpp b/tests/svs/index/flat/flat.cpp index 5eff1012..851d53ce 100644 --- a/tests/svs/index/flat/flat.cpp +++ b/tests/svs/index/flat/flat.cpp @@ -41,7 +41,7 @@ CATCH_TEST_CASE("FlatIndex Per-Index Logging Test", "[logging]") { auto threadpool = svs::threads::DefaultThreadPool(1); svs::index::flat::FlatIndex index( - dataView, dist, std::move(threadpool), &testLogContext + std::move(dataView), dist, std::move(threadpool), &testLogContext ); index.log("NOTICE", "Test FlatIndex Logging"); From ee22f45ddfdaf458de6a78d94d705567d4621162 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Tue, 4 Mar 2025 22:14:50 -0800 Subject: [PATCH 16/33] fix: remove empty space and make idnex test same as other tests --- include/svs/index/vamana/index.h | 2 -- tests/svs/index/vamana/index.cpp | 25 +++++++++---------------- 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/include/svs/index/vamana/index.h b/include/svs/index/vamana/index.h index ebcd8c11..08e4af92 100644 --- a/include/svs/index/vamana/index.h +++ b/include/svs/index/vamana/index.h @@ -243,8 +243,6 @@ VamanaSearchParameters construct_default_search_parameters(const Data& data) { /// @tparam PostOp The cleanup operation that is performed after the initial graph /// search. This may be included to perform operations like reranking for quantized /// datasets. -/// @tparam log_callback_ctx A pointer to a user-defined context for per-index logging -/// customization. /// /// The mid-level implementation of the static Vamana graph-based index. /// diff --git a/tests/svs/index/vamana/index.cpp b/tests/svs/index/vamana/index.cpp index 56c4cd14..e38a7579 100644 --- a/tests/svs/index/vamana/index.cpp +++ b/tests/svs/index/vamana/index.cpp @@ -111,26 +111,19 @@ CATCH_TEST_CASE("Vamana Index Parameters", "[index][vamana]") { } struct TestLogCtx { - std::vector logBuffer; - std::string prefix; + std::vector logs; }; -static void test_log_impl(void* ctx, const char* level, const char* message) { - // Cast ctx to our local struct - auto* log = reinterpret_cast(ctx); - - // Format the final string - std::string msg = std::string(level) + ": " + log->prefix + message; - - // Append to the vector - log->logBuffer.push_back(msg); +static void test_log_impl(void* ctx, const char* level, const char* msg) { + if (!ctx) + return; + auto* logCtx = reinterpret_cast(ctx); + logCtx->logs.push_back(std::string(level) + ": " + msg); } CATCH_TEST_CASE("Static VamanaIndex Per-Index Logging", "[logging]") { // Prepare a local context TestLogCtx testCtx; - testCtx.prefix = "test log prefix: "; - // Set the global callback function svs::logging::set_global_log_callback(test_log_impl); @@ -156,11 +149,11 @@ CATCH_TEST_CASE("Static VamanaIndex Per-Index Logging", "[logging]") { ); // Trigger log message - index.log("NOTICE", "Test dynamic VamanaIndex Logging"); + index.log("NOTICE", "Test VamanaIndex Logging"); // Check message - CATCH_REQUIRE(testCtx.logBuffer.size() == 1); + CATCH_REQUIRE(testCtx.logs.size() == 1); CATCH_REQUIRE( - testCtx.logBuffer[0] == "NOTICE: test log prefix: Test dynamic VamanaIndex Logging" + testCtx.logs[0] == "NOTICE: Test VamanaIndex Logging" ); } \ No newline at end of file From 88d866a38e2554683f70cf668de652273ce1e0c4 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Tue, 4 Mar 2025 22:19:21 -0800 Subject: [PATCH 17/33] fix: remove changes on dynamic_index.cpp --- tests/svs/index/vamana/dynamic_index.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/svs/index/vamana/dynamic_index.cpp b/tests/svs/index/vamana/dynamic_index.cpp index aad544d0..593dc3c9 100644 --- a/tests/svs/index/vamana/dynamic_index.cpp +++ b/tests/svs/index/vamana/dynamic_index.cpp @@ -245,4 +245,5 @@ CATCH_TEST_CASE("MutableVamanaIndex", "[graph_index]") { std::cout << "Post reinsertion recall: " << post_reinsertion_recall << " in " << post_add_time << " seconds." << std::endl; } -} \ No newline at end of file +} + From 2a092aee3fced18b3f11ef550c136288ad7871e6 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Tue, 4 Mar 2025 22:20:13 -0800 Subject: [PATCH 18/33] fix: remove changes on dynamic_index.cpp --- tests/svs/index/vamana/dynamic_index.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/svs/index/vamana/dynamic_index.cpp b/tests/svs/index/vamana/dynamic_index.cpp index 593dc3c9..17725887 100644 --- a/tests/svs/index/vamana/dynamic_index.cpp +++ b/tests/svs/index/vamana/dynamic_index.cpp @@ -246,4 +246,3 @@ CATCH_TEST_CASE("MutableVamanaIndex", "[graph_index]") { << post_add_time << " seconds." << std::endl; } } - From 8321bc468efcb571726cf4ad6d4f9c6d11f4f264 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Tue, 4 Mar 2025 22:21:55 -0800 Subject: [PATCH 19/33] fix: format --- tests/svs/index/vamana/index.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/svs/index/vamana/index.cpp b/tests/svs/index/vamana/index.cpp index e38a7579..d2bcb025 100644 --- a/tests/svs/index/vamana/index.cpp +++ b/tests/svs/index/vamana/index.cpp @@ -153,7 +153,5 @@ CATCH_TEST_CASE("Static VamanaIndex Per-Index Logging", "[logging]") { // Check message CATCH_REQUIRE(testCtx.logs.size() == 1); - CATCH_REQUIRE( - testCtx.logs[0] == "NOTICE: Test VamanaIndex Logging" - ); + CATCH_REQUIRE(testCtx.logs[0] == "NOTICE: Test VamanaIndex Logging"); } \ No newline at end of file From d40db3074196e526152287038395b6bdc39cfc6c Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Tue, 4 Mar 2025 22:24:29 -0800 Subject: [PATCH 20/33] fix: don't de;ete ref from dynamic index 2 --- tests/svs/index/vamana/dynamic_index_2.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/svs/index/vamana/dynamic_index_2.cpp b/tests/svs/index/vamana/dynamic_index_2.cpp index 291ae585..5d4f8517 100644 --- a/tests/svs/index/vamana/dynamic_index_2.cpp +++ b/tests/svs/index/vamana/dynamic_index_2.cpp @@ -20,8 +20,9 @@ #include "svs/index/flat/flat.h" #include "svs/index/vamana/dynamic_index.h" #include "svs/lib/timing.h" +#include "svs/misc/dynamic_helper.h" -// svs-test +// tests #include "tests/utils/test_dataset.h" #include "tests/utils/utils.h" #include "tests/utils/vamana_reference.h" From 64209f596b3b9ce14052dfb0e69f1bdd7168a089 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Tue, 4 Mar 2025 22:25:50 -0800 Subject: [PATCH 21/33] fix: don't de;ete ref from dynamic index 2 --- tests/svs/index/vamana/dynamic_index_2.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/svs/index/vamana/dynamic_index_2.cpp b/tests/svs/index/vamana/dynamic_index_2.cpp index 5d4f8517..2dadb3dc 100644 --- a/tests/svs/index/vamana/dynamic_index_2.cpp +++ b/tests/svs/index/vamana/dynamic_index_2.cpp @@ -20,6 +20,7 @@ #include "svs/index/flat/flat.h" #include "svs/index/vamana/dynamic_index.h" #include "svs/lib/timing.h" + #include "svs/misc/dynamic_helper.h" // tests From 3e62f8dff306e211e9a3412d2b67958c0e20af89 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Thu, 6 Mar 2025 17:05:43 -0800 Subject: [PATCH 22/33] fix: use sdp log callback instead creating new wheels --- include/svs/core/logging.h | 23 ----------- include/svs/index/flat/flat.h | 31 ++++++--------- include/svs/index/inverted/memory_based.h | 23 ++++------- include/svs/index/vamana/dynamic_index.h | 27 +++++-------- include/svs/index/vamana/index.h | 39 +++++++------------ tests/svs/index/flat/flat.cpp | 40 +++++++++++--------- tests/svs/index/inverted/memory_based.cpp | 36 +++++++++--------- tests/svs/index/vamana/dynamic_index_2.cpp | 40 ++++++++++---------- tests/svs/index/vamana/index.cpp | 44 +++++++++++----------- 9 files changed, 127 insertions(+), 176 deletions(-) diff --git a/include/svs/core/logging.h b/include/svs/core/logging.h index edb197b7..24645a5f 100644 --- a/include/svs/core/logging.h +++ b/include/svs/core/logging.h @@ -68,12 +68,6 @@ using logger_ptr = std::shared_ptr<::spdlog::logger>; /// @brief The type for sinks registered with loggers. using sink_ptr = std::shared_ptr<::spdlog::sinks::sink>; -/// @brief Define the callback function type -using log_callback_function = void (*)(void* ctx, const char* level, const char* message); - -/// @brief Global static log callback function -inline log_callback_function global_log_callback = nullptr; - /// @brief A sink going nowhere. Used to disable logging entirely. inline sink_ptr null_sink() { return std::make_shared<::spdlog::sinks::null_sink_mt>(); } @@ -308,11 +302,6 @@ inline void set_level(Level level) { set_level(logger, level); } -/// @brief Function to set the global log callback -inline void set_global_log_callback(log_callback_function callback) { - global_log_callback = callback; -} - /// @brief Return whether a message should be created for the logger at the given level. inline bool should_log(const logger_ptr& logger, logging::Level level) { return logger->should_log(detail::to_spdlog(level)); @@ -339,18 +328,6 @@ void log(Level level, fmt::format_string fmt, Args&&... args) { logging::log(global_logger, level, fmt, SVS_FWD(args)...); } -/// @brief Each index can pass its own ctx pointer, but -/// the function pointer is always global_log_callback. -inline void log(void* ctx, const char* level, const char* msg) { - if (global_log_callback) { - global_log_callback(ctx, level, msg); - } else { - // Fallback to exisitng global logger - Level log_level = svs::logging::detail::level_from_string(level); - log(log_level, "{}", msg); - } -} - // Convenience methods #define SVS_DEFINE_LOG_FUNCTION(name) \ template \ diff --git a/include/svs/index/flat/flat.h b/include/svs/index/flat/flat.h index e8d9e055..3352e3a9 100644 --- a/include/svs/index/flat/flat.h +++ b/include/svs/index/flat/flat.h @@ -146,8 +146,8 @@ class FlatIndex { data_storage_type data_; [[no_unique_address]] distance_type distance_; threads::ThreadPoolHandle threadpool_; - // Log callback - void* log_callback_ctx_; + // SVS logger for per index logging + svs::logging::logger_ptr logger_; // Constructs controlling the iteration strategy over the data and queries. search_parameters_type search_parameters_{}; @@ -192,8 +192,7 @@ class FlatIndex { /// instance or an integer specifying the number of threads to use. In the latter /// case, a new default thread pool will be constructed using ``threadpool_proto`` /// as the number of threads to create. - /// @param log_callback_ctx A pointer to a user-defined context for per-index logging - /// customization. + /// @param logger_ Spd logger for per-index logging customization. /// /// @copydoc threadpool_requirements /// @@ -202,26 +201,26 @@ class FlatIndex { Data data, Dist distance, ThreadPoolProto threadpool_proto, - void* log_callback_ctx = nullptr + svs::logging::logger_ptr logger = svs::logging::get() ) requires std::is_same_v : data_{std::move(data)} , distance_{std::move(distance)} , threadpool_{threads::as_threadpool(std::move(threadpool_proto))} - , log_callback_ctx_{log_callback_ctx} {} + , logger_{std::move(logger)} {} template FlatIndex( Data& data, Dist distance, ThreadPoolProto threadpool_proto, - void* log_callback_ctx = nullptr + svs::logging::logger_ptr logger = svs::logging::get() ) requires std::is_same_v : data_{data} , distance_{std::move(distance)} , threadpool_{threads::as_threadpool(std::move(threadpool_proto))} - , log_callback_ctx_{log_callback_ctx} {} + , logger_{std::move(logger)} {} ////// Dataset Interface @@ -453,13 +452,6 @@ class FlatIndex { /// @brief Return the current thread pool handle. /// threads::ThreadPoolHandle& get_threadpool_handle() { return threadpool_; } - - ///// Logging - - /// @brief Helper method to log - void log(const char* level, const char* message) const { - svs::logging::log(log_callback_ctx_, level, message); - } }; /// @@ -486,9 +478,8 @@ class FlatIndex { /// instance or an integer specifying the number of threads to use. In the latter case, /// a new default thread pool will be constructed using ``threadpool_proto`` as the /// number of threads to create. -/// @param log_callback_ctx A pointer to a user-defined context for per-index logging -/// customization. - +/// @param logger_ Spd logger for per-index logging customization. +/// /// This method provides much of the heavy lifting for constructing a Flat index from /// a data file on disk or a dataset in memory. /// @@ -501,12 +492,12 @@ auto auto_assemble( DataProto&& data_proto, Distance distance, ThreadPoolProto threadpool_proto, - void* log_callback_ctx = nullptr + svs::logging::logger_ptr logger = svs::logging::get() ) { auto threadpool = threads::as_threadpool(std::move(threadpool_proto)); auto data = svs::detail::dispatch_load(std::forward(data_proto), threadpool); return FlatIndex( - std::move(data), std::move(distance), std::move(threadpool), log_callback_ctx + std::move(data), std::move(distance), std::move(threadpool), std::move(logger) ); } diff --git a/include/svs/index/inverted/memory_based.h b/include/svs/index/inverted/memory_based.h index 112e76d0..82b9d9b1 100644 --- a/include/svs/index/inverted/memory_based.h +++ b/include/svs/index/inverted/memory_based.h @@ -343,13 +343,13 @@ template class InvertedIndex { Cluster cluster, translator_type index_local_to_global, Pool threadpool, - void* log_callback_ctx = nullptr + svs::logging::logger_ptr logger = svs::logging::get() ) : index_{std::move(index)} , cluster_{std::move(cluster)} , index_local_to_global_{std::move(index_local_to_global)} , threadpool_{std::move(threadpool)} - , log_callback_ctx_{log_callback_ctx} { + , logger_{std::move(logger)} { // Clear out the threadpool in the inner index - prefer to handle threading // ourselves. index_.set_threadpool(threads::SequentialThreadPool()); @@ -497,13 +497,6 @@ template class InvertedIndex { index_.save(index_config, graph, data); } - ///// Logging - - /// @brief Helper method to log - void log(const char* level, const char* message) const { - svs::logging::log(log_callback_ctx_, level, message); - } - private: // Tunable Parameters double refinement_epsilon_ = 10.0; @@ -516,8 +509,8 @@ template class InvertedIndex { // Transient parameters. threads::ThreadPoolHandle threadpool_; - // Per index context pointer - void* log_callback_ctx_ = nullptr; + // SVS logger for per index logging + svs::logging::logger_ptr logger_; }; struct PickRandomly { @@ -564,7 +557,7 @@ auto auto_build( Strategy strategy = {}, CentroidPicker centroid_picker = {}, ClusteringOp clustering_op = {}, - void* log_callback_ctx = nullptr + svs::logging::logger_ptr logger = svs::logging::get() ) { // Perform clustering. auto threadpool = threads::as_threadpool(std::move(threadpool_proto)); @@ -602,7 +595,7 @@ auto auto_build( strategy(data, clustering, HugepageAllocator()), std::move(centroids), std::move(primary_threadpool), - log_callback_ctx}; + logger}; } ///// Auto Assembling. @@ -619,7 +612,7 @@ auto assemble_from_clustering( const std::filesystem::path& index_config, const std::filesystem::path& graph, ThreadPoolProto threadpool_proto, - void* log_callback_ctx = nullptr + svs::logging::logger_ptr logger = svs::logging::get() ) { auto threadpool = threads::as_threadpool(std::move(threadpool_proto)); auto original = svs::detail::dispatch_load(std::move(data_proto), threadpool); @@ -640,7 +633,7 @@ auto assemble_from_clustering( }), distance, 1, - log_callback_ctx + logger ); // Create the clustering and return the final results. diff --git a/include/svs/index/vamana/dynamic_index.h b/include/svs/index/vamana/dynamic_index.h index 14379311..5c42afdc 100644 --- a/include/svs/index/vamana/dynamic_index.h +++ b/include/svs/index/vamana/dynamic_index.h @@ -157,8 +157,8 @@ class MutableVamanaIndex { float alpha_ = 1.2; bool use_full_search_history_ = true; - // Log callback - void* log_callback_ctx_; + // SVS logger for per index logging + svs::logging::logger_ptr logger_; // Methods public: @@ -179,7 +179,7 @@ class MutableVamanaIndex { const ExternalIds& external_ids, ThreadPoolProto threadpool_proto, // Optional logger parameter - void* log_callback_ctx = nullptr + svs::logging::logger_ptr logger = svs::logging::get() ) : graph_{std::move(graph)} , data_{std::move(data)} @@ -192,7 +192,7 @@ class MutableVamanaIndex { , search_parameters_{vamana::construct_default_search_parameters(data_)} , construction_window_size_{2 * graph.max_degree()} // Ctor accept logger in parameter - , log_callback_ctx_{log_callback_ctx} { + , logger_{std::move(logger)} { translator_.insert(external_ids, threads::UnitRange(0, external_ids.size())); } @@ -206,7 +206,7 @@ class MutableVamanaIndex { const ExternalIds& external_ids, Dist distance_function, ThreadPoolProto threadpool_proto, - void* log_callback_ctx = nullptr + svs::logging::logger_ptr logger = svs::logging::get() ) : graph_(Graph{data.size(), parameters.graph_max_degree}) , data_(std::move(data)) @@ -222,7 +222,7 @@ class MutableVamanaIndex { , prune_to_(parameters.prune_to) , alpha_(parameters.alpha) , use_full_search_history_{parameters.use_full_search_history} - , log_callback_ctx_{log_callback_ctx} { + , logger_{std::move(logger)} { // Setup the initial translation of external to internal ids. translator_.insert(external_ids, threads::UnitRange(0, external_ids.size())); @@ -257,7 +257,7 @@ class MutableVamanaIndex { const Dist& distance_function, IDTranslator translator, Pool threadpool, - void* log_callback_ctx = nullptr + svs::logging::logger_ptr logger = svs::logging::get() ) : graph_{std::move(graph)} , data_{std::move(data)} @@ -273,7 +273,7 @@ class MutableVamanaIndex { , prune_to_{config.build_parameters.prune_to} , alpha_{config.build_parameters.alpha} , use_full_search_history_{config.build_parameters.use_full_search_history} - , log_callback_ctx_{log_callback_ctx} {} + , logger_{std::move(logger)} {} ///// Scratchspace scratchspace_type scratchspace(const search_parameters_type& sp) const { @@ -1206,13 +1206,6 @@ class MutableVamanaIndex { } } } - - ///// Logging - - /// @brief Helper method to log - void log(const char* level, const char* message) const { - svs::logging::log(log_callback_ctx_, level, message); - } }; ///// Deduction Guides. @@ -1277,7 +1270,7 @@ auto auto_dynamic_assemble( // // This is an internal API and should not be considered officially supported nor stable. bool debug_load_from_static = false, - void* log_callback_ctx = nullptr + svs::logging::logger_ptr logger = svs::logging::get() ) { // Load the dataset auto threadpool = threads::as_threadpool(std::move(threadpool_proto)); @@ -1344,7 +1337,7 @@ auto auto_dynamic_assemble( std::move(distance), std::move(translator), std::move(threadpool), - log_callback_ctx}; + logger}; } } // namespace svs::index::vamana diff --git a/include/svs/index/vamana/index.h b/include/svs/index/vamana/index.h index 08e4af92..34984e35 100644 --- a/include/svs/index/vamana/index.h +++ b/include/svs/index/vamana/index.h @@ -302,8 +302,8 @@ class VamanaIndex { lib::ReadWriteProtected default_search_parameters_{}; // Construction parameters VamanaBuildParameters build_parameters_{}; - // Log callback - void* log_callback_ctx_; + // SVS logger for per index logging + svs::logging::logger_ptr logger_; public: // This is because some datasets may not yet support single-searching, which is required @@ -335,8 +335,7 @@ class VamanaIndex { /// instance or an integer specifying the number of threads to use. In the latter /// case, a new default thread pool will be constructed using ``threadpool_proto`` /// as the number of threads to create. - /// @param log_callback_ctx A pointer to a user-defined context for per-index logging - /// customization. + /// @param logger_ Spd logger for per-index logging customization. /// /// This is a lower-level function that is meant to take a collection of /// instantiated components and assemble the final index. For a more "hands-free" @@ -358,7 +357,7 @@ class VamanaIndex { Idx entry_point, Dist distance_function, ThreadPoolProto threadpool_proto, - void* log_callback_ctx = nullptr + svs::logging::logger_ptr logger = svs::logging::get() ) : graph_{std::move(graph)} , data_{std::move(data)} @@ -366,7 +365,7 @@ class VamanaIndex { , distance_{std::move(distance_function)} , threadpool_{threads::as_threadpool(std::move(threadpool_proto))} , default_search_parameters_{construct_default_search_parameters(data_)} - , log_callback_ctx_{log_callback_ctx} {} + , logger_{std::move(logger)} {} /// /// @brief Build a VamanaIndex over the given dataset. @@ -379,8 +378,7 @@ class VamanaIndex { /// @param distance_function The distance function used to compare queries and /// elements of the dataset. /// @param threadpool The acceptable threadpool to use to conduct searches. - /// @param log_callback_ctx A pointer to a user-defined context for per-index logging - /// customization. + /// @param logger_ Spd logger for per-index logging customization. /// /// This is a lower-level function that is meant to take a dataset and construct /// the graph-based index over the dataset. For a more "hands-free" approach, see @@ -401,7 +399,7 @@ class VamanaIndex { Idx entry_point, Dist distance_function, Pool threadpool, - void* log_callback_ctx = nullptr + svs::logging::logger_ptr logger = svs::logging::get() ) : VamanaIndex{ std::move(graph), @@ -409,7 +407,7 @@ class VamanaIndex { entry_point, std::move(distance_function), std::move(threadpool), - log_callback_ctx} { + logger} { if (graph_.n_nodes() != data_.size()) { throw ANNEXCEPTION("Wrong sizes!"); } @@ -863,13 +861,6 @@ class VamanaIndex { template void experimental_escape_hatch(F&& f) const { std::invoke(SVS_FWD(f), graph_, data_, distance_, lib::as_const_span(entry_point_)); } - - ///// Logging - - /// @brief Helper method to log - void log(const char* level, const char* message) const { - svs::logging::log(log_callback_ctx_, level, message); - } }; // Shared documentation for assembly methods. @@ -887,8 +878,7 @@ class VamanaIndex { /// a new default thread pool will be constructed using ``threadpool_proto`` as the /// number of threads to create. /// @param graph_allocator The allocator to use for the graph data structure. -/// @param log_callback_ctx A pointer to a user-defined context for per-index logging -/// customization. +/// @param logger_ Spd logger for per-index logging customization. /// /// @copydoc threadpool_requirements /// @@ -903,7 +893,7 @@ auto auto_build( Distance distance, ThreadPoolProto threadpool_proto, const Allocator& graph_allocator = {}, - void* log_callback_ctx = nullptr + svs::logging::logger_ptr logger = svs::logging::get() ) { auto threadpool = threads::as_threadpool(std::move(threadpool_proto)); auto data = svs::detail::dispatch_load(std::move(data_proto), threadpool); @@ -919,7 +909,7 @@ auto auto_build( lib::narrow(entry_point), std::move(distance), std::move(threadpool), - log_callback_ctx}; + logger}; } /// @@ -937,8 +927,7 @@ auto auto_build( /// This method provides much of the heavy lifting for instantiating a Vamana index from /// a collection of files on disk (or perhaps a mix-and-match of existing data in-memory /// and on disk). -/// @param log_callback_ctx A pointer to a user-defined context for per-index logging -/// customization. +/// @param logger_ Spd logger for per-index logging customization. /// /// Refer to the examples for use of this interface. /// @@ -955,7 +944,7 @@ auto auto_assemble( DataProto data_proto, Distance distance, ThreadPoolProto threadpool_proto, - void* log_callback_ctx = nullptr + svs::logging::logger_ptr logger = svs::logging::get() ) { auto threadpool = threads::as_threadpool(std::move(threadpool_proto)); auto data = svs::detail::dispatch_load(std::move(data_proto), threadpool); @@ -969,7 +958,7 @@ auto auto_assemble( I{}, std::move(distance), std::move(threadpool), - log_callback_ctx}; + logger}; auto config = lib::load_from_disk(config_path); index.apply(config); diff --git a/tests/svs/index/flat/flat.cpp b/tests/svs/index/flat/flat.cpp index 851d53ce..5637605c 100644 --- a/tests/svs/index/flat/flat.cpp +++ b/tests/svs/index/flat/flat.cpp @@ -20,20 +20,24 @@ // catch2 #include "catch2/catch_test_macros.hpp" -struct TestLogCtx { - std::vector logs; -}; - -static void test_log_callback(void* ctx, const char* level, const char* msg) { - if (!ctx) - return; - auto* logCtx = reinterpret_cast(ctx); - logCtx->logs.push_back(std::string(level) + ": " + msg); -} +// spd log +#include "spdlog/sinks/callback_sink.h" + +CATCH_TEST_CASE("FlatIndex Logging Test", "[logging]") { + // Vector to store captured log messages + std::vector captured_logs; + + // Create a callback sink to capture log messages + auto callback_sink = std::make_shared( + [&captured_logs](const spdlog::details::log_msg& msg) { + captured_logs.emplace_back(msg.payload.data(), msg.payload.size()); + } + ); + callback_sink->set_level(spdlog::level::trace); -CATCH_TEST_CASE("FlatIndex Per-Index Logging Test", "[logging]") { - svs::logging::set_global_log_callback(&test_log_callback); - TestLogCtx testLogContext; + // Set up the FlatIndex with the test logger + auto test_logger = std::make_shared("test_logger", callback_sink); + test_logger->set_level(spdlog::level::trace); std::vector data{1.0f, 2.0f}; auto dataView = svs::data::SimpleDataView(data.data(), 2, 1); @@ -41,11 +45,13 @@ CATCH_TEST_CASE("FlatIndex Per-Index Logging Test", "[logging]") { auto threadpool = svs::threads::DefaultThreadPool(1); svs::index::flat::FlatIndex index( - std::move(dataView), dist, std::move(threadpool), &testLogContext + std::move(dataView), dist, std::move(threadpool), test_logger ); - index.log("NOTICE", "Test FlatIndex Logging"); + // Log a message + test_logger->info("Test FlatIndex Logging"); - CATCH_REQUIRE(testLogContext.logs.size() == 1); - CATCH_REQUIRE(testLogContext.logs[0] == "NOTICE: Test FlatIndex Logging"); + // Verify the log output + CATCH_REQUIRE(captured_logs.size() == 1); + CATCH_REQUIRE(captured_logs[0] == "Test FlatIndex Logging"); } diff --git a/tests/svs/index/inverted/memory_based.cpp b/tests/svs/index/inverted/memory_based.cpp index df4d62e2..a2939ff5 100644 --- a/tests/svs/index/inverted/memory_based.cpp +++ b/tests/svs/index/inverted/memory_based.cpp @@ -21,23 +21,23 @@ #include "tests/utils/inverted_reference.h" #include "tests/utils/test_dataset.h" #include +#include "spdlog/sinks/callback_sink.h" -// Define our log context structure. -struct TestLogCtx { - std::vector logs; -}; +CATCH_TEST_CASE("InvertedIndex Logging Test", "[logging]") { + // Vector to store captured log messages + std::vector captured_logs; -static void test_log_callback(void* ctx, const char* level, const char* msg) { - if (!ctx) - return; - auto* logCtx = reinterpret_cast(ctx); - logCtx->logs.push_back(std::string(level) + ": " + msg); -} + // Create a callback sink to capture log messages + auto callback_sink = std::make_shared( + [&captured_logs](const spdlog::details::log_msg& msg) { + captured_logs.emplace_back(msg.payload.data(), msg.payload.size()); + } + ); + callback_sink->set_level(spdlog::level::trace); -CATCH_TEST_CASE("InvertedIndex Per-Index Logging", "[logging]") { - // Set global log callback - svs::logging::set_global_log_callback(&test_log_callback); - TestLogCtx testLogContext; + // Create a logger with the callback sink + auto test_logger = std::make_shared("test_logger", callback_sink); + test_logger->set_level(spdlog::level::trace); // Setup index auto distance = svs::DistanceL2(); @@ -55,13 +55,13 @@ CATCH_TEST_CASE("InvertedIndex Per-Index Logging", "[logging]") { {}, {}, {}, - &testLogContext + test_logger ); // Log a test message - invertedIndex.log("NOTICE", "Test InvertedIndex Logging"); + test_logger->info("Test InvertedIndex Logging"); // Check log context received the message - CATCH_REQUIRE(testLogContext.logs.size() == 1); - CATCH_REQUIRE(testLogContext.logs[0] == "NOTICE: Test InvertedIndex Logging"); + CATCH_REQUIRE(captured_logs.size() == 1); + CATCH_REQUIRE(captured_logs[0] == "Test InvertedIndex Logging"); } \ No newline at end of file diff --git a/tests/svs/index/vamana/dynamic_index_2.cpp b/tests/svs/index/vamana/dynamic_index_2.cpp index 2dadb3dc..790b2f56 100644 --- a/tests/svs/index/vamana/dynamic_index_2.cpp +++ b/tests/svs/index/vamana/dynamic_index_2.cpp @@ -27,6 +27,7 @@ #include "tests/utils/test_dataset.h" #include "tests/utils/utils.h" #include "tests/utils/vamana_reference.h" +#include "spdlog/sinks/callback_sink.h" // catch #include "catch2/catch_test_macros.hpp" @@ -419,21 +420,22 @@ CATCH_TEST_CASE("Testing Graph Index", "[graph_index][dynamic_index]") { index.on_ids([&](size_t e) { CATCH_REQUIRE(reloaded.has_id(e)); }); } -struct TestLogCtx { - std::vector logs; -}; - -static void test_log_callback(void* ctx, const char* level, const char* msg) { - if (!ctx) - return; - auto* logCtx = reinterpret_cast(ctx); - logCtx->logs.push_back(std::string(level) + ": " + msg); -} CATCH_TEST_CASE("Dynamic MutableVamanaIndex Per-Index Logging Test", "[logging]") { - // Set callback - svs::logging::set_global_log_callback(&test_log_callback); - TestLogCtx testLogContext; + // Vector to store captured log messages + std::vector captured_logs; + + // Create a callback sink to capture log messages + auto callback_sink = std::make_shared( + [&captured_logs](const spdlog::details::log_msg& msg) { + captured_logs.emplace_back(msg.payload.data(), msg.payload.size()); + } + ); + callback_sink->set_level(spdlog::level::trace); // Capture all log levels + + // Create a logger with the callback sink + auto test_logger = std::make_shared("test_logger", callback_sink); + test_logger->set_level(spdlog::level::trace); // Setup index std::filesystem::path configPath = test_dataset::vamana_config_file(); @@ -458,13 +460,13 @@ CATCH_TEST_CASE("Dynamic MutableVamanaIndex Per-Index Logging Test", "[logging]" svs::graphs::SimpleGraph, svs::data::SimpleData, svs::distance::DistanceL2>( - buildParams, data, external_ids, distance, std::move(threadpool), &testLogContext + buildParams, data, external_ids, distance, std::move(threadpool), test_logger ); - // Log messasge - dynamicIndex.log("NOTICE", "Test MutableVamanaIndex Logging"); + // Log a message using the logger directly + test_logger->info("Test MutableVamanaIndex Logging"); - // Verify that the log callback was called - CATCH_REQUIRE(testLogContext.logs.size() == 1); - CATCH_REQUIRE(testLogContext.logs[0] == "NOTICE: Test MutableVamanaIndex Logging"); + // Verify the log output + CATCH_REQUIRE(captured_logs.size() == 1); + CATCH_REQUIRE(captured_logs[0] == "Test MutableVamanaIndex Logging"); } \ No newline at end of file diff --git a/tests/svs/index/vamana/index.cpp b/tests/svs/index/vamana/index.cpp index d2bcb025..58f614c2 100644 --- a/tests/svs/index/vamana/index.cpp +++ b/tests/svs/index/vamana/index.cpp @@ -17,6 +17,7 @@ // Header under test #include "svs/index/vamana/index.h" #include "svs/core/logging.h" +#include "spdlog/sinks/callback_sink.h" // catch2 #include "catch2/catch_test_macros.hpp" @@ -110,22 +111,21 @@ CATCH_TEST_CASE("Vamana Index Parameters", "[index][vamana]") { } } -struct TestLogCtx { - std::vector logs; -}; - -static void test_log_impl(void* ctx, const char* level, const char* msg) { - if (!ctx) - return; - auto* logCtx = reinterpret_cast(ctx); - logCtx->logs.push_back(std::string(level) + ": " + msg); -} - CATCH_TEST_CASE("Static VamanaIndex Per-Index Logging", "[logging]") { - // Prepare a local context - TestLogCtx testCtx; - // Set the global callback function - svs::logging::set_global_log_callback(test_log_impl); + // Vector to store captured log messages + std::vector captured_logs; + + // Create a callback sink to capture log messages + auto callback_sink = std::make_shared( + [&captured_logs](const spdlog::details::log_msg& msg) { + captured_logs.emplace_back(msg.payload.data(), msg.payload.size()); + } + ); + callback_sink->set_level(spdlog::level::trace); // Capture all log levels + + // Create a logger with the callback sink + auto test_logger = std::make_shared("test_logger", callback_sink); + test_logger->set_level(spdlog::level::trace); // Create some minimal data std::vector data = {1.0f, 2.0f}; @@ -136,7 +136,7 @@ CATCH_TEST_CASE("Static VamanaIndex Per-Index Logging", "[logging]") { uint32_t entry_point = 0; auto threadpool = svs::threads::DefaultThreadPool(1); - // Build the VamanaIndex, passing &testCtx as the per-index logging context + // Build the VamanaIndex with the test logger svs::index::vamana::VamanaBuildParameters buildParams(1.2, 64, 10, 20, 10, true); svs::index::vamana::VamanaIndex index( buildParams, @@ -145,13 +145,13 @@ CATCH_TEST_CASE("Static VamanaIndex Per-Index Logging", "[logging]") { entry_point, distance_function, std::move(threadpool), - &testCtx + test_logger ); - // Trigger log message - index.log("NOTICE", "Test VamanaIndex Logging"); + // Log a message using the logger directly + test_logger->info("Test VamanaIndex Logging"); - // Check message - CATCH_REQUIRE(testCtx.logs.size() == 1); - CATCH_REQUIRE(testCtx.logs[0] == "NOTICE: Test VamanaIndex Logging"); + // Verify the log output + CATCH_REQUIRE(captured_logs.size() == 1); + CATCH_REQUIRE(captured_logs[0] == "Test VamanaIndex Logging"); } \ No newline at end of file From 5ee991deaf53a39bc11e084fe35e5c16f79f4bc6 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Thu, 6 Mar 2025 17:06:42 -0800 Subject: [PATCH 23/33] fix: format --- tests/svs/index/flat/flat.cpp | 2 +- tests/svs/index/inverted/memory_based.cpp | 2 +- tests/svs/index/vamana/dynamic_index_2.cpp | 5 ++--- tests/svs/index/vamana/index.cpp | 4 ++-- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/tests/svs/index/flat/flat.cpp b/tests/svs/index/flat/flat.cpp index 5637605c..e4dbf49b 100644 --- a/tests/svs/index/flat/flat.cpp +++ b/tests/svs/index/flat/flat.cpp @@ -24,7 +24,7 @@ #include "spdlog/sinks/callback_sink.h" CATCH_TEST_CASE("FlatIndex Logging Test", "[logging]") { - // Vector to store captured log messages + // Vector to store captured log messages std::vector captured_logs; // Create a callback sink to capture log messages diff --git a/tests/svs/index/inverted/memory_based.cpp b/tests/svs/index/inverted/memory_based.cpp index a2939ff5..52773512 100644 --- a/tests/svs/index/inverted/memory_based.cpp +++ b/tests/svs/index/inverted/memory_based.cpp @@ -16,12 +16,12 @@ #include "svs/index/inverted/memory_based.h" #include "catch2/catch_test_macros.hpp" +#include "spdlog/sinks/callback_sink.h" #include "svs-benchmark/datasets.h" #include "svs/lib/timing.h" #include "tests/utils/inverted_reference.h" #include "tests/utils/test_dataset.h" #include -#include "spdlog/sinks/callback_sink.h" CATCH_TEST_CASE("InvertedIndex Logging Test", "[logging]") { // Vector to store captured log messages diff --git a/tests/svs/index/vamana/dynamic_index_2.cpp b/tests/svs/index/vamana/dynamic_index_2.cpp index 790b2f56..d944034e 100644 --- a/tests/svs/index/vamana/dynamic_index_2.cpp +++ b/tests/svs/index/vamana/dynamic_index_2.cpp @@ -24,10 +24,10 @@ #include "svs/misc/dynamic_helper.h" // tests +#include "spdlog/sinks/callback_sink.h" #include "tests/utils/test_dataset.h" #include "tests/utils/utils.h" #include "tests/utils/vamana_reference.h" -#include "spdlog/sinks/callback_sink.h" // catch #include "catch2/catch_test_macros.hpp" @@ -420,9 +420,8 @@ CATCH_TEST_CASE("Testing Graph Index", "[graph_index][dynamic_index]") { index.on_ids([&](size_t e) { CATCH_REQUIRE(reloaded.has_id(e)); }); } - CATCH_TEST_CASE("Dynamic MutableVamanaIndex Per-Index Logging Test", "[logging]") { - // Vector to store captured log messages + // Vector to store captured log messages std::vector captured_logs; // Create a callback sink to capture log messages diff --git a/tests/svs/index/vamana/index.cpp b/tests/svs/index/vamana/index.cpp index 58f614c2..c58e2755 100644 --- a/tests/svs/index/vamana/index.cpp +++ b/tests/svs/index/vamana/index.cpp @@ -16,8 +16,8 @@ // Header under test #include "svs/index/vamana/index.h" -#include "svs/core/logging.h" #include "spdlog/sinks/callback_sink.h" +#include "svs/core/logging.h" // catch2 #include "catch2/catch_test_macros.hpp" @@ -112,7 +112,7 @@ CATCH_TEST_CASE("Vamana Index Parameters", "[index][vamana]") { } CATCH_TEST_CASE("Static VamanaIndex Per-Index Logging", "[logging]") { - // Vector to store captured log messages + // Vector to store captured log messages std::vector captured_logs; // Create a callback sink to capture log messages From 3e9fcb8faac08993fbdc488680dc4aa08502df4a Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Tue, 11 Mar 2025 16:13:44 -0700 Subject: [PATCH 24/33] fix: vamana index internal logging change to per index logging works --- include/svs/core/logging.h | 1 + include/svs/index/vamana/index.h | 9 ++- include/svs/index/vamana/vamana_build.h | 10 ++-- tests/integration/vamana/index_build.cpp | 76 ++++++++++++++++++++++++ tests/svs/index/vamana/index.cpp | 9 +-- 5 files changed, 92 insertions(+), 13 deletions(-) diff --git a/include/svs/core/logging.h b/include/svs/core/logging.h index 24645a5f..0555c935 100644 --- a/include/svs/core/logging.h +++ b/include/svs/core/logging.h @@ -35,6 +35,7 @@ #include #include + namespace svs::logging { /// @brief Enum controlling verbosity. diff --git a/include/svs/index/vamana/index.h b/include/svs/index/vamana/index.h index 34984e35..46c0077d 100644 --- a/include/svs/index/vamana/index.h +++ b/include/svs/index/vamana/index.h @@ -422,8 +422,13 @@ class VamanaIndex { extensions::estimate_prefetch_parameters(data_) ); - builder.construct(1.0F, entry_point_[0]); - builder.construct(parameters.alpha, entry_point_[0]); + builder.construct(1.0F, entry_point_[0], logging::Level::Info, logger); + builder.construct(parameters.alpha, entry_point_[0], logging::Level::Info, logger); + } + + /// @brief Getter method for logger + svs::logging::logger_ptr get_logger() const { + return logger_; } /// @brief Apply the given configuration parameters to the index. diff --git a/include/svs/index/vamana/vamana_build.h b/include/svs/index/vamana/vamana_build.h index a174ac0a..8cc6de03 100644 --- a/include/svs/index/vamana/vamana_build.h +++ b/include/svs/index/vamana/vamana_build.h @@ -201,10 +201,10 @@ class VamanaBuilder { ); } } - + void - construct(float alpha, Idx entry_point, logging::Level level = logging::Level::Info) { - construct(alpha, entry_point, threads::UnitRange{0, data_.size()}, level); + construct(float alpha, Idx entry_point, logging::Level level = logging::Level::Info, logging::logger_ptr logger = svs::logging::get()) { + construct(alpha, entry_point, threads::UnitRange{0, data_.size()}, level, logger); } template @@ -212,9 +212,9 @@ class VamanaBuilder { float alpha, Idx entry_point, const R& range, - logging::Level level = logging::Level::Info + logging::Level level = logging::Level::Info, + logging::logger_ptr logger = svs::logging::get() ) { - auto logger = svs::logging::get(); size_t num_nodes = range.size(); size_t num_batches = std::max( size_t{40}, lib::div_round_up(num_nodes, lib::narrow_cast(64 * 64)) diff --git a/tests/integration/vamana/index_build.cpp b/tests/integration/vamana/index_build.cpp index 637eae33..1c45d727 100644 --- a/tests/integration/vamana/index_build.cpp +++ b/tests/integration/vamana/index_build.cpp @@ -40,6 +40,10 @@ #include #include +// logger +#include "spdlog/sinks/callback_sink.h" +#include "svs/core/logging.h" + namespace { template < @@ -199,3 +203,75 @@ CATCH_TEST_CASE( ++num_threads; } } + + + +// Helper function to create a logger with a callback +std::shared_ptr create_test_logger(std::vector& captured_logs) { + auto callback_sink = std::make_shared( + [&captured_logs](const spdlog::details::log_msg& msg) { + captured_logs.emplace_back(msg.payload.data(), msg.payload.size()); + } + ); + callback_sink->set_level(spdlog::level::trace); + + auto logger = std::make_shared("test_logger", callback_sink); + logger->set_level(spdlog::level::trace); + return logger; +} + +CATCH_TEST_CASE("VamanaIndex Logging Tests", "[logging-integration]") { + using namespace svs::index::vamana; + + // Test data setup + std::vector data = {1.0f, 2.0f}; + const size_t dim = 1; + auto graph = svs::graphs::SimpleGraph(1, 64); + auto data_view = svs::data::SimpleDataView(data.data(), 1, dim); + svs::distance::DistanceL2 distance_function; + uint32_t entry_point = 0; + auto threadpool = svs::threads::DefaultThreadPool(1); + VamanaBuildParameters buildParams(1.2, 64, 10, 20, 10, true); + + CATCH_SECTION("With Custom Logger") { + std::vector captured_logs; + auto custom_logger = create_test_logger(captured_logs); + + // Create VamanaIndex, which will call the builder and construct + VamanaIndex vamana_index( + buildParams, + std::move(graph), + std::move(data_view), + entry_point, + std::move(distance_function), + std::move(threadpool), + custom_logger + ); + + // Verify the custom logger captured the log messages + CATCH_REQUIRE(captured_logs[0].find("Number of syncs:") != std::string::npos); + CATCH_REQUIRE(captured_logs[1].find("Batch Size:") != std::string::npos); + } + + CATCH_SECTION("With Default Logger") { + // Reset the test data setup + std::vector data = {1.0f, 2.0f}; + auto graph = svs::graphs::SimpleGraph(1, 64); + auto data_view = svs::data::SimpleDataView(data.data(), 1, dim); + auto threadpool = svs::threads::DefaultThreadPool(1); + + // Create VamanaIndex without passing a custom logger + VamanaIndex vamana_index( + buildParams, + std::move(graph), + std::move(data_view), + entry_point, + std::move(distance_function), + std::move(threadpool) + ); + + auto default_logger = svs::logging::get(); + CATCH_REQUIRE(vamana_index.get_logger() == default_logger); + + } +} \ No newline at end of file diff --git a/tests/svs/index/vamana/index.cpp b/tests/svs/index/vamana/index.cpp index c58e2755..bc00775e 100644 --- a/tests/svs/index/vamana/index.cpp +++ b/tests/svs/index/vamana/index.cpp @@ -148,10 +148,7 @@ CATCH_TEST_CASE("Static VamanaIndex Per-Index Logging", "[logging]") { test_logger ); - // Log a message using the logger directly - test_logger->info("Test VamanaIndex Logging"); - - // Verify the log output - CATCH_REQUIRE(captured_logs.size() == 1); - CATCH_REQUIRE(captured_logs[0] == "Test VamanaIndex Logging"); + // Verify the internal log messages + CATCH_REQUIRE(captured_logs[0].find("Number of syncs:") != std::string::npos); + CATCH_REQUIRE(captured_logs[1].find("Batch Size:") != std::string::npos); } \ No newline at end of file From e5b02c0f419bfb81ec650546b2414e0fbd6c6a9d Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Tue, 11 Mar 2025 21:08:08 -0700 Subject: [PATCH 25/33] fix: dynamic index works --- include/svs/index/vamana/dynamic_index.h | 15 +++++++-- include/svs/index/vamana/vamana_build.h | 1 - tests/svs/index/vamana/dynamic_index_2.cpp | 39 ++++++---------------- tests/svs/index/vamana/index.cpp | 3 +- 4 files changed, 25 insertions(+), 33 deletions(-) diff --git a/include/svs/index/vamana/dynamic_index.h b/include/svs/index/vamana/dynamic_index.h index 5c42afdc..e7b64020 100644 --- a/include/svs/index/vamana/dynamic_index.h +++ b/include/svs/index/vamana/dynamic_index.h @@ -236,8 +236,8 @@ class MutableVamanaIndex { auto builder = VamanaBuilder( graph_, data_, distance_, parameters, threadpool_, prefetch_parameters ); - builder.construct(1.0f, entry_point_[0]); - builder.construct(parameters.alpha, entry_point_[0]); + builder.construct(1.0f, entry_point_[0], logging::Level::Info, logger_); + builder.construct(parameters.alpha, entry_point_[0], logging::Level::Info, logger_); } /// @brief Post re-load constructor. @@ -1218,6 +1218,17 @@ template MutableVamanaIndex, Data, Dist>; +// Guide with logging +template +MutableVamanaIndex( + const VamanaBuildParameters&, + Data, + const ExternalIds&, + Dist, + Pool, + svs::logging::logger_ptr +) -> MutableVamanaIndex, Data, Dist>; + namespace detail { struct VamanaStateLoader { diff --git a/include/svs/index/vamana/vamana_build.h b/include/svs/index/vamana/vamana_build.h index 8cc6de03..0933fb67 100644 --- a/include/svs/index/vamana/vamana_build.h +++ b/include/svs/index/vamana/vamana_build.h @@ -233,7 +233,6 @@ class VamanaBuilder { double search_time = 0; double reverse_time = 0; unsigned progress_counter = 0; - svs::logging::log(logger, level, "Number of syncs: {}", num_batches); svs::logging::log(logger, level, "Batch Size: {}", batchsize); diff --git a/tests/svs/index/vamana/dynamic_index_2.cpp b/tests/svs/index/vamana/dynamic_index_2.cpp index d944034e..d4c5e166 100644 --- a/tests/svs/index/vamana/dynamic_index_2.cpp +++ b/tests/svs/index/vamana/dynamic_index_2.cpp @@ -437,35 +437,18 @@ CATCH_TEST_CASE("Dynamic MutableVamanaIndex Per-Index Logging Test", "[logging]" test_logger->set_level(spdlog::level::trace); // Setup index - std::filesystem::path configPath = test_dataset::vamana_config_file(); - auto data = svs::data::SimpleData::load(test_dataset::data_svs_file()); - auto graph = svs::graphs::SimpleGraph::load(test_dataset::graph_file()); - auto distance = svs::DistanceL2(); + std::vector data = {1.0f, 2.0f}; + std::vector initial_indices(data.size()); + std::iota(initial_indices.begin(), initial_indices.end(), 0); + svs::index::vamana::VamanaBuildParameters buildParams(1.2, 64, 10, 20, 10, true); + auto data_view = svs::data::SimpleDataView(data.data(), 2, 1); auto threadpool = svs::threads::DefaultThreadPool(1); - // Load expected build parameters - auto expected_result = test_dataset::vamana::expected_build_results( - svs::L2, svsbenchmark::Uncompressed(svs::DataType::float32) - ); - - // Use the build parameters from the expected result - svs::index::vamana::VamanaBuildParameters buildParams = - expected_result.build_parameters_.value(); - - std::vector external_ids(data.size()); - std::iota(external_ids.begin(), external_ids.end(), 0); - - // Use the build parameters from the expected result - auto dynamicIndex = svs::index::vamana::MutableVamanaIndex< - svs::graphs::SimpleGraph, - svs::data::SimpleData, - svs::distance::DistanceL2>( - buildParams, data, external_ids, distance, std::move(threadpool), test_logger + auto index = svs::index::vamana::MutableVamanaIndex( + buildParams, std::move(data_view), initial_indices, + svs::DistanceL2(), std::move(threadpool), test_logger ); - // Log a message using the logger directly - test_logger->info("Test MutableVamanaIndex Logging"); - - // Verify the log output - CATCH_REQUIRE(captured_logs.size() == 1); - CATCH_REQUIRE(captured_logs[0] == "Test MutableVamanaIndex Logging"); + // Verify the internal log messages + CATCH_REQUIRE(captured_logs[0].find("Number of syncs:") != std::string::npos); + CATCH_REQUIRE(captured_logs[1].find("Batch Size:") != std::string::npos); } \ No newline at end of file diff --git a/tests/svs/index/vamana/index.cpp b/tests/svs/index/vamana/index.cpp index bc00775e..cd549299 100644 --- a/tests/svs/index/vamana/index.cpp +++ b/tests/svs/index/vamana/index.cpp @@ -129,9 +129,8 @@ CATCH_TEST_CASE("Static VamanaIndex Per-Index Logging", "[logging]") { // Create some minimal data std::vector data = {1.0f, 2.0f}; - const size_t dim = 1; auto graph = svs::graphs::SimpleGraph(1, 64); - auto data_view = svs::data::SimpleDataView(data.data(), 1, dim); + auto data_view = svs::data::SimpleDataView(data.data(), 1, 1); svs::distance::DistanceL2 distance_function; uint32_t entry_point = 0; auto threadpool = svs::threads::DefaultThreadPool(1); From c3c314235d8cdf9f7122191abe5569a949275502 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Tue, 11 Mar 2025 22:00:14 -0700 Subject: [PATCH 26/33] fix: clustering logging changed to per idnex logging --- include/svs/index/inverted/clustering.h | 4 +- include/svs/index/inverted/memory_based.h | 2 +- .../svs/index/inverted/memory_search_params.h | 2 + include/svs/index/vamana/calibrate.h | 12 ++--- tests/svs/index/inverted/clustering.cpp | 44 +++++++++++++++++++ tests/svs/index/inverted/memory_based.cpp | 8 +--- 6 files changed, 57 insertions(+), 15 deletions(-) diff --git a/include/svs/index/inverted/clustering.h b/include/svs/index/inverted/clustering.h index 2039b456..bb80e9bb 100644 --- a/include/svs/index/inverted/clustering.h +++ b/include/svs/index/inverted/clustering.h @@ -801,7 +801,8 @@ Clustering cluster_with( const Data& data, std::span centroid_ids, const ClusteringParameters& params, - Index& primary_index + Index& primary_index, + svs::logging::logger_ptr logger = svs::logging::get() ) { for (auto id : centroid_ids) { if (id >= data.size()) { @@ -820,7 +821,6 @@ Clustering cluster_with( size_t start = 0; size_t datasize = data.size(); auto timer = lib::Timer(); - auto logger = svs::logging::get(); while (start < datasize) { size_t stop = std::min(start + batchsize, datasize); diff --git a/include/svs/index/inverted/memory_based.h b/include/svs/index/inverted/memory_based.h index 82b9d9b1..c370e87d 100644 --- a/include/svs/index/inverted/memory_based.h +++ b/include/svs/index/inverted/memory_based.h @@ -578,7 +578,7 @@ auto auto_build( // Cluster the dataset with the help of the primary index. auto clustering = cluster_with( - data, lib::as_const_span(centroids), parameters.clustering_parameters_, index + data, lib::as_const_span(centroids), parameters.clustering_parameters_, index, logger ); // Perform any post-proceseccing on the clustering. diff --git a/include/svs/index/inverted/memory_search_params.h b/include/svs/index/inverted/memory_search_params.h index 0d867fa2..a2aa98d7 100644 --- a/include/svs/index/inverted/memory_search_params.h +++ b/include/svs/index/inverted/memory_search_params.h @@ -21,6 +21,8 @@ #include "svs/lib/saveload.h" #include "svs/lib/version.h" +//logging + namespace svs::index::inverted { struct InvertedSearchParameters { diff --git a/include/svs/index/vamana/calibrate.h b/include/svs/index/vamana/calibrate.h index 7cd1f86b..c6cf4147 100644 --- a/include/svs/index/vamana/calibrate.h +++ b/include/svs/index/vamana/calibrate.h @@ -176,9 +176,9 @@ VamanaSearchParameters optimize_split_buffer( double target_recall, VamanaSearchParameters current, const F& compute_recall, - const DoSearch& do_search + const DoSearch& do_search, + svs::logging::logger_ptr logger = svs::logging::get() ) { - auto logger = svs::logging::get(); svs::logging::trace(logger, "Entering split buffer optimization routine"); assert( current.buffer_config_.get_search_window_size() == @@ -252,11 +252,11 @@ std::pair optimize_search_buffer( size_t num_neighbors, double target_recall, const ComputeRecall& compute_recall, - const DoSearch& do_search + const DoSearch& do_search, + svs::logging::logger_ptr logger = svs::logging::get() ) { using enum CalibrationParameters::SearchBufferOptimization; using dataset_type = typename Index::data_type; - auto logger = svs::logging::get(); double max_recall = std::numeric_limits::lowest(); const size_t current_capacity = current.buffer_config_.get_total_capacity(); @@ -345,9 +345,9 @@ VamanaSearchParameters tune_prefetch( const CalibrationParameters& calibration_parameters, Index& index, VamanaSearchParameters search_parameters, - const DoSearch& do_search + const DoSearch& do_search, + svs::logging::logger_ptr logger = svs::logging::get() ) { - auto logger = svs::logging::get(); svs::logging::trace(logger, "Tuning prefetch parameters"); const auto& prefetch_steps = calibration_parameters.prefetch_steps_; size_t max_lookahead = index.max_degree(); diff --git a/tests/svs/index/inverted/clustering.cpp b/tests/svs/index/inverted/clustering.cpp index e8a9e062..dca4202a 100644 --- a/tests/svs/index/inverted/clustering.cpp +++ b/tests/svs/index/inverted/clustering.cpp @@ -27,6 +27,9 @@ // stl #include +//logging +#include "spdlog/sinks/callback_sink.h" + namespace { template @@ -389,3 +392,44 @@ CATCH_TEST_CASE("Random Clustering - End to End", "[inverted][random_clustering] test_end_to_end_clustering(data, svs::DistanceIP(), 0.9f); } } + +CATCH_TEST_CASE("Clustering with Logger", "[logging]") { + // Setup logger + std::vector captured_logs; + auto callback_sink = std::make_shared( + [&captured_logs](const spdlog::details::log_msg& msg) { + captured_logs.emplace_back(msg.payload.data(), msg.payload.size()); + } + ); + callback_sink->set_level(spdlog::level::trace); // Capture all log levels + auto test_logger = std::make_shared("test_logger", callback_sink); + test_logger->set_level(spdlog::level::trace); + + // Setup cluster + auto data = svs::data::SimpleData::load(test_dataset::data_svs_file()); + auto vamana_parameters = svs::index::vamana::VamanaBuildParameters{1.2, 64, 200, 1000, 60, true}; + auto clustering_parameters = svs::index::inverted::ClusteringParameters() + .percent_centroids(svs::lib::Percent(0.1)) + .epsilon(0.05) + .max_replicas(8) + .max_cluster_size(200); + auto centroids = svs::index::inverted::randomly_select_centroids( + data.size(), + svs::lib::narrow_cast(std::floor(data.size() * clustering_parameters.percent_centroids_.value())), + clustering_parameters.seed_ + ); + auto threadpool = svs::threads::DefaultThreadPool(2); + auto index = svs::index::inverted::build_primary_index( + data, + svs::lib::as_const_span(centroids), + vamana_parameters, + svs::DistanceL2(), + std::move(threadpool) + ); + auto clustering = svs::index::inverted::cluster_with( + data, svs::lib::as_const_span(centroids), clustering_parameters, index, test_logger + ); + + // Verify the internal log messages + CATCH_REQUIRE(captured_logs[0].find("Processing batch") != std::string::npos); +} \ No newline at end of file diff --git a/tests/svs/index/inverted/memory_based.cpp b/tests/svs/index/inverted/memory_based.cpp index 52773512..e6c50925 100644 --- a/tests/svs/index/inverted/memory_based.cpp +++ b/tests/svs/index/inverted/memory_based.cpp @@ -58,10 +58,6 @@ CATCH_TEST_CASE("InvertedIndex Logging Test", "[logging]") { test_logger ); - // Log a test message - test_logger->info("Test InvertedIndex Logging"); - - // Check log context received the message - CATCH_REQUIRE(captured_logs.size() == 1); - CATCH_REQUIRE(captured_logs[0] == "Test InvertedIndex Logging"); + // Verify the internal log messages + CATCH_REQUIRE(captured_logs[0].find("Processing batch") != std::string::npos); } \ No newline at end of file From 6e379fef30efc272da6b8ea0cb2c91d5c1f04cc1 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Tue, 11 Mar 2025 22:04:29 -0700 Subject: [PATCH 27/33] fix: format --- include/svs/core/logging.h | 1 - include/svs/index/inverted/memory_based.h | 6 +++++- include/svs/index/inverted/memory_search_params.h | 2 -- include/svs/index/vamana/dynamic_index.h | 10 +++++----- include/svs/index/vamana/index.h | 4 +--- include/svs/index/vamana/vamana_build.h | 14 ++++++++++---- tests/integration/vamana/index_build.cpp | 6 ++---- tests/svs/index/inverted/clustering.cpp | 9 ++++++--- tests/svs/index/vamana/dynamic_index_2.cpp | 8 ++++++-- 9 files changed, 35 insertions(+), 25 deletions(-) diff --git a/include/svs/core/logging.h b/include/svs/core/logging.h index 0555c935..24645a5f 100644 --- a/include/svs/core/logging.h +++ b/include/svs/core/logging.h @@ -35,7 +35,6 @@ #include #include - namespace svs::logging { /// @brief Enum controlling verbosity. diff --git a/include/svs/index/inverted/memory_based.h b/include/svs/index/inverted/memory_based.h index c370e87d..29694e2e 100644 --- a/include/svs/index/inverted/memory_based.h +++ b/include/svs/index/inverted/memory_based.h @@ -578,7 +578,11 @@ auto auto_build( // Cluster the dataset with the help of the primary index. auto clustering = cluster_with( - data, lib::as_const_span(centroids), parameters.clustering_parameters_, index, logger + data, + lib::as_const_span(centroids), + parameters.clustering_parameters_, + index, + logger ); // Perform any post-proceseccing on the clustering. diff --git a/include/svs/index/inverted/memory_search_params.h b/include/svs/index/inverted/memory_search_params.h index a2aa98d7..0d867fa2 100644 --- a/include/svs/index/inverted/memory_search_params.h +++ b/include/svs/index/inverted/memory_search_params.h @@ -21,8 +21,6 @@ #include "svs/lib/saveload.h" #include "svs/lib/version.h" -//logging - namespace svs::index::inverted { struct InvertedSearchParameters { diff --git a/include/svs/index/vamana/dynamic_index.h b/include/svs/index/vamana/dynamic_index.h index e7b64020..4d8c65e1 100644 --- a/include/svs/index/vamana/dynamic_index.h +++ b/include/svs/index/vamana/dynamic_index.h @@ -1221,11 +1221,11 @@ MutableVamanaIndex(const VamanaBuildParameters&, Data, const ExternalIds&, Dist, // Guide with logging template MutableVamanaIndex( - const VamanaBuildParameters&, - Data, - const ExternalIds&, - Dist, - Pool, + const VamanaBuildParameters&, + Data, + const ExternalIds&, + Dist, + Pool, svs::logging::logger_ptr ) -> MutableVamanaIndex, Data, Dist>; diff --git a/include/svs/index/vamana/index.h b/include/svs/index/vamana/index.h index 46c0077d..be65e990 100644 --- a/include/svs/index/vamana/index.h +++ b/include/svs/index/vamana/index.h @@ -427,9 +427,7 @@ class VamanaIndex { } /// @brief Getter method for logger - svs::logging::logger_ptr get_logger() const { - return logger_; - } + svs::logging::logger_ptr get_logger() const { return logger_; } /// @brief Apply the given configuration parameters to the index. void apply(const VamanaIndexParameters& parameters) { diff --git a/include/svs/index/vamana/vamana_build.h b/include/svs/index/vamana/vamana_build.h index 0933fb67..0f3f31ad 100644 --- a/include/svs/index/vamana/vamana_build.h +++ b/include/svs/index/vamana/vamana_build.h @@ -201,10 +201,16 @@ class VamanaBuilder { ); } } - - void - construct(float alpha, Idx entry_point, logging::Level level = logging::Level::Info, logging::logger_ptr logger = svs::logging::get()) { - construct(alpha, entry_point, threads::UnitRange{0, data_.size()}, level, logger); + + void construct( + float alpha, + Idx entry_point, + logging::Level level = logging::Level::Info, + logging::logger_ptr logger = svs::logging::get() + ) { + construct( + alpha, entry_point, threads::UnitRange{0, data_.size()}, level, logger + ); } template diff --git a/tests/integration/vamana/index_build.cpp b/tests/integration/vamana/index_build.cpp index 1c45d727..e0c7b52e 100644 --- a/tests/integration/vamana/index_build.cpp +++ b/tests/integration/vamana/index_build.cpp @@ -204,10 +204,9 @@ CATCH_TEST_CASE( } } - - // Helper function to create a logger with a callback -std::shared_ptr create_test_logger(std::vector& captured_logs) { +std::shared_ptr create_test_logger(std::vector& captured_logs +) { auto callback_sink = std::make_shared( [&captured_logs](const spdlog::details::log_msg& msg) { captured_logs.emplace_back(msg.payload.data(), msg.payload.size()); @@ -272,6 +271,5 @@ CATCH_TEST_CASE("VamanaIndex Logging Tests", "[logging-integration]") { auto default_logger = svs::logging::get(); CATCH_REQUIRE(vamana_index.get_logger() == default_logger); - } } \ No newline at end of file diff --git a/tests/svs/index/inverted/clustering.cpp b/tests/svs/index/inverted/clustering.cpp index dca4202a..77820880 100644 --- a/tests/svs/index/inverted/clustering.cpp +++ b/tests/svs/index/inverted/clustering.cpp @@ -27,7 +27,7 @@ // stl #include -//logging +// logging #include "spdlog/sinks/callback_sink.h" namespace { @@ -407,7 +407,8 @@ CATCH_TEST_CASE("Clustering with Logger", "[logging]") { // Setup cluster auto data = svs::data::SimpleData::load(test_dataset::data_svs_file()); - auto vamana_parameters = svs::index::vamana::VamanaBuildParameters{1.2, 64, 200, 1000, 60, true}; + auto vamana_parameters = + svs::index::vamana::VamanaBuildParameters{1.2, 64, 200, 1000, 60, true}; auto clustering_parameters = svs::index::inverted::ClusteringParameters() .percent_centroids(svs::lib::Percent(0.1)) .epsilon(0.05) @@ -415,7 +416,9 @@ CATCH_TEST_CASE("Clustering with Logger", "[logging]") { .max_cluster_size(200); auto centroids = svs::index::inverted::randomly_select_centroids( data.size(), - svs::lib::narrow_cast(std::floor(data.size() * clustering_parameters.percent_centroids_.value())), + svs::lib::narrow_cast( + std::floor(data.size() * clustering_parameters.percent_centroids_.value()) + ), clustering_parameters.seed_ ); auto threadpool = svs::threads::DefaultThreadPool(2); diff --git a/tests/svs/index/vamana/dynamic_index_2.cpp b/tests/svs/index/vamana/dynamic_index_2.cpp index d4c5e166..fd41bc59 100644 --- a/tests/svs/index/vamana/dynamic_index_2.cpp +++ b/tests/svs/index/vamana/dynamic_index_2.cpp @@ -444,8 +444,12 @@ CATCH_TEST_CASE("Dynamic MutableVamanaIndex Per-Index Logging Test", "[logging]" auto data_view = svs::data::SimpleDataView(data.data(), 2, 1); auto threadpool = svs::threads::DefaultThreadPool(1); auto index = svs::index::vamana::MutableVamanaIndex( - buildParams, std::move(data_view), initial_indices, - svs::DistanceL2(), std::move(threadpool), test_logger + buildParams, + std::move(data_view), + initial_indices, + svs::DistanceL2(), + std::move(threadpool), + test_logger ); // Verify the internal log messages From ad80dbf75ab7c9f51286c95bc8379d25d6183a8a Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Thu, 13 Mar 2025 11:11:20 -0700 Subject: [PATCH 28/33] fix: add logger and extra check in integraiton logging test --- include/svs/index/flat/flat.h | 3 +++ include/svs/index/inverted/memory_based.h | 4 ++++ include/svs/index/vamana/dynamic_index.h | 4 +++- tests/integration/vamana/index_build.cpp | 4 +++- 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/include/svs/index/flat/flat.h b/include/svs/index/flat/flat.h index 3352e3a9..5deac303 100644 --- a/include/svs/index/flat/flat.h +++ b/include/svs/index/flat/flat.h @@ -174,6 +174,9 @@ class FlatIndex { } public: + /// @brief Getter method for logger + svs::logging::logger_ptr get_logger() const { return logger_; } + search_parameters_type get_search_parameters() const { return search_parameters_; } void set_search_parameters(const search_parameters_type& search_parameters) { diff --git a/include/svs/index/inverted/memory_based.h b/include/svs/index/inverted/memory_based.h index 29694e2e..0f7f1019 100644 --- a/include/svs/index/inverted/memory_based.h +++ b/include/svs/index/inverted/memory_based.h @@ -497,6 +497,10 @@ template class InvertedIndex { index_.save(index_config, graph, data); } + ///// Accessors + /// @brief Getter method for logger + svs::logging::logger_ptr get_logger() const { return logger_; } + private: // Tunable Parameters double refinement_epsilon_ = 10.0; diff --git a/include/svs/index/vamana/dynamic_index.h b/include/svs/index/vamana/dynamic_index.h index 4d8c65e1..b581fb94 100644 --- a/include/svs/index/vamana/dynamic_index.h +++ b/include/svs/index/vamana/dynamic_index.h @@ -290,7 +290,9 @@ class MutableVamanaIndex { scratchspace_type scratchspace() const { return scratchspace(get_search_parameters()); } ///// Accessors - + /// @brief Getter method for logger + svs::logging::logger_ptr get_logger() const { return logger_; } + /// @brief Get the alpha value used for pruning while mutating the graph. float get_alpha() const { return alpha_; } /// @brief Set the alpha value used for pruning while mutating the graph. diff --git a/tests/integration/vamana/index_build.cpp b/tests/integration/vamana/index_build.cpp index e0c7b52e..ec04b156 100644 --- a/tests/integration/vamana/index_build.cpp +++ b/tests/integration/vamana/index_build.cpp @@ -219,7 +219,7 @@ std::shared_ptr create_test_logger(std::vector& cap return logger; } -CATCH_TEST_CASE("VamanaIndex Logging Tests", "[logging-integration]") { +CATCH_TEST_CASE("VamanaIndex Logging Tests", "[logging]") { using namespace svs::index::vamana; // Test data setup @@ -250,6 +250,8 @@ CATCH_TEST_CASE("VamanaIndex Logging Tests", "[logging-integration]") { // Verify the custom logger captured the log messages CATCH_REQUIRE(captured_logs[0].find("Number of syncs:") != std::string::npos); CATCH_REQUIRE(captured_logs[1].find("Batch Size:") != std::string::npos); + auto default_logger = svs::logging::get(); + CATCH_REQUIRE(vamana_index.get_logger() != default_logger); } CATCH_SECTION("With Default Logger") { From e0968410f3ecc47bd11051f0de8928878c4f2166 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Thu, 13 Mar 2025 11:11:56 -0700 Subject: [PATCH 29/33] fix: format --- include/svs/index/vamana/dynamic_index.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/svs/index/vamana/dynamic_index.h b/include/svs/index/vamana/dynamic_index.h index b581fb94..dff383ad 100644 --- a/include/svs/index/vamana/dynamic_index.h +++ b/include/svs/index/vamana/dynamic_index.h @@ -292,7 +292,7 @@ class MutableVamanaIndex { ///// Accessors /// @brief Getter method for logger svs::logging::logger_ptr get_logger() const { return logger_; } - + /// @brief Get the alpha value used for pruning while mutating the graph. float get_alpha() const { return alpha_; } /// @brief Set the alpha value used for pruning while mutating the graph. From 3429e9a3c48e434a0c219fb70512b81831cc5ae3 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Thu, 13 Mar 2025 12:16:34 -0700 Subject: [PATCH 30/33] fix: resolveing comemnts --- include/svs/index/vamana/dynamic_index.h | 8 ++++---- include/svs/index/vamana/index.h | 2 +- tests/svs/index/vamana/dynamic_index_2.cpp | 21 +++++++++++++++++++++ 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/include/svs/index/vamana/dynamic_index.h b/include/svs/index/vamana/dynamic_index.h index dff383ad..e6831c1e 100644 --- a/include/svs/index/vamana/dynamic_index.h +++ b/include/svs/index/vamana/dynamic_index.h @@ -593,10 +593,10 @@ class MutableVamanaIndex { /// @brief Add the points with the given external IDs to the dataset. // /// When `delete_entries` is called, a soft deletion is performed, marking the entries - /// as `deleted`. When `consolidate` is called, the state of these deleted entries - /// becomes `empty`. When `add_points` is called with the `reuse_empty` flag enabled, - /// the memory is scanned from the beginning to locate and fill these empty entries with - /// new points. + /// as `deleted`. When `consolidate` is called, the state of these deleted entries becomes `empty`. + /// When `add_points` is called with the `reuse_empty` flag enabled, the + /// memory is scanned from the beginning to locate and fill these empty entries with new + /// points. /// /// @param points Dataset of points to add. /// @param external_ids The external IDs of the corresponding points. Must be a diff --git a/include/svs/index/vamana/index.h b/include/svs/index/vamana/index.h index be65e990..c7371b7f 100644 --- a/include/svs/index/vamana/index.h +++ b/include/svs/index/vamana/index.h @@ -961,7 +961,7 @@ auto auto_assemble( I{}, std::move(distance), std::move(threadpool), - logger}; + std::move(logger)}; auto config = lib::load_from_disk(config_path); index.apply(config); diff --git a/tests/svs/index/vamana/dynamic_index_2.cpp b/tests/svs/index/vamana/dynamic_index_2.cpp index fd41bc59..a3acb7f0 100644 --- a/tests/svs/index/vamana/dynamic_index_2.cpp +++ b/tests/svs/index/vamana/dynamic_index_2.cpp @@ -455,4 +455,25 @@ CATCH_TEST_CASE("Dynamic MutableVamanaIndex Per-Index Logging Test", "[logging]" // Verify the internal log messages CATCH_REQUIRE(captured_logs[0].find("Number of syncs:") != std::string::npos); CATCH_REQUIRE(captured_logs[1].find("Batch Size:") != std::string::npos); +} + +CATCH_TEST_CASE("Dynamic MutableVamanaIndex Default Logger Test", "[logging]") { + // Setup index with default logger + std::vector data = {1.0f, 2.0f}; + std::vector initial_indices(data.size()); + std::iota(initial_indices.begin(), initial_indices.end(), 0); + svs::index::vamana::VamanaBuildParameters buildParams(1.2, 64, 10, 20, 10, true); + auto data_view = svs::data::SimpleDataView(data.data(), 2, 1); + auto threadpool = svs::threads::DefaultThreadPool(1); + auto index = svs::index::vamana::MutableVamanaIndex( + buildParams, + std::move(data_view), + initial_indices, + svs::DistanceL2(), + std::move(threadpool) + ); + + // Verify that the default logger is used + auto default_logger = svs::logging::get(); + CATCH_REQUIRE(index.get_logger() == default_logger); } \ No newline at end of file From 270dd11102a12cb73e660b1549f3e81139e165f9 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Thu, 13 Mar 2025 14:02:00 -0700 Subject: [PATCH 31/33] fix: use move instead of cp for logger in some places --- include/svs/index/inverted/memory_based.h | 5 +++-- include/svs/index/vamana/dynamic_index.h | 2 +- include/svs/index/vamana/index.h | 1 - 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/svs/index/inverted/memory_based.h b/include/svs/index/inverted/memory_based.h index 0f7f1019..18e128b4 100644 --- a/include/svs/index/inverted/memory_based.h +++ b/include/svs/index/inverted/memory_based.h @@ -603,7 +603,7 @@ auto auto_build( strategy(data, clustering, HugepageAllocator()), std::move(centroids), std::move(primary_threadpool), - logger}; + std::move(logger)}; } ///// Auto Assembling. @@ -649,7 +649,8 @@ auto assemble_from_clustering( std::move(index), strategy(original, clustering, HugepageAllocator()), std::move(ids), - std::move(threadpool) + std::move(threadpool), + std::move(logger) ); } diff --git a/include/svs/index/vamana/dynamic_index.h b/include/svs/index/vamana/dynamic_index.h index e6831c1e..f37cfa16 100644 --- a/include/svs/index/vamana/dynamic_index.h +++ b/include/svs/index/vamana/dynamic_index.h @@ -1350,7 +1350,7 @@ auto auto_dynamic_assemble( std::move(distance), std::move(translator), std::move(threadpool), - logger}; + std::move(logger)}; } } // namespace svs::index::vamana diff --git a/include/svs/index/vamana/index.h b/include/svs/index/vamana/index.h index c7371b7f..20946c17 100644 --- a/include/svs/index/vamana/index.h +++ b/include/svs/index/vamana/index.h @@ -962,7 +962,6 @@ auto auto_assemble( std::move(distance), std::move(threadpool), std::move(logger)}; - auto config = lib::load_from_disk(config_path); index.apply(config); return index; From cb3f58e7e83f542d024981823311b25854fefa44 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Thu, 13 Mar 2025 14:08:00 -0700 Subject: [PATCH 32/33] fix: don't remove exra lines --- include/svs/index/vamana/vamana_build.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/svs/index/vamana/vamana_build.h b/include/svs/index/vamana/vamana_build.h index 0f3f31ad..2f31a052 100644 --- a/include/svs/index/vamana/vamana_build.h +++ b/include/svs/index/vamana/vamana_build.h @@ -239,6 +239,7 @@ class VamanaBuilder { double search_time = 0; double reverse_time = 0; unsigned progress_counter = 0; + svs::logging::log(logger, level, "Number of syncs: {}", num_batches); svs::logging::log(logger, level, "Batch Size: {}", batchsize); From f4400644df4a55b99e5c5b2bb40b52b1721ccd4c Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Thu, 13 Mar 2025 14:11:16 -0700 Subject: [PATCH 33/33] fix: no extra spece added --- include/svs/index/vamana/vamana_build.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/svs/index/vamana/vamana_build.h b/include/svs/index/vamana/vamana_build.h index 2f31a052..b20f7bc5 100644 --- a/include/svs/index/vamana/vamana_build.h +++ b/include/svs/index/vamana/vamana_build.h @@ -239,7 +239,7 @@ class VamanaBuilder { double search_time = 0; double reverse_time = 0; unsigned progress_counter = 0; - + svs::logging::log(logger, level, "Number of syncs: {}", num_batches); svs::logging::log(logger, level, "Batch Size: {}", batchsize);