Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
b72fc00
feature: logger in vamana index and unit test
yuejiaointel Feb 21, 2025
ff105b5
feature: customized global logger and per index logger supoort
yuejiaointel Feb 25, 2025
4dae087
fix: test work for static index
yuejiaointel Feb 26, 2025
99be5df
fix: comment out inverted for now
yuejiaointel Feb 26, 2025
6882d87
feature: add logger to auto build and auto assemble functions
yuejiaointel Feb 27, 2025
3b3136c
fix: inverted index work
yuejiaointel Feb 27, 2025
9917d9c
fix: dynamic index test working
yuejiaointel Feb 28, 2025
3810870
fix: add created test files
yuejiaointel Feb 28, 2025
766004b
fix: format and comments
yuejiaointel Mar 4, 2025
c5402ad
fix: everthing works locally
yuejiaointel Mar 4, 2025
32e05e4
fix: format
yuejiaointel Mar 4, 2025
ce8d27d
fix: format
yuejiaointel Mar 4, 2025
e616eae
fix: dybamic index 2 working
yuejiaointel Mar 5, 2025
6b05251
fix: format
yuejiaointel Mar 5, 2025
ac988db
fix: fix flat test
yuejiaointel Mar 5, 2025
ee22f45
fix: remove empty space and make idnex test same as other tests
yuejiaointel Mar 5, 2025
88d866a
fix: remove changes on dynamic_index.cpp
yuejiaointel Mar 5, 2025
2a092ae
fix: remove changes on dynamic_index.cpp
yuejiaointel Mar 5, 2025
8321bc4
fix: format
yuejiaointel Mar 5, 2025
d40db30
fix: don't de;ete ref from dynamic index 2
yuejiaointel Mar 5, 2025
64209f5
fix: don't de;ete ref from dynamic index 2
yuejiaointel Mar 5, 2025
3e62f8d
fix: use sdp log callback instead creating new wheels
yuejiaointel Mar 7, 2025
5ee991d
fix: format
yuejiaointel Mar 7, 2025
3e9fcb8
fix: vamana index internal logging change to per index logging works
yuejiaointel Mar 11, 2025
e5b02c0
fix: dynamic index works
yuejiaointel Mar 12, 2025
c3c3142
fix: clustering logging changed to per idnex logging
yuejiaointel Mar 12, 2025
6e379fe
fix: format
yuejiaointel Mar 12, 2025
ad80dbf
fix: add logger and extra check in integraiton logging test
yuejiaointel Mar 13, 2025
e096841
fix: format
yuejiaointel Mar 13, 2025
3429e9a
fix: resolveing comemnts
yuejiaointel Mar 13, 2025
270dd11
fix: use move instead of cp for logger in some places
yuejiaointel Mar 13, 2025
cb3f58e
fix: don't remove exra lines
yuejiaointel Mar 13, 2025
f440064
fix: no extra spece added
yuejiaointel Mar 13, 2025
69b9da9
Merge branch 'main' into dev/yue/fix_per_index_logging
ibhati Mar 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 31 additions & 6 deletions include/svs/index/flat/flat.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#pragma once

// Flat index utilities
#include "svs/core/logging.h"
#include "svs/index/flat/inserters.h"
#include "svs/index/index.h"

Expand Down Expand Up @@ -145,6 +146,8 @@ class FlatIndex {
data_storage_type data_;
[[no_unique_address]] distance_type distance_;
threads::ThreadPoolHandle threadpool_;
// SVS logger for per index logging
svs::logging::logger_ptr logger_;

// Constructs controlling the iteration strategy over the data and queries.
search_parameters_type search_parameters_{};
Expand All @@ -171,6 +174,9 @@ class FlatIndex {
}

public:
/// @brief Getter method for logger
svs::logging::logger_ptr get_logger() const { return logger_; }

search_parameters_type get_search_parameters() const { return search_parameters_; }

void set_search_parameters(const search_parameters_type& search_parameters) {
Expand All @@ -189,22 +195,35 @@ class FlatIndex {
/// instance or an integer specifying the number of threads to use. In the latter
/// case, a new default thread pool will be constructed using ``threadpool_proto``
/// as the number of threads to create.
/// @param logger_ Spd logger for per-index logging customization.
///
/// @copydoc threadpool_requirements
///
template <typename ThreadPoolProto>
FlatIndex(Data data, Dist distance, ThreadPoolProto threadpool_proto)
FlatIndex(
Data data,
Dist distance,
ThreadPoolProto threadpool_proto,
svs::logging::logger_ptr logger = svs::logging::get()
)
requires std::is_same_v<Ownership, OwnsMembers>
: data_{std::move(data)}
, distance_{std::move(distance)}
, threadpool_{threads::as_threadpool(std::move(threadpool_proto))} {}
, threadpool_{threads::as_threadpool(std::move(threadpool_proto))}
, logger_{std::move(logger)} {}

template <typename ThreadPoolProto>
FlatIndex(Data& data, Dist distance, ThreadPoolProto threadpool_proto)
FlatIndex(
Data& data,
Dist distance,
ThreadPoolProto threadpool_proto,
svs::logging::logger_ptr logger = svs::logging::get()
)
requires std::is_same_v<Ownership, ReferencesMembers>
: data_{data}
, distance_{std::move(distance)}
, threadpool_{threads::as_threadpool(std::move(threadpool_proto))} {}
, threadpool_{threads::as_threadpool(std::move(threadpool_proto))}
, logger_{std::move(logger)} {}

////// Dataset Interface

Expand Down Expand Up @@ -462,6 +481,7 @@ class FlatIndex {
/// instance or an integer specifying the number of threads to use. In the latter case,
/// a new default thread pool will be constructed using ``threadpool_proto`` as the
/// number of threads to create.
/// @param logger_ Spd logger for per-index logging customization.
///
/// This method provides much of the heavy lifting for constructing a Flat index from
/// a data file on disk or a dataset in memory.
Expand All @@ -472,11 +492,16 @@ class FlatIndex {
///
template <typename DataProto, typename Distance, typename ThreadPoolProto>
auto auto_assemble(
DataProto&& data_proto, Distance distance, ThreadPoolProto threadpool_proto
DataProto&& data_proto,
Distance distance,
ThreadPoolProto threadpool_proto,
svs::logging::logger_ptr logger = svs::logging::get()
) {
auto threadpool = threads::as_threadpool(std::move(threadpool_proto));
auto data = svs::detail::dispatch_load(std::forward<DataProto>(data_proto), threadpool);
return FlatIndex(std::move(data), std::move(distance), std::move(threadpool));
return FlatIndex(
std::move(data), std::move(distance), std::move(threadpool), std::move(logger)
);
}

/// @brief Alias for a short-lived flat index.
Expand Down
4 changes: 2 additions & 2 deletions include/svs/index/inverted/clustering.h
Original file line number Diff line number Diff line change
Expand Up @@ -801,7 +801,8 @@ Clustering<I> cluster_with(
const Data& data,
std::span<const I> centroid_ids,
const ClusteringParameters& params,
Index& primary_index
Index& primary_index,
svs::logging::logger_ptr logger = svs::logging::get()
) {
for (auto id : centroid_ids) {
if (id >= data.size()) {
Expand All @@ -820,7 +821,6 @@ Clustering<I> cluster_with(
size_t start = 0;
size_t datasize = data.size();
auto timer = lib::Timer();
auto logger = svs::logging::get();

while (start < datasize) {
size_t stop = std::min(start + batchsize, datasize);
Expand Down
37 changes: 29 additions & 8 deletions include/svs/index/inverted/memory_based.h
Original file line number Diff line number Diff line change
Expand Up @@ -339,12 +339,17 @@ template <typename Index, typename Cluster> class InvertedIndex {

template <threads::ThreadPool Pool>
InvertedIndex(
Index index, Cluster cluster, translator_type index_local_to_global, Pool threadpool
Index index,
Cluster cluster,
translator_type index_local_to_global,
Pool threadpool,
svs::logging::logger_ptr logger = svs::logging::get()
)
: index_{std::move(index)}
, cluster_{std::move(cluster)}
, index_local_to_global_{std::move(index_local_to_global)}
, threadpool_{std::move(threadpool)} {
, threadpool_{std::move(threadpool)}
, logger_{std::move(logger)} {
// Clear out the threadpool in the inner index - prefer to handle threading
// ourselves.
index_.set_threadpool(threads::SequentialThreadPool());
Expand Down Expand Up @@ -492,6 +497,10 @@ template <typename Index, typename Cluster> class InvertedIndex {
index_.save(index_config, graph, data);
}

///// Accessors
/// @brief Getter method for logger
svs::logging::logger_ptr get_logger() const { return logger_; }

private:
// Tunable Parameters
double refinement_epsilon_ = 10.0;
Expand All @@ -503,6 +512,9 @@ template <typename Index, typename Cluster> class InvertedIndex {

// Transient parameters.
threads::ThreadPoolHandle threadpool_;

// SVS logger for per index logging
svs::logging::logger_ptr logger_;
};

struct PickRandomly {
Expand Down Expand Up @@ -548,7 +560,8 @@ auto auto_build(
// Customizations
Strategy strategy = {},
CentroidPicker centroid_picker = {},
ClusteringOp clustering_op = {}
ClusteringOp clustering_op = {},
svs::logging::logger_ptr logger = svs::logging::get()
) {
// Perform clustering.
auto threadpool = threads::as_threadpool(std::move(threadpool_proto));
Expand All @@ -569,7 +582,11 @@ auto auto_build(

// Cluster the dataset with the help of the primary index.
auto clustering = cluster_with(
data, lib::as_const_span(centroids), parameters.clustering_parameters_, index
data,
lib::as_const_span(centroids),
parameters.clustering_parameters_,
index,
logger
);

// Perform any post-proceseccing on the clustering.
Expand All @@ -585,7 +602,8 @@ auto auto_build(
std::move(index),
strategy(data, clustering, HugepageAllocator<std::byte>()),
std::move(centroids),
std::move(primary_threadpool)};
std::move(primary_threadpool),
std::move(logger)};
}

///// Auto Assembling.
Expand All @@ -601,7 +619,8 @@ auto assemble_from_clustering(
Strategy strategy,
const std::filesystem::path& index_config,
const std::filesystem::path& graph,
ThreadPoolProto threadpool_proto
ThreadPoolProto threadpool_proto,
svs::logging::logger_ptr logger = svs::logging::get()
) {
auto threadpool = threads::as_threadpool(std::move(threadpool_proto));
auto original = svs::detail::dispatch_load(std::move(data_proto), threadpool);
Expand All @@ -621,15 +640,17 @@ auto assemble_from_clustering(
return local_data;
}),
distance,
1
1,
logger
);

// Create the clustering and return the final results.
return InvertedIndex(
std::move(index),
strategy(original, clustering, HugepageAllocator<std::byte>()),
std::move(ids),
std::move(threadpool)
std::move(threadpool),
std::move(logger)
);
}

Expand Down
12 changes: 6 additions & 6 deletions include/svs/index/vamana/calibrate.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,9 @@ VamanaSearchParameters optimize_split_buffer(
double target_recall,
VamanaSearchParameters current,
const F& compute_recall,
const DoSearch& do_search
const DoSearch& do_search,
svs::logging::logger_ptr logger = svs::logging::get()
) {
auto logger = svs::logging::get();
svs::logging::trace(logger, "Entering split buffer optimization routine");
assert(
current.buffer_config_.get_search_window_size() ==
Expand Down Expand Up @@ -252,11 +252,11 @@ std::pair<VamanaSearchParameters, bool> optimize_search_buffer(
size_t num_neighbors,
double target_recall,
const ComputeRecall& compute_recall,
const DoSearch& do_search
const DoSearch& do_search,
svs::logging::logger_ptr logger = svs::logging::get()
) {
using enum CalibrationParameters::SearchBufferOptimization;
using dataset_type = typename Index::data_type;
auto logger = svs::logging::get();

double max_recall = std::numeric_limits<double>::lowest();
const size_t current_capacity = current.buffer_config_.get_total_capacity();
Expand Down Expand Up @@ -345,9 +345,9 @@ VamanaSearchParameters tune_prefetch(
const CalibrationParameters& calibration_parameters,
Index& index,
VamanaSearchParameters search_parameters,
const DoSearch& do_search
const DoSearch& do_search,
svs::logging::logger_ptr logger = svs::logging::get()
) {
auto logger = svs::logging::get();
svs::logging::trace(logger, "Tuning prefetch parameters");
const auto& prefetch_steps = calibration_parameters.prefetch_steps_;
size_t max_lookahead = index.max_degree();
Expand Down
46 changes: 36 additions & 10 deletions include/svs/index/vamana/dynamic_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,9 @@ class MutableVamanaIndex {
float alpha_ = 1.2;
bool use_full_search_history_ = true;

// SVS logger for per index logging
svs::logging::logger_ptr logger_;

// Methods
public:
// Constructors
Expand All @@ -167,7 +170,9 @@ class MutableVamanaIndex {
Idx entry_point,
Dist distance_function,
const ExternalIds& external_ids,
ThreadPoolProto threadpool_proto
ThreadPoolProto threadpool_proto,
// Optional logger parameter
svs::logging::logger_ptr logger = svs::logging::get()
)
: graph_{std::move(graph)}
, data_{std::move(data)}
Expand All @@ -178,7 +183,9 @@ class MutableVamanaIndex {
, distance_{std::move(distance_function)}
, threadpool_{threads::as_threadpool(std::move(threadpool_proto))}
, search_parameters_{vamana::construct_default_search_parameters(data_)}
, construction_window_size_{2 * graph.max_degree()} {
, construction_window_size_{2 * graph.max_degree()}
// Ctor accept logger in parameter
, logger_{std::move(logger)} {
translator_.insert(external_ids, threads::UnitRange<Idx>(0, external_ids.size()));
}

Expand All @@ -191,7 +198,8 @@ class MutableVamanaIndex {
Data data,
const ExternalIds& external_ids,
Dist distance_function,
ThreadPoolProto threadpool_proto
ThreadPoolProto threadpool_proto,
svs::logging::logger_ptr logger = svs::logging::get()
)
: graph_(Graph{data.size(), parameters.graph_max_degree})
, data_(std::move(data))
Expand All @@ -206,7 +214,8 @@ class MutableVamanaIndex {
, max_candidates_(parameters.max_candidate_pool_size)
, prune_to_(parameters.prune_to)
, alpha_(parameters.alpha)
, use_full_search_history_{parameters.use_full_search_history} {
, use_full_search_history_{parameters.use_full_search_history}
, logger_{std::move(logger)} {
// Setup the initial translation of external to internal ids.
translator_.insert(external_ids, threads::UnitRange<Idx>(0, external_ids.size()));

Expand All @@ -220,8 +229,8 @@ class MutableVamanaIndex {
auto builder = VamanaBuilder(
graph_, data_, distance_, parameters, threadpool_, prefetch_parameters
);
builder.construct(1.0f, entry_point_[0]);
builder.construct(parameters.alpha, entry_point_[0]);
builder.construct(1.0f, entry_point_[0], logging::Level::Info, logger_);
builder.construct(parameters.alpha, entry_point_[0], logging::Level::Info, logger_);
}

/// @brief Post re-load constructor.
Expand All @@ -240,7 +249,8 @@ class MutableVamanaIndex {
graph_type graph,
const Dist& distance_function,
IDTranslator translator,
Pool threadpool
Pool threadpool,
svs::logging::logger_ptr logger = svs::logging::get()
)
: graph_{std::move(graph)}
, data_{std::move(data)}
Expand All @@ -255,7 +265,8 @@ class MutableVamanaIndex {
, max_candidates_{config.build_parameters.max_candidate_pool_size}
, prune_to_{config.build_parameters.prune_to}
, alpha_{config.build_parameters.alpha}
, use_full_search_history_{config.build_parameters.use_full_search_history} {}
, use_full_search_history_{config.build_parameters.use_full_search_history}
, logger_{std::move(logger)} {}

///// Scratchspace
scratchspace_type scratchspace(const search_parameters_type& sp) const {
Expand All @@ -272,6 +283,8 @@ class MutableVamanaIndex {
scratchspace_type scratchspace() const { return scratchspace(get_search_parameters()); }

///// Accessors
/// @brief Getter method for logger
svs::logging::logger_ptr get_logger() const { return logger_; }

/// @brief Get the alpha value used for pruning while mutating the graph.
float get_alpha() const { return alpha_; }
Expand Down Expand Up @@ -1200,6 +1213,17 @@ template <typename Data, typename Dist, typename ExternalIds, threads::ThreadPoo
MutableVamanaIndex(const VamanaBuildParameters&, Data, const ExternalIds&, Dist, Pool)
-> MutableVamanaIndex<graphs::SimpleBlockedGraph<uint32_t>, Data, Dist>;

// Guide with logging
template <typename Data, typename Dist, typename ExternalIds, threads::ThreadPool Pool>
MutableVamanaIndex(
const VamanaBuildParameters&,
Data,
const ExternalIds&,
Dist,
Pool,
svs::logging::logger_ptr
) -> MutableVamanaIndex<graphs::SimpleBlockedGraph<uint32_t>, Data, Dist>;

namespace detail {

struct VamanaStateLoader {
Expand Down Expand Up @@ -1251,7 +1275,8 @@ auto auto_dynamic_assemble(
// to easily benchmark the static versus dynamic implementation.
//
// This is an internal API and should not be considered officially supported nor stable.
bool debug_load_from_static = false
bool debug_load_from_static = false,
svs::logging::logger_ptr logger = svs::logging::get()
) {
// Load the dataset
auto threadpool = threads::as_threadpool(std::move(threadpool_proto));
Expand Down Expand Up @@ -1317,7 +1342,8 @@ auto auto_dynamic_assemble(
std::move(graph),
std::move(distance),
std::move(translator),
std::move(threadpool)};
std::move(threadpool),
std::move(logger)};
}

} // namespace svs::index::vamana
Loading
Loading