Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions be/src/olap/rowset/beta_rowset_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,9 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context
_read_options.column_predicates.insert(_read_options.column_predicates.end(),
_read_context->predicates->begin(),
_read_context->predicates->end());
LOG_INFO("Rowset reader, read options column predicates size: {}",
_read_options.column_predicates.size());

for (auto pred : *(_read_context->predicates)) {
if (_read_options.col_id_to_predicates.count(pred->column_id()) < 1) {
_read_options.col_id_to_predicates.insert(
Expand Down Expand Up @@ -185,6 +188,10 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context
_read_options.column_predicates.insert(_read_options.column_predicates.end(),
_read_context->value_predicates->begin(),
_read_context->value_predicates->end());
LOG_INFO(
"Rowset reader, read options add value predicates, column predicates size now: "
"{}",
_read_options.column_predicates.size());
for (auto pred : *(_read_context->value_predicates)) {
if (_read_options.col_id_to_predicates.count(pred->column_id()) < 1) {
_read_options.col_id_to_predicates.insert(
Expand Down
2 changes: 1 addition & 1 deletion be/src/olap/rowset/segment_v2/ann_index_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ Status AnnIndexIterator::read_from_index(const IndexParam& param) {
}

Status AnnIndexIterator::range_search(const RangeSearchParams& params,
const CustomSearchParams& custom_params,
const VectorSearchUserParams& custom_params,
RangeSearchResult* result) {
if (_ann_reader == nullptr) {
return Status::Error<ErrorCode::INDEX_INVALID_PARAMETERS>("_ann_reader is null");
Expand Down
8 changes: 3 additions & 5 deletions be/src/olap/rowset/segment_v2/ann_index_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,15 @@
#include "gutil/integral_types.h"
#include "olap/rowset/segment_v2/ann_index_reader.h"
#include "olap/rowset/segment_v2/index_iterator.h"
#include "runtime/runtime_state.h"

namespace doris::segment_v2 {

struct AnnIndexParam {
const float* query_value;
const size_t query_value_size;
size_t limit;
doris::VectorSearchUserParams _user_params;
roaring::Roaring* roaring;
std::unique_ptr<std::vector<float>> distance = nullptr;
std::unique_ptr<std::vector<uint64_t>> row_ids = nullptr;
Expand All @@ -48,10 +50,6 @@ struct RangeSearchParams {
virtual ~RangeSearchParams() = default;
};

struct CustomSearchParams {
int ef_search = 16;
};

struct RangeSearchResult {
std::shared_ptr<roaring::Roaring> roaring;
std::unique_ptr<std::vector<uint64_t>> row_ids;
Expand Down Expand Up @@ -80,7 +78,7 @@ class AnnIndexIterator : public IndexIterator {
bool has_null() override { return true; }

MOCK_FUNCTION Status range_search(const RangeSearchParams& params,
const CustomSearchParams& custom_params,
const VectorSearchUserParams& custom_params,
RangeSearchResult* result);

private:
Expand Down
31 changes: 25 additions & 6 deletions be/src/olap/rowset/segment_v2/ann_index_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "common/config.h"
#include "olap/rowset/segment_v2/index_file_reader.h"
#include "olap/rowset/segment_v2/inverted_index_compound_reader.h"
#include "runtime/runtime_state.h"
#include "vector/faiss_vector_index.h"
#include "vector/vector_index.h"

Expand All @@ -49,6 +50,9 @@ AnnIndexReader::AnnIndexReader(const TabletIndex* index_meta,
auto it = index_properties.find("index_type");
DCHECK(it != index_properties.end());
_index_type = it->second;
it = index_properties.find("metric_type");
DCHECK(it != index_properties.end());
_metric_type = VectorIndex::string_to_metric(it->second);
}

Status AnnIndexReader::new_iterator(const io::IOContext& io_ctx, OlapReaderStatistics* stats,
Expand All @@ -71,16 +75,27 @@ Status AnnIndexReader::load_index(io::IOContext* io_ctx) {
}

Status AnnIndexReader::query(io::IOContext* io_ctx, AnnIndexParam* param) {
#ifndef BE_TEST
RETURN_IF_ERROR(_index_file_reader->init(config::inverted_index_read_buffer_size, io_ctx));
RETURN_IF_ERROR(load_index(io_ctx));
#endif
DCHECK(_vector_index != nullptr);
const float* query_vec = param->query_value;
const int limit = param->limit;
IndexSearchParameters index_search_params;
IndexSearchResult index_search_result;
index_search_params.roaring = param->roaring;
RETURN_IF_ERROR(_vector_index->ann_topn_search(query_vec, limit, index_search_params,
index_search_result));
if (_index_type == "hnsw") {
HNSWSearchParameters hnsw_search_params;
hnsw_search_params.roaring = param->roaring;
hnsw_search_params.ef_search = param->_user_params.hnsw_ef_search;
hnsw_search_params.check_relative_distance =
param->_user_params.hnsw_check_relative_distance;
hnsw_search_params.bounded_queue = param->_user_params.hnsw_bounded_queue;
RETURN_IF_ERROR(_vector_index->ann_topn_search(query_vec, limit, hnsw_search_params,
index_search_result));
} else {
throw Status::NotSupported("Unsupported index type: {}", _index_type);
}

DCHECK(index_search_result.roaring != nullptr);
DCHECK(index_search_result.distances != nullptr);
DCHECK(index_search_result.row_ids != nullptr);
Expand All @@ -92,17 +107,21 @@ Status AnnIndexReader::query(io::IOContext* io_ctx, AnnIndexParam* param) {
}

Status AnnIndexReader::range_search(const RangeSearchParams& params,
const CustomSearchParams& custom_params,
const VectorSearchUserParams& custom_params,
RangeSearchResult* result, io::IOContext* io_ctx) {
#ifndef BE_TEST
RETURN_IF_ERROR(_index_file_reader->init(config::inverted_index_read_buffer_size, io_ctx));
RETURN_IF_ERROR(load_index(io_ctx));
#endif
DCHECK(_vector_index != nullptr);
IndexSearchResult search_result;
std::unique_ptr<IndexSearchParameters> search_param = nullptr;

if (_index_type == "hnsw") {
auto hnsw_param = std::make_unique<HNSWSearchParameters>();
hnsw_param->ef_search = custom_params.ef_search;
hnsw_param->ef_search = custom_params.hnsw_ef_search;
hnsw_param->check_relative_distance = custom_params.hnsw_check_relative_distance;
hnsw_param->bounded_queue = custom_params.hnsw_bounded_queue;
search_param = std::move(hnsw_param);
} else {
throw Status::NotSupported("Unsupported index type: {}", _index_type);
Expand Down
9 changes: 6 additions & 3 deletions be/src/olap/rowset/segment_v2/ann_index_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@

#include "olap/rowset/segment_v2/index_reader.h"
#include "olap/tablet_schema.h"
#include "runtime/runtime_state.h"
#include "vector/vector_index.h"

namespace doris::segment_v2 {

struct AnnIndexParam;
struct RangeSearchParams;
struct CustomSearchParams;
struct RangeSearchResult;

class IndexFileReader;
Expand All @@ -44,21 +44,24 @@ class AnnIndexReader : public IndexReader {

Status query(io::IOContext* io_ctx, AnnIndexParam* param);

Status range_search(const RangeSearchParams& params, const CustomSearchParams& custom_params,
RangeSearchResult* result, io::IOContext* io_ctx = nullptr);
Status range_search(const RangeSearchParams& params,
const VectorSearchUserParams& custom_params, RangeSearchResult* result,
io::IOContext* io_ctx = nullptr);

uint64_t get_index_id() const override { return _index_meta.index_id(); }

Status new_iterator(const io::IOContext& io_ctx, OlapReaderStatistics* stats,
RuntimeState* runtime_state,
std::unique_ptr<IndexIterator>* iterator) override;
VectorIndex::Metric get_metric_type() const { return _metric_type; }

private:
TabletIndex _index_meta;
std::shared_ptr<IndexFileReader> _index_file_reader;
std::unique_ptr<VectorIndex> _vector_index;
// TODO: Use integer.
std::string _index_type;
VectorIndex::Metric _metric_type;
};

using AnnIndexReaderPtr = std::shared_ptr<AnnIndexReader>;
Expand Down
32 changes: 17 additions & 15 deletions be/src/olap/rowset/segment_v2/ann_index_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include <cstddef>
#include <memory>
#include <string>

#include "olap/rowset/segment_v2/inverted_index_fs_directory.h"

Expand Down Expand Up @@ -57,21 +58,22 @@ Status AnnIndexColumnWriter::init() {

_vector_index = nullptr;
const auto& properties = _index_meta->properties();
std::string index_type = get_or_default(properties, INDEX_TYPE, "");
if (index_type == "hnsw") {
std::shared_ptr<FaissVectorIndex> faiss_index = std::make_shared<FaissVectorIndex>();
FaissBuildParameter builderParameter;
builderParameter.index_type = FaissBuildParameter::string_to_index_type("hnsw");
builderParameter.d = std::stoi(get_or_default(properties, DIM, "512"));
builderParameter.m = std::stoi(get_or_default(properties, MAX_DEGREE, "32"));
builderParameter.quantilizer = FaissBuildParameter::string_to_quantilizer(
get_or_default(properties, QUANTILIZER, "flat"));
faiss_index->set_build_params(builderParameter);
_vector_index = faiss_index;
} else {
return Status::NotSupported("Unsupported index type: " + index_type);
}

const std::string index_type = get_or_default(properties, INDEX_TYPE, "hnsw");
const std::string metric_type = get_or_default(properties, METRIC_TYPE, "l2");
const std::string quantilizer = get_or_default(properties, QUANTILIZER, "flat");
FaissBuildParameter builderParameter;
std::shared_ptr<FaissVectorIndex> faiss_index = std::make_shared<FaissVectorIndex>();
builderParameter.index_type = FaissBuildParameter::string_to_index_type(index_type);
builderParameter.d = std::stoi(get_or_default(properties, DIM, "512"));
builderParameter.m = std::stoi(get_or_default(properties, MAX_DEGREE, "32"));
builderParameter.pq_m = std::stoi(get_or_default(properties, PQ_M, "-1")); // -1 means not set

builderParameter.metric_type = FaissBuildParameter::string_to_metric_type(metric_type);
builderParameter.quantilizer = FaissBuildParameter::string_to_quantilizer(quantilizer);

faiss_index->set_build_params(builderParameter);

_vector_index = faiss_index;
return Status::OK();
}

Expand Down
1 change: 1 addition & 0 deletions be/src/olap/rowset/segment_v2/ann_index_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ class AnnIndexColumnWriter : public IndexColumnWriter {
static constexpr const char* INDEX_TYPE = "index_type";
static constexpr const char* METRIC_TYPE = "metric_type";
static constexpr const char* QUANTILIZER = "quantilizer";
static constexpr const char* PQ_M = "pq_m";
static constexpr const char* DIM = "dim";
static constexpr const char* MAX_DEGREE = "max_degree";

Expand Down
Loading