From f5090d7780974c79313ac1aae65738480d6eddf7 Mon Sep 17 00:00:00 2001 From: zzzxl1993 <474696115@qq.com> Date: Tue, 2 Jan 2024 17:55:58 +0800 Subject: [PATCH] [opt](position) add position iterator interface --- src/core/CLucene/index/SegmentTermDocs.cpp | 4 +-- src/core/CLucene/index/_SegmentHeader.h | 4 +-- .../CLucene/search/query/DcoIdSetIterator.h | 16 ---------- src/core/CLucene/search/query/TermIterator.h | 29 ++++++++++--------- .../search/query/TermPositionIterator.h | 23 +++++++++++++++ src/core/CMakeLists.txt | 2 +- 6 files changed, 44 insertions(+), 34 deletions(-) delete mode 100644 src/core/CLucene/search/query/DcoIdSetIterator.h create mode 100644 src/core/CLucene/search/query/TermPositionIterator.h diff --git a/src/core/CLucene/index/SegmentTermDocs.cpp b/src/core/CLucene/index/SegmentTermDocs.cpp index 9108f1dfd52..e346dc0ca24 100644 --- a/src/core/CLucene/index/SegmentTermDocs.cpp +++ b/src/core/CLucene/index/SegmentTermDocs.cpp @@ -19,7 +19,7 @@ CL_NS_DEF(index) SegmentTermDocs::SegmentTermDocs(const SegmentReader *_parent) : parent(_parent), freqStream(_parent->freqStream->clone()), - count(0), df(0), deletedDocs(_parent->deletedDocs), _doc(0), _freq(0), skipInterval(_parent->tis->getSkipInterval()), + count(0), df(0), deletedDocs(_parent->deletedDocs), _doc(-1), _freq(0), skipInterval(_parent->tis->getSkipInterval()), maxSkipLevels(_parent->tis->getMaxSkipLevels()), skipListReader(NULL), freqBasePointer(0), proxBasePointer(0), skipPointer(0), haveSkipped(false), pointer(0), pointerMax(0), indexVersion_(_parent->_fieldInfos->getIndexVersion()), hasProx(_parent->_fieldInfos->hasProx()), buffer_(freqStream, hasProx, indexVersion_) { @@ -73,7 +73,7 @@ void SegmentTermDocs::seek(const TermInfo *ti, Term *term) { df = 0; } else {// punt case df = ti->docFreq; - _doc = 0; + _doc = -1; freqBasePointer = ti->freqPointer; proxBasePointer = ti->proxPointer; skipPointer = freqBasePointer + ti->skipOffset; diff --git a/src/core/CLucene/index/_SegmentHeader.h b/src/core/CLucene/index/_SegmentHeader.h index bf988a2f273..c1f01e7cecb 100644 --- a/src/core/CLucene/index/_SegmentHeader.h +++ b/src/core/CLucene/index/_SegmentHeader.h @@ -93,8 +93,8 @@ class SegmentTermDocs:public virtual TermDocs { int32_t count; int32_t df; CL_NS(util)::BitSet* deletedDocs; - int32_t _doc; - int32_t _freq; + int32_t _doc = -1; + int32_t _freq = 0; int32_t docs[PFOR_BLOCK_SIZE]; // buffered doc numbers int32_t freqs[PFOR_BLOCK_SIZE]; // buffered term freqs int32_t pointer; diff --git a/src/core/CLucene/search/query/DcoIdSetIterator.h b/src/core/CLucene/search/query/DcoIdSetIterator.h deleted file mode 100644 index 88aa4313576..00000000000 --- a/src/core/CLucene/search/query/DcoIdSetIterator.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once - -#include "CLucene/index/DocRange.h" - -class DocIdSetIterator { -public: - DocIdSetIterator() = default; - virtual ~DocIdSetIterator() = default; - - virtual int32_t docID() = 0; - virtual int32_t nextDoc() = 0; - virtual int32_t advance(int32_t target) = 0; - - virtual int32_t docFreq() const = 0; - virtual bool readRange(DocRange* docRange) const = 0; -}; \ No newline at end of file diff --git a/src/core/CLucene/search/query/TermIterator.h b/src/core/CLucene/search/query/TermIterator.h index e0cf23a4fb6..3eb22a254de 100644 --- a/src/core/CLucene/search/query/TermIterator.h +++ b/src/core/CLucene/search/query/TermIterator.h @@ -1,51 +1,54 @@ #pragma once -#include "CLucene/search/query/DcoIdSetIterator.h" #include "CLucene/index/Terms.h" #include +#include CL_NS_USE(index) -class TermIterator : public DocIdSetIterator { +class TermIterator { public: TermIterator() = default; - TermIterator(TermDocs* termDocs) : termDocs_(termDocs) { + TermIterator(TermDocs* termDocs) + : termDocs_(termDocs) { } - virtual ~TermIterator() = default; - - bool isEmpty() { + inline bool isEmpty() const { return termDocs_ == nullptr; } - int32_t docID() override { - uint32_t docId = termDocs_->doc(); + inline int32_t docID() const { + int32_t docId = termDocs_->doc(); return docId >= INT_MAX ? INT_MAX : docId; } - int32_t nextDoc() override { + inline int32_t freq() const { + return termDocs_->freq(); + } + + inline int32_t nextDoc() const { if (termDocs_->next()) { return termDocs_->doc(); } return INT_MAX; } - int32_t advance(int32_t target) override { + inline int32_t advance(int32_t target) const { if (termDocs_->skipTo(target)) { return termDocs_->doc(); } return INT_MAX; } - int32_t docFreq() const override { + inline int32_t docFreq() const { return termDocs_->docFreq(); } - bool readRange(DocRange* docRange) const override { + inline bool readRange(DocRange* docRange) const { return termDocs_->readRange(docRange); } -private: +protected: TermDocs* termDocs_ = nullptr; }; \ No newline at end of file diff --git a/src/core/CLucene/search/query/TermPositionIterator.h b/src/core/CLucene/search/query/TermPositionIterator.h new file mode 100644 index 00000000000..d64af4098fb --- /dev/null +++ b/src/core/CLucene/search/query/TermPositionIterator.h @@ -0,0 +1,23 @@ +#pragma once + +#include "CLucene/search/query/TermIterator.h" +#include "CLucene/index/Terms.h" + +#include + +CL_NS_USE(index) + +class TermPositionIterator : public TermIterator { +public: + TermPositionIterator() = default; + TermPositionIterator(TermPositions* termPositions) + : TermIterator(termPositions), termPositions_(termPositions) { + } + + inline int32_t nextPosition() const { + return termPositions_->nextPosition(); + } + +private: + TermPositions* termPositions_ = nullptr; +}; \ No newline at end of file diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index e1c13305aa5..b9a09bb3065 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -199,8 +199,8 @@ SET(clucene_core_Files ./CLucene/search/spans/SpanWeight.cpp ./CLucene/search/spans/SpanWeight.h ./CLucene/search/spans/TermSpans.cpp - ./CLucene/search/query/DcoIdSetIterator.h ./CLucene/search/query/TermIterator.h + ./CLucene/search/query/TermPositionIterator.h ) #if USE_SHARED_OBJECT_FILES then we link directly to the object files (means rebuilding them for the core)