From 7ee46851aee2f89df1abd7bc9408e6a7ace729b9 Mon Sep 17 00:00:00 2001 From: amory Date: Mon, 18 Mar 2024 10:25:36 +0800 Subject: [PATCH] support multi add different field for one doc (#200) --- src/core/CLucene/document/Document.cpp | 7 ++++++ src/core/CLucene/document/Document.h | 7 +++++- src/core/CLucene/index/SDocumentWriter.cpp | 25 ++++++++++++++++++++++ src/core/CLucene/index/SDocumentWriter.h | 2 ++ 4 files changed, 40 insertions(+), 1 deletion(-) diff --git a/src/core/CLucene/document/Document.cpp b/src/core/CLucene/document/Document.cpp index 69ba2af1201..d3da684b155 100644 --- a/src/core/CLucene/document/Document.cpp +++ b/src/core/CLucene/document/Document.cpp @@ -85,6 +85,13 @@ CL_NS_DEF(document) return boost; } + void Document::setNeedResetFieldData(bool needResetFieldData) { + this->needResetFieldData = needResetFieldData; + } + + bool Document::getNeedResetFieldData() const { + return needResetFieldData; + } Field* Document::getField(const TCHAR* name) const{ CND_PRECONDITION(name != NULL, "name is NULL"); diff --git a/src/core/CLucene/document/Document.h b/src/core/CLucene/document/Document.h index 400ec591229..2dc59cb9937 100644 --- a/src/core/CLucene/document/Document.h +++ b/src/core/CLucene/document/Document.h @@ -35,6 +35,7 @@ class CLUCENE_EXPORT Document:LUCENE_BASE { private: FieldsType* _fields; float_t boost; + bool needResetFieldData = false; public: /** Constructs a new document with no fields. */ Document(); @@ -66,7 +67,11 @@ class CLUCENE_EXPORT Document:LUCENE_BASE { * * @see #setBoost(float_t) */ - float_t getBoost() const; + float_t getBoost() const; + + void setNeedResetFieldData(bool needResetFieldData); + + bool getNeedResetFieldData() const; /** *

Adds a field to a document. Several fields may be added with diff --git a/src/core/CLucene/index/SDocumentWriter.cpp b/src/core/CLucene/index/SDocumentWriter.cpp index c757ba1a42a..fa0f992349c 100644 --- a/src/core/CLucene/index/SDocumentWriter.cpp +++ b/src/core/CLucene/index/SDocumentWriter.cpp @@ -126,6 +126,29 @@ SDocumentsWriter::ThreadState::~ThreadState() { _CLDELETE(allFieldDataArray.values[i]); } +template +void SDocumentsWriter::ThreadState::resetCurrentFieldData(Document *doc) { + const Document::FieldsType &docFields = *doc->getFields(); + const int32_t numDocFields = docFields.size(); + + if (FieldData* fp = fieldDataArray.values[0]; fp && numDocFields > 0) { + numFieldData = 1; + // reset fp for new fields + fp->fieldCount = 0; + fp->docFields.deleteValues(); + fp->docFields.resize(1); + for (int32_t i = 0; i < numDocFields; i++) { + Field *field = docFields[i]; + if (fp->fieldCount == fp->docFields.length) { + fp->docFields.resize(fp->docFields.length * 2); + } + + fp->docFields.values[fp->fieldCount++] = field; + } + } + return; +} + template typename SDocumentsWriter::ThreadState *SDocumentsWriter::getThreadState(Document *doc) { if (threadState == nullptr) { @@ -135,6 +158,8 @@ typename SDocumentsWriter::ThreadState *SDocumentsWriter::getThreadState(D if (segment.empty()) { segment = writer->newSegmentName(); threadState->init(doc, nextDocID); + } else if (doc->getNeedResetFieldData()) { + threadState->resetCurrentFieldData(doc); } threadState->docID = nextDocID; diff --git a/src/core/CLucene/index/SDocumentWriter.h b/src/core/CLucene/index/SDocumentWriter.h index b1217ba18b7..3dd98186635 100644 --- a/src/core/CLucene/index/SDocumentWriter.h +++ b/src/core/CLucene/index/SDocumentWriter.h @@ -390,6 +390,8 @@ class SDocumentsWriter : public IDocumentsWriter { /** Tokenizes the fields of a document into Postings */ void processDocument(CL_NS(analysis)::Analyzer *sanalyzer); + void resetCurrentFieldData(CL_NS(document)::Document *doc); + /** If there are fields we've seen but did not see again * in the last run, then free them up. Also reduce * postings hash size. */