Skip to content

Commit

Permalink
[Feature]Support array reuse field (#219)
Browse files Browse the repository at this point in the history
* support multi add different field for one doc #200 

* fix field data delete values but not make length reset #208
  • Loading branch information
amorynan authored Jun 5, 2024
1 parent 692fdf7 commit a28adab
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 1 deletion.
7 changes: 7 additions & 0 deletions src/core/CLucene/document/Document.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,13 @@ CL_NS_DEF(document)
return boost;
}

void Document::setNeedResetFieldData(bool needResetFieldData) {
this->needResetFieldData = needResetFieldData;
}

bool Document::getNeedResetFieldData() const {
return needResetFieldData;
}

Field* Document::getField(const TCHAR* name) const{
CND_PRECONDITION(name != NULL, "name is NULL");
Expand Down
7 changes: 6 additions & 1 deletion src/core/CLucene/document/Document.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class CLUCENE_EXPORT Document:LUCENE_BASE {
private:
FieldsType* _fields;
float_t boost;
bool needResetFieldData = false;
public:
/** Constructs a new document with no fields. */
Document();
Expand Down Expand Up @@ -66,7 +67,11 @@ class CLUCENE_EXPORT Document:LUCENE_BASE {
*
* @see #setBoost(float_t)
*/
float_t getBoost() const;
float_t getBoost() const;

void setNeedResetFieldData(bool needResetFieldData);

bool getNeedResetFieldData() const;

/**
* <p>Adds a field to a document. Several fields may be added with
Expand Down
27 changes: 27 additions & 0 deletions src/core/CLucene/index/SDocumentWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,31 @@ SDocumentsWriter<T>::ThreadState::~ThreadState() {
_CLDELETE(allFieldDataArray.values[i]);
}

template<typename T>
void SDocumentsWriter<T>::ThreadState::resetCurrentFieldData(Document *doc) {
const Document::FieldsType &docFields = *doc->getFields();
const int32_t numDocFields = docFields.size();

if (FieldData* fp = fieldDataArray.values[0]; fp && numDocFields > 0) {
numFieldData = 1;
// reset fp for new fields
fp->fieldCount = 0;
// delete values is not make length reset to 0, so resize can not make sure new docFields values
fp->docFields.deleteValues();
fp->docFields.length = 0;
fp->docFields.resize(1);
for (int32_t i = 0; i < numDocFields; i++) {
Field *field = docFields[i];
if (fp->fieldCount == fp->docFields.length) {
fp->docFields.resize(fp->docFields.length * 2);
}

fp->docFields.values[fp->fieldCount++] = field;
}
}
return;
}

template<typename T>
typename SDocumentsWriter<T>::ThreadState *SDocumentsWriter<T>::getThreadState(Document *doc) {
if (threadState == nullptr) {
Expand All @@ -135,6 +160,8 @@ typename SDocumentsWriter<T>::ThreadState *SDocumentsWriter<T>::getThreadState(D
if (segment.empty()) {
segment = writer->newSegmentName();
threadState->init(doc, nextDocID);
} else if (doc->getNeedResetFieldData()) {
threadState->resetCurrentFieldData(doc);
}

threadState->docID = nextDocID;
Expand Down
2 changes: 2 additions & 0 deletions src/core/CLucene/index/SDocumentWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,8 @@ class SDocumentsWriter : public IDocumentsWriter {
/** Tokenizes the fields of a document into Postings */
void processDocument(CL_NS(analysis)::Analyzer *sanalyzer);

void resetCurrentFieldData(CL_NS(document)::Document *doc);

/** If there are fields we've seen but did not see again
* in the last run, then free them up. Also reduce
* postings hash size. */
Expand Down

0 comments on commit a28adab

Please sign in to comment.