From 778a3db8825012e7fc61f39ae0f5ff4fb712b298 Mon Sep 17 00:00:00 2001
From: Ma-cat <1054638297@qq.com>
Date: Fri, 10 May 2024 16:55:29 +0800
Subject: [PATCH 01/14] optimize external sort comparison and reduce index
 creation time by 10%

---
 src/storage/invertedindex/column_inverter.cpp |  6 ++-
 .../common/external_sort_merger.cppm          | 38 +++++++++++++++++++
 src/storage/invertedindex/memory_indexer.cpp  | 24 ++++++++----
 3 files changed, 58 insertions(+), 10 deletions(-)
diff --git a/src/storage/invertedindex/column_inverter.cpp b/src/storage/invertedindex/column_inverter.cpp
index 50d1e1c2c2..9550e8f82c 100644
--- a/src/storage/invertedindex/column_inverter.cpp
+++ b/src/storage/invertedindex/column_inverter.cpp
@@ -60,8 +60,9 @@ SizeT ColumnInverter::InvertColumn(SharedPtr<ColumnVector> column_vector, u32 ro
     SizeT term_count_sum = 0;
     for (SizeT i = 0; i < row_count; ++i) {
         String data = column_vector->ToString(row_offset + i);
-        if (data.empty())
+        if (data.empty()) {
             continue;
+        }
         SizeT term_count = InvertColumn(begin_doc_id + i, data);
         column_lengths[i] = term_count;
         term_count_sum += term_count;
@@ -246,8 +247,9 @@ void ColumnInverter::SortForOfflineDump() {
 //                                                            Data within each group
 void ColumnInverter::SpillSortResults(FILE *spill_file, u64 &tuple_count) {
     // spill sort results for external merge sort
-    if (positions_.empty())
+    if (positions_.empty()) {
         return;
+    }
     // size of this Run in bytes
     u32 data_size = 0;
     u64 data_size_pos = ftell(spill_file);
diff --git a/src/storage/invertedindex/common/external_sort_merger.cppm b/src/storage/invertedindex/common/external_sort_merger.cppm
index 0142cc3b14..1217f04e90 100644
--- a/src/storage/invertedindex/common/external_sort_merger.cppm
+++ b/src/storage/invertedindex/common/external_sort_merger.cppm
@@ -169,6 +169,44 @@ struct KeyAddress<KeyType, LenType, typename std::enable_if<std::is_scalar<KeyTy
     bool operator<(const KeyAddress &other) const { return Compare(other) > 0; }
 };
 
+template <typename LenType>
+struct KeyAddress<TermTuple, LenType> {
+    char *data{nullptr};
+    u64 addr;
+    u32 idx;
+
+    KeyAddress(char *p, u64 ad, u32 i) {
+        data = p;
+        addr = ad;
+        idx = i;
+    }
+
+    KeyAddress() {
+        data = nullptr;
+        addr = -1;
+        idx = -1;
+    }
+
+    TermTuple KEY() { return TermTuple(data + sizeof(LenType), LEN()); }
+    TermTuple KEY() const { return TermTuple(data + sizeof(LenType), LEN()); }
+    LenType LEN() const { return *(LenType *)data; }
+    u64 &ADDR() { return addr; }
+    u64 ADDR() const { return addr; }
+    u32 IDX() const { return idx; }
+    u32 &IDX() { return idx; }
+
+    int Compare(const KeyAddress &p) const {
+        return KEY().Compare(p.KEY());
+    }
+
+    bool operator==(const KeyAddress &other) const { return Compare(other) == 0; }
+
+    bool operator>(const KeyAddress &other) const { return Compare(other) < 0; }
+
+    bool operator<(const KeyAddress &other) const { return Compare(other) > 0; }
+};
+
+
 export template <typename KeyType, typename LenType>
 class SortMerger {
     typedef SortMerger<KeyType, LenType> self_t;
diff --git a/src/storage/invertedindex/memory_indexer.cpp b/src/storage/invertedindex/memory_indexer.cpp
index d80376f3e5..2a43a996f2 100644
--- a/src/storage/invertedindex/memory_indexer.cpp
+++ b/src/storage/invertedindex/memory_indexer.cpp
@@ -91,8 +91,9 @@ MemoryIndexer::~MemoryIndexer() {
 }
 
 void MemoryIndexer::Insert(SharedPtr<ColumnVector> column_vector, u32 row_offset, u32 row_count, bool offline) {
-    if (is_spilled_)
+    if (is_spilled_) {
         Load();
+    }
 
     u64 seq_inserted(0);
     u32 doc_count(0);
@@ -121,8 +122,9 @@ void MemoryIndexer::Insert(SharedPtr<ColumnVector> column_vector, u32 row_offset
         auto func = [this, task, inverter](int id) {
             SizeT column_length_sum = inverter->InvertColumn(task->column_vector_, task->row_offset_, task->row_count_, task->start_doc_id_);
             column_length_sum_ += column_length_sum;
-            if (column_length_sum > 0)
+            if (column_length_sum > 0) {
                 inverter->SortForOfflineDump();
+            }
             this->ring_sorted_.Put(task->task_seq_, inverter);
         };
         inverting_thread_pool_.push(std::move(func));
@@ -145,8 +147,9 @@ void MemoryIndexer::Insert(SharedPtr<ColumnVector> column_vector, u32 row_offset
 }
 
 void MemoryIndexer::InsertGap(u32 row_count) {
-    if (is_spilled_)
+    if (is_spilled_) {
         Load();
+    }
 
     std::unique_lock<std::mutex> lock(mutex_);
     doc_count_ += row_count;
@@ -155,14 +158,16 @@ void MemoryIndexer::InsertGap(u32 row_count) {
 void MemoryIndexer::Commit(bool offline) {
     if (offline) {
         commiting_thread_pool_.push([this](int id) { this->CommitOffline(); });
-    } else
+    } else {
         commiting_thread_pool_.push([this](int id) { this->CommitSync(); });
+    }
 }
 
 SizeT MemoryIndexer::CommitOffline(SizeT wait_if_empty_ms) {
     std::unique_lock<std::mutex> lock(mutex_commit_, std::defer_lock);
-    if (!lock.try_lock())
+    if (!lock.try_lock()) {
         return 0;
+    }
 
     if (nullptr == spill_file_handle_) {
         PrepareSpillFile();
@@ -200,14 +205,16 @@ SizeT MemoryIndexer::CommitSync(SizeT wait_if_empty_ms) {
     };
 
     std::unique_lock<std::mutex> lock(mutex_commit_, std::defer_lock);
-    if (!lock.try_lock())
+    if (!lock.try_lock()) {
         return 0;
+    }
 
     while (1) {
         this->ring_sorted_.GetBatch(inverters, wait_if_empty_ms);
         // num_merged = inverters.size();
-        if (inverters.empty())
+        if (inverters.empty()) {
             break;
+        }
         for (auto &inverter : inverters) {
             inverter->GeneratePosting();
             num_generated += inverter->GetMerged();
@@ -353,8 +360,9 @@ void MemoryIndexer::OfflineDump() {
     // 2. Generate posting
     // 3. Dump disk segment data
     // LOG_INFO(fmt::format("MemoryIndexer::OfflineDump begin, num_runs_ {}", num_runs_));
-    if (tuple_count_ == 0)
+    if (tuple_count_ == 0) {
         return;
+    }
     FinalSpillFile();
     constexpr u32 buffer_size_of_each_run = 2 * 1024 * 1024;
     SortMerger<TermTuple, u32> *merger = new SortMerger<TermTuple, u32>(spill_full_path_.c_str(), num_runs_, buffer_size_of_each_run * num_runs_, 2);

From ff3bbc4c6560e9e7e9532955561d69917eb4495c Mon Sep 17 00:00:00 2001
From: Ma-cat <1054638297@qq.com>
Date: Mon, 13 May 2024 13:32:32 +0800
Subject: [PATCH 02/14] implement the loser tree and use the loser tree to
 replace the heap to achieve multi-way merge time and improve performance

---
 .../common/external_sort_merger.cpp           |  41 ++++-
 .../common/external_sort_merger.cppm          |   4 +-
 .../invertedindex/common/loser_tree.cppm      | 164 ++++++++++++++++++
 src/unit_test/storage/common/loser_tree.cpp   | 131 ++++++++++++++
 4 files changed, 334 insertions(+), 6 deletions(-)
 create mode 100644 src/storage/invertedindex/common/loser_tree.cppm
 create mode 100644 src/unit_test/storage/common/loser_tree.cpp

diff --git a/src/storage/invertedindex/common/external_sort_merger.cpp b/src/storage/invertedindex/common/external_sort_merger.cpp
index 108c2a36aa..b207c54ec3 100644
--- a/src/storage/invertedindex/common/external_sort_merger.cpp
+++ b/src/storage/invertedindex/common/external_sort_merger.cpp
@@ -31,6 +31,7 @@ namespace infinity {
             assert(false);                                                                                                                           \
         }                                                                                                                                            \
     }
+#define USE_LOSER_TREE
 
 template <typename KeyType, typename LenType>
 SortMerger<KeyType, LenType>::SortMerger(const char *filenm, u32 group_size, u32 bs, u32 output_num)
@@ -58,6 +59,9 @@ SortMerger<KeyType, LenType>::SortMerger(const char *filenm, u32 group_size, u32
 
     out_buf_size_ = new u32[OUT_BUF_NUM_];
     out_buf_full_ = new bool[OUT_BUF_NUM_];
+#ifdef USE_LOSER_TREE
+    merge_loser_tree_ = MakeShared<LoserTree<KeyAddr>>(MAX_GROUP_SIZE_);
+#endif
 }
 
 template <typename KeyType, typename LenType>
@@ -166,17 +170,29 @@ void SortMerger<KeyType, LenType>::Init(DirectIO &io_stream) {
             size_micro_run_[i] = s;
             io_stream.Read(micro_buf_[i], s);
         }
-        if (flag)
-            continue;
 
+#ifdef USE_LOSER_TREE
+        if (flag) {
+            merge_loser_tree_->InsertStart(nullptr, static_cast<LoserTree<u64>::Source>(i), true);
+            continue;
+        }
+        auto key = KeyAddr(micro_buf_[i], -1, i);
+        merge_loser_tree_->InsertStart(&key, static_cast<LoserTree<u64>::Source>(i), false);
+#else
+        if (flag) {
+            continue;
+        }
         merge_heap_.push(KeyAddr(micro_buf_[i], -1, i));
+#endif
         micro_run_idx_[i] = 1;
         micro_run_pos_[i] = KeyAddr(micro_buf_[i], -1, i).LEN() + sizeof(LenType);
         num_micro_run_[i] = 0;
 
         io_stream.Seek(next_run_pos);
     }
-
+#ifdef USE_LOSER_TREE
+    merge_loser_tree_->Init();
+#endif
     // initialize predict heap and records number of every microrun
     for (u32 i = 0; i < group_size_; ++i) {
         u32 pos = 0;
@@ -278,9 +294,14 @@ void SortMerger<KeyType, LenType>::Predict(DirectIO &io_stream) {
 
 template <typename KeyType, typename LenType>
 void SortMerger<KeyType, LenType>::Merge() {
+#ifdef USE_LOSER_TREE
+    while (merge_loser_tree_->TopSource() != LoserTree<KeyAddr>::invalid_) {
+        auto top = merge_loser_tree_->TopKey();
+#else
     while (merge_heap_.size() > 0) {
         KeyAddr top = merge_heap_.top();
         merge_heap_.pop();
+#endif
         u32 idx = top.IDX();
 
         // output
@@ -316,8 +337,12 @@ void SortMerger<KeyType, LenType>::Merge() {
             while (pre_buf_size_ == 0)
                 pre_buf_con_.wait(lock);
 
-            if (pre_buf_size_ == (u32)-1)
+            if (pre_buf_size_ == (u32)-1) {
+#ifdef USE_LOSER_TREE
+                merge_loser_tree_->DeleteTopInsert(nullptr, true);
+#endif
                 continue;
+            }
 
             assert(idx < MAX_GROUP_SIZE_);
             memcpy(micro_buf_[idx], pre_buf_, pre_buf_size_);
@@ -328,7 +353,12 @@ void SortMerger<KeyType, LenType>::Merge() {
         }
 
         assert(idx < MAX_GROUP_SIZE_);
+#ifdef USE_LOSER_TREE
+        auto key = KeyAddr(micro_buf_[idx] + micro_run_pos_[idx], -1, idx);
+        merge_loser_tree_->DeleteTopInsert(&key, false);
+#else
         merge_heap_.push(KeyAddr(micro_buf_[idx] + micro_run_pos_[idx], -1, idx));
+#endif
         ++micro_run_idx_[idx];
         micro_run_pos_[idx] += KeyAddr(micro_buf_[idx] + micro_run_pos_[idx], -1, idx).LEN() + sizeof(LenType);
     }
@@ -400,8 +430,9 @@ void SortMerger<KeyType, LenType>::Run() {
     IASSERT(fwrite(&count_, sizeof(u64), 1, out_f) == 1);
 
     Vector<Thread *> out_thread(OUT_BUF_NUM_);
-    for (u32 i = 0; i < OUT_BUF_NUM_; ++i)
+    for (u32 i = 0; i < OUT_BUF_NUM_; ++i) {
         out_thread[i] = new Thread(std::bind(&self_t::Output, this, out_f, i));
+    }
 
     predict_thread.join();
     merge_thread.join();
diff --git a/src/storage/invertedindex/common/external_sort_merger.cppm b/src/storage/invertedindex/common/external_sort_merger.cppm
index 1217f04e90..61cd1a5615 100644
--- a/src/storage/invertedindex/common/external_sort_merger.cppm
+++ b/src/storage/invertedindex/common/external_sort_merger.cppm
@@ -19,6 +19,7 @@ module;
 export module external_sort_merger;
 
 import stl;
+import loser_tree;
 
 namespace infinity {
 
@@ -221,6 +222,7 @@ class SortMerger {
 
     std::priority_queue<KeyAddr> pre_heap_;   //!< predict heap
     std::priority_queue<KeyAddr> merge_heap_; //!< merge heap
+    SharedPtr<LoserTree<KeyAddr>> merge_loser_tree_;
 
     u32 *micro_run_idx_{nullptr};   //!< the access index of each microruns
     u32 *micro_run_pos_{nullptr};   //!< the access position within each microruns
@@ -250,7 +252,7 @@ class SortMerger {
 
     u32 pre_buf_size_; //!< the current size of microrun that has been loaded onto prediect buffer
     u32 pre_buf_num_;  //!< the current records number of microrun that has been loaded onto prediect buffer
-    // u32 pre_idx_;//!< the index of microrun channel right in the predict buffer
+    //u32 pre_idx_;    //!< the index of microrun channel right in the predict buffer
     u32 out_buf_in_idx_;          //!< used by merge to get the current available output buffer
     u32 out_buf_out_idx_;         //!< used by output threads to get the index of the turn of outputting
     u32 *out_buf_size_{nullptr};  //!< data size of each output buffer
diff --git a/src/storage/invertedindex/common/loser_tree.cppm b/src/storage/invertedindex/common/loser_tree.cppm
new file mode 100644
index 0000000000..14032f58d0
--- /dev/null
+++ b/src/storage/invertedindex/common/loser_tree.cppm
@@ -0,0 +1,164 @@
+module;
+
+#include <cassert>
+#include <functional>
+
+export module loser_tree;
+
+import stl;
+
+namespace infinity {
+
+//! LoserTreeBase class definition
+export template <typename ValueType>
+class LoserTreeBase {
+public:
+    using Source = u32;
+    static constexpr Source invalid_ = Source(-1);
+    static inline Source round_up_to_power_of_two(Source n) {
+        --n;
+        for (SizeT k = 1; k != 8 * sizeof(n); k <<= 1) {
+            n |= n >> k;
+        }
+        ++n;
+        return n;
+    }
+protected:
+    struct Loser {
+        //! flag, true if is a virtual maximum sentinel
+        bool sup;
+        Source source;
+        ValueType key;
+    };
+
+    //! The number of nodes in the tree.
+    const Source ik_;
+    //! The next greater power of two of ik_.
+    const Source k_;
+    //! Array containing loser tree nodes.
+    Vector<Loser> losers_;
+    //! Function object for comparing ValueTypes.
+    std::function<bool(const ValueType&, const ValueType&)> cmp_;
+
+    bool first_insert_;
+public:
+    explicit LoserTreeBase(const Source& k,
+                           std::function<bool(const ValueType&, const ValueType&)> cmp = std::greater<ValueType>())
+         : ik_(k), k_(round_up_to_power_of_two(k)),
+          losers_(2 * k_), cmp_(cmp), first_insert_(true) {
+        // : ik_(k), k_(static_cast<Source>(1) << static_cast<Source>(std::ceil(std::log2(static_cast<float>(k))))),
+
+        for (Source i = ik_ - 1; i < k_; i++) {
+            losers_[i + k_].sup = true;
+            losers_[i + k_].source = invalid_;
+        }
+//        for (Source i = 0; i < k_; ++i) {
+//            losers_[i].source = invalid_;
+//            losers_[i].keyp = &sentinel;
+//        }
+    }
+
+    //! Return the index of the player with the smallest element.
+    Source TopSource() {
+        // if (losers_[0].sup) return invalid_;
+        return losers_[0].source;
+    }
+
+    ValueType TopKey() {
+        return losers_[0].key;
+    }
+
+    //! Initializes the player source with the element key.
+    void InsertStart(const ValueType* keyp, const Source& source, bool sup) {
+        Source pos = k_ + source;
+        // assert(pos < losers_.size());
+        losers_[pos].source = source;
+        losers_[pos].sup = sup;
+
+        if (first_insert_) {
+            for (Source i = 0; i < 2 * k_; ++i) {
+                // losers_[i].keyp = keyp;
+                if (keyp) {
+                    losers_[i].key = *keyp;
+                } else {
+                    losers_[i].key = ValueType();
+                }
+            }
+            first_insert_ = false;
+        } else {
+            // losers_[pos].keyp = keyp;
+            losers_[pos].key = (keyp ? *keyp : ValueType());
+        }
+    }
+
+    //! Recursively compute the winner of the competition at player root.
+    Source InitWinner(const Source& root) {
+        if (root >= k_) {
+            return root;
+        }
+
+        Source left = InitWinner(2 * root);
+        Source right = InitWinner(2 * root + 1);
+        if (losers_[right].sup ||
+            (!losers_[left].sup && !cmp_(losers_[right].key, losers_[left].key))) {
+            losers_[root] = losers_[right];
+            return left;
+        } else {
+            losers_[root] = losers_[left];
+            return right;
+        }
+    }
+
+    void Init() {
+        if (k_ == 0) {
+            return;
+        }
+        losers_[0] = losers_[InitWinner(1)];
+    }
+};
+
+//! Unguarded loser tree, keeping only pointers to the elements in the tree structure.
+export template <typename ValueType>
+class LoserTree : public LoserTreeBase<ValueType> {
+public:
+    using Super = LoserTreeBase<ValueType>;
+    using Source = typename Super::Source;
+
+public:
+    //! Constructor.
+    explicit LoserTree(const Source& k,
+                       std::function<bool(const ValueType&, const ValueType&)> cmp = std::greater<ValueType>())
+        : Super(k, cmp) {}
+
+    //! Delete the current minimum and insert a new element.
+    void DeleteTopInsert(const ValueType* keyp, bool sup) {
+        assert(sup == (keyp == nullptr));
+        Source source = Super::losers_[0].source;
+        ValueType key = (keyp ? *keyp : ValueType());
+
+        for (Source pos = (Super::k_ + source) / 2; pos > 0; pos /= 2) {
+            if (sup) {
+                std::swap(Super::losers_[pos].sup, sup);
+                std::swap(Super::losers_[pos].source, source);
+                std::swap(Super::losers_[pos].key, key);
+            } else if (Super::losers_[pos].sup) {
+                // do nothing
+            } else if (Super::cmp_(Super::losers_[pos].key, key)) {
+                // std::swap(Super::losers_[pos].sup, sup);
+                std::swap(Super::losers_[pos].source, source);
+                std::swap(Super::losers_[pos].key, key);
+            } else {
+                // do nothing
+            }
+        }
+        if (sup) {
+            Super::losers_[0].source = Super::invalid_;
+        } else {
+            Super::losers_[0].source = source;
+        }
+        Super::losers_[0].sup = sup;
+        Super::losers_[0].key = key;
+    }
+};
+
+} // namespace infinity
\ No newline at end of file
diff --git a/src/unit_test/storage/common/loser_tree.cpp b/src/unit_test/storage/common/loser_tree.cpp
new file mode 100644
index 0000000000..b257ee07b4
--- /dev/null
+++ b/src/unit_test/storage/common/loser_tree.cpp
@@ -0,0 +1,131 @@
+#include "unit_test/base_test.h"
+import stl;
+import random;
+import third_party;
+import loser_tree;
+
+using namespace infinity;
+
+class LoserTreeTest : public BaseTest {
+public:
+    LoserTreeTest() = default;
+    ~LoserTreeTest() = default;
+
+    void SetUp() override {
+        numbers.clear();
+        loser.clear();
+    }
+    void TearDown() override {}
+
+    u64 GetRandom(u64 max_val) {
+        return static_cast<u64>(random() % max_val) * random() % max_val;
+    }
+
+    void GenerateData(SizeT num_size, SizeT loser_num, u64 max_val);
+
+    void MultiWayMerge(SizeT num_size, SizeT loser_num, u64 max_val);
+
+protected:
+    Vector<u64> numbers;
+    Vector<u64> num_idx;
+    Vector<Vector<u64>> loser;
+};
+
+void LoserTreeTest::GenerateData(infinity::SizeT num_size, infinity::SizeT loser_num, infinity::u64 max_val) {
+    numbers.clear();
+    loser.clear();
+    num_idx.clear();
+
+    loser.resize(loser_num);
+    num_idx.resize(loser_num, 0);
+
+    for (SizeT i = 0; i < num_size; ++i) {
+        auto val = GetRandom(max_val);
+        numbers.emplace_back(val);
+        loser[GetRandom(loser_num)].emplace_back(val);
+    }
+    for (SizeT i = 0; i < loser_num; ++i) {
+        std::sort(loser[i].begin(), loser[i].end());
+//        for (SizeT j = 0; j < loser[i].size(); ++j) {
+//            fmt::print("{} ", loser[i][j]);
+//        }
+//        fmt::print("\n");
+    }
+}
+
+void LoserTreeTest::MultiWayMerge(infinity::SizeT num_size, infinity::SizeT loser_num, infinity::u64 max_val) {
+    auto loser_tree = MakeShared<LoserTree<u64>>(loser_num, std::less<u64>());
+    for (SizeT i = 0; i < loser_num; ++i) {
+        if (!loser[i].empty()) {
+            loser_tree->InsertStart(&loser[i][num_idx[i]], static_cast<LoserTree<u64>::Source>(i), false);
+            ++num_idx[i];
+        } else {
+            loser_tree->InsertStart(nullptr, static_cast<LoserTree<u64>::Source>(i), true);
+            ++num_idx[i];
+        }
+    }
+    loser_tree->Init();
+    Vector<u64> merge_res;
+    while (loser_tree->TopSource() != LoserTree<u64>::invalid_) {
+        auto min_value = loser_tree->TopKey();
+        auto min_source = loser_tree->TopSource();
+        merge_res.push_back(min_value);
+        // fmt::print("min val = {}, min source = {}\n", min_value, min_source);
+        auto& min_seq = num_idx[min_source];
+
+        if (min_seq < loser[min_source].size()) {
+            loser_tree->DeleteTopInsert(&(loser[min_source][min_seq]), false);
+        } else {
+            loser_tree->DeleteTopInsert(nullptr, true);
+        }
+        min_seq++;
+    }
+    sort(numbers.begin(), numbers.end());
+    EXPECT_EQ(numbers.size(), merge_res.size());
+    for (SizeT i = 0; i < merge_res.size(); ++i) {
+        EXPECT_EQ(merge_res[i], numbers[i]);
+    }
+    return ;
+    for (SizeT i = 0; i < merge_res.size(); ++i) {
+        fmt::print("{} ", merge_res[i]);
+        // EXPECT_EQ(merge_res[i], numbers[i]);
+    }
+    fmt::print("\n");
+    for (SizeT i = 0; i < numbers.size(); ++i) {
+        fmt::print("{} ", numbers[i]);
+        // EXPECT_EQ(merge_res[i], numbers[i]);
+    }
+    fmt::print("\n");
+}
+
+TEST_F(LoserTreeTest, BasicMerge1) {
+    const SizeT num_size = 10;
+    const SizeT loser_num = 3;
+    const u64 max_val = 30;
+    GenerateData(num_size, loser_num, max_val);
+    MultiWayMerge(num_size, loser_num, max_val);
+}
+
+TEST_F(LoserTreeTest, BasicMerge2) {
+    const SizeT num_size = 10;
+    const SizeT loser_num = 12;
+    const u64 max_val = 30;
+    GenerateData(num_size, loser_num, max_val);
+    MultiWayMerge(num_size, loser_num, max_val);
+}
+
+TEST_F(LoserTreeTest, BasicMerge3) {
+    const SizeT num_size = 1000;
+    const SizeT loser_num = 200;
+    const u64 max_val = 10000;
+    GenerateData(num_size, loser_num, max_val);
+    MultiWayMerge(num_size, loser_num, max_val);
+}
+
+TEST_F(LoserTreeTest, BasicMerge4) {
+    const SizeT num_size = 1000;
+    const SizeT loser_num = 2000;
+    const u64 max_val = 10000;
+    GenerateData(num_size, loser_num, max_val);
+    MultiWayMerge(num_size, loser_num, max_val);
+}
\ No newline at end of file

From 435ce8953c53323daf92f945029f649d5416ef59 Mon Sep 17 00:00:00 2001
From: Ma-cat <1054638297@qq.com>
Date: Mon, 13 May 2024 15:44:06 +0800
Subject: [PATCH 03/14] refactor the loser tree to reduce the impact of
 function and copy on performance

---
 src/storage/invertedindex/common/loser_tree.cppm | 16 +++++++++-------
 src/unit_test/storage/common/loser_tree.cpp      |  2 +-
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/storage/invertedindex/common/loser_tree.cppm b/src/storage/invertedindex/common/loser_tree.cppm
index 14032f58d0..762e512629 100644
--- a/src/storage/invertedindex/common/loser_tree.cppm
+++ b/src/storage/invertedindex/common/loser_tree.cppm
@@ -10,7 +10,7 @@ import stl;
 namespace infinity {
 
 //! LoserTreeBase class definition
-export template <typename ValueType>
+export template <typename ValueType, typename Comparator = std::greater<ValueType>>
 class LoserTreeBase {
 public:
     using Source = u32;
@@ -38,12 +38,13 @@ protected:
     //! Array containing loser tree nodes.
     Vector<Loser> losers_;
     //! Function object for comparing ValueTypes.
-    std::function<bool(const ValueType&, const ValueType&)> cmp_;
+    // std::function<bool(const ValueType&, const ValueType&)> cmp_;
+    Comparator cmp_;
 
     bool first_insert_;
 public:
     explicit LoserTreeBase(const Source& k,
-                           std::function<bool(const ValueType&, const ValueType&)> cmp = std::greater<ValueType>())
+                           const Comparator& cmp = Comparator())
          : ik_(k), k_(round_up_to_power_of_two(k)),
           losers_(2 * k_), cmp_(cmp), first_insert_(true) {
         // : ik_(k), k_(static_cast<Source>(1) << static_cast<Source>(std::ceil(std::log2(static_cast<float>(k))))),
@@ -118,16 +119,17 @@ public:
 };
 
 //! Unguarded loser tree, keeping only pointers to the elements in the tree structure.
-export template <typename ValueType>
-class LoserTree : public LoserTreeBase<ValueType> {
+export template <typename ValueType, typename Comparator = std::greater<ValueType>>
+class LoserTree : public LoserTreeBase<ValueType, Comparator> {
 public:
-    using Super = LoserTreeBase<ValueType>;
+    using Super = LoserTreeBase<ValueType, Comparator>;
     using Source = typename Super::Source;
 
 public:
     //! Constructor.
     explicit LoserTree(const Source& k,
-                       std::function<bool(const ValueType&, const ValueType&)> cmp = std::greater<ValueType>())
+                       const Comparator& cmp = Comparator())
+                       // const std::function<bool(const ValueType&, const ValueType&)>& cmp = std::greater<ValueType>())
         : Super(k, cmp) {}
 
     //! Delete the current minimum and insert a new element.
diff --git a/src/unit_test/storage/common/loser_tree.cpp b/src/unit_test/storage/common/loser_tree.cpp
index b257ee07b4..6b1e52293e 100644
--- a/src/unit_test/storage/common/loser_tree.cpp
+++ b/src/unit_test/storage/common/loser_tree.cpp
@@ -54,7 +54,7 @@ void LoserTreeTest::GenerateData(infinity::SizeT num_size, infinity::SizeT loser
 }
 
 void LoserTreeTest::MultiWayMerge(infinity::SizeT num_size, infinity::SizeT loser_num, infinity::u64 max_val) {
-    auto loser_tree = MakeShared<LoserTree<u64>>(loser_num, std::less<u64>());
+    auto loser_tree = MakeShared<LoserTree<u64, std::less<u64>>>(loser_num);
     for (SizeT i = 0; i < loser_num; ++i) {
         if (!loser[i].empty()) {
             loser_tree->InsertStart(&loser[i][num_idx[i]], static_cast<LoserTree<u64>::Source>(i), false);

From 45a5dcb27f78fa62a1b0acd8b28f3063bd0d50a3 Mon Sep 17 00:00:00 2001
From: Ma-cat <1054638297@qq.com>
Date: Tue, 14 May 2024 16:33:39 +0800
Subject: [PATCH 04/14] Optimize io for creating index

---
 src/storage/invertedindex/column_inverter.cpp |  86 +++++++++++
 .../invertedindex/column_inverter.cppm        |   2 +
 .../common/external_sort_merger.cpp           | 133 +++++++++++++++++-
 .../common/external_sort_merger.cppm          |   6 +
 src/storage/invertedindex/common/mmap.cppm    |  59 +++++++-
 src/storage/invertedindex/memory_indexer.cpp  |  40 +++++-
 src/storage/invertedindex/memory_indexer.cppm |   8 ++
 7 files changed, 327 insertions(+), 7 deletions(-)

diff --git a/src/storage/invertedindex/column_inverter.cpp b/src/storage/invertedindex/column_inverter.cpp
index 9550e8f82c..aa1ff31f29 100644
--- a/src/storage/invertedindex/column_inverter.cpp
+++ b/src/storage/invertedindex/column_inverter.cpp
@@ -291,4 +291,90 @@ void ColumnInverter::SpillSortResults(FILE *spill_file, u64 &tuple_count) {
     fseek(spill_file, next_start_offset, SEEK_SET);
 }
 
+void ColumnInverter::SpillSortResults(FILE *spill_file, u64 &tuple_count, UniquePtr<char_t[]>& spill_buffer, SizeT spill_buf_size) {
+    // spill sort results for external merge sort
+    if (positions_.empty()) {
+        return;
+    }
+    SizeT spill_buf_idx = 0;
+    SizeT spill_file_tell = ftell(spill_file);
+    // size of this Run in bytes
+    u32 data_size = 0;
+    u64 data_size_pos = spill_file_tell;
+    // fwrite(&data_size, sizeof(u32), 1, spill_file);
+    memcpy(spill_buffer.get() + spill_buf_idx, &data_size, sizeof(u32));
+    spill_buf_idx += sizeof(u32);
+    spill_file_tell += sizeof(u32);
+
+    // number of tuples
+    u32 num_of_tuples = positions_.size();
+    tuple_count += num_of_tuples;
+    // fwrite(&num_of_tuples, sizeof(u32), 1, spill_file);
+    memcpy(spill_buffer.get() + spill_buf_idx, &num_of_tuples, sizeof(u32));
+    spill_buf_idx += sizeof(u32);
+    spill_file_tell += sizeof(u32);
+
+    // start offset for next spill
+    u64 next_start_offset = 0;
+    u64 next_start_offset_pos = spill_file_tell;
+    // u64 next_start_offset_pos = ftell(spill_file);
+    // fwrite(&next_start_offset, sizeof(u64), 1, spill_file);
+    memcpy(spill_buffer.get() + spill_buf_idx, &next_start_offset, sizeof(u64));
+    spill_buf_idx += sizeof(u64);
+    spill_file_tell += sizeof(u64);
+
+    assert(spill_buf_idx < spill_buf_size);
+    fwrite(spill_buffer.get(), spill_buf_idx, 1, spill_file);
+    spill_buf_idx = 0;
+
+    // u64 data_start_offset = ftell(spill_file);
+    u64 data_start_offset = spill_file_tell;
+    assert((SizeT)ftell(spill_file) == spill_file_tell);
+    // sorted data
+    u32 last_term_num = std::numeric_limits<u32>::max();
+    StringRef term;
+    u32 record_length = 0;
+    char str_null = '\0';
+    for (auto &i : positions_) {
+        if (last_term_num != i.term_num_) {
+            last_term_num = i.term_num_;
+            term = GetTermFromNum(last_term_num);
+        }
+        record_length = term.size() + sizeof(docid_t) + sizeof(u32) + 1;
+//        fwrite(&record_length, sizeof(u32), 1, spill_file);
+//        fwrite(term.data(), term.size(), 1, spill_file);
+//        fwrite(&str_null, sizeof(char), 1, spill_file);
+//        fwrite(&i.doc_id_, sizeof(docid_t), 1, spill_file);
+//        fwrite(&i.term_pos_, sizeof(u32), 1, spill_file);
+        memcpy(spill_buffer.get() + spill_buf_idx, &record_length, sizeof(u32));
+        spill_buf_idx += sizeof(u32);
+
+        memcpy(spill_buffer.get() + spill_buf_idx, term.data(), term.size());
+        spill_buf_idx += term.size();
+
+        memcpy(spill_buffer.get() + spill_buf_idx, &str_null, sizeof(char));
+        spill_buf_idx += sizeof(char);
+
+        memcpy(spill_buffer.get() + spill_buf_idx, &i.doc_id_, sizeof(docid_t));
+        spill_buf_idx += sizeof(docid_t);
+
+        memcpy(spill_buffer.get() + spill_buf_idx, &i.term_pos_, sizeof(u32));
+        spill_buf_idx += sizeof(u32);
+
+        assert(spill_buf_idx < spill_buf_size);
+        fwrite(spill_buffer.get(), spill_buf_idx, 1, spill_file);
+        spill_buf_idx = 0;
+    }
+
+    // update data size
+    next_start_offset = ftell(spill_file);
+    data_size = next_start_offset - data_start_offset;
+    fseek(spill_file, data_size_pos, SEEK_SET);
+    fwrite(&data_size, sizeof(u32), 1, spill_file); // update offset for next spill
+    fseek(spill_file, next_start_offset_pos, SEEK_SET);
+    fwrite(&next_start_offset, sizeof(u64), 1, spill_file);
+    fseek(spill_file, next_start_offset, SEEK_SET);
+}
+
+
 } // namespace infinity
\ No newline at end of file
diff --git a/src/storage/invertedindex/column_inverter.cppm b/src/storage/invertedindex/column_inverter.cppm
index 830fed9b55..e9b97919d6 100644
--- a/src/storage/invertedindex/column_inverter.cppm
+++ b/src/storage/invertedindex/column_inverter.cppm
@@ -73,6 +73,8 @@ public:
 
     void SpillSortResults(FILE *spill_file, u64 &tuple_count);
 
+    void SpillSortResults(FILE *spill_file, u64 &tuple_count, UniquePtr<char_t[]>& spill_buffer, SizeT spill_buf_size);
+
 private:
     using TermBuffer = Vector<char>;
     using PosInfoVec = Vector<PosInfo>;
diff --git a/src/storage/invertedindex/common/external_sort_merger.cpp b/src/storage/invertedindex/common/external_sort_merger.cpp
index b207c54ec3..677a5127a9 100644
--- a/src/storage/invertedindex/common/external_sort_merger.cpp
+++ b/src/storage/invertedindex/common/external_sort_merger.cpp
@@ -31,7 +31,9 @@ namespace infinity {
             assert(false);                                                                                                                           \
         }                                                                                                                                            \
     }
+
 #define USE_LOSER_TREE
+// #define USE_MMAP_IO
 
 template <typename KeyType, typename LenType>
 SortMerger<KeyType, LenType>::SortMerger(const char *filenm, u32 group_size, u32 bs, u32 output_num)
@@ -221,6 +223,123 @@ void SortMerger<KeyType, LenType>::Init(DirectIO &io_stream) {
         size_micro_run_[i] = pos;
     }
 }
+#ifdef USE_MMAP_IO
+template <typename KeyType, typename LenType>
+void SortMerger<KeyType, LenType>::Init(MmapIO &io_stream) {
+    // initialize three buffers
+    NewBuffer();
+
+    // initiate output buffers
+    out_buf_size_[0] = 0;
+    sub_out_buf_[0] = out_buf_;
+    out_buf_full_[0] = false;
+    for (u32 i = 1; i < OUT_BUF_NUM_; ++i) {
+        sub_out_buf_[i] = sub_out_buf_[i - 1] + OUT_BUF_SIZE_ / OUT_BUF_NUM_;
+        out_buf_size_[i] = 0;
+        out_buf_full_[i] = false;
+    }
+    out_buf_in_idx_ = 0;
+    out_buf_out_idx_ = 0;
+
+    // initiate the microrun buffer
+    micro_buf_[0] = run_buf_;
+    for (u32 i = 1; i < MAX_GROUP_SIZE_; ++i) {
+        micro_buf_[i] = micro_buf_[i - 1] + PRE_BUF_SIZE_;
+    }
+
+    group_size_ = 0;
+    u64 next_run_pos = 0;
+    for (u32 i = 0; i < MAX_GROUP_SIZE_ && (u64)io_stream.Tell() < FILE_LEN_; ++i, ++group_size_) {
+        // get the size of run
+        io_stream.ReadU32(size_run_[i]);
+        // get the records number of a run
+        io_stream.ReadU32(num_run_[i]);
+        io_stream.ReadU64(next_run_pos);
+
+        run_addr_[i] = io_stream.Tell();
+
+        // loading size of a microrun
+        u32 s = size_run_[i] > PRE_BUF_SIZE_ ? PRE_BUF_SIZE_ : size_run_[i];
+        size_t ret = io_stream.ReadBuf(micro_buf_[i], s);
+        size_micro_run_[i] = ret;
+        size_loaded_run_[i] = ret;
+        run_curr_addr_[i] = io_stream.Tell();
+        // std::cout << "num_run_[" << i << "] " << num_run_[i] << " size_run_ " << size_run_[i] << " size_micro_run " << size_micro_run_[i]
+        //           << std::endl;
+
+        /// it is not needed for compression, validation will be made within IOStream in that case
+        // if a record can fit in microrun buffer
+        bool flag = false;
+        while (*(LenType *)(micro_buf_[i]) + sizeof(LenType) > s) {
+            size_micro_run_[i] = 0;
+            --count_;
+            // LOG_WARN("[Warning]: A record is too long, it will be ignored");
+
+            io_stream.Seek(*(LenType *)(micro_buf_[i]) + sizeof(LenType) - s);
+            // io_stream.Seek(*(LenType *)(micro_buf_[i]) + sizeof(LenType) - s, SEEK_CUR);
+
+            if (io_stream.Tell() - run_addr_[i] >= (u64)size_run_[i]) {
+                flag = true;
+                break;
+            }
+
+            s = (u32)((u64)size_run_[i] - (io_stream.Tell() - run_addr_[i]) > PRE_BUF_SIZE_ ? PRE_BUF_SIZE_
+                                                                                            : (u64)size_run_[i] - (io_stream.Tell() - run_addr_[i]));
+            size_micro_run_[i] = s;
+            io_stream.Read(micro_buf_[i], s);
+        }
+
+#ifdef USE_LOSER_TREE
+        if (flag) {
+            merge_loser_tree_->InsertStart(nullptr, static_cast<LoserTree<u64>::Source>(i), true);
+            continue;
+        }
+        auto key = KeyAddr(micro_buf_[i], -1, i);
+        merge_loser_tree_->InsertStart(&key, static_cast<LoserTree<u64>::Source>(i), false);
+#else
+        if (flag) {
+            continue;
+        }
+        merge_heap_.push(KeyAddr(micro_buf_[i], -1, i));
+#endif
+        micro_run_idx_[i] = 1;
+        micro_run_pos_[i] = KeyAddr(micro_buf_[i], -1, i).LEN() + sizeof(LenType);
+        num_micro_run_[i] = 0;
+
+        io_stream.Seek(next_run_pos);
+    }
+#ifdef USE_LOSER_TREE
+    merge_loser_tree_->Init();
+#endif
+    // initialize predict heap and records number of every microrun
+    for (u32 i = 0; i < group_size_; ++i) {
+        u32 pos = 0;
+        u32 last_pos = -1;
+        assert(i < MAX_GROUP_SIZE_);
+        if (size_micro_run_[i] <= 0)
+            continue;
+        while (pos + sizeof(LenType) <= size_micro_run_[i]) {
+            LenType len = *(LenType *)(micro_buf_[i] + pos);
+            if (pos + sizeof(LenType) + len <= size_micro_run_[i]) {
+                num_micro_run_[i]++;
+                last_pos = pos;
+                pos += sizeof(LenType) + len;
+            } else {
+                break;
+            }
+        }
+        // std::cout << "len " << len << " size_micro_run_[" << i << "] " << size_micro_run_[i] << std::endl;
+        assert(last_pos != (u32)-1); // buffer too small that can't hold one record
+        assert(last_pos + sizeof(LenType) <= size_micro_run_[i]);
+        assert(pos <= size_micro_run_[i]);
+        LenType len = (LenType)(pos - last_pos);
+        char *tmp = (char *)malloc(len);
+        memcpy(tmp, micro_buf_[i] + last_pos, len);
+        pre_heap_.push(KeyAddr(tmp, run_addr_[i] + pos, i));
+        size_micro_run_[i] = pos;
+    }
+}
+#endif
 
 template <typename KeyType, typename LenType>
 void SortMerger<KeyType, LenType>::Predict(DirectIO &io_stream) {
@@ -413,6 +532,12 @@ void SortMerger<KeyType, LenType>::Output(FILE *f, u32 idx) {
 
 template <typename KeyType, typename LenType>
 void SortMerger<KeyType, LenType>::Run() {
+#ifdef USE_MMAP_IO
+    MMappedIO io_stream(filenm_);
+    FILE_LEN_ = io_stream.DataLen();
+    io_stream.ReadU64(count_);
+    Init(io_stream);
+#else
     FILE *f = fopen(filenm_.c_str(), "r");
 
     DirectIO io_stream(f);
@@ -423,6 +548,8 @@ void SortMerger<KeyType, LenType>::Run() {
     Init(io_stream);
 
     Thread predict_thread(std::bind(&self_t::Predict, this, io_stream));
+#endif
+
     Thread merge_thread(std::bind(&self_t::Merge, this));
 
     FILE *out_f = fopen((filenm_ + ".out").c_str(), "w+");
@@ -433,15 +560,17 @@ void SortMerger<KeyType, LenType>::Run() {
     for (u32 i = 0; i < OUT_BUF_NUM_; ++i) {
         out_thread[i] = new Thread(std::bind(&self_t::Output, this, out_f, i));
     }
-
+#ifndef USE_MMAP_IO
     predict_thread.join();
+#endif
     merge_thread.join();
     for (u32 i = 0; i < OUT_BUF_NUM_; ++i) {
         out_thread[i]->join();
         delete out_thread[i];
     }
-
+#ifndef USE_MMAP_IO
     fclose(f);
+#endif
     fclose(out_f);
 
     if (std::filesystem::exists(filenm_))
diff --git a/src/storage/invertedindex/common/external_sort_merger.cppm b/src/storage/invertedindex/common/external_sort_merger.cppm
index 61cd1a5615..eec6017787 100644
--- a/src/storage/invertedindex/common/external_sort_merger.cppm
+++ b/src/storage/invertedindex/common/external_sort_merger.cppm
@@ -16,10 +16,14 @@ module;
 
 #include <filesystem>
 #include <queue>
+#include <cstring>
+
 export module external_sort_merger;
 
 import stl;
 import loser_tree;
+import mmap;
+import infinity_exception;
 
 namespace infinity {
 
@@ -273,6 +277,8 @@ class SortMerger {
 
     void Output(FILE *f, u32 idx);
 
+    // void Init(MmapIO &io_stream);
+
 public:
     SortMerger(const char *filenm, u32 group_size = 4, u32 bs = 100000000, u32 output_num = 2);
 
diff --git a/src/storage/invertedindex/common/mmap.cppm b/src/storage/invertedindex/common/mmap.cppm
index 9d0f6f1497..3056b8dc20 100644
--- a/src/storage/invertedindex/common/mmap.cppm
+++ b/src/storage/invertedindex/common/mmap.cppm
@@ -5,8 +5,10 @@ module;
 #include <sys/mman.h>
 #include <sys/stat.h>
 #include <unistd.h>
-import stl;
+#include <cstring>
 
+import stl;
+import infinity_exception;
 export module mmap;
 
 using namespace infinity;
@@ -14,7 +16,7 @@ namespace fs = std::filesystem;
 
 namespace infinity {
 
-export int MmapFile(const String &fp, u8 *&data_ptr, SizeT &data_len) {
+export int MmapFile(const String &fp, u8 *&data_ptr, SizeT &data_len, int advice = (MADV_RANDOM | MADV_DONTDUMP)) {
     data_ptr = nullptr;
     data_len = 0;
     long len_f = fs::file_size(fp);
@@ -25,7 +27,7 @@ export int MmapFile(const String &fp, u8 *&data_ptr, SizeT &data_len) {
     if (tmpd == MAP_FAILED)
         return -1;
     close(f);
-    int rc = madvise(tmpd, len_f, MADV_RANDOM | MADV_DONTDUMP);
+    int rc = madvise(tmpd, len_f, advice);
     if (rc < 0)
         return -1;
     data_ptr = (u8 *)tmpd;
@@ -44,4 +46,55 @@ export int MunmapFile(u8 *&data_ptr, SizeT &data_len) {
     return 0;
 }
 
+export struct MmapReader {
+    MmapReader(const String &filename, int advice = MADV_SEQUENTIAL) {
+        int rc = MmapFile(filename, data_ptr_, data_len_, advice);
+        idx_ = 0;
+        if (rc < 0) {
+            throw UnrecoverableException("MmapFile failed");
+        }
+    }
+
+    ~MmapReader() {
+        MunmapFile(data_ptr_, data_len_);
+    }
+
+    void Seek(SizeT diff) {
+        idx_ += diff;
+    }
+
+    void ReadU64(u64 &val) {
+        val = *(u64 *)(data_ptr_ + idx_);
+        idx_ += sizeof(u64);
+    }
+
+    void ReadU32(u32 &val) {
+        val = *(u32 *)(data_ptr_ + idx_);
+        idx_ += sizeof(u32);
+    }
+
+    SizeT ReadBuf(char* buf, SizeT len) {
+        if (idx_ + len < data_len_) {
+            memcpy(buf, data_ptr_ + idx_, len);
+            idx_ += len;
+            return len;
+        } else {
+            SizeT left = data_len_ - idx_;
+            memcpy(buf, data_ptr_ + idx_, left);
+            idx_ = data_len_;
+            return left;
+        }
+    }
+
+    SizeT Tell() { return idx_; }
+
+    SizeT DataLen() { return data_len_; }
+
+    u8 *data_ptr_ = nullptr;
+
+    SizeT data_len_{0};
+
+    SizeT idx_{0};
+};
+
 } // namespace infinity
diff --git a/src/storage/invertedindex/memory_indexer.cpp b/src/storage/invertedindex/memory_indexer.cpp
index 2a43a996f2..0f03af6170 100644
--- a/src/storage/invertedindex/memory_indexer.cpp
+++ b/src/storage/invertedindex/memory_indexer.cpp
@@ -27,7 +27,8 @@ module;
 #include <cassert>
 #include <filesystem>
 #include <iostream>
-#include <string.h>
+#include <cstring>
+
 module memory_indexer;
 
 import stl;
@@ -54,9 +55,12 @@ import logger;
 import file_system;
 import file_system_type;
 import vector_with_lock;
+import infinity_exception;
+import mmap;
 
 namespace infinity {
 constexpr int MAX_TUPLE_LENGTH = 1024; // we assume that analyzed term, together with docid/offset info, will never exceed such length
+#define USE_MMAP
 
 bool MemoryIndexer::KeyComp::operator()(const String &lhs, const String &rhs) const {
     int ret = strcmp(lhs.c_str(), rhs.c_str());
@@ -81,6 +85,8 @@ MemoryIndexer::MemoryIndexer(const String &index_dir,
     prepared_posting_ = MakeShared<PostingWriter>(nullptr, nullptr, PostingFormatOption(flag_), column_lengths_);
     Path path = Path(index_dir) / (base_name + ".tmp.merge");
     spill_full_path_ = path.string();
+    spill_buffer_size_ = MAX_TUPLE_LENGTH * 2;
+    spill_buffer_ = MakeUnique<char_t[]>(spill_buffer_size_);
 }
 
 MemoryIndexer::~MemoryIndexer() {
@@ -177,7 +183,8 @@ SizeT MemoryIndexer::CommitOffline(SizeT wait_if_empty_ms) {
     SizeT num = inverters.size();
     if (num > 0) {
         for (auto &inverter : inverters) {
-            inverter->SpillSortResults(this->spill_file_handle_, this->tuple_count_);
+            // inverter->SpillSortResults(this->spill_file_handle_, this->tuple_count_);
+            inverter->SpillSortResults(this->spill_file_handle_, this->tuple_count_, spill_buffer_, spill_buffer_size_);
             num_runs_++;
         }
     }
@@ -368,9 +375,21 @@ void MemoryIndexer::OfflineDump() {
     SortMerger<TermTuple, u32> *merger = new SortMerger<TermTuple, u32>(spill_full_path_.c_str(), num_runs_, buffer_size_of_each_run * num_runs_, 2);
     merger->Run();
     delete merger;
+#ifdef USE_MMAP
+    u8 *data_ptr = nullptr;
+    SizeT data_len = (SizeT)-1;
+    auto rc = MmapFile(spill_full_path_, data_ptr, data_len);
+    if (rc < 0) {
+        throw UnrecoverableException("MmapFile failed");
+    }
+    SizeT idx = 0;
+    u64 count = ReadU64LE(data_ptr + idx);
+    idx += sizeof(u64);
+#else
     FILE *f = fopen(spill_full_path_.c_str(), "r");
     u64 count;
     fread((char *)&count, sizeof(u64), 1, f);
+#endif
     Path path = Path(index_dir_) / base_name_;
     String index_prefix = path.string();
     LocalFileSystem fs;
@@ -392,16 +411,30 @@ void MemoryIndexer::OfflineDump() {
     UniquePtr<PostingWriter> posting;
 
     for (u64 i = 0; i < count; ++i) {
+#ifdef USE_MMAP
+        record_length = ReadU32LE(data_ptr + idx);
+        idx += sizeof(u32);
+#else
         fread(&record_length, sizeof(u32), 1, f);
+#endif
         if (record_length >= MAX_TUPLE_LENGTH) {
+#ifdef USE_MMAP
+            idx += record_length;
+#else
             // rubbish tuple, abandoned
             char *buffer = new char[record_length];
             fread(buffer, record_length, 1, f);
             // TermTuple tuple(buffer, record_length);
             delete[] buffer;
+#endif
             continue;
         }
+#ifdef USE_MMAP
+        memcpy(buf, data_ptr + idx, record_length);
+        idx += record_length;
+#else
         fread(buf, record_length, 1, f);
+#endif
         TermTuple tuple(buf, record_length);
         if (tuple.term_ != last_term) {
             assert(last_term < tuple.term_);
@@ -431,6 +464,9 @@ void MemoryIndexer::OfflineDump() {
         posting->AddPosition(tuple.term_pos_);
         // printf(" pos-%u", tuple.term_pos_);
     }
+#ifdef USE_MMAP
+    MunmapFile(data_ptr, data_len);
+#endif
     if (last_doc_id != INVALID_DOCID) {
         posting->EndDocument(last_doc_id, 0);
         // printf(" EndDocument3-%u\n", last_doc_id);
diff --git a/src/storage/invertedindex/memory_indexer.cppm b/src/storage/invertedindex/memory_indexer.cppm
index 00e586119b..c3c3d0b360 100644
--- a/src/storage/invertedindex/memory_indexer.cppm
+++ b/src/storage/invertedindex/memory_indexer.cppm
@@ -120,6 +120,10 @@ private:
 
     void PrepareSpillFile();
 
+    u32 ReadU32LE(const u8 *ptr) { return *(u32 *)ptr; }
+
+    u64 ReadU64LE(const u8 *ptr) { return *(u64 *)ptr; }
+
 private:
     String index_dir_;
     String base_name_;
@@ -151,5 +155,9 @@ private:
     // for column length info
     VectorWithLock<u32> column_lengths_;
     Atomic<u32> column_length_sum_{0};
+
+    // spill file write buf
+    UniquePtr<char_t[]> spill_buffer_{};
+    SizeT spill_buffer_size_{0};
 };
 } // namespace infinity

From 8cbb344c1a0f0a8caedc8df96882a85a8bb76625 Mon Sep 17 00:00:00 2001
From: Ma-cat <1054638297@qq.com>
Date: Wed, 15 May 2024 10:50:22 +0800
Subject: [PATCH 05/14] spill file write with more buf for creating index

---
 src/storage/invertedindex/column_inverter.cpp | 104 +++++++++++++++++-
 .../invertedindex/column_inverter.cppm        |   4 +
 .../invertedindex/common/buf_writer.cppm      |  40 +++++++
 src/storage/invertedindex/common/mmap.cppm    |   2 +-
 src/storage/invertedindex/memory_indexer.cpp  |  40 ++++---
 src/storage/invertedindex/memory_indexer.cppm |   2 +
 6 files changed, 170 insertions(+), 22 deletions(-)
 create mode 100644 src/storage/invertedindex/common/buf_writer.cppm

diff --git a/src/storage/invertedindex/column_inverter.cpp b/src/storage/invertedindex/column_inverter.cpp
index aa1ff31f29..125c0f3c42 100644
--- a/src/storage/invertedindex/column_inverter.cpp
+++ b/src/storage/invertedindex/column_inverter.cpp
@@ -34,6 +34,7 @@ import infinity_exception;
 import third_party;
 import status;
 import logger;
+import buf_writer;
 
 namespace infinity {
 
@@ -341,11 +342,11 @@ void ColumnInverter::SpillSortResults(FILE *spill_file, u64 &tuple_count, Unique
             term = GetTermFromNum(last_term_num);
         }
         record_length = term.size() + sizeof(docid_t) + sizeof(u32) + 1;
-//        fwrite(&record_length, sizeof(u32), 1, spill_file);
-//        fwrite(term.data(), term.size(), 1, spill_file);
-//        fwrite(&str_null, sizeof(char), 1, spill_file);
-//        fwrite(&i.doc_id_, sizeof(docid_t), 1, spill_file);
-//        fwrite(&i.term_pos_, sizeof(u32), 1, spill_file);
+        //        fwrite(&record_length, sizeof(u32), 1, spill_file);
+        //        fwrite(term.data(), term.size(), 1, spill_file);
+        //        fwrite(&str_null, sizeof(char), 1, spill_file);
+        //        fwrite(&i.doc_id_, sizeof(docid_t), 1, spill_file);
+        //        fwrite(&i.term_pos_, sizeof(u32), 1, spill_file);
         memcpy(spill_buffer.get() + spill_buf_idx, &record_length, sizeof(u32));
         spill_buf_idx += sizeof(u32);
 
@@ -376,5 +377,98 @@ void ColumnInverter::SpillSortResults(FILE *spill_file, u64 &tuple_count, Unique
     fseek(spill_file, next_start_offset, SEEK_SET);
 }
 
+void ColumnInverter::SpillSortResults(FILE *spill_file, u64 &tuple_count, UniquePtr<BufWriter>& buf_writer) {
+    // spill sort results for external merge sort
+    if (positions_.empty()) {
+        return;
+    }
+    // SizeT spill_buf_idx = 0;
+    SizeT spill_file_tell = ftell(spill_file);
+    // size of this Run in bytes
+    u32 data_size = 0;
+    u64 data_size_pos = spill_file_tell;
+    // fwrite(&data_size, sizeof(u32), 1, spill_file);
+//    memcpy(spill_buffer.get() + spill_buf_idx, &data_size, sizeof(u32));
+//    spill_buf_idx += sizeof(u32);
+    buf_writer->Write((const char*)&data_size, sizeof(u32));
+    spill_file_tell += sizeof(u32);
+
+    // number of tuples
+    u32 num_of_tuples = positions_.size();
+    tuple_count += num_of_tuples;
+    // fwrite(&num_of_tuples, sizeof(u32), 1, spill_file);
+//    memcpy(spill_buffer.get() + spill_buf_idx, &num_of_tuples, sizeof(u32));
+//    spill_buf_idx += sizeof(u32);
+    buf_writer->Write((const char*)&num_of_tuples, sizeof(u32));
+    spill_file_tell += sizeof(u32);
+
+    // start offset for next spill
+    u64 next_start_offset = 0;
+    u64 next_start_offset_pos = spill_file_tell;
+    // u64 next_start_offset_pos = ftell(spill_file);
+    // fwrite(&next_start_offset, sizeof(u64), 1, spill_file);
+//    memcpy(spill_buffer.get() + spill_buf_idx, &next_start_offset, sizeof(u64));
+//    spill_buf_idx += sizeof(u64);
+    buf_writer->Write((const char*)&next_start_offset, sizeof(u64));
+    spill_file_tell += sizeof(u64);
+
+    // assert(spill_buf_idx < spill_buf_size);
+//    fwrite(spill_buffer.get(), spill_buf_idx, 1, spill_file);
+//    spill_buf_idx = 0;
+
+    // u64 data_start_offset = ftell(spill_file);
+    u64 data_start_offset = spill_file_tell;
+    // assert((SizeT)ftell(spill_file) == spill_file_tell);
+    // sorted data
+    u32 last_term_num = std::numeric_limits<u32>::max();
+    StringRef term;
+    u32 record_length = 0;
+    char str_null = '\0';
+    for (auto &i : positions_) {
+        if (last_term_num != i.term_num_) {
+            last_term_num = i.term_num_;
+            term = GetTermFromNum(last_term_num);
+        }
+        record_length = term.size() + sizeof(docid_t) + sizeof(u32) + 1;
+//        fwrite(&record_length, sizeof(u32), 1, spill_file);
+//        fwrite(term.data(), term.size(), 1, spill_file);
+//        fwrite(&str_null, sizeof(char), 1, spill_file);
+//        fwrite(&i.doc_id_, sizeof(docid_t), 1, spill_file);
+//        fwrite(&i.term_pos_, sizeof(u32), 1, spill_file);
+//        memcpy(spill_buffer.get() + spill_buf_idx, &record_length, sizeof(u32));
+//        spill_buf_idx += sizeof(u32);
+//
+//        memcpy(spill_buffer.get() + spill_buf_idx, term.data(), term.size());
+//        spill_buf_idx += term.size();
+//
+//        memcpy(spill_buffer.get() + spill_buf_idx, &str_null, sizeof(char));
+//        spill_buf_idx += sizeof(char);
+//
+//        memcpy(spill_buffer.get() + spill_buf_idx, &i.doc_id_, sizeof(docid_t));
+//        spill_buf_idx += sizeof(docid_t);
+//
+//        memcpy(spill_buffer.get() + spill_buf_idx, &i.term_pos_, sizeof(u32));
+//        spill_buf_idx += sizeof(u32);
+        buf_writer->Write((const char*)&record_length, sizeof(u32));
+        buf_writer->Write(term.data(), term.size());
+        buf_writer->Write((const char*)&str_null, sizeof(char));
+        buf_writer->Write((const char*)&(i.doc_id_), sizeof(docid_t));
+        buf_writer->Write((const char*)&(i.term_pos_), sizeof(u32));
+        // assert(spill_buf_idx < spill_buf_size);
+        // fwrite(spill_buffer.get(), spill_buf_idx, 1, spill_file);
+        // spill_buf_idx = 0;
+    }
+    buf_writer->Flush();
+    // update data size
+    // next_start_offset = ftell(spill_file);
+    next_start_offset = buf_writer->Tell();
+    data_size = next_start_offset - data_start_offset;
+    fseek(spill_file, data_size_pos, SEEK_SET);
+    fwrite(&data_size, sizeof(u32), 1, spill_file); // update offset for next spill
+    fseek(spill_file, next_start_offset_pos, SEEK_SET);
+    fwrite(&next_start_offset, sizeof(u64), 1, spill_file);
+    fseek(spill_file, next_start_offset, SEEK_SET);
+}
+
 
 } // namespace infinity
\ No newline at end of file
diff --git a/src/storage/invertedindex/column_inverter.cppm b/src/storage/invertedindex/column_inverter.cppm
index e9b97919d6..5a545e5db8 100644
--- a/src/storage/invertedindex/column_inverter.cppm
+++ b/src/storage/invertedindex/column_inverter.cppm
@@ -27,6 +27,7 @@ import string_ref;
 import internal_types;
 import posting_writer;
 import vector_with_lock;
+import buf_writer;
 
 namespace infinity {
 
@@ -73,8 +74,11 @@ public:
 
     void SpillSortResults(FILE *spill_file, u64 &tuple_count);
 
+    // fast
     void SpillSortResults(FILE *spill_file, u64 &tuple_count, UniquePtr<char_t[]>& spill_buffer, SizeT spill_buf_size);
 
+    void SpillSortResults(FILE *spill_file, u64 &tuple_count, UniquePtr<BufWriter>& buf_writer);
+
 private:
     using TermBuffer = Vector<char>;
     using PosInfoVec = Vector<PosInfo>;
diff --git a/src/storage/invertedindex/common/buf_writer.cppm b/src/storage/invertedindex/common/buf_writer.cppm
new file mode 100644
index 0000000000..6e60077db6
--- /dev/null
+++ b/src/storage/invertedindex/common/buf_writer.cppm
@@ -0,0 +1,40 @@
+module;
+
+#include <cstring>
+#include <cstdio>
+
+export module buf_writer;
+import stl;
+
+namespace infinity {
+
+export struct BufWriter {
+    BufWriter(FILE *spill_file, SizeT spill_buf_size) : spill_file_(spill_file), spill_buf_size_(spill_buf_size) {
+        spill_buffer_ = MakeUnique<char_t[]>(spill_buf_size_);
+    }
+
+    void Write(const char* data, SizeT data_size) {
+        if (spill_buf_idx_ + data_size >= spill_buf_size_) {
+            Flush();
+        }
+        memcpy(spill_buffer_.get() + spill_buf_idx_, data, data_size);
+        spill_buf_idx_ += data_size;
+    }
+
+    void Flush() {
+        if (spill_buf_idx_) {
+            fwrite(spill_buffer_.get(), spill_buf_idx_, 1, spill_file_);
+            spill_buf_idx_ = 0;
+        }
+    }
+
+    SizeT Tell() {
+        return ftell(spill_file_);
+    }
+
+    FILE *spill_file_{nullptr};
+    SizeT spill_buf_idx_{0};
+    UniquePtr<char_t[]> spill_buffer_{};
+    SizeT spill_buf_size_{0};
+};
+}
\ No newline at end of file
diff --git a/src/storage/invertedindex/common/mmap.cppm b/src/storage/invertedindex/common/mmap.cppm
index 3056b8dc20..ff2202baef 100644
--- a/src/storage/invertedindex/common/mmap.cppm
+++ b/src/storage/invertedindex/common/mmap.cppm
@@ -74,7 +74,7 @@ export struct MmapReader {
     }
 
     SizeT ReadBuf(char* buf, SizeT len) {
-        if (idx_ + len < data_len_) {
+        if (idx_ + len <= data_len_) {
             memcpy(buf, data_ptr_ + idx_, len);
             idx_ += len;
             return len;
diff --git a/src/storage/invertedindex/memory_indexer.cpp b/src/storage/invertedindex/memory_indexer.cpp
index 0f03af6170..0f1c3b1d64 100644
--- a/src/storage/invertedindex/memory_indexer.cpp
+++ b/src/storage/invertedindex/memory_indexer.cpp
@@ -57,11 +57,13 @@ import file_system_type;
 import vector_with_lock;
 import infinity_exception;
 import mmap;
+import buf_writer;
 
 namespace infinity {
 constexpr int MAX_TUPLE_LENGTH = 1024; // we assume that analyzed term, together with docid/offset info, will never exceed such length
 #define USE_MMAP
-
+#define USE_BUF
+//#define USE_MORE_BUF
 bool MemoryIndexer::KeyComp::operator()(const String &lhs, const String &rhs) const {
     int ret = strcmp(lhs.c_str(), rhs.c_str());
     return ret < 0;
@@ -85,8 +87,10 @@ MemoryIndexer::MemoryIndexer(const String &index_dir,
     prepared_posting_ = MakeShared<PostingWriter>(nullptr, nullptr, PostingFormatOption(flag_), column_lengths_);
     Path path = Path(index_dir) / (base_name + ".tmp.merge");
     spill_full_path_ = path.string();
+#ifdef USE_BUF
     spill_buffer_size_ = MAX_TUPLE_LENGTH * 2;
     spill_buffer_ = MakeUnique<char_t[]>(spill_buffer_size_);
+#endif
 }
 
 MemoryIndexer::~MemoryIndexer() {
@@ -184,7 +188,12 @@ SizeT MemoryIndexer::CommitOffline(SizeT wait_if_empty_ms) {
     if (num > 0) {
         for (auto &inverter : inverters) {
             // inverter->SpillSortResults(this->spill_file_handle_, this->tuple_count_);
+#ifdef USE_BUF
             inverter->SpillSortResults(this->spill_file_handle_, this->tuple_count_, spill_buffer_, spill_buffer_size_);
+            // inverter->SpillSortResults(this->spill_file_handle_, this->tuple_count_, buf_writer_);
+#else
+            inverter->SpillSortResults(this->spill_file_handle_, this->tuple_count_);
+#endif
             num_runs_++;
         }
     }
@@ -376,15 +385,10 @@ void MemoryIndexer::OfflineDump() {
     merger->Run();
     delete merger;
 #ifdef USE_MMAP
-    u8 *data_ptr = nullptr;
-    SizeT data_len = (SizeT)-1;
-    auto rc = MmapFile(spill_full_path_, data_ptr, data_len);
-    if (rc < 0) {
-        throw UnrecoverableException("MmapFile failed");
-    }
-    SizeT idx = 0;
-    u64 count = ReadU64LE(data_ptr + idx);
-    idx += sizeof(u64);
+    MmapReader reader(spill_full_path_);
+    u64 count;
+    reader.ReadU64(count);
+    // idx += sizeof(u64);
 #else
     FILE *f = fopen(spill_full_path_.c_str(), "r");
     u64 count;
@@ -412,14 +416,14 @@ void MemoryIndexer::OfflineDump() {
 
     for (u64 i = 0; i < count; ++i) {
 #ifdef USE_MMAP
-        record_length = ReadU32LE(data_ptr + idx);
-        idx += sizeof(u32);
+        reader.ReadU32(record_length);
 #else
         fread(&record_length, sizeof(u32), 1, f);
 #endif
         if (record_length >= MAX_TUPLE_LENGTH) {
 #ifdef USE_MMAP
-            idx += record_length;
+            reader.Seek(record_length);
+            // idx += record_length;
 #else
             // rubbish tuple, abandoned
             char *buffer = new char[record_length];
@@ -430,8 +434,7 @@ void MemoryIndexer::OfflineDump() {
             continue;
         }
 #ifdef USE_MMAP
-        memcpy(buf, data_ptr + idx, record_length);
-        idx += record_length;
+        reader.ReadBuf(buf, record_length);
 #else
         fread(buf, record_length, 1, f);
 #endif
@@ -465,7 +468,8 @@ void MemoryIndexer::OfflineDump() {
         // printf(" pos-%u", tuple.term_pos_);
     }
 #ifdef USE_MMAP
-    MunmapFile(data_ptr, data_len);
+    // MunmapFile(data_ptr, data_len);
+    // reader.MunmapFile();
 #endif
     if (last_doc_id != INVALID_DOCID) {
         posting->EndDocument(last_doc_id, 0);
@@ -504,6 +508,10 @@ void MemoryIndexer::FinalSpillFile() {
 void MemoryIndexer::PrepareSpillFile() {
     spill_file_handle_ = fopen(spill_full_path_.c_str(), "w");
     fwrite(&tuple_count_, sizeof(u64), 1, spill_file_handle_);
+#ifdef USE_MORE_BUF
+    const SizeT spill_buf_size = 128000;
+    buf_writer_ = MakeUnique<BufWriter>(spill_file_handle_, spill_buf_size);
+#endif
 }
 
 } // namespace infinity
\ No newline at end of file
diff --git a/src/storage/invertedindex/memory_indexer.cppm b/src/storage/invertedindex/memory_indexer.cppm
index c3c3d0b360..f280d0ea4f 100644
--- a/src/storage/invertedindex/memory_indexer.cppm
+++ b/src/storage/invertedindex/memory_indexer.cppm
@@ -30,6 +30,7 @@ import skiplist;
 import internal_types;
 import map_with_lock;
 import vector_with_lock;
+import buf_writer;
 
 namespace infinity {
 
@@ -159,5 +160,6 @@ private:
     // spill file write buf
     UniquePtr<char_t[]> spill_buffer_{};
     SizeT spill_buffer_size_{0};
+    UniquePtr<BufWriter> buf_writer_;
 };
 } // namespace infinity

From bf8fac9d51b5f030037a9073d364fe568fca2582 Mon Sep 17 00:00:00 2001
From: Ma-cat <1054638297@qq.com>
Date: Fri, 17 May 2024 15:59:48 +0800
Subject: [PATCH 06/14] remove the predict thread and output thread, use mmap
 for sequential reading and merge execution, and try three mmap reading
 methods

---
 src/storage/invertedindex/column_inverter.cpp |  10 +
 .../common/external_sort_merger.cpp           | 246 +++++++++++-------
 .../common/external_sort_merger.cppm          |  14 +-
 src/storage/invertedindex/common/mmap.cppm    |  62 ++++-
 src/storage/invertedindex/memory_indexer.cpp  |  10 +
 .../invertedindex/common/external_sort.cpp    |   9 +-
 6 files changed, 242 insertions(+), 109 deletions(-)

diff --git a/src/storage/invertedindex/column_inverter.cpp b/src/storage/invertedindex/column_inverter.cpp
index 125c0f3c42..438dbf750b 100644
--- a/src/storage/invertedindex/column_inverter.cpp
+++ b/src/storage/invertedindex/column_inverter.cpp
@@ -35,6 +35,8 @@ import third_party;
 import status;
 import logger;
 import buf_writer;
+import profiler;
+import third_party;
 
 namespace infinity {
 
@@ -55,6 +57,8 @@ ColumnInverter::~ColumnInverter() = default;
 bool ColumnInverter::CompareTermRef::operator()(const u32 lhs, const u32 rhs) const { return std::strcmp(GetTerm(lhs), GetTerm(rhs)) < 0; }
 
 SizeT ColumnInverter::InvertColumn(SharedPtr<ColumnVector> column_vector, u32 row_offset, u32 row_count, u32 begin_doc_id) {
+    // BaseProfiler profiler;
+    // profiler.Begin();
     begin_doc_id_ = begin_doc_id;
     doc_count_ = row_count;
     Vector<u32> column_lengths(row_count);
@@ -69,6 +73,8 @@ SizeT ColumnInverter::InvertColumn(SharedPtr<ColumnVector> column_vector, u32 ro
         term_count_sum += term_count;
     }
     column_lengths_.SetBatch(begin_doc_id, column_lengths);
+    // LOG_INFO(fmt::format("ColumnInverter::InvertColumn time cost: {}", profiler.ElapsedToString()));
+    // profiler.End();
     return term_count_sum;
 }
 
@@ -234,8 +240,12 @@ void ColumnInverter::GeneratePosting() {
 }
 
 void ColumnInverter::SortForOfflineDump() {
+    // BaseProfiler profiler;
+    // profiler.Begin();
     MergePrepare();
     Sort();
+    // LOG_INFO(fmt::format("ColumnInverter::SortForOfflineDump time cost: {}", profiler.ElapsedToString()));
+    // profiler.End();
 }
 
 /// Layout of the input of external sort file
diff --git a/src/storage/invertedindex/common/external_sort_merger.cpp b/src/storage/invertedindex/common/external_sort_merger.cpp
index 677a5127a9..3536682428 100644
--- a/src/storage/invertedindex/common/external_sort_merger.cpp
+++ b/src/storage/invertedindex/common/external_sort_merger.cpp
@@ -17,11 +17,18 @@ module;
 #include <filesystem>
 #include <functional>
 #include <queue>
-#include <string.h>
+#include <cstring>
+#include <sys/mman.h>
 
 module external_sort_merger;
 
 import stl;
+import mmap;
+import third_party;
+import file_writer;
+import local_file_system;
+import profiler;
+import logger;
 
 namespace infinity {
 
@@ -33,16 +40,16 @@ namespace infinity {
     }
 
 #define USE_LOSER_TREE
-// #define USE_MMAP_IO
+#define USE_MMAP_IO
 
 template <typename KeyType, typename LenType>
 SortMerger<KeyType, LenType>::SortMerger(const char *filenm, u32 group_size, u32 bs, u32 output_num)
     : filenm_(filenm), MAX_GROUP_SIZE_(group_size), BS_SIZE_(bs), PRE_BUF_SIZE_((u32)(1. * bs * 0.8 / (group_size + 1))),
       RUN_BUF_SIZE_(PRE_BUF_SIZE_ * group_size), OUT_BUF_SIZE_(bs - RUN_BUF_SIZE_ - PRE_BUF_SIZE_), OUT_BUF_NUM_(output_num) {
     pre_buf_ = run_buf_ = out_buf_ = nullptr;
+    count_ = 0;
 
-    pre_buf_size_ = pre_buf_num_ = count_ = 0;
-
+    pre_buf_size_ = pre_buf_num_ = 0;
     micro_run_idx_ = new u32[MAX_GROUP_SIZE_];
     micro_run_pos_ = new u32[MAX_GROUP_SIZE_];
     num_micro_run_ = new u32[MAX_GROUP_SIZE_];
@@ -61,6 +68,16 @@ SortMerger<KeyType, LenType>::SortMerger(const char *filenm, u32 group_size, u32
 
     out_buf_size_ = new u32[OUT_BUF_NUM_];
     out_buf_full_ = new bool[OUT_BUF_NUM_];
+
+    curr_addr_.resize(MAX_GROUP_SIZE_, 0);
+    end_addr_.resize(MAX_GROUP_SIZE_, 0);
+    key_buf_.resize(MAX_GROUP_SIZE_);
+    key_buf_ptr_.resize(MAX_GROUP_SIZE_, nullptr);
+    mmap_io_streams_.resize(MAX_GROUP_SIZE_, nullptr);
+    for (u32 i = 0; i < MAX_GROUP_SIZE_; ++i) {
+        key_buf_[i] = MakeUnique<char_t[]>(MAX_TUPLE_LENGTH + 100);
+        // mmap_io_streams_[i] = MakeShared<MmapReader>(filenm_);
+    }
 #ifdef USE_LOSER_TREE
     merge_loser_tree_ = MakeShared<LoserTree<KeyAddr>>(MAX_GROUP_SIZE_);
 #endif
@@ -223,30 +240,53 @@ void SortMerger<KeyType, LenType>::Init(DirectIO &io_stream) {
         size_micro_run_[i] = pos;
     }
 }
-#ifdef USE_MMAP_IO
-template <typename KeyType, typename LenType>
-void SortMerger<KeyType, LenType>::Init(MmapIO &io_stream) {
-    // initialize three buffers
-    NewBuffer();
 
-    // initiate output buffers
-    out_buf_size_[0] = 0;
-    sub_out_buf_[0] = out_buf_;
-    out_buf_full_[0] = false;
-    for (u32 i = 1; i < OUT_BUF_NUM_; ++i) {
-        sub_out_buf_[i] = sub_out_buf_[i - 1] + OUT_BUF_SIZE_ / OUT_BUF_NUM_;
-        out_buf_size_[i] = 0;
-        out_buf_full_[i] = false;
-    }
-    out_buf_in_idx_ = 0;
-    out_buf_out_idx_ = 0;
+template <typename KeyType, typename LenType>
+void SortMerger<KeyType, LenType>::ReadKeyAt(MmapReader &io_stream, u64 pos) {
+    auto file_pos = curr_addr_[pos];
+//    if (file_pos != io_stream.Tell()) {
+//        io_stream.Seek(file_pos, true);
+//    }
+    // fmt::print("begin tell = {}\n", file_pos);
+    io_stream.Seek(file_pos, true);
+    LenType len;
+    io_stream.ReadBuf((char_t*)&len, sizeof(LenType));
+    io_stream.Seek(file_pos, true);
+    io_stream.ReadBuf(key_buf_[pos].get(), len + sizeof(LenType));
+//    fmt::print("len = {}, sizeof len_type = {}, key buf get len = {}, tell = {}\n", len, sizeof(LenType), *(LenType *)key_buf_[pos].get(), io_stream.Tell());
+//    fmt::print("data = ");
+//    for (u32 i = 0; i < len; ++i) {
+//        fmt::print("{}", key_buf_[pos].get()[i + sizeof(LenType)]);
+//    }
+//    fmt::print("\n");
+    curr_addr_[pos] = io_stream.Tell();
+}
 
-    // initiate the microrun buffer
-    micro_buf_[0] = run_buf_;
-    for (u32 i = 1; i < MAX_GROUP_SIZE_; ++i) {
-        micro_buf_[i] = micro_buf_[i - 1] + PRE_BUF_SIZE_;
-    }
+template <typename KeyType, typename LenType>
+void SortMerger<KeyType, LenType>::ReadKeyAtNonCopy(MmapReader &io_stream, u64 pos) {
+//    auto file_pos = curr_addr_[pos];
+//
+//    io_stream.Seek(file_pos, true);
+    // assert(curr_addr_[pos] == io_stream.Tell());
+    LenType len;
+//    io_stream.ReadBuf((char_t*)&len, sizeof(LenType));
+//    io_stream.Seek(file_pos, true);
+    key_buf_ptr_[pos] = io_stream.ReadBufNonCopy(sizeof(LenType));
+    len = *(LenType *)key_buf_ptr_[pos];
+    io_stream.ReadBufNonCopy(len);
+//    fmt::print("len = {}, key = ", len);
+//    for (u32 i = 0; i < len; ++i) {
+//        fmt::print("{}", key_buf_ptr_[pos][i + sizeof(LenType)]);
+//    }
+//    fmt::print("\n");
+
+    // io_stream.ReadBuf(key_buf_[pos].get(), len + sizeof(LenType));
+    curr_addr_[pos] = io_stream.Tell();
+}
 
+#ifdef USE_MMAP_IO
+template <typename KeyType, typename LenType>
+void SortMerger<KeyType, LenType>::Init(MmapReader &io_stream) {
     group_size_ = 0;
     u64 next_run_pos = 0;
     for (u32 i = 0; i < MAX_GROUP_SIZE_ && (u64)io_stream.Tell() < FILE_LEN_; ++i, ++group_size_) {
@@ -255,90 +295,72 @@ void SortMerger<KeyType, LenType>::Init(MmapIO &io_stream) {
         // get the records number of a run
         io_stream.ReadU32(num_run_[i]);
         io_stream.ReadU64(next_run_pos);
-
-        run_addr_[i] = io_stream.Tell();
-
-        // loading size of a microrun
-        u32 s = size_run_[i] > PRE_BUF_SIZE_ ? PRE_BUF_SIZE_ : size_run_[i];
-        size_t ret = io_stream.ReadBuf(micro_buf_[i], s);
-        size_micro_run_[i] = ret;
-        size_loaded_run_[i] = ret;
-        run_curr_addr_[i] = io_stream.Tell();
-        // std::cout << "num_run_[" << i << "] " << num_run_[i] << " size_run_ " << size_run_[i] << " size_micro_run " << size_micro_run_[i]
-        //           << std::endl;
-
-        /// it is not needed for compression, validation will be made within IOStream in that case
-        // if a record can fit in microrun buffer
-        bool flag = false;
-        while (*(LenType *)(micro_buf_[i]) + sizeof(LenType) > s) {
-            size_micro_run_[i] = 0;
-            --count_;
-            // LOG_WARN("[Warning]: A record is too long, it will be ignored");
-
-            io_stream.Seek(*(LenType *)(micro_buf_[i]) + sizeof(LenType) - s);
-            // io_stream.Seek(*(LenType *)(micro_buf_[i]) + sizeof(LenType) - s, SEEK_CUR);
-
-            if (io_stream.Tell() - run_addr_[i] >= (u64)size_run_[i]) {
-                flag = true;
-                break;
-            }
-
-            s = (u32)((u64)size_run_[i] - (io_stream.Tell() - run_addr_[i]) > PRE_BUF_SIZE_ ? PRE_BUF_SIZE_
-                                                                                            : (u64)size_run_[i] - (io_stream.Tell() - run_addr_[i]));
-            size_micro_run_[i] = s;
-            io_stream.Read(micro_buf_[i], s);
-        }
-
+//        fmt::print("i = {}, size_run = {}, num_run = {}, next run pos:{}\n", i, size_run_[i], num_run_[i], next_run_pos);
+        assert(next_run_pos <= FILE_LEN_);
+        end_addr_[i] = next_run_pos;
+        curr_addr_[i] = io_stream.Tell();
+        // fmt::print("curr_addr_[{}] = {}, end_addr_[{}] = {}\n", i, curr_addr_[i], i, end_addr_[i]);
+        mmap_io_streams_[i] = MakeShared<MmapReader>(filenm_, curr_addr_[i], end_addr_[i] - curr_addr_[i]);
+        // mmap_io_streams_[i]->Seek(curr_addr_[i], true);
 #ifdef USE_LOSER_TREE
-        if (flag) {
+        if (curr_addr_[i] >= end_addr_[i]) {
             merge_loser_tree_->InsertStart(nullptr, static_cast<LoserTree<u64>::Source>(i), true);
             continue;
         }
-        auto key = KeyAddr(micro_buf_[i], -1, i);
+//        ReadKeyAt(io_stream, i);
+//        auto key = KeyAddr(key_buf_[i].get(), -1, i);
+        // read block use mmap, need update end_addr
+        end_addr_[i] = mmap_io_streams_[i]->DataLen();
+        ReadKeyAtNonCopy(*mmap_io_streams_[i], i);
+
+        auto key = KeyAddr(key_buf_ptr_[i], -1, i);
         merge_loser_tree_->InsertStart(&key, static_cast<LoserTree<u64>::Source>(i), false);
 #else
-        if (flag) {
+        if (curr_addr_[i] >= end_addr_[i]) {
             continue;
         }
-        merge_heap_.push(KeyAddr(micro_buf_[i], -1, i));
+        ReadKeyAtNonCopy(io_stream, i);
+        merge_heap_.push(KeyAddr(key_buf_ptr_[i].get(), -1, i));
 #endif
-        micro_run_idx_[i] = 1;
-        micro_run_pos_[i] = KeyAddr(micro_buf_[i], -1, i).LEN() + sizeof(LenType);
-        num_micro_run_[i] = 0;
-
-        io_stream.Seek(next_run_pos);
+        io_stream.Seek(next_run_pos, true);
     }
 #ifdef USE_LOSER_TREE
     merge_loser_tree_->Init();
 #endif
-    // initialize predict heap and records number of every microrun
-    for (u32 i = 0; i < group_size_; ++i) {
-        u32 pos = 0;
-        u32 last_pos = -1;
-        assert(i < MAX_GROUP_SIZE_);
-        if (size_micro_run_[i] <= 0)
+}
+
+template <typename KeyType, typename LenType>
+void SortMerger<KeyType, LenType>::MergeMmap(MmapReader &io_stream, SharedPtr<FileWriter> out_file_writer) {
+#ifdef USE_LOSER_TREE
+    while (merge_loser_tree_->TopSource() != LoserTree<KeyAddr>::invalid_) {
+        auto top = merge_loser_tree_->TopKey();
+#else
+    while (merge_heap_.size() > 0) {
+        KeyAddr top = merge_heap_.top();
+        merge_heap_.pop();
+#endif
+        u32 idx = top.IDX();
+        // fmt::print("idx = {}\n", idx);
+        out_file_writer->Write(top.data, top.LEN() + sizeof(LenType));
+        assert(idx < MAX_GROUP_SIZE_);
+        // reach the end of a microrun
+        if (curr_addr_[idx] >= end_addr_[idx]) {
+            merge_loser_tree_->DeleteTopInsert(nullptr, true);
+            --count_;
             continue;
-        while (pos + sizeof(LenType) <= size_micro_run_[i]) {
-            LenType len = *(LenType *)(micro_buf_[i] + pos);
-            if (pos + sizeof(LenType) + len <= size_micro_run_[i]) {
-                num_micro_run_[i]++;
-                last_pos = pos;
-                pos += sizeof(LenType) + len;
-            } else {
-                break;
-            }
         }
-        // std::cout << "len " << len << " size_micro_run_[" << i << "] " << size_micro_run_[i] << std::endl;
-        assert(last_pos != (u32)-1); // buffer too small that can't hold one record
-        assert(last_pos + sizeof(LenType) <= size_micro_run_[i]);
-        assert(pos <= size_micro_run_[i]);
-        LenType len = (LenType)(pos - last_pos);
-        char *tmp = (char *)malloc(len);
-        memcpy(tmp, micro_buf_[i] + last_pos, len);
-        pre_heap_.push(KeyAddr(tmp, run_addr_[i] + pos, i));
-        size_micro_run_[i] = pos;
+        assert(idx < MAX_GROUP_SIZE_);
+
+//        ReadKeyAt(io_stream, idx);
+//        auto key = KeyAddr(key_buf_[idx].get(), -1, idx);
+        // ReadKeyAtNonCopy(io_stream, idx);
+        ReadKeyAtNonCopy(*mmap_io_streams_[idx], idx);
+        auto key = KeyAddr(key_buf_ptr_[idx], -1, idx);
+        merge_loser_tree_->DeleteTopInsert(&key, false);
     }
+    out_file_writer->Sync();
 }
+
 #endif
 
 template <typename KeyType, typename LenType>
@@ -532,11 +554,38 @@ void SortMerger<KeyType, LenType>::Output(FILE *f, u32 idx) {
 
 template <typename KeyType, typename LenType>
 void SortMerger<KeyType, LenType>::Run() {
+    BaseProfiler profiler;
+    profiler.Begin();
 #ifdef USE_MMAP_IO
-    MMappedIO io_stream(filenm_);
+    MmapReader io_stream(filenm_);
     FILE_LEN_ = io_stream.DataLen();
     io_stream.ReadU64(count_);
+    // fmt::print("FILE LEN: {}, count: {}, read begin tell = {}\n", FILE_LEN_, count_, io_stream.Tell());
     Init(io_stream);
+
+//    FILE *out_f = fopen((filenm_ + ".out").c_str(), "w+");
+//    IASSERT(out_f);
+//    IASSERT(fwrite(&count_, sizeof(u64), 1, out_f) == 1);
+    String out_file = filenm_ + ".out";
+    LocalFileSystem fs;
+    SharedPtr<FileWriter> out_file_writer = MakeShared<FileWriter>(fs, out_file, 128000);
+    out_file_writer->Write((char*)&count_, sizeof(u64));
+
+    MergeMmap(io_stream, out_file_writer);
+    // out_file_writer->Sync();
+//    Thread merge_thread(std::bind(&self_t::MergeMmap, this, std::ref(io_stream)));
+//    FILE *out_f = fopen((filenm_ + ".out").c_str(), "w+");
+//    IASSERT(out_f);
+//    IASSERT(fwrite(&count_, sizeof(u64), 1, out_f) == 1);
+//    Vector<Thread *> out_thread(OUT_BUF_NUM_);
+//    for (u32 i = 0; i < OUT_BUF_NUM_; ++i) {
+//        out_thread[i] = new Thread(std::bind(&self_t::Output, this, out_f, i));
+//    }
+//    merge_thread.join();
+//    for (u32 i = 0; i < OUT_BUF_NUM_; ++i) {
+//        out_thread[i]->join();
+//        delete out_thread[i];
+//    }
 #else
     FILE *f = fopen(filenm_.c_str(), "r");
 
@@ -548,10 +597,7 @@ void SortMerger<KeyType, LenType>::Run() {
     Init(io_stream);
 
     Thread predict_thread(std::bind(&self_t::Predict, this, io_stream));
-#endif
-
     Thread merge_thread(std::bind(&self_t::Merge, this));
-
     FILE *out_f = fopen((filenm_ + ".out").c_str(), "w+");
     IASSERT(out_f);
     IASSERT(fwrite(&count_, sizeof(u64), 1, out_f) == 1);
@@ -560,23 +606,23 @@ void SortMerger<KeyType, LenType>::Run() {
     for (u32 i = 0; i < OUT_BUF_NUM_; ++i) {
         out_thread[i] = new Thread(std::bind(&self_t::Output, this, out_f, i));
     }
-#ifndef USE_MMAP_IO
+
     predict_thread.join();
-#endif
     merge_thread.join();
     for (u32 i = 0; i < OUT_BUF_NUM_; ++i) {
         out_thread[i]->join();
         delete out_thread[i];
     }
-#ifndef USE_MMAP_IO
     fclose(f);
-#endif
     fclose(out_f);
+#endif
 
     if (std::filesystem::exists(filenm_))
         std::filesystem::remove(filenm_);
     if (std::filesystem::exists(filenm_ + ".out"))
         std::filesystem::rename(filenm_ + ".out", filenm_);
+    LOG_INFO(fmt::format("SortMerger<KeyType, LenType>::Run() time cost: {}", profiler.ElapsedToString()));
+    profiler.End();
 }
 
 template class SortMerger<u32, u8>;
diff --git a/src/storage/invertedindex/common/external_sort_merger.cppm b/src/storage/invertedindex/common/external_sort_merger.cppm
index eec6017787..462a531261 100644
--- a/src/storage/invertedindex/common/external_sort_merger.cppm
+++ b/src/storage/invertedindex/common/external_sort_merger.cppm
@@ -24,6 +24,7 @@ import stl;
 import loser_tree;
 import mmap;
 import infinity_exception;
+import file_writer;
 
 namespace infinity {
 
@@ -216,6 +217,7 @@ export template <typename KeyType, typename LenType>
 class SortMerger {
     typedef SortMerger<KeyType, LenType> self_t;
     typedef KeyAddress<KeyType, LenType> KeyAddr;
+    static constexpr SizeT MAX_TUPLE_LENGTH = 1024;
     String filenm_;
     const u32 MAX_GROUP_SIZE_; //!< max group size
     const u32 BS_SIZE_;        //!< in fact it equals to memory size
@@ -262,6 +264,11 @@ class SortMerger {
     u32 *out_buf_size_{nullptr};  //!< data size of each output buffer
     bool *out_buf_full_{nullptr}; //!< a flag to ensure if the output buffer is full or not
 
+    Vector<u64> curr_addr_;
+    Vector<u64> end_addr_;
+    Vector<UniquePtr<char_t[]>> key_buf_;
+    Vector<char*> key_buf_ptr_;
+    Vector<SharedPtr<MmapReader>> mmap_io_streams_;
     u64 count_;      //!< records number
     u32 group_size_; //!< the real run number that can get from the input file.
 
@@ -275,10 +282,15 @@ class SortMerger {
 
     void Merge();
 
+    void MergeMmap(MmapReader &io_stream, SharedPtr<FileWriter> out_file_writer);
+
     void Output(FILE *f, u32 idx);
 
-    // void Init(MmapIO &io_stream);
+    void Init(MmapReader &io_stream);
+
+    void ReadKeyAt(MmapReader &io_stream, u64 pos);
 
+    void ReadKeyAtNonCopy(MmapReader &io_stream, u64 pos);
 public:
     SortMerger(const char *filenm, u32 group_size = 4, u32 bs = 100000000, u32 output_num = 2);
 
diff --git a/src/storage/invertedindex/common/mmap.cppm b/src/storage/invertedindex/common/mmap.cppm
index ff2202baef..3826e3f366 100644
--- a/src/storage/invertedindex/common/mmap.cppm
+++ b/src/storage/invertedindex/common/mmap.cppm
@@ -35,9 +35,9 @@ export int MmapFile(const String &fp, u8 *&data_ptr, SizeT &data_len, int advice
     return 0;
 }
 
-export int MunmapFile(u8 *&data_ptr, SizeT &data_len) {
+export int MunmapFile(u8 *&data_ptr, SizeT &data_len, SizeT offset_diff = 0) {
     if (data_ptr != nullptr) {
-        int rc = munmap(data_ptr, data_len);
+        int rc = munmap(data_ptr - offset_diff, data_len + offset_diff);
         if (rc < 0)
             return -1;
         data_ptr = nullptr;
@@ -47,20 +47,27 @@ export int MunmapFile(u8 *&data_ptr, SizeT &data_len) {
 }
 
 export struct MmapReader {
-    MmapReader(const String &filename, int advice = MADV_SEQUENTIAL) {
-        int rc = MmapFile(filename, data_ptr_, data_len_, advice);
+    MmapReader(const String &filename, SizeT offset = 0, SizeT len = SizeT(-1), int advice = MADV_SEQUENTIAL) {
+        // int rc = MmapFile(filename, data_ptr_, data_len_, advice);
+        // fmt::print("filename = {}, offset = {}, len = {}\n", filename, offset, len);
+        int rc = MmapPartFile(filename, data_ptr_, len, advice, offset);
         idx_ = 0;
+        data_len_ = len;
         if (rc < 0) {
             throw UnrecoverableException("MmapFile failed");
         }
     }
 
     ~MmapReader() {
-        MunmapFile(data_ptr_, data_len_);
+        MunmapFile(data_ptr_, data_len_, offset_diff_);
     }
 
-    void Seek(SizeT diff) {
-        idx_ += diff;
+    void Seek(SizeT pos, bool set = false) {
+        if (set) {
+            idx_ = pos;
+        } else {
+            idx_ += pos;
+        }
     }
 
     void ReadU64(u64 &val) {
@@ -86,6 +93,45 @@ export struct MmapReader {
         }
     }
 
+    char* ReadBufNonCopy(SizeT len) {
+        char* buf = (char*)(data_ptr_ + idx_);
+        idx_ = std::min(idx_ + len, data_len_);
+        return buf;
+    }
+
+    int MmapPartFile(const String &fp, u8 *&data_ptr, SizeT &data_len, int advice = (MADV_RANDOM | MADV_DONTDUMP), SizeT offset = 0) {
+        data_ptr = nullptr;
+        long len_f = fs::file_size(fp);
+        if (len_f == 0) {
+            return -1;
+        }
+        if (data_len == SizeT(-1)) {
+            data_len = len_f;
+        } else if (data_len > (SizeT)len_f) {
+            return -1;
+        }
+
+        SizeT page_size = getpagesize();
+
+        SizeT aligned_offset = offset & ~(page_size - 1);
+        offset_diff_ = offset - aligned_offset;
+
+        SizeT mapped_length = data_len + offset_diff_;
+        // void* mapped = mmap(NULL, mapped_length, PROT_READ, MAP_SHARED, fd, aligned_offset);
+
+        int f = open(fp.c_str(), O_RDONLY);
+        void *tmpd = mmap(NULL, mapped_length, PROT_READ, MAP_SHARED, f, aligned_offset);
+        if (tmpd == MAP_FAILED)
+            return -1;
+        close(f);
+        int rc = madvise(tmpd, data_len, advice);
+        if (rc < 0)
+            return -1;
+        data_ptr = (u8 *)tmpd + offset_diff_;
+        // data_len = len_f;
+        return 0;
+    }
+
     SizeT Tell() { return idx_; }
 
     SizeT DataLen() { return data_len_; }
@@ -95,6 +141,8 @@ export struct MmapReader {
     SizeT data_len_{0};
 
     SizeT idx_{0};
+
+    SizeT offset_diff_{0};
 };
 
 } // namespace infinity
diff --git a/src/storage/invertedindex/memory_indexer.cpp b/src/storage/invertedindex/memory_indexer.cpp
index 0f1c3b1d64..1e818e5043 100644
--- a/src/storage/invertedindex/memory_indexer.cpp
+++ b/src/storage/invertedindex/memory_indexer.cpp
@@ -58,6 +58,8 @@ import vector_with_lock;
 import infinity_exception;
 import mmap;
 import buf_writer;
+import profiler;
+import third_party;
 
 namespace infinity {
 constexpr int MAX_TUPLE_LENGTH = 1024; // we assume that analyzed term, together with docid/offset info, will never exceed such length
@@ -174,6 +176,8 @@ void MemoryIndexer::Commit(bool offline) {
 }
 
 SizeT MemoryIndexer::CommitOffline(SizeT wait_if_empty_ms) {
+    // BaseProfiler profiler;
+    // profiler.Begin();
     std::unique_lock<std::mutex> lock(mutex_commit_, std::defer_lock);
     if (!lock.try_lock()) {
         return 0;
@@ -204,6 +208,8 @@ SizeT MemoryIndexer::CommitOffline(SizeT wait_if_empty_ms) {
             cv_.notify_all();
         }
     }
+    // LOG_INFO(fmt::format("MemoryIndexer::CommitOffline time cost: {}", profiler.ElapsedToString()));
+    // profiler.End();
     return num;
 }
 
@@ -259,7 +265,11 @@ void MemoryIndexer::Dump(bool offline, bool spill) {
         while (GetInflightTasks() > 0) {
             CommitOffline(100);
         }
+        BaseProfiler profiler;
+        profiler.Begin();
         OfflineDump();
+        LOG_INFO(fmt::format("MemoryIndexer::OfflineDump() time cost: {}", profiler.ElapsedToString()));
+        profiler.End();
         return;
     }
 
diff --git a/src/unit_test/storage/invertedindex/common/external_sort.cpp b/src/unit_test/storage/invertedindex/common/external_sort.cpp
index 6413826554..5439ec4a59 100644
--- a/src/unit_test/storage/invertedindex/common/external_sort.cpp
+++ b/src/unit_test/storage/invertedindex/common/external_sort.cpp
@@ -69,7 +69,7 @@ class ExternalSortTest : public BaseTest {
         u32 run_num = rand() % 300;
         while (run_num < 100 || SIZE % run_num != 0)
             run_num = rand() % 300;
-
+        // fmt::print("begin tell = {}\n", ftell(f));
         for (u32 i = 0; i < run_num; ++i) {
             u64 pos = ftell(f);
             fseek(f, 2 * sizeof(u32) + sizeof(u64), SEEK_CUR);
@@ -77,8 +77,14 @@ class ExternalSortTest : public BaseTest {
             for (u32 j = 0; j < SIZE / run_num; ++j) {
                 str = RandStr<KeyType>(i * SIZE / run_num + j);
                 LenType len = str.size();
+                // fmt::print("begin tell = {}\n", ftell(f));
                 fwrite(&len, sizeof(LenType), 1, f);
                 fwrite(str.data(), len, 1, f);
+//                fmt::print("len: {}, str.size() = {}, size len_type = {}, tell = {}, str: ", len, str.size(), sizeof(LenType), ftell(f));
+//                for (auto c : str) {
+//                    fmt::print("{}",c);
+//                }
+//                fmt::print("\n");
                 s += len + sizeof(LenType);
             }
             u64 next_run_pos = ftell(f);
@@ -87,6 +93,7 @@ class ExternalSortTest : public BaseTest {
             s = SIZE / run_num;
             fwrite(&s, sizeof(u32), 1, f);
             fwrite(&next_run_pos, sizeof(u64), 1, f);
+            // fmt::print("next_pos: {}\n", next_run_pos);
             fseek(f, 0, SEEK_END);
         }
         fclose(f);

From f9b3cb3fd1c9cd0177aafceca7f1b82f8727fe0b Mon Sep 17 00:00:00 2001
From: Ma-cat <1054638297@qq.com>
Date: Mon, 20 May 2024 13:19:52 +0800
Subject: [PATCH 07/14] refactor merge and predict, use CycleBuffer read more
 buf for merge and perdict

---
 .../common/external_sort_merger.cpp           | 211 +++++++++++++++++-
 .../common/external_sort_merger.cppm          | 118 ++++++++++
 src/storage/invertedindex/memory_indexer.cpp  |   6 +-
 3 files changed, 331 insertions(+), 4 deletions(-)

diff --git a/src/storage/invertedindex/common/external_sort_merger.cpp b/src/storage/invertedindex/common/external_sort_merger.cpp
index 3536682428..53893800d9 100644
--- a/src/storage/invertedindex/common/external_sort_merger.cpp
+++ b/src/storage/invertedindex/common/external_sort_merger.cpp
@@ -40,7 +40,7 @@ namespace infinity {
     }
 
 #define USE_LOSER_TREE
-#define USE_MMAP_IO
+//#define USE_MMAP_IO
 
 template <typename KeyType, typename LenType>
 SortMerger<KeyType, LenType>::SortMerger(const char *filenm, u32 group_size, u32 bs, u32 output_num)
@@ -74,10 +74,16 @@ SortMerger<KeyType, LenType>::SortMerger(const char *filenm, u32 group_size, u32
     key_buf_.resize(MAX_GROUP_SIZE_);
     key_buf_ptr_.resize(MAX_GROUP_SIZE_, nullptr);
     mmap_io_streams_.resize(MAX_GROUP_SIZE_, nullptr);
+
     for (u32 i = 0; i < MAX_GROUP_SIZE_; ++i) {
         key_buf_[i] = MakeUnique<char_t[]>(MAX_TUPLE_LENGTH + 100);
         // mmap_io_streams_[i] = MakeShared<MmapReader>(filenm_);
     }
+    CYCLE_BUF_SIZE_ = MAX_GROUP_SIZE_ * 4;
+    CYCLE_BUF_THRESHOLD_ = MAX_GROUP_SIZE_ * 3;
+    // fmt::print("cycle buf size = {}, buf threshold = {}\n", CYCLE_BUF_SIZE_, CYCLE_BUF_THRESHOLD_);
+    assert(CYCLE_BUF_THRESHOLD_ <= CYCLE_BUF_SIZE_);
+    cycle_buffer_ = MakeUnique<CycleBuffer>(CYCLE_BUF_SIZE_, PRE_BUF_SIZE_);
 #ifdef USE_LOSER_TREE
     merge_loser_tree_ = MakeShared<LoserTree<KeyAddr>>(MAX_GROUP_SIZE_);
 #endif
@@ -433,6 +439,197 @@ void SortMerger<KeyType, LenType>::Predict(DirectIO &io_stream) {
     // LOG_INFO("Predicting is over...");
 }
 
+template <typename KeyType, typename LenType>
+void SortMerger<KeyType, LenType>::PredictByQueue(DirectIO &io_stream) {
+    while (pre_heap_.size() > 0) {
+        KeyAddr top = pre_heap_.top();
+        pre_heap_.pop();
+        u64 addr = top.ADDR();
+        u32 idx = top.IDX();
+        free(top.data);
+
+        std::unique_lock lock(cycle_buf_mtx_);
+        cycle_buf_con_.wait(lock, [this]() { return !this->cycle_buffer_->IsFull(); });
+//        while (pre_buf_size_ != 0)
+//            pre_buf_con_.wait(lock);
+
+//        while (cycle_buffer_->Size() >= 2 * MAX_GROUP_SIZE_) {
+//            pre_buf_con_.wait(lock);
+//        }
+
+        assert(idx < MAX_GROUP_SIZE_);
+        // get loading size of a microrun
+        u32 s;
+        s = (u32)((u64)size_run_[idx] - (addr - run_addr_[idx]));
+
+        if (s == 0) {
+            continue;
+        }
+        s = s > PRE_BUF_SIZE_ ? PRE_BUF_SIZE_ : s;
+
+        // load microrun
+        io_stream.Seek(addr);
+        // s = io_stream.Read(pre_buf_, s);
+        auto data_ptr = cycle_buffer_->PutByRead(io_stream, s);
+
+//        fmt::print("[Predict] cycle buffer idx = {}, read data: ", idx);
+//        for (SizeT i = 0; i < s; ++i) {
+//            fmt::print("{}", data_ptr[i]);
+//        }
+//        fmt::print("\n");
+
+        size_loaded_run_[idx] += s;
+        run_curr_addr_[idx] = io_stream.Tell();
+
+        u32 pos = 0;
+        u32 last_pos = -1;
+        pre_buf_num_ = 0;
+        while (1) {
+            if (pos + sizeof(LenType) > s) {
+                // the last record of this microrun
+                IASSERT(last_pos != (u32)-1); // buffer too small that can't hold one record
+                LenType len = *(LenType *)(data_ptr + last_pos) + sizeof(LenType);
+                char *tmp = (char *)malloc(len);
+                memcpy(tmp, data_ptr + last_pos, len);
+                pre_heap_.push(KeyAddr(tmp, addr + (u64)pos, idx));
+                break;
+            }
+            LenType len = *(LenType *)(data_ptr + pos);
+            if (pos + sizeof(LenType) + len > s) {
+                // the last record of this microrun
+                IASSERT(last_pos != (u32)-1); // buffer too small that can't hold one record
+                len = *(LenType *)(data_ptr + last_pos) + sizeof(LenType);
+                char *tmp = (char *)malloc(len);
+                memcpy(tmp, data_ptr + last_pos, len);
+                pre_heap_.push(KeyAddr(tmp, addr + (u64)pos, idx));
+                break;
+            }
+
+            ++pre_buf_num_;
+            last_pos = pos;
+            pos += sizeof(LenType) + len;
+        }
+        pre_buf_size_ = pos;
+        cycle_buffer_->PutReal(pre_buf_size_, pre_buf_num_);
+        cycle_buf_con_.notify_one();
+    }
+    // fmt::print("1-read finish\n");
+    {
+        std::unique_lock lock(cycle_buf_mtx_);
+        // fmt::print("read finish\n");
+        // pre_buf_size_ = -1;
+        read_finish_ = true;
+        cycle_buf_con_.notify_one();
+    }
+
+    // LOG_INFO("Predicting is over...");
+}
+
+template <typename KeyType, typename LenType>
+void SortMerger<KeyType, LenType>::MergeByQueue() {
+#ifdef USE_LOSER_TREE
+    while (merge_loser_tree_->TopSource() != LoserTree<KeyAddr>::invalid_) {
+        auto top = merge_loser_tree_->TopKey();
+#else
+    while (merge_heap_.size() > 0) {
+        KeyAddr top = merge_heap_.top();
+        merge_heap_.pop();
+#endif
+        u32 idx = top.IDX();
+        // fmt::print("loser tree pop idx = {}\n", idx);
+        // output
+        while (1) {
+            assert(out_buf_in_idx_ < OUT_BUF_NUM_);
+            std::unique_lock lock(in_out_mtx_[out_buf_in_idx_]);
+            while (out_buf_full_[out_buf_in_idx_])
+                in_out_con_[out_buf_in_idx_].wait(lock);
+
+            // if buffer is full
+            if (top.LEN() + sizeof(LenType) + out_buf_size_[out_buf_in_idx_] > OUT_BUF_SIZE_ / OUT_BUF_NUM_) {
+                IASSERT(out_buf_size_[out_buf_in_idx_] != 0); // output buffer chanel is smaller than size of a record
+                out_buf_full_[out_buf_in_idx_] = true;
+                u32 tmp = out_buf_in_idx_;
+                ++out_buf_in_idx_;
+                out_buf_in_idx_ %= OUT_BUF_NUM_;
+                in_out_con_[tmp].notify_one();
+                continue;
+            }
+
+            assert(out_buf_in_idx_ < OUT_BUF_NUM_);
+            memcpy(sub_out_buf_[out_buf_in_idx_] + out_buf_size_[out_buf_in_idx_], top.data, top.LEN() + sizeof(LenType));
+            out_buf_size_[out_buf_in_idx_] += top.LEN() + sizeof(LenType);
+
+            break;
+        }
+
+        assert(idx < MAX_GROUP_SIZE_);
+        // reach the end of a microrun
+        if (micro_run_idx_[idx] == num_micro_run_[idx]) {
+            IASSERT(micro_run_pos_[idx] <= size_micro_run_[idx]);
+            std::unique_lock lock(cycle_buf_mtx_);
+            // fmt::print("cycle buffer size = {}\n", cycle_buffer_->Size());
+
+            cycle_buf_con_.wait(lock, [this]() {
+                return !this->cycle_buffer_->IsEmpty() || (this->read_finish_ && this->cycle_buffer_->IsEmpty());
+            });
+
+            if (cycle_buffer_->IsEmpty() && read_finish_) {
+                merge_loser_tree_->DeleteTopInsert(nullptr, true);
+                continue;
+            }
+
+            assert(idx < MAX_GROUP_SIZE_);
+            // fmt::print("cycle buffer size = {}, read_finish_ = {}\n", cycle_buffer_->Size(), read_finish_);
+            auto res = cycle_buffer_->Get();
+            // micro_buf_[idx] = res.get<0>();
+            pre_buf_size_ = std::get<1>(res);
+            pre_buf_num_ = std::get<2>(res);
+            memcpy(micro_buf_[idx], std::get<0>(res), pre_buf_size_);
+
+//            fmt::print("[Merge] loser tree add idx = {}, pre_buf_size = {}, pre_buf_num = {}\n", idx, pre_buf_size_, pre_buf_num_);
+//            for (u32 i = 0; i < pre_buf_size_; ++i) {
+//                fmt::print("{}", micro_buf_[idx][i]);
+//            }
+//            fmt::print("\n");
+
+            size_micro_run_[idx] = pre_buf_size_;
+            num_micro_run_[idx] = pre_buf_num_;
+            micro_run_pos_[idx] = micro_run_idx_[idx] = pre_buf_num_ = pre_buf_size_ = 0;
+
+            if (cycle_buffer_->Size() < CYCLE_BUF_THRESHOLD_) {
+                // fmt::print("cycle buffer size = {}\n", cycle_buffer_->Size());
+                cycle_buf_con_.notify_one();
+            }
+            // cycle_buf_con_.notify_one();
+            // pre_buf_con_.notify_one();
+        }
+
+        assert(idx < MAX_GROUP_SIZE_);
+#ifdef USE_LOSER_TREE
+        auto key = KeyAddr(micro_buf_[idx] + micro_run_pos_[idx], -1, idx);
+//        fmt::print("[Merge] add key idx = {}, key = ", idx);
+//        for (u32 i = 0; i < key.LEN() + sizeof(LenType); ++i) {
+//            fmt::print("{}", micro_buf_[idx][micro_run_pos_[idx] + i]);
+//        }
+//        fmt::print("\n");
+
+        merge_loser_tree_->DeleteTopInsert(&key, false);
+#else
+        merge_heap_.push(KeyAddr(micro_buf_[idx] + micro_run_pos_[idx], -1, idx));
+#endif
+        ++micro_run_idx_[idx];
+        micro_run_pos_[idx] += KeyAddr(micro_buf_[idx] + micro_run_pos_[idx], -1, idx).LEN() + sizeof(LenType);
+    }
+    {
+        assert(out_buf_in_idx_ < OUT_BUF_NUM_);
+        std::unique_lock lock(in_out_mtx_[out_buf_in_idx_]);
+        if (!out_buf_full_[out_buf_in_idx_] && out_buf_size_[out_buf_in_idx_] > 0) {
+            out_buf_full_[out_buf_in_idx_] = true;
+            in_out_con_[out_buf_in_idx_].notify_one();
+        }
+    }
+}
+
 template <typename KeyType, typename LenType>
 void SortMerger<KeyType, LenType>::Merge() {
 #ifdef USE_LOSER_TREE
@@ -552,10 +749,14 @@ void SortMerger<KeyType, LenType>::Output(FILE *f, u32 idx) {
     }
 }
 
+#define PRINT_TIME_COST
+
 template <typename KeyType, typename LenType>
 void SortMerger<KeyType, LenType>::Run() {
+#ifdef PRINT_TIME_COST
     BaseProfiler profiler;
     profiler.Begin();
+#endif
 #ifdef USE_MMAP_IO
     MmapReader io_stream(filenm_);
     FILE_LEN_ = io_stream.DataLen();
@@ -596,8 +797,10 @@ void SortMerger<KeyType, LenType>::Run() {
 
     Init(io_stream);
 
-    Thread predict_thread(std::bind(&self_t::Predict, this, io_stream));
-    Thread merge_thread(std::bind(&self_t::Merge, this));
+    // Thread predict_thread(std::bind(&self_t::Predict, this, io_stream));
+    // Thread merge_thread(std::bind(&self_t::Merge, this));
+    Thread predict_thread(std::bind(&self_t::PredictByQueue, this, io_stream));
+    Thread merge_thread(std::bind(&self_t::MergeByQueue, this));
     FILE *out_f = fopen((filenm_ + ".out").c_str(), "w+");
     IASSERT(out_f);
     IASSERT(fwrite(&count_, sizeof(u64), 1, out_f) == 1);
@@ -621,8 +824,10 @@ void SortMerger<KeyType, LenType>::Run() {
         std::filesystem::remove(filenm_);
     if (std::filesystem::exists(filenm_ + ".out"))
         std::filesystem::rename(filenm_ + ".out", filenm_);
+#ifdef PRINT_TIME_COST
     LOG_INFO(fmt::format("SortMerger<KeyType, LenType>::Run() time cost: {}", profiler.ElapsedToString()));
     profiler.End();
+#endif
 }
 
 template class SortMerger<u32, u8>;
diff --git a/src/storage/invertedindex/common/external_sort_merger.cppm b/src/storage/invertedindex/common/external_sort_merger.cppm
index 462a531261..fedeed8d3f 100644
--- a/src/storage/invertedindex/common/external_sort_merger.cppm
+++ b/src/storage/invertedindex/common/external_sort_merger.cppm
@@ -212,6 +212,114 @@ struct KeyAddress<TermTuple, LenType> {
     bool operator<(const KeyAddress &other) const { return Compare(other) > 0; }
 };
 
+class CycleBuffer {
+public:
+    CycleBuffer(SizeT total_buffers, SizeT buffer_size)
+        : total_buffers_(total_buffers), buffer_size_(buffer_size), head_(0), tail_(0), full_(false) {
+        buffer_array_.resize(total_buffers);
+        buffer_real_size_.resize(total_buffers);
+        buffer_real_num_.resize(total_buffers);
+        for (auto& buf : buffer_array_) {
+            buf = MakeUnique<char[]>(buffer_size);
+        }
+    }
+
+    void Put(const char* data, SizeT length) {
+        if (length > buffer_size_) {
+            throw std::runtime_error("Data length exceeds buffer capacity");
+        }
+        if (IsFull()) {
+            throw std::runtime_error("Buffer is full");
+        }
+
+        // Copy data into the current buffer
+        std::memcpy(buffer_array_[head_].get(), data, length);
+        head_ = (head_ + 1) % total_buffers_;
+
+        if (head_ == tail_) {
+            full_ = true;
+        }
+    }
+
+    void PutReal(const u32& real_size, const u32& real_num) {
+        buffer_real_size_[head_] = real_size;
+        buffer_real_num_[head_] = real_num;
+
+        head_ = (head_ + 1) % total_buffers_;
+
+        if (head_ == tail_) {
+            full_ = true;
+        }
+    }
+
+    char* PutByRead(DirectIO &io_stream, u32& length) {
+        if (length > buffer_size_) {
+            throw std::runtime_error("Data length exceeds buffer capacity");
+        }
+        if (IsFull()) {
+            throw std::runtime_error("Buffer is full");
+        }
+
+        // Read data into the current buffer
+        length = io_stream.Read(buffer_array_[head_].get(), length);
+        return buffer_array_[head_].get();
+//        auto res_ptr = buffer_array_[head_].get();
+//        head_ = (head_ + 1) % total_buffers_;
+//
+//        if (head_ == tail_) {
+//            full_ = true;
+//        }
+//        return res_ptr;
+    }
+
+    Tuple<const char*, u32, u32> Get() {
+        // std::cout << "CycleBuffer::Get" << std::endl;
+        if (IsEmpty()) {
+            throw std::runtime_error("Buffer is empty");
+        }
+
+        const char* result_data = buffer_array_[tail_].get();
+        auto result_real_size = buffer_real_size_[tail_];
+        auto result_real_num = buffer_real_num_[tail_];
+        tail_ = (tail_ + 1) % total_buffers_;
+        full_ = false;
+
+        return std::make_tuple(result_data, result_real_size, result_real_num);
+    }
+
+    void Reset() {
+        head_ = tail_ = 0;
+        full_ = false;
+    }
+
+    bool IsEmpty() const {
+        return (!full_ && (head_ == tail_));
+    }
+
+    bool IsFull() const {
+        return full_;
+    }
+
+    SizeT Size() const {
+        if (full_) {
+            return total_buffers_;
+        }
+        if (head_ >= tail_) {
+            return head_ - tail_;
+        }
+        return total_buffers_ + head_ - tail_;
+    }
+
+private:
+    Vector<UniquePtr<char[]>> buffer_array_;
+    Vector<u32> buffer_real_size_;
+    Vector<u32> buffer_real_num_;
+    SizeT total_buffers_;
+    SizeT buffer_size_;
+    SizeT head_;
+    SizeT tail_;
+    bool full_;
+};
 
 export template <typename KeyType, typename LenType>
 class SortMerger {
@@ -269,6 +377,12 @@ class SortMerger {
     Vector<UniquePtr<char_t[]>> key_buf_;
     Vector<char*> key_buf_ptr_;
     Vector<SharedPtr<MmapReader>> mmap_io_streams_;
+    UniquePtr<CycleBuffer> cycle_buffer_;
+    std::mutex cycle_buf_mtx_;
+    std::condition_variable cycle_buf_con_;
+    bool read_finish_{false};
+    u32 CYCLE_BUF_SIZE_;
+    u32 CYCLE_BUF_THRESHOLD_;
     u64 count_;      //!< records number
     u32 group_size_; //!< the real run number that can get from the input file.
 
@@ -280,10 +394,14 @@ class SortMerger {
 
     void Predict(DirectIO &io_stream);
 
+    void PredictByQueue(DirectIO &io_stream);
+
     void Merge();
 
     void MergeMmap(MmapReader &io_stream, SharedPtr<FileWriter> out_file_writer);
 
+    void MergeByQueue();
+
     void Output(FILE *f, u32 idx);
 
     void Init(MmapReader &io_stream);
diff --git a/src/storage/invertedindex/memory_indexer.cpp b/src/storage/invertedindex/memory_indexer.cpp
index 1e818e5043..5fd42c2edb 100644
--- a/src/storage/invertedindex/memory_indexer.cpp
+++ b/src/storage/invertedindex/memory_indexer.cpp
@@ -258,18 +258,22 @@ SizeT MemoryIndexer::CommitSync(SizeT wait_if_empty_ms) {
 
     return num_generated;
 }
-
+#define PRINT_TIME_COST
 void MemoryIndexer::Dump(bool offline, bool spill) {
     if (offline) {
         assert(!spill);
         while (GetInflightTasks() > 0) {
             CommitOffline(100);
         }
+#ifdef PRINT_TIME_COST
         BaseProfiler profiler;
         profiler.Begin();
+#endif
         OfflineDump();
+#ifdef PRINT_TIME_COST
         LOG_INFO(fmt::format("MemoryIndexer::OfflineDump() time cost: {}", profiler.ElapsedToString()));
         profiler.End();
+#endif
         return;
     }
 

From 93b0362a3a850ebb11aadae3246aa7fb019eab7d Mon Sep 17 00:00:00 2001
From: Ma-cat <1054638297@qq.com>
Date: Wed, 22 May 2024 16:28:09 +0800
Subject: [PATCH 08/14] use queue to output merge results and add TermTupleList
 to reduce the output temporary data size

---
 .../common/external_sort_merger.cpp           | 269 +++++++++++++++++-
 .../common/external_sort_merger.cppm          | 177 +++++++++++-
 src/storage/invertedindex/memory_indexer.cpp  | 149 +++++++++-
 src/storage/invertedindex/memory_indexer.cppm |   2 +
 .../storage/invertedindex/memory_indexer.cpp  |   9 +
 5 files changed, 595 insertions(+), 11 deletions(-)

diff --git a/src/storage/invertedindex/common/external_sort_merger.cpp b/src/storage/invertedindex/common/external_sort_merger.cpp
index 53893800d9..dbde03e607 100644
--- a/src/storage/invertedindex/common/external_sort_merger.cpp
+++ b/src/storage/invertedindex/common/external_sort_merger.cpp
@@ -19,6 +19,9 @@ module;
 #include <queue>
 #include <cstring>
 #include <sys/mman.h>
+#include <cstdio>
+#include <unistd.h>
+#include <fcntl.h>
 
 module external_sort_merger;
 
@@ -81,9 +84,14 @@ SortMerger<KeyType, LenType>::SortMerger(const char *filenm, u32 group_size, u32
     }
     CYCLE_BUF_SIZE_ = MAX_GROUP_SIZE_ * 4;
     CYCLE_BUF_THRESHOLD_ = MAX_GROUP_SIZE_ * 3;
+    OUT_BATCH_SIZE_ = 10240;
     // fmt::print("cycle buf size = {}, buf threshold = {}\n", CYCLE_BUF_SIZE_, CYCLE_BUF_THRESHOLD_);
     assert(CYCLE_BUF_THRESHOLD_ <= CYCLE_BUF_SIZE_);
     cycle_buffer_ = MakeUnique<CycleBuffer>(CYCLE_BUF_SIZE_, PRE_BUF_SIZE_);
+
+    const SizeT term_tuple_list_cycle_buf_size = MAX_GROUP_SIZE_ * 2;
+    const SizeT term_tuple_list_buf_size = 1024 + 2 * 1024 * 1024 + 100;
+    cycle_term_tuple_list_queue_ = MakeUnique<CycleBuffer>(term_tuple_list_cycle_buf_size, term_tuple_list_buf_size);
 #ifdef USE_LOSER_TREE
     merge_loser_tree_ = MakeShared<LoserTree<KeyAddr>>(MAX_GROUP_SIZE_);
 #endif
@@ -536,8 +544,20 @@ void SortMerger<KeyType, LenType>::MergeByQueue() {
         merge_heap_.pop();
 #endif
         u32 idx = top.IDX();
+//        memcpy(sub_out_buf_[out_buf_in_idx_] + out_buf_size_[out_buf_in_idx_], top.data, top.LEN() + sizeof(LenType));
         // fmt::print("loser tree pop idx = {}\n", idx);
         // output
+//        {
+//            std::unique_lock lock(out_queue_mtx_);
+//            auto data_len = top.LEN() + sizeof(LenType);
+//            auto top_data = MakeUnique<char_t[]>(data_len + 10);
+//            memcpy(top_data.get(), top.data, data_len);
+//            out_queue_.push(std::move(top_data));
+//            out_size_queue_.push(data_len);
+//            if (out_queue_.size() >= OUT_BATCH_SIZE_ / 10) {
+//                out_queue_con_.notify_one();
+//            }
+//        }
         while (1) {
             assert(out_buf_in_idx_ < OUT_BUF_NUM_);
             std::unique_lock lock(in_out_mtx_[out_buf_in_idx_]);
@@ -580,7 +600,7 @@ void SortMerger<KeyType, LenType>::MergeByQueue() {
 
             assert(idx < MAX_GROUP_SIZE_);
             // fmt::print("cycle buffer size = {}, read_finish_ = {}\n", cycle_buffer_->Size(), read_finish_);
-            auto res = cycle_buffer_->Get();
+            auto res = cycle_buffer_->GetTuple();
             // micro_buf_[idx] = res.get<0>();
             pre_buf_size_ = std::get<1>(res);
             pre_buf_num_ = std::get<2>(res);
@@ -596,6 +616,7 @@ void SortMerger<KeyType, LenType>::MergeByQueue() {
             num_micro_run_[idx] = pre_buf_num_;
             micro_run_pos_[idx] = micro_run_idx_[idx] = pre_buf_num_ = pre_buf_size_ = 0;
 
+//            cycle_buf_con_.notify_one();
             if (cycle_buffer_->Size() < CYCLE_BUF_THRESHOLD_) {
                 // fmt::print("cycle buffer size = {}\n", cycle_buffer_->Size());
                 cycle_buf_con_.notify_one();
@@ -621,6 +642,8 @@ void SortMerger<KeyType, LenType>::MergeByQueue() {
         micro_run_pos_[idx] += KeyAddr(micro_buf_[idx] + micro_run_pos_[idx], -1, idx).LEN() + sizeof(LenType);
     }
     {
+//        std::unique_lock lock(out_queue_mtx_);
+//        out_queue_con_.notify_one();
         assert(out_buf_in_idx_ < OUT_BUF_NUM_);
         std::unique_lock lock(in_out_mtx_[out_buf_in_idx_]);
         if (!out_buf_full_[out_buf_in_idx_] && out_buf_size_[out_buf_in_idx_] > 0) {
@@ -710,6 +733,47 @@ void SortMerger<KeyType, LenType>::Merge() {
     }
 }
 
+template <typename KeyType, typename LenType>
+void SortMerger<KeyType, LenType>::OutputByQueue(FILE *f) {
+    DirectIO io_stream(f, "w");
+    while (count_ > 0) {
+        // wait its turn to output
+        Queue<UniquePtr<char_t[]>> temp_out_queue;
+        Queue<u32> temp_out_size_queue;
+        {
+            std::unique_lock out_lock(out_queue_mtx_);
+            out_queue_con_.wait(out_lock, [this]() { return !this->out_queue_.empty(); });
+
+            if (count_ == 0) {
+                break;
+            }
+
+            auto write_cnt = OUT_BATCH_SIZE_;
+
+            while (count_ > 0 && write_cnt > 0 && !out_queue_.empty()) {
+                // auto top_data = std::move(out_queue_.front());
+                // auto data_len = out_size_queue_.front();
+                temp_out_queue.push(std::move(out_queue_.front()));
+                temp_out_size_queue.push(out_size_queue_.front());
+                out_queue_.pop();
+                out_size_queue_.pop();
+
+                // io_stream.Write(top_data.get(), data_len);
+                --count_;
+                --write_cnt;
+            }
+        }
+        assert(temp_out_queue.size() == temp_out_size_queue.size());
+        while(temp_out_queue.size()) {
+            auto top_data = std::move(temp_out_queue.front());
+            auto data_len = temp_out_size_queue.front();
+            temp_out_queue.pop();
+            temp_out_size_queue.pop();
+            io_stream.Write(top_data.get(), data_len);
+        }
+    }
+}
+
 template <typename KeyType, typename LenType>
 void SortMerger<KeyType, LenType>::Output(FILE *f, u32 idx) {
     DirectIO io_stream(f, "w");
@@ -805,6 +869,7 @@ void SortMerger<KeyType, LenType>::Run() {
     IASSERT(out_f);
     IASSERT(fwrite(&count_, sizeof(u64), 1, out_f) == 1);
 
+//    Thread out_thread(std::bind(&self_t::OutputByQueue, this, out_f));
     Vector<Thread *> out_thread(OUT_BUF_NUM_);
     for (u32 i = 0; i < OUT_BUF_NUM_; ++i) {
         out_thread[i] = new Thread(std::bind(&self_t::Output, this, out_f, i));
@@ -812,24 +877,220 @@ void SortMerger<KeyType, LenType>::Run() {
 
     predict_thread.join();
     merge_thread.join();
+//    out_thread.join();
     for (u32 i = 0; i < OUT_BUF_NUM_; ++i) {
         out_thread[i]->join();
         delete out_thread[i];
     }
     fclose(f);
     fclose(out_f);
+
+#endif
+
+    if (std::filesystem::exists(filenm_)) {
+//        std::filesystem::remove(filenm_);
+        std::filesystem::rename(filenm_, filenm_ + ".backup");
+    }
+    if (std::filesystem::exists(filenm_ + ".out")) {
+        std::filesystem::rename(filenm_ + ".out", filenm_);
+    }
+#ifdef PRINT_TIME_COST
+    // LOG_INFO(fmt::format("SortMerger<KeyType, LenType>::Run() time cost: {}", profiler.ElapsedToString()));
+    fmt::print("SortMerger<KeyType, LenType>::Run() time cost: {}\n", profiler.ElapsedToString());
+    profiler.End();
+#endif
+}
+
+template <typename KeyType, typename LenType>
+requires std::same_as<KeyType, TermTuple>
+void SortMergerTerm<KeyType, LenType>::MergeByQueueTerm() {
+    UniquePtr<TermTupleList> tuple_list = nullptr;
+    u32 last_idx = -1;
+#ifdef USE_LOSER_TREE
+    while (merge_loser_tree_->TopSource() != LoserTree<KeyAddr>::invalid_) {
+        auto top = merge_loser_tree_->TopKey();
+#else
+    while (merge_heap_.size() > 0) {
+        KeyAddr top = merge_heap_.top();
+        merge_heap_.pop();
 #endif
+        u32 idx = top.IDX();
+        auto out_key = top.KEY();
+        if (tuple_list == nullptr) {
+            tuple_list = MakeUnique<TermTupleList>(out_key.term_);
+            tuple_list->Add(out_key.doc_id_, out_key.term_pos_);
+        } else if (idx != last_idx) {
+            if (tuple_list->IsFull() || out_key.term_ != tuple_list->term_) {
+                // output
+                {
+                    std::unique_lock lock(out_queue_mtx_);
+                    term_tuple_list_queue_.push(std::move(tuple_list));
+                    out_queue_con_.notify_one();
+                }
+                tuple_list = MakeUnique<TermTupleList>(out_key.term_);
+                tuple_list->Add(out_key.doc_id_, out_key.term_pos_);
+            } else {
+                tuple_list->Add(out_key.doc_id_, out_key.term_pos_);
+            }
+        }
+
+        assert(idx < MAX_GROUP_SIZE_);
+        // reach the end of a microrun
+        if (micro_run_idx_[idx] == num_micro_run_[idx]) {
+            IASSERT(micro_run_pos_[idx] <= size_micro_run_[idx]);
+            std::unique_lock lock(cycle_buf_mtx_);
+
+            cycle_buf_con_.wait(lock, [this]() {
+                return !this->cycle_buffer_->IsEmpty() || (this->read_finish_ && this->cycle_buffer_->IsEmpty());
+            });
 
-    if (std::filesystem::exists(filenm_))
+            if (cycle_buffer_->IsEmpty() && read_finish_) {
+                merge_loser_tree_->DeleteTopInsert(nullptr, true);
+                continue;
+            }
+
+            assert(idx < MAX_GROUP_SIZE_);
+            auto res = cycle_buffer_->GetTuple();
+            pre_buf_size_ = std::get<1>(res);
+            pre_buf_num_ = std::get<2>(res);
+            memcpy(micro_buf_[idx], std::get<0>(res), pre_buf_size_);
+
+
+            size_micro_run_[idx] = pre_buf_size_;
+            num_micro_run_[idx] = pre_buf_num_;
+            micro_run_pos_[idx] = micro_run_idx_[idx] = pre_buf_num_ = pre_buf_size_ = 0;
+
+            if (cycle_buffer_->Size() < CYCLE_BUF_THRESHOLD_) {
+                // fmt::print("cycle buffer size = {}\n", cycle_buffer_->Size());
+                cycle_buf_con_.notify_one();
+            }
+            // cycle_buf_con_.notify_one();
+        }
+
+        assert(idx < MAX_GROUP_SIZE_);
+        auto key = KeyAddr(micro_buf_[idx] + micro_run_pos_[idx], -1, idx);
+        merge_loser_tree_->DeleteTopInsert(&key, false);
+
+        ++micro_run_idx_[idx];
+        micro_run_pos_[idx] += KeyAddr(micro_buf_[idx] + micro_run_pos_[idx], -1, idx).LEN() + sizeof(LenType);
+    }
+    {
+        std::unique_lock lock(out_queue_mtx_);
+        if (tuple_list != nullptr) {
+            term_tuple_list_queue_.push(std::move(tuple_list));
+        }
+        out_queue_con_.notify_one();
+    }
+    fmt::print("MergeByQueueTerm finish\n");
+}
+
+template <typename KeyType, typename LenType>
+requires std::same_as<KeyType, TermTuple>
+void SortMergerTerm<KeyType, LenType>::OutputByQueueTerm(FILE *f) {
+    DirectIO io_stream(f, "w");
+    while (count_ > 0) {
+        // wait its turn to output
+        UniquePtr<TermTupleList> temp_term_tuple;
+        // SizeT queue_size = 0;
+        {
+            std::unique_lock out_lock(out_queue_mtx_);
+            out_queue_con_.wait(out_lock, [this]() { return !this->term_tuple_list_queue_.empty(); });
+
+            if (count_ == 0) {
+                break;
+            }
+
+            temp_term_tuple = std::move(term_tuple_list_queue_.front());
+            ++term_list_count_;
+            term_tuple_list_queue_.pop();
+            // queue_size = term_tuple_list_queue_.size();
+        }
+        count_ -= temp_term_tuple->Size();
+        // fmt::print("term = {}, count_ = {}, term queue size = {}\n", temp_term_tuple->term_, count_, queue_size);
+        /*
+         * data_len, term_len, doc_list_size, term, [doc_id, term_pos]...
+         */
+        u32 term_len = temp_term_tuple->term_.size();
+        u32 doc_list_size = temp_term_tuple->Size();
+        u32 data_len = sizeof(u32) + sizeof(u32) + term_len + 2 * sizeof(u32) * doc_list_size;
+
+        char buf[20];
+        auto SIZE_U32 = sizeof(u32);
+        memcpy(buf, &data_len, SIZE_U32);
+        memcpy(buf + SIZE_U32, &term_len, SIZE_U32);
+        memcpy(buf + SIZE_U32 + SIZE_U32, &doc_list_size, SIZE_U32);
+        io_stream.Write(buf, SIZE_U32 * 3);
+
+//        io_stream.Write((char*)(&data_len), sizeof(u32));
+//        io_stream.Write((char*)(&term_len), sizeof(u32));
+//        io_stream.Write((char*)(&term_len), sizeof(u32));
+        io_stream.Write(temp_term_tuple->term_.data(), term_len);
+        io_stream.Write((char*)temp_term_tuple->doc_pos_list_.data(), SIZE_U32 * 2 * doc_list_size);
+        if (count_ == 0) {
+            io_stream.Seek(0, SEEK_SET);
+            io_stream.Write((char*)(&term_list_count_), sizeof(u64));
+            term_list_count_ = 0;
+            break;
+        }
+    }
+    fmt::print("OutputByQueueTerm finish\n");
+}
+
+template <typename KeyType, typename LenType>
+requires std::same_as<KeyType, TermTuple>
+void SortMergerTerm<KeyType, LenType>::RunTerm() {
+#ifdef PRINT_TIME_COST
+    BaseProfiler profiler;
+    profiler.Begin();
+#endif
+    FILE *f = fopen(filenm_.c_str(), "r");
+
+    DirectIO io_stream(f);
+    FILE_LEN_ = io_stream.Length();
+
+    term_list_count_ = 0;
+    io_stream.Read((char *)(&count_), sizeof(u64));
+
+    Super::Init(io_stream);
+
+    Thread predict_thread(std::bind(&Super::PredictByQueue, this, io_stream));
+    Thread merge_thread(std::bind(&self_t::MergeByQueueTerm, this));
+    FILE *out_f = fopen((filenm_ + ".out").c_str(), "w+");
+    IASSERT(out_f);
+    IASSERT(fwrite(&count_, sizeof(u64), 1, out_f) == 1);
+
+    Thread out_thread(std::bind(&self_t::OutputByQueueTerm, this, out_f));
+//    Vector<Thread *> out_thread(OUT_BUF_NUM_);
+//    for (u32 i = 0; i < OUT_BUF_NUM_; ++i) {
+//        out_thread[i] = new Thread(std::bind(&self_t::Output, this, out_f, i));
+//    }
+
+    predict_thread.join();
+    merge_thread.join();
+    out_thread.join();
+//    for (u32 i = 0; i < OUT_BUF_NUM_; ++i) {
+//        out_thread[i]->join();
+//        delete out_thread[i];
+//    }
+    fclose(f);
+    fclose(out_f);
+
+    if (std::filesystem::exists(filenm_)) {
         std::filesystem::remove(filenm_);
-    if (std::filesystem::exists(filenm_ + ".out"))
+//        std::filesystem::rename(filenm_, filenm_ + ".backup");
+    }
+    if (std::filesystem::exists(filenm_ + ".out")) {
         std::filesystem::rename(filenm_ + ".out", filenm_);
+    }
 #ifdef PRINT_TIME_COST
-    LOG_INFO(fmt::format("SortMerger<KeyType, LenType>::Run() time cost: {}", profiler.ElapsedToString()));
+    // LOG_INFO(fmt::format("SortMerger<KeyType, LenType>::Run() time cost: {}", profiler.ElapsedToString()));
+    fmt::print("SortMergerTerm<KeyType, LenType>::RunTerm() time cost: {}\n", profiler.ElapsedToString());
     profiler.End();
 #endif
 }
 
+
 template class SortMerger<u32, u8>;
 template class SortMerger<TermTuple, u32>;
+template class SortMergerTerm<TermTuple, u32>;
 } // namespace infinity
\ No newline at end of file
diff --git a/src/storage/invertedindex/common/external_sort_merger.cppm b/src/storage/invertedindex/common/external_sort_merger.cppm
index fedeed8d3f..a04d933f0e 100644
--- a/src/storage/invertedindex/common/external_sort_merger.cppm
+++ b/src/storage/invertedindex/common/external_sort_merger.cppm
@@ -17,6 +17,8 @@ module;
 #include <filesystem>
 #include <queue>
 #include <cstring>
+#include <cstdio>
+#include <unistd.h>
 
 export module external_sort_merger;
 
@@ -74,8 +76,9 @@ export struct TermTuple {
                 }
                 return 0;
             }
-        } else
+        } else {
             return ret < 0 ? -1 : 1;
+        }
     }
 
     bool operator==(const TermTuple &other) const { return Compare(other) == 0; }
@@ -90,6 +93,30 @@ export struct TermTuple {
     }
 };
 
+export struct TermTupleList {
+    TermTupleList(std::string_view term, u32 list_size = 2 * 1024 * 1024) : term_(term) {
+        doc_pos_list_.reserve(list_size);
+        max_tuple_num_ = list_size / (sizeof(u32) * 2);
+    }
+
+    bool IsFull() {
+        return doc_pos_list_.size() >= max_tuple_num_;
+    }
+
+    void Add(u32 doc_id, u32 term_pos) {
+        doc_pos_list_.emplace_back(doc_id, term_pos);
+    }
+
+    SizeT Size() const {
+        return doc_pos_list_.size();
+    }
+
+    String term_;
+    // <doc_id, term_pos>
+    Vector<Pair<u32, u32>> doc_pos_list_;
+    u32 max_tuple_num_{0};
+};
+
 template <typename KeyType, typename LenType, typename = void>
 struct KeyAddress {
     char *data{nullptr};
@@ -224,6 +251,36 @@ public:
         }
     }
 
+    void Put(const TermTupleList& tuple_list) {
+        /*
+         * data_len, term_len, doc_list_size, term, [doc_id, term_pos]...
+         */
+        u32 term_len = tuple_list.term_.size();
+        u32 doc_list_size = tuple_list.Size();
+        auto SIZE_U32 = sizeof(u32);
+        u32 data_len = SIZE_U32 + SIZE_U32 + term_len + 2 * SIZE_U32 * doc_list_size;
+        if (data_len > buffer_size_) {
+            throw std::runtime_error("Data length exceeds buffer capacity");
+        }
+        SizeT idx = 0;
+        std::memcpy(buffer_array_[head_].get() + idx, &data_len, SIZE_U32);
+        idx += SIZE_U32;
+        std::memcpy(buffer_array_[head_].get() + idx, &term_len, SIZE_U32);
+        idx += SIZE_U32;
+        std::memcpy(buffer_array_[head_].get() + idx, &doc_list_size, SIZE_U32);
+        idx += SIZE_U32;
+        std::memcpy(buffer_array_[head_].get() + idx, tuple_list.term_.data(), term_len);
+        idx += term_len;
+        std::memcpy(buffer_array_[head_].get() + idx, tuple_list.doc_pos_list_.data(), SIZE_U32 * 2 * doc_list_size);
+        idx += SIZE_U32 * 2 * doc_list_size;
+
+        head_ = (head_ + 1) % total_buffers_;
+
+        if (head_ == tail_) {
+            full_ = true;
+        }
+    }
+
     void Put(const char* data, SizeT length) {
         if (length > buffer_size_) {
             throw std::runtime_error("Data length exceeds buffer capacity");
@@ -272,7 +329,7 @@ public:
 //        return res_ptr;
     }
 
-    Tuple<const char*, u32, u32> Get() {
+    Tuple<const char*, u32, u32> GetTuple() {
         // std::cout << "CycleBuffer::Get" << std::endl;
         if (IsEmpty()) {
             throw std::runtime_error("Buffer is empty");
@@ -287,6 +344,20 @@ public:
         return std::make_tuple(result_data, result_real_size, result_real_num);
     }
 
+    const char* Get() {
+        // std::cout << "CycleBuffer::Get" << std::endl;
+        if (IsEmpty()) {
+            throw std::runtime_error("Buffer is empty");
+        }
+
+        const char* result_data = buffer_array_[tail_].get();
+        tail_ = (tail_ + 1) % total_buffers_;
+        full_ = false;
+
+        return result_data;
+    }
+
+
     void Reset() {
         head_ = tail_ = 0;
         full_ = false;
@@ -323,6 +394,7 @@ private:
 
 export template <typename KeyType, typename LenType>
 class SortMerger {
+public:
     typedef SortMerger<KeyType, LenType> self_t;
     typedef KeyAddress<KeyType, LenType> KeyAddr;
     static constexpr SizeT MAX_TUPLE_LENGTH = 1024;
@@ -380,6 +452,16 @@ class SortMerger {
     UniquePtr<CycleBuffer> cycle_buffer_;
     std::mutex cycle_buf_mtx_;
     std::condition_variable cycle_buf_con_;
+
+    std::mutex out_queue_mtx_;
+    std::condition_variable out_queue_con_;
+    Queue<UniquePtr<char_t[]>> out_queue_;
+    Queue<u32> out_size_queue_;
+    SizeT OUT_BATCH_SIZE_;
+    Queue<UniquePtr<TermTupleList>> term_tuple_list_queue_;
+
+    UniquePtr<CycleBuffer> cycle_term_tuple_list_queue_;
+
     bool read_finish_{false};
     u32 CYCLE_BUF_SIZE_;
     u32 CYCLE_BUF_THRESHOLD_;
@@ -404,15 +486,23 @@ class SortMerger {
 
     void Output(FILE *f, u32 idx);
 
+    void OutputByQueue(FILE* f);
+
+    // void OutputByQueueTerm(FILE *f);
+
+    // void MergeByQueueTerm();
+
     void Init(MmapReader &io_stream);
 
     void ReadKeyAt(MmapReader &io_stream, u64 pos);
 
     void ReadKeyAtNonCopy(MmapReader &io_stream, u64 pos);
+
+    // void MergeByQueue<TermTuple, LenType>();
 public:
     SortMerger(const char *filenm, u32 group_size = 4, u32 bs = 100000000, u32 output_num = 2);
 
-    ~SortMerger();
+    virtual ~SortMerger();
 
     void SetParams(u32 max_record_len) {
         if (max_record_len > PRE_BUF_SIZE_) {
@@ -433,7 +523,86 @@ public:
             OUT_BUF_SIZE_ = min_buff_size_required;
     }
 
-    void Run();
+    virtual void Run();
+};
+
+//export template <typename KeyType, typename LenType>
+//class SortMergerTerm;
+
+export template <typename KeyType, typename LenType>
+requires std::same_as<KeyType, TermTuple>
+class SortMergerTerm : public SortMerger<KeyType, LenType> {
+protected:
+    typedef SortMergerTerm<KeyType, LenType> self_t;
+    using Super = SortMerger<KeyType, LenType>;
+    using Super::filenm_;
+    using Super::MAX_GROUP_SIZE_;
+    using Super::BS_SIZE_;
+    using Super::PRE_BUF_SIZE_;
+    using Super::RUN_BUF_SIZE_;
+    using Super::OUT_BUF_SIZE_;
+    using Super::OUT_BUF_NUM_;
+    using Super::pre_heap_;
+    using Super::merge_heap_;
+    using Super::merge_loser_tree_;
+    using Super::micro_run_idx_;
+    using Super::micro_run_pos_;
+    using Super::num_micro_run_;
+    using Super::size_micro_run_;
+    using Super::num_run_;
+    using Super::size_run_;
+    using Super::size_loaded_run_;
+    using Super::run_addr_;
+    using Super::run_curr_addr_;
+    using Super::micro_buf_;
+    using Super::sub_out_buf_;
+    using Super::pre_buf_;
+    using Super::run_buf_;
+    using Super::out_buf_;
+    using Super::pre_buf_mtx_;
+    using Super::pre_buf_con_;
+    using Super::in_out_mtx_;
+    using Super::in_out_con_;
+    using Super::out_out_mtx_;
+    using Super::out_out_con_;
+    using Super::pre_buf_size_;
+    using Super::pre_buf_num_;
+    using Super::out_buf_in_idx_;
+    using Super::out_buf_out_idx_;
+    using Super::out_buf_size_;
+    using Super::out_buf_full_;
+    using Super::curr_addr_;
+    using Super::end_addr_;
+    using Super::key_buf_;
+    using Super::key_buf_ptr_;
+    using Super::mmap_io_streams_;
+    using Super::cycle_buffer_;
+    using Super::cycle_buf_mtx_;
+    using Super::cycle_buf_con_;
+    using Super::out_queue_mtx_;
+    using Super::out_queue_con_;
+    using Super::out_queue_;
+    using Super::out_size_queue_;
+    using Super::OUT_BATCH_SIZE_;
+    using Super::term_tuple_list_queue_;
+    using Super::read_finish_;
+    using Super::CYCLE_BUF_SIZE_;
+    using Super::CYCLE_BUF_THRESHOLD_;
+    using Super::count_;
+    using Super::group_size_;
+    using Super::FILE_LEN_;
+    using typename Super::KeyAddr;
+    using Super::MAX_TUPLE_LENGTH;
+    u64 term_list_count_{0};
+
+    void OutputByQueueTerm(FILE *f);
+    void MergeByQueueTerm();
+
+public:
+    SortMergerTerm(const char *filenm, u32 group_size = 4, u32 bs = 100000000, u32 output_num = 2)
+            : Super(filenm, group_size, bs, output_num) {}
+
+    void RunTerm();
 };
 
 } // namespace infinity
diff --git a/src/storage/invertedindex/memory_indexer.cpp b/src/storage/invertedindex/memory_indexer.cpp
index 5fd42c2edb..f8a7da43f0 100644
--- a/src/storage/invertedindex/memory_indexer.cpp
+++ b/src/storage/invertedindex/memory_indexer.cpp
@@ -269,9 +269,11 @@ void MemoryIndexer::Dump(bool offline, bool spill) {
         BaseProfiler profiler;
         profiler.Begin();
 #endif
-        OfflineDump();
+        OfflineDumpTermTupleList();
+//        OfflineDump();
 #ifdef PRINT_TIME_COST
-        LOG_INFO(fmt::format("MemoryIndexer::OfflineDump() time cost: {}", profiler.ElapsedToString()));
+//        LOG_INFO(fmt::format("MemoryIndexer::OfflineDump() time cost: {}", profiler.ElapsedToString()));
+        fmt::print("MemoryIndexer::OfflineDumpTermTupleList() time cost: {}\n", profiler.ElapsedToString());
         profiler.End();
 #endif
         return;
@@ -389,13 +391,15 @@ void MemoryIndexer::OfflineDump() {
     // 1. External sort merge
     // 2. Generate posting
     // 3. Dump disk segment data
-    // LOG_INFO(fmt::format("MemoryIndexer::OfflineDump begin, num_runs_ {}", num_runs_));
+    // LOG_INFO(fmt::format("MemoryIndexer::OfflineDump begin, num_runs_ {}\n", num_runs_));
     if (tuple_count_ == 0) {
         return;
     }
     FinalSpillFile();
     constexpr u32 buffer_size_of_each_run = 2 * 1024 * 1024;
     SortMerger<TermTuple, u32> *merger = new SortMerger<TermTuple, u32>(spill_full_path_.c_str(), num_runs_, buffer_size_of_each_run * num_runs_, 2);
+//    SortMergerTerm<TermTuple, u32> *merger = new SortMergerTerm<TermTuple, u32>(spill_full_path_.c_str(), num_runs_, buffer_size_of_each_run * num_runs_, 2);
+//    merger->RunTerm();
     merger->Run();
     delete merger;
 #ifdef USE_MMAP
@@ -449,6 +453,7 @@ void MemoryIndexer::OfflineDump() {
         }
 #ifdef USE_MMAP
         reader.ReadBuf(buf, record_length);
+        // char* tuple_data = reader.ReadBufNonCopy(record_length);
 #else
         fread(buf, record_length, 1, f);
 #endif
@@ -511,6 +516,144 @@ void MemoryIndexer::OfflineDump() {
     num_runs_ = 0;
 }
 
+void MemoryIndexer::OfflineDumpTermTupleList() {
+    // Steps of offline dump:
+    // 1. External sort merge
+    // 2. Generate posting
+    // 3. Dump disk segment data
+    // LOG_INFO(fmt::format("MemoryIndexer::OfflineDump begin, num_runs_ {}\n", num_runs_));
+    if (tuple_count_ == 0) {
+        return;
+    }
+    FinalSpillFile();
+    constexpr u32 buffer_size_of_each_run = 2 * 1024 * 1024;
+    // SortMerger<TermTuple, u32> *merger = new SortMerger<TermTuple, u32>(spill_full_path_.c_str(), num_runs_, buffer_size_of_each_run * num_runs_, 2);
+    SortMergerTerm<TermTuple, u32> *merger = new SortMergerTerm<TermTuple, u32>(spill_full_path_.c_str(), num_runs_, buffer_size_of_each_run * num_runs_, 2);
+    merger->RunTerm();
+    // merger->Run();
+    delete merger;
+
+    MmapReader reader(spill_full_path_);
+    u64 term_list_count;
+    reader.ReadU64(term_list_count);
+
+    Path path = Path(index_dir_) / base_name_;
+    String index_prefix = path.string();
+    LocalFileSystem fs;
+    String posting_file = index_prefix + POSTING_SUFFIX;
+    SharedPtr<FileWriter> posting_file_writer = MakeShared<FileWriter>(fs, posting_file, 128000);
+    String dict_file = index_prefix + DICT_SUFFIX;
+    SharedPtr<FileWriter> dict_file_writer = MakeShared<FileWriter>(fs, dict_file, 128000);
+    TermMetaDumper term_meta_dumpler((PostingFormatOption(flag_)));
+    String fst_file = index_prefix + DICT_SUFFIX + ".fst";
+    std::ofstream ofs(fst_file.c_str(), std::ios::binary | std::ios::trunc);
+    OstreamWriter wtr(ofs);
+    FstBuilder fst_builder(wtr);
+
+    u32 record_length = 0;
+    u32 term_length = 0;
+    u32 doc_pos_list_size = 0;
+    const u32 MAX_TUPLE_LIST_LENGTH = MAX_TUPLE_LENGTH + 2 * 1024 * 1024;
+    auto buf = MakeUnique<char[]>(MAX_TUPLE_LIST_LENGTH);
+    // char buf[MAX_TUPLE_LENGTH];
+    String last_term_str;
+    std::string_view last_term;
+    u32 last_doc_id = INVALID_DOCID;
+    UniquePtr<PostingWriter> posting;
+
+    assert(record_length < MAX_TUPLE_LIST_LENGTH);
+
+    for (u64 i = 0; i < term_list_count; ++i) {
+
+        reader.ReadU32(record_length);
+        reader.ReadU32(term_length);
+
+        if (term_length >= MAX_TUPLE_LENGTH) {
+            reader.Seek(record_length - sizeof(u32));
+            continue;
+        }
+
+
+        reader.ReadBuf(buf.get(), record_length - sizeof(u32));
+        u32 buf_idx = 0;
+
+        doc_pos_list_size = *(u32 *)(buf.get() + buf_idx);
+        buf_idx += sizeof(u32);
+
+        std::string_view term = std::string_view(buf.get() + buf_idx, term_length);
+        buf_idx += term_length;
+
+        // TermTuple tuple(buf, record_length);
+        if (term != last_term) {
+            assert(last_term < term);
+            if (last_doc_id != INVALID_DOCID) {
+                posting->EndDocument(last_doc_id, 0);
+                // printf(" EndDocument1-%u\n", last_doc_id);
+            }
+            if (posting.get()) {
+                TermMeta term_meta(posting->GetDF(), posting->GetTotalTF());
+                posting->Dump(posting_file_writer, term_meta);
+                SizeT term_meta_offset = dict_file_writer->TotalWrittenBytes();
+                term_meta_dumpler.Dump(dict_file_writer, term_meta);
+                fst_builder.Insert((u8 *)last_term.data(), last_term.length(), term_meta_offset);
+            }
+            posting = MakeUnique<PostingWriter>(nullptr, nullptr, PostingFormatOption(flag_), column_lengths_);
+            // printf("\nswitched-term-%d-<%s>\n", i.term_num_, term.data());
+            last_term_str = String(term);
+            last_term = std::string_view(last_term_str);
+            last_doc_id = INVALID_DOCID;
+        }
+        for (SizeT i = 0; i < doc_pos_list_size; ++i) {
+            u32& doc_id = *(u32 *)(buf.get() + buf_idx);
+            buf_idx += sizeof(u32);
+            u32& term_pos = *(u32 *)(buf.get() + buf_idx);
+            buf_idx += sizeof(u32);
+
+            if (last_doc_id != INVALID_DOCID && last_doc_id != doc_id) {
+                // assert(last_doc_id != INVALID_DOCID);
+                assert(last_doc_id < doc_id);
+                assert(posting.get() != nullptr);
+                posting->EndDocument(last_doc_id, 0);
+                // printf(" EndDocument2-%u\n", last_doc_id);
+            }
+            last_doc_id = doc_id;
+            posting->AddPosition(term_pos);
+        }
+
+//        last_doc_id = doc_id;
+//        posting->AddPosition(tuple.term_pos_);
+        // printf(" pos-%u", tuple.term_pos_);
+    }
+#ifdef USE_MMAP
+    // MunmapFile(data_ptr, data_len);
+    // reader.MunmapFile();
+#endif
+    if (last_doc_id != INVALID_DOCID) {
+        posting->EndDocument(last_doc_id, 0);
+        // printf(" EndDocument3-%u\n", last_doc_id);
+        TermMeta term_meta(posting->GetDF(), posting->GetTotalTF());
+        posting->Dump(posting_file_writer, term_meta);
+        SizeT term_meta_offset = dict_file_writer->TotalWrittenBytes();
+        term_meta_dumpler.Dump(dict_file_writer, term_meta);
+        fst_builder.Insert((u8 *)last_term.data(), last_term.length(), term_meta_offset);
+    }
+    posting_file_writer->Sync();
+    dict_file_writer->Sync();
+    fst_builder.Finish();
+    fs.AppendFile(dict_file, fst_file);
+    fs.DeleteFile(fst_file);
+
+    String column_length_file = index_prefix + LENGTH_SUFFIX;
+    UniquePtr<FileHandler> file_handler = fs.OpenFile(column_length_file, FileFlags::WRITE_FLAG | FileFlags::TRUNCATE_CREATE, FileLockType::kNoLock);
+    Vector<u32> &unsafe_column_lengths = column_lengths_.UnsafeVec();
+    fs.Write(*file_handler, &unsafe_column_lengths[0], sizeof(unsafe_column_lengths[0]) * unsafe_column_lengths.size());
+    fs.Close(*file_handler);
+
+    std::filesystem::remove(spill_full_path_);
+    // LOG_INFO(fmt::format("MemoryIndexer::OfflineDump done, num_runs_ {}", num_runs_));
+    num_runs_ = 0;
+}
+
 void MemoryIndexer::FinalSpillFile() {
     fseek(spill_file_handle_, 0, SEEK_SET);
     fwrite(&tuple_count_, sizeof(u64), 1, spill_file_handle_);
diff --git a/src/storage/invertedindex/memory_indexer.cppm b/src/storage/invertedindex/memory_indexer.cppm
index f280d0ea4f..730826996b 100644
--- a/src/storage/invertedindex/memory_indexer.cppm
+++ b/src/storage/invertedindex/memory_indexer.cppm
@@ -117,6 +117,8 @@ private:
 
     void OfflineDump();
 
+    void OfflineDumpTermTupleList();
+
     void FinalSpillFile();
 
     void PrepareSpillFile();
diff --git a/src/unit_test/storage/invertedindex/memory_indexer.cpp b/src/unit_test/storage/invertedindex/memory_indexer.cpp
index d19ee3327a..3c7c2342b8 100644
--- a/src/unit_test/storage/invertedindex/memory_indexer.cpp
+++ b/src/unit_test/storage/invertedindex/memory_indexer.cpp
@@ -70,6 +70,15 @@ class MemoryIndexerTest : public BaseTest {
             R"#(An automaton can be said to recognize a string if we view the content of its tape as input. In other words, the automaton computes a function that maps strings into the set {0,1}. Alternatively, we can say that an automaton generates strings, which means viewing its tape as an output tape. On this view, the automaton generates a formal language, which is a set of strings. The two views of automata are equivalent: the function that the automaton computes is precisely the indicator function of the set of strings it generates. The class of languages generated by finite automata is known as the class of regular languages.)#",
             R"#(The two tapes of a transducer are typically viewed as an input tape and an output tape. On this view, a transducer is said to transduce (i.e., translate) the contents of its input tape to its output tape, by accepting a string on its input tape and generating another string on its output tape. It may do so nondeterministically and it may produce more than one output for each input string. A transducer may also produce no output for a given input string, in which case it is said to reject the input. In general, a transducer computes a relation between two formal languages.)#",
         };
+
+//        const char *paragraphs[] = {
+//            R"#(a, b, c)#",
+//            R"#(a, b)#",
+//            R"#(c, d)#",
+//            R"#(e, d)#",
+//            R"#(a, c)#",
+//        };
+
         const SizeT num_paragraph = sizeof(paragraphs) / sizeof(char *);
         column_ = ColumnVector::Make(MakeShared<DataType>(LogicalType::kVarchar));
         column_->Initialize();

From 41a84908d82964fa19b4f7d0dcf35194c094031c Mon Sep 17 00:00:00 2001
From: Ma-cat <1054638297@qq.com>
Date: Thu, 23 May 2024 11:22:20 +0800
Subject: [PATCH 09/14] optimize predict thread lock granularity

---
 .../common/external_sort_merger.cpp           | 62 ++++++++-----------
 .../common/external_sort_merger.cppm          |  3 +-
 2 files changed, 29 insertions(+), 36 deletions(-)

diff --git a/src/storage/invertedindex/common/external_sort_merger.cpp b/src/storage/invertedindex/common/external_sort_merger.cpp
index dbde03e607..cd2b5d4ff0 100644
--- a/src/storage/invertedindex/common/external_sort_merger.cpp
+++ b/src/storage/invertedindex/common/external_sort_merger.cpp
@@ -449,22 +449,13 @@ void SortMerger<KeyType, LenType>::Predict(DirectIO &io_stream) {
 
 template <typename KeyType, typename LenType>
 void SortMerger<KeyType, LenType>::PredictByQueue(DirectIO &io_stream) {
+    UniquePtr<char[]> data_buf = MakeUnique<char[]>(PRE_BUF_SIZE_);
     while (pre_heap_.size() > 0) {
         KeyAddr top = pre_heap_.top();
         pre_heap_.pop();
         u64 addr = top.ADDR();
         u32 idx = top.IDX();
         free(top.data);
-
-        std::unique_lock lock(cycle_buf_mtx_);
-        cycle_buf_con_.wait(lock, [this]() { return !this->cycle_buffer_->IsFull(); });
-//        while (pre_buf_size_ != 0)
-//            pre_buf_con_.wait(lock);
-
-//        while (cycle_buffer_->Size() >= 2 * MAX_GROUP_SIZE_) {
-//            pre_buf_con_.wait(lock);
-//        }
-
         assert(idx < MAX_GROUP_SIZE_);
         // get loading size of a microrun
         u32 s;
@@ -477,14 +468,8 @@ void SortMerger<KeyType, LenType>::PredictByQueue(DirectIO &io_stream) {
 
         // load microrun
         io_stream.Seek(addr);
-        // s = io_stream.Read(pre_buf_, s);
-        auto data_ptr = cycle_buffer_->PutByRead(io_stream, s);
-
-//        fmt::print("[Predict] cycle buffer idx = {}, read data: ", idx);
-//        for (SizeT i = 0; i < s; ++i) {
-//            fmt::print("{}", data_ptr[i]);
-//        }
-//        fmt::print("\n");
+        io_stream.Read(data_buf.get(), s);
+        auto data_ptr = data_buf.get();
 
         size_loaded_run_[idx] += s;
         run_curr_addr_[idx] = io_stream.Tell();
@@ -518,7 +503,12 @@ void SortMerger<KeyType, LenType>::PredictByQueue(DirectIO &io_stream) {
             pos += sizeof(LenType) + len;
         }
         pre_buf_size_ = pos;
-        cycle_buffer_->PutReal(pre_buf_size_, pre_buf_num_);
+
+        std::unique_lock lock(cycle_buf_mtx_);
+        cycle_buf_con_.wait(lock, [this]() { return !this->cycle_buffer_->IsFull(); });
+        // auto data_ptr = cycle_buffer_->PutByRead(io_stream, s);
+
+        cycle_buffer_->PutReal(data_buf, pre_buf_size_, pre_buf_num_);
         cycle_buf_con_.notify_one();
     }
     // fmt::print("1-read finish\n");
@@ -602,9 +592,9 @@ void SortMerger<KeyType, LenType>::MergeByQueue() {
             // fmt::print("cycle buffer size = {}, read_finish_ = {}\n", cycle_buffer_->Size(), read_finish_);
             auto res = cycle_buffer_->GetTuple();
             // micro_buf_[idx] = res.get<0>();
-            pre_buf_size_ = std::get<1>(res);
-            pre_buf_num_ = std::get<2>(res);
-            memcpy(micro_buf_[idx], std::get<0>(res), pre_buf_size_);
+            auto pre_buf_size = std::get<1>(res);
+            auto pre_buf_num = std::get<2>(res);
+            memcpy(micro_buf_[idx], std::get<0>(res), pre_buf_size);
 
 //            fmt::print("[Merge] loser tree add idx = {}, pre_buf_size = {}, pre_buf_num = {}\n", idx, pre_buf_size_, pre_buf_num_);
 //            for (u32 i = 0; i < pre_buf_size_; ++i) {
@@ -612,9 +602,9 @@ void SortMerger<KeyType, LenType>::MergeByQueue() {
 //            }
 //            fmt::print("\n");
 
-            size_micro_run_[idx] = pre_buf_size_;
-            num_micro_run_[idx] = pre_buf_num_;
-            micro_run_pos_[idx] = micro_run_idx_[idx] = pre_buf_num_ = pre_buf_size_ = 0;
+            size_micro_run_[idx] = pre_buf_size;
+            num_micro_run_[idx] = pre_buf_num;
+            micro_run_pos_[idx] = micro_run_idx_[idx] = 0;
 
 //            cycle_buf_con_.notify_one();
             if (cycle_buffer_->Size() < CYCLE_BUF_THRESHOLD_) {
@@ -888,8 +878,8 @@ void SortMerger<KeyType, LenType>::Run() {
 #endif
 
     if (std::filesystem::exists(filenm_)) {
-//        std::filesystem::remove(filenm_);
-        std::filesystem::rename(filenm_, filenm_ + ".backup");
+        std::filesystem::remove(filenm_);
+//        std::filesystem::rename(filenm_, filenm_ + ".backup");
     }
     if (std::filesystem::exists(filenm_ + ".out")) {
         std::filesystem::rename(filenm_ + ".out", filenm_);
@@ -951,14 +941,14 @@ void SortMergerTerm<KeyType, LenType>::MergeByQueueTerm() {
 
             assert(idx < MAX_GROUP_SIZE_);
             auto res = cycle_buffer_->GetTuple();
-            pre_buf_size_ = std::get<1>(res);
-            pre_buf_num_ = std::get<2>(res);
-            memcpy(micro_buf_[idx], std::get<0>(res), pre_buf_size_);
+            auto pre_buf_size = std::get<1>(res);
+            auto pre_buf_num = std::get<2>(res);
+            memcpy(micro_buf_[idx], std::get<0>(res), pre_buf_size);
 
 
-            size_micro_run_[idx] = pre_buf_size_;
-            num_micro_run_[idx] = pre_buf_num_;
-            micro_run_pos_[idx] = micro_run_idx_[idx] = pre_buf_num_ = pre_buf_size_ = 0;
+            size_micro_run_[idx] = pre_buf_size;
+            num_micro_run_[idx] = pre_buf_num;
+            micro_run_pos_[idx] = micro_run_idx_[idx] = 0;
 
             if (cycle_buffer_->Size() < CYCLE_BUF_THRESHOLD_) {
                 // fmt::print("cycle buffer size = {}\n", cycle_buffer_->Size());
@@ -1039,6 +1029,8 @@ void SortMergerTerm<KeyType, LenType>::OutputByQueueTerm(FILE *f) {
 template <typename KeyType, typename LenType>
 requires std::same_as<KeyType, TermTuple>
 void SortMergerTerm<KeyType, LenType>::RunTerm() {
+    LOG_INFO(fmt::format("begin run"));
+//    fmt::print("begin run\n");
 #ifdef PRINT_TIME_COST
     BaseProfiler profiler;
     profiler.Begin();
@@ -1083,8 +1075,8 @@ void SortMergerTerm<KeyType, LenType>::RunTerm() {
         std::filesystem::rename(filenm_ + ".out", filenm_);
     }
 #ifdef PRINT_TIME_COST
-    // LOG_INFO(fmt::format("SortMerger<KeyType, LenType>::Run() time cost: {}", profiler.ElapsedToString()));
-    fmt::print("SortMergerTerm<KeyType, LenType>::RunTerm() time cost: {}\n", profiler.ElapsedToString());
+    LOG_INFO(fmt::format("SortMerger<KeyType, LenType>::Run() time cost: {}", profiler.ElapsedToString()));
+//    fmt::print("SortMergerTerm<KeyType, LenType>::RunTerm() time cost: {}\n", profiler.ElapsedToString());
     profiler.End();
 #endif
 }
diff --git a/src/storage/invertedindex/common/external_sort_merger.cppm b/src/storage/invertedindex/common/external_sort_merger.cppm
index a04d933f0e..cbf9be6ce0 100644
--- a/src/storage/invertedindex/common/external_sort_merger.cppm
+++ b/src/storage/invertedindex/common/external_sort_merger.cppm
@@ -298,9 +298,10 @@ public:
         }
     }
 
-    void PutReal(const u32& real_size, const u32& real_num) {
+    void PutReal(UniquePtr<char[]>& data_buf, const u32& real_size, const u32& real_num) {
         buffer_real_size_[head_] = real_size;
         buffer_real_num_[head_] = real_num;
+        std::swap(data_buf, buffer_array_[head_]);
 
         head_ = (head_ + 1) % total_buffers_;
 

From 4aabc7576de18c00958bdf596f9efb5ace0b4387 Mon Sep 17 00:00:00 2001
From: Ma-cat <1054638297@qq.com>
Date: Thu, 23 May 2024 13:57:05 +0800
Subject: [PATCH 10/14] delete some useless code

---
 src/storage/invertedindex/column_inverter.cpp |  60 +-
 .../common/external_sort_merger.cpp           | 533 ++----------------
 .../common/external_sort_merger.cppm          | 101 +---
 .../invertedindex/common/loser_tree.cppm      |  31 +-
 src/storage/invertedindex/common/mmap.cppm    |   6 +-
 src/storage/invertedindex/memory_indexer.cpp  | 185 +-----
 src/storage/invertedindex/memory_indexer.cppm |   2 -
 src/unit_test/storage/common/loser_tree.cpp   |  16 -
 .../invertedindex/common/external_sort.cpp    |   8 -
 .../storage/invertedindex/memory_indexer.cpp  |   8 -
 10 files changed, 67 insertions(+), 883 deletions(-)

diff --git a/src/storage/invertedindex/column_inverter.cpp b/src/storage/invertedindex/column_inverter.cpp
index 438dbf750b..6fcb26d875 100644
--- a/src/storage/invertedindex/column_inverter.cpp
+++ b/src/storage/invertedindex/column_inverter.cpp
@@ -57,8 +57,6 @@ ColumnInverter::~ColumnInverter() = default;
 bool ColumnInverter::CompareTermRef::operator()(const u32 lhs, const u32 rhs) const { return std::strcmp(GetTerm(lhs), GetTerm(rhs)) < 0; }
 
 SizeT ColumnInverter::InvertColumn(SharedPtr<ColumnVector> column_vector, u32 row_offset, u32 row_count, u32 begin_doc_id) {
-    // BaseProfiler profiler;
-    // profiler.Begin();
     begin_doc_id_ = begin_doc_id;
     doc_count_ = row_count;
     Vector<u32> column_lengths(row_count);
@@ -73,8 +71,6 @@ SizeT ColumnInverter::InvertColumn(SharedPtr<ColumnVector> column_vector, u32 ro
         term_count_sum += term_count;
     }
     column_lengths_.SetBatch(begin_doc_id, column_lengths);
-    // LOG_INFO(fmt::format("ColumnInverter::InvertColumn time cost: {}", profiler.ElapsedToString()));
-    // profiler.End();
     return term_count_sum;
 }
 
@@ -240,12 +236,8 @@ void ColumnInverter::GeneratePosting() {
 }
 
 void ColumnInverter::SortForOfflineDump() {
-    // BaseProfiler profiler;
-    // profiler.Begin();
     MergePrepare();
     Sort();
-    // LOG_INFO(fmt::format("ColumnInverter::SortForOfflineDump time cost: {}", profiler.ElapsedToString()));
-    // profiler.End();
 }
 
 /// Layout of the input of external sort file
@@ -312,7 +304,6 @@ void ColumnInverter::SpillSortResults(FILE *spill_file, u64 &tuple_count, Unique
     // size of this Run in bytes
     u32 data_size = 0;
     u64 data_size_pos = spill_file_tell;
-    // fwrite(&data_size, sizeof(u32), 1, spill_file);
     memcpy(spill_buffer.get() + spill_buf_idx, &data_size, sizeof(u32));
     spill_buf_idx += sizeof(u32);
     spill_file_tell += sizeof(u32);
@@ -320,7 +311,6 @@ void ColumnInverter::SpillSortResults(FILE *spill_file, u64 &tuple_count, Unique
     // number of tuples
     u32 num_of_tuples = positions_.size();
     tuple_count += num_of_tuples;
-    // fwrite(&num_of_tuples, sizeof(u32), 1, spill_file);
     memcpy(spill_buffer.get() + spill_buf_idx, &num_of_tuples, sizeof(u32));
     spill_buf_idx += sizeof(u32);
     spill_file_tell += sizeof(u32);
@@ -328,8 +318,6 @@ void ColumnInverter::SpillSortResults(FILE *spill_file, u64 &tuple_count, Unique
     // start offset for next spill
     u64 next_start_offset = 0;
     u64 next_start_offset_pos = spill_file_tell;
-    // u64 next_start_offset_pos = ftell(spill_file);
-    // fwrite(&next_start_offset, sizeof(u64), 1, spill_file);
     memcpy(spill_buffer.get() + spill_buf_idx, &next_start_offset, sizeof(u64));
     spill_buf_idx += sizeof(u64);
     spill_file_tell += sizeof(u64);
@@ -338,7 +326,6 @@ void ColumnInverter::SpillSortResults(FILE *spill_file, u64 &tuple_count, Unique
     fwrite(spill_buffer.get(), spill_buf_idx, 1, spill_file);
     spill_buf_idx = 0;
 
-    // u64 data_start_offset = ftell(spill_file);
     u64 data_start_offset = spill_file_tell;
     assert((SizeT)ftell(spill_file) == spill_file_tell);
     // sorted data
@@ -352,11 +339,6 @@ void ColumnInverter::SpillSortResults(FILE *spill_file, u64 &tuple_count, Unique
             term = GetTermFromNum(last_term_num);
         }
         record_length = term.size() + sizeof(docid_t) + sizeof(u32) + 1;
-        //        fwrite(&record_length, sizeof(u32), 1, spill_file);
-        //        fwrite(term.data(), term.size(), 1, spill_file);
-        //        fwrite(&str_null, sizeof(char), 1, spill_file);
-        //        fwrite(&i.doc_id_, sizeof(docid_t), 1, spill_file);
-        //        fwrite(&i.term_pos_, sizeof(u32), 1, spill_file);
         memcpy(spill_buffer.get() + spill_buf_idx, &record_length, sizeof(u32));
         spill_buf_idx += sizeof(u32);
 
@@ -392,43 +374,26 @@ void ColumnInverter::SpillSortResults(FILE *spill_file, u64 &tuple_count, Unique
     if (positions_.empty()) {
         return;
     }
-    // SizeT spill_buf_idx = 0;
     SizeT spill_file_tell = ftell(spill_file);
     // size of this Run in bytes
     u32 data_size = 0;
     u64 data_size_pos = spill_file_tell;
-    // fwrite(&data_size, sizeof(u32), 1, spill_file);
-//    memcpy(spill_buffer.get() + spill_buf_idx, &data_size, sizeof(u32));
-//    spill_buf_idx += sizeof(u32);
     buf_writer->Write((const char*)&data_size, sizeof(u32));
     spill_file_tell += sizeof(u32);
 
     // number of tuples
     u32 num_of_tuples = positions_.size();
     tuple_count += num_of_tuples;
-    // fwrite(&num_of_tuples, sizeof(u32), 1, spill_file);
-//    memcpy(spill_buffer.get() + spill_buf_idx, &num_of_tuples, sizeof(u32));
-//    spill_buf_idx += sizeof(u32);
     buf_writer->Write((const char*)&num_of_tuples, sizeof(u32));
     spill_file_tell += sizeof(u32);
 
     // start offset for next spill
     u64 next_start_offset = 0;
     u64 next_start_offset_pos = spill_file_tell;
-    // u64 next_start_offset_pos = ftell(spill_file);
-    // fwrite(&next_start_offset, sizeof(u64), 1, spill_file);
-//    memcpy(spill_buffer.get() + spill_buf_idx, &next_start_offset, sizeof(u64));
-//    spill_buf_idx += sizeof(u64);
     buf_writer->Write((const char*)&next_start_offset, sizeof(u64));
     spill_file_tell += sizeof(u64);
 
-    // assert(spill_buf_idx < spill_buf_size);
-//    fwrite(spill_buffer.get(), spill_buf_idx, 1, spill_file);
-//    spill_buf_idx = 0;
-
-    // u64 data_start_offset = ftell(spill_file);
     u64 data_start_offset = spill_file_tell;
-    // assert((SizeT)ftell(spill_file) == spill_file_tell);
     // sorted data
     u32 last_term_num = std::numeric_limits<u32>::max();
     StringRef term;
@@ -440,37 +405,15 @@ void ColumnInverter::SpillSortResults(FILE *spill_file, u64 &tuple_count, Unique
             term = GetTermFromNum(last_term_num);
         }
         record_length = term.size() + sizeof(docid_t) + sizeof(u32) + 1;
-//        fwrite(&record_length, sizeof(u32), 1, spill_file);
-//        fwrite(term.data(), term.size(), 1, spill_file);
-//        fwrite(&str_null, sizeof(char), 1, spill_file);
-//        fwrite(&i.doc_id_, sizeof(docid_t), 1, spill_file);
-//        fwrite(&i.term_pos_, sizeof(u32), 1, spill_file);
-//        memcpy(spill_buffer.get() + spill_buf_idx, &record_length, sizeof(u32));
-//        spill_buf_idx += sizeof(u32);
-//
-//        memcpy(spill_buffer.get() + spill_buf_idx, term.data(), term.size());
-//        spill_buf_idx += term.size();
-//
-//        memcpy(spill_buffer.get() + spill_buf_idx, &str_null, sizeof(char));
-//        spill_buf_idx += sizeof(char);
-//
-//        memcpy(spill_buffer.get() + spill_buf_idx, &i.doc_id_, sizeof(docid_t));
-//        spill_buf_idx += sizeof(docid_t);
-//
-//        memcpy(spill_buffer.get() + spill_buf_idx, &i.term_pos_, sizeof(u32));
-//        spill_buf_idx += sizeof(u32);
+
         buf_writer->Write((const char*)&record_length, sizeof(u32));
         buf_writer->Write(term.data(), term.size());
         buf_writer->Write((const char*)&str_null, sizeof(char));
         buf_writer->Write((const char*)&(i.doc_id_), sizeof(docid_t));
         buf_writer->Write((const char*)&(i.term_pos_), sizeof(u32));
-        // assert(spill_buf_idx < spill_buf_size);
-        // fwrite(spill_buffer.get(), spill_buf_idx, 1, spill_file);
-        // spill_buf_idx = 0;
     }
     buf_writer->Flush();
     // update data size
-    // next_start_offset = ftell(spill_file);
     next_start_offset = buf_writer->Tell();
     data_size = next_start_offset - data_start_offset;
     fseek(spill_file, data_size_pos, SEEK_SET);
@@ -480,5 +423,4 @@ void ColumnInverter::SpillSortResults(FILE *spill_file, u64 &tuple_count, Unique
     fseek(spill_file, next_start_offset, SEEK_SET);
 }
 
-
 } // namespace infinity
\ No newline at end of file
diff --git a/src/storage/invertedindex/common/external_sort_merger.cpp b/src/storage/invertedindex/common/external_sort_merger.cpp
index cd2b5d4ff0..a72c426afe 100644
--- a/src/storage/invertedindex/common/external_sort_merger.cpp
+++ b/src/storage/invertedindex/common/external_sort_merger.cpp
@@ -42,26 +42,19 @@ namespace infinity {
         }                                                                                                                                            \
     }
 
-#define USE_LOSER_TREE
-//#define USE_MMAP_IO
-
 template <typename KeyType, typename LenType>
 SortMerger<KeyType, LenType>::SortMerger(const char *filenm, u32 group_size, u32 bs, u32 output_num)
     : filenm_(filenm), MAX_GROUP_SIZE_(group_size), BS_SIZE_(bs), PRE_BUF_SIZE_((u32)(1. * bs * 0.8 / (group_size + 1))),
       RUN_BUF_SIZE_(PRE_BUF_SIZE_ * group_size), OUT_BUF_SIZE_(bs - RUN_BUF_SIZE_ - PRE_BUF_SIZE_), OUT_BUF_NUM_(output_num) {
-    pre_buf_ = run_buf_ = out_buf_ = nullptr;
+    run_buf_ = out_buf_ = nullptr;
     count_ = 0;
 
-    pre_buf_size_ = pre_buf_num_ = 0;
     micro_run_idx_ = new u32[MAX_GROUP_SIZE_];
     micro_run_pos_ = new u32[MAX_GROUP_SIZE_];
     num_micro_run_ = new u32[MAX_GROUP_SIZE_];
     size_micro_run_ = new u32[MAX_GROUP_SIZE_];
-    num_run_ = new u32[MAX_GROUP_SIZE_];
     size_run_ = new u32[MAX_GROUP_SIZE_];
-    size_loaded_run_ = new u32[MAX_GROUP_SIZE_];
     run_addr_ = new u64[MAX_GROUP_SIZE_];
-    run_curr_addr_ = new u64[MAX_GROUP_SIZE_];
 
     micro_buf_ = new char *[MAX_GROUP_SIZE_];
     sub_out_buf_ = new char *[OUT_BUF_NUM_];
@@ -72,51 +65,32 @@ SortMerger<KeyType, LenType>::SortMerger(const char *filenm, u32 group_size, u32
     out_buf_size_ = new u32[OUT_BUF_NUM_];
     out_buf_full_ = new bool[OUT_BUF_NUM_];
 
-    curr_addr_.resize(MAX_GROUP_SIZE_, 0);
-    end_addr_.resize(MAX_GROUP_SIZE_, 0);
-    key_buf_.resize(MAX_GROUP_SIZE_);
-    key_buf_ptr_.resize(MAX_GROUP_SIZE_, nullptr);
-    mmap_io_streams_.resize(MAX_GROUP_SIZE_, nullptr);
 
-    for (u32 i = 0; i < MAX_GROUP_SIZE_; ++i) {
-        key_buf_[i] = MakeUnique<char_t[]>(MAX_TUPLE_LENGTH + 100);
-        // mmap_io_streams_[i] = MakeShared<MmapReader>(filenm_);
-    }
     CYCLE_BUF_SIZE_ = MAX_GROUP_SIZE_ * 4;
     CYCLE_BUF_THRESHOLD_ = MAX_GROUP_SIZE_ * 3;
     OUT_BATCH_SIZE_ = 10240;
-    // fmt::print("cycle buf size = {}, buf threshold = {}\n", CYCLE_BUF_SIZE_, CYCLE_BUF_THRESHOLD_);
     assert(CYCLE_BUF_THRESHOLD_ <= CYCLE_BUF_SIZE_);
     cycle_buffer_ = MakeUnique<CycleBuffer>(CYCLE_BUF_SIZE_, PRE_BUF_SIZE_);
 
-    const SizeT term_tuple_list_cycle_buf_size = MAX_GROUP_SIZE_ * 2;
-    const SizeT term_tuple_list_buf_size = 1024 + 2 * 1024 * 1024 + 100;
-    cycle_term_tuple_list_queue_ = MakeUnique<CycleBuffer>(term_tuple_list_cycle_buf_size, term_tuple_list_buf_size);
-#ifdef USE_LOSER_TREE
     merge_loser_tree_ = MakeShared<LoserTree<KeyAddr>>(MAX_GROUP_SIZE_);
-#endif
 }
 
 template <typename KeyType, typename LenType>
 SortMerger<KeyType, LenType>::~SortMerger() {
-    if (pre_buf_)
-        free(pre_buf_);
-
-    if (run_buf_)
+    if (run_buf_) {
         free(run_buf_);
+    }
 
-    if (out_buf_)
+    if (out_buf_) {
         free(out_buf_);
+    }
 
     delete[] micro_run_idx_;
     delete[] micro_run_pos_;
     delete[] num_micro_run_;
     delete[] size_micro_run_;
-    delete[] num_run_;
     delete[] size_run_;
-    delete[] size_loaded_run_;
     delete[] run_addr_;
-    delete[] run_curr_addr_;
 
     delete[] micro_buf_;
     delete[] sub_out_buf_;
@@ -130,14 +104,13 @@ SortMerger<KeyType, LenType>::~SortMerger() {
 
 template <typename KeyType, typename LenType>
 void SortMerger<KeyType, LenType>::NewBuffer() {
-    if (!pre_buf_)
-        pre_buf_ = (char *)malloc(PRE_BUF_SIZE_);
-
-    if (!run_buf_)
+    if (!run_buf_) {
         run_buf_ = (char *)malloc(RUN_BUF_SIZE_);
+    }
 
-    if (!out_buf_)
+    if (!out_buf_) {
         out_buf_ = (char *)malloc(OUT_BUF_SIZE_);
+    }
 }
 
 template <typename KeyType, typename LenType>
@@ -159,17 +132,19 @@ void SortMerger<KeyType, LenType>::Init(DirectIO &io_stream) {
 
     // initiate the microrun buffer
     micro_buf_[0] = run_buf_;
-    for (u32 i = 1; i < MAX_GROUP_SIZE_; ++i)
+    for (u32 i = 1; i < MAX_GROUP_SIZE_; ++i) {
         micro_buf_[i] = micro_buf_[i - 1] + PRE_BUF_SIZE_;
+    }
 
     //
     group_size_ = 0;
     u64 next_run_pos = 0;
+    u32 num_run = 0;
     for (u32 i = 0; i < MAX_GROUP_SIZE_ && (u64)io_stream.Tell() < FILE_LEN_; ++i, ++group_size_) {
         // get the size of run
         io_stream.Read((char *)(size_run_ + i), sizeof(u32));
         // get the records number of a run
-        io_stream.Read((char *)(num_run_ + i), sizeof(u32));
+        io_stream.Read((char *)(&num_run), sizeof(u32));
         io_stream.Read((char *)(&next_run_pos), sizeof(u64));
 
         run_addr_[i] = io_stream.Tell(); // ftell(f);
@@ -178,10 +153,6 @@ void SortMerger<KeyType, LenType>::Init(DirectIO &io_stream) {
         u32 s = size_run_[i] > PRE_BUF_SIZE_ ? PRE_BUF_SIZE_ : size_run_[i];
         size_t ret = io_stream.Read(micro_buf_[i], s);
         size_micro_run_[i] = ret;
-        size_loaded_run_[i] = ret;
-        run_curr_addr_[i] = io_stream.Tell();
-        // std::cout << "num_run_[" << i << "] " << num_run_[i] << " size_run_ " << size_run_[i] << " size_micro_run " << size_micro_run_[i]
-        //           << std::endl;
 
         /// it is not needed for compression, validation will be made within IOStream in that case
         // if a record can fit in microrun buffer
@@ -189,7 +160,6 @@ void SortMerger<KeyType, LenType>::Init(DirectIO &io_stream) {
         while (*(LenType *)(micro_buf_[i]) + sizeof(LenType) > s) {
             size_micro_run_[i] = 0;
             --count_;
-            // LOG_WARN("[Warning]: A record is too long, it will be ignored");
 
             io_stream.Seek(*(LenType *)(micro_buf_[i]) + sizeof(LenType) - s, SEEK_CUR);
 
@@ -204,28 +174,23 @@ void SortMerger<KeyType, LenType>::Init(DirectIO &io_stream) {
             io_stream.Read(micro_buf_[i], s);
         }
 
-#ifdef USE_LOSER_TREE
         if (flag) {
             merge_loser_tree_->InsertStart(nullptr, static_cast<LoserTree<u64>::Source>(i), true);
             continue;
         }
+
         auto key = KeyAddr(micro_buf_[i], -1, i);
         merge_loser_tree_->InsertStart(&key, static_cast<LoserTree<u64>::Source>(i), false);
-#else
-        if (flag) {
-            continue;
-        }
-        merge_heap_.push(KeyAddr(micro_buf_[i], -1, i));
-#endif
+
         micro_run_idx_[i] = 1;
         micro_run_pos_[i] = KeyAddr(micro_buf_[i], -1, i).LEN() + sizeof(LenType);
         num_micro_run_[i] = 0;
 
         io_stream.Seek(next_run_pos);
     }
-#ifdef USE_LOSER_TREE
+
     merge_loser_tree_->Init();
-#endif
+
     // initialize predict heap and records number of every microrun
     for (u32 i = 0; i < group_size_; ++i) {
         u32 pos = 0;
@@ -243,7 +208,6 @@ void SortMerger<KeyType, LenType>::Init(DirectIO &io_stream) {
                 break;
             }
         }
-        // std::cout << "len " << len << " size_micro_run_[" << i << "] " << size_micro_run_[i] << std::endl;
         assert(last_pos != (u32)-1); // buffer too small that can't hold one record
         assert(last_pos + sizeof(LenType) <= size_micro_run_[i]);
         assert(pos <= size_micro_run_[i]);
@@ -255,200 +219,8 @@ void SortMerger<KeyType, LenType>::Init(DirectIO &io_stream) {
     }
 }
 
-template <typename KeyType, typename LenType>
-void SortMerger<KeyType, LenType>::ReadKeyAt(MmapReader &io_stream, u64 pos) {
-    auto file_pos = curr_addr_[pos];
-//    if (file_pos != io_stream.Tell()) {
-//        io_stream.Seek(file_pos, true);
-//    }
-    // fmt::print("begin tell = {}\n", file_pos);
-    io_stream.Seek(file_pos, true);
-    LenType len;
-    io_stream.ReadBuf((char_t*)&len, sizeof(LenType));
-    io_stream.Seek(file_pos, true);
-    io_stream.ReadBuf(key_buf_[pos].get(), len + sizeof(LenType));
-//    fmt::print("len = {}, sizeof len_type = {}, key buf get len = {}, tell = {}\n", len, sizeof(LenType), *(LenType *)key_buf_[pos].get(), io_stream.Tell());
-//    fmt::print("data = ");
-//    for (u32 i = 0; i < len; ++i) {
-//        fmt::print("{}", key_buf_[pos].get()[i + sizeof(LenType)]);
-//    }
-//    fmt::print("\n");
-    curr_addr_[pos] = io_stream.Tell();
-}
-
-template <typename KeyType, typename LenType>
-void SortMerger<KeyType, LenType>::ReadKeyAtNonCopy(MmapReader &io_stream, u64 pos) {
-//    auto file_pos = curr_addr_[pos];
-//
-//    io_stream.Seek(file_pos, true);
-    // assert(curr_addr_[pos] == io_stream.Tell());
-    LenType len;
-//    io_stream.ReadBuf((char_t*)&len, sizeof(LenType));
-//    io_stream.Seek(file_pos, true);
-    key_buf_ptr_[pos] = io_stream.ReadBufNonCopy(sizeof(LenType));
-    len = *(LenType *)key_buf_ptr_[pos];
-    io_stream.ReadBufNonCopy(len);
-//    fmt::print("len = {}, key = ", len);
-//    for (u32 i = 0; i < len; ++i) {
-//        fmt::print("{}", key_buf_ptr_[pos][i + sizeof(LenType)]);
-//    }
-//    fmt::print("\n");
-
-    // io_stream.ReadBuf(key_buf_[pos].get(), len + sizeof(LenType));
-    curr_addr_[pos] = io_stream.Tell();
-}
-
-#ifdef USE_MMAP_IO
-template <typename KeyType, typename LenType>
-void SortMerger<KeyType, LenType>::Init(MmapReader &io_stream) {
-    group_size_ = 0;
-    u64 next_run_pos = 0;
-    for (u32 i = 0; i < MAX_GROUP_SIZE_ && (u64)io_stream.Tell() < FILE_LEN_; ++i, ++group_size_) {
-        // get the size of run
-        io_stream.ReadU32(size_run_[i]);
-        // get the records number of a run
-        io_stream.ReadU32(num_run_[i]);
-        io_stream.ReadU64(next_run_pos);
-//        fmt::print("i = {}, size_run = {}, num_run = {}, next run pos:{}\n", i, size_run_[i], num_run_[i], next_run_pos);
-        assert(next_run_pos <= FILE_LEN_);
-        end_addr_[i] = next_run_pos;
-        curr_addr_[i] = io_stream.Tell();
-        // fmt::print("curr_addr_[{}] = {}, end_addr_[{}] = {}\n", i, curr_addr_[i], i, end_addr_[i]);
-        mmap_io_streams_[i] = MakeShared<MmapReader>(filenm_, curr_addr_[i], end_addr_[i] - curr_addr_[i]);
-        // mmap_io_streams_[i]->Seek(curr_addr_[i], true);
-#ifdef USE_LOSER_TREE
-        if (curr_addr_[i] >= end_addr_[i]) {
-            merge_loser_tree_->InsertStart(nullptr, static_cast<LoserTree<u64>::Source>(i), true);
-            continue;
-        }
-//        ReadKeyAt(io_stream, i);
-//        auto key = KeyAddr(key_buf_[i].get(), -1, i);
-        // read block use mmap, need update end_addr
-        end_addr_[i] = mmap_io_streams_[i]->DataLen();
-        ReadKeyAtNonCopy(*mmap_io_streams_[i], i);
-
-        auto key = KeyAddr(key_buf_ptr_[i], -1, i);
-        merge_loser_tree_->InsertStart(&key, static_cast<LoserTree<u64>::Source>(i), false);
-#else
-        if (curr_addr_[i] >= end_addr_[i]) {
-            continue;
-        }
-        ReadKeyAtNonCopy(io_stream, i);
-        merge_heap_.push(KeyAddr(key_buf_ptr_[i].get(), -1, i));
-#endif
-        io_stream.Seek(next_run_pos, true);
-    }
-#ifdef USE_LOSER_TREE
-    merge_loser_tree_->Init();
-#endif
-}
-
-template <typename KeyType, typename LenType>
-void SortMerger<KeyType, LenType>::MergeMmap(MmapReader &io_stream, SharedPtr<FileWriter> out_file_writer) {
-#ifdef USE_LOSER_TREE
-    while (merge_loser_tree_->TopSource() != LoserTree<KeyAddr>::invalid_) {
-        auto top = merge_loser_tree_->TopKey();
-#else
-    while (merge_heap_.size() > 0) {
-        KeyAddr top = merge_heap_.top();
-        merge_heap_.pop();
-#endif
-        u32 idx = top.IDX();
-        // fmt::print("idx = {}\n", idx);
-        out_file_writer->Write(top.data, top.LEN() + sizeof(LenType));
-        assert(idx < MAX_GROUP_SIZE_);
-        // reach the end of a microrun
-        if (curr_addr_[idx] >= end_addr_[idx]) {
-            merge_loser_tree_->DeleteTopInsert(nullptr, true);
-            --count_;
-            continue;
-        }
-        assert(idx < MAX_GROUP_SIZE_);
-
-//        ReadKeyAt(io_stream, idx);
-//        auto key = KeyAddr(key_buf_[idx].get(), -1, idx);
-        // ReadKeyAtNonCopy(io_stream, idx);
-        ReadKeyAtNonCopy(*mmap_io_streams_[idx], idx);
-        auto key = KeyAddr(key_buf_ptr_[idx], -1, idx);
-        merge_loser_tree_->DeleteTopInsert(&key, false);
-    }
-    out_file_writer->Sync();
-}
-
-#endif
-
 template <typename KeyType, typename LenType>
 void SortMerger<KeyType, LenType>::Predict(DirectIO &io_stream) {
-    while (pre_heap_.size() > 0) {
-        KeyAddr top = pre_heap_.top();
-        pre_heap_.pop();
-        u64 addr = top.ADDR();
-        u32 idx = top.IDX();
-        free(top.data);
-
-        std::unique_lock lock(pre_buf_mtx_);
-
-        while (pre_buf_size_ != 0)
-            pre_buf_con_.wait(lock);
-
-        assert(idx < MAX_GROUP_SIZE_);
-        // get loading size of a microrun
-        u32 s;
-        s = (u32)((u64)size_run_[idx] - (addr - run_addr_[idx]));
-
-        if (s == 0) {
-            continue;
-        }
-        s = s > PRE_BUF_SIZE_ ? PRE_BUF_SIZE_ : s;
-
-        // load microrun
-        io_stream.Seek(addr);
-        s = io_stream.Read(pre_buf_, s);
-        size_loaded_run_[idx] += s;
-        run_curr_addr_[idx] = io_stream.Tell();
-
-        u32 pos = 0;
-        u32 last_pos = -1;
-        pre_buf_num_ = 0;
-        while (1) {
-            if (pos + sizeof(LenType) > s) {
-                // the last record of this microrun
-                IASSERT(last_pos != (u32)-1); // buffer too small that can't hold one record
-                LenType len = *(LenType *)(pre_buf_ + last_pos) + sizeof(LenType);
-                char *tmp = (char *)malloc(len);
-                memcpy(tmp, pre_buf_ + last_pos, len);
-                pre_heap_.push(KeyAddr(tmp, addr + (u64)pos, idx));
-                break;
-            }
-            LenType len = *(LenType *)(pre_buf_ + pos);
-            if (pos + sizeof(LenType) + len > s) {
-                // the last record of this microrun
-                IASSERT(last_pos != (u32)-1); // buffer too small that can't hold one record
-                len = *(LenType *)(pre_buf_ + last_pos) + sizeof(LenType);
-                char *tmp = (char *)malloc(len);
-                memcpy(tmp, pre_buf_ + last_pos, len);
-                pre_heap_.push(KeyAddr(tmp, addr + (u64)pos, idx));
-                break;
-            }
-
-            ++pre_buf_num_;
-            last_pos = pos;
-            pos += sizeof(LenType) + len;
-        }
-        pre_buf_size_ = pos;
-        pre_buf_con_.notify_one();
-    }
-    {
-        std::unique_lock lock(pre_buf_mtx_);
-        pre_buf_size_ = -1;
-        pre_buf_con_.notify_one();
-    }
-
-    // LOG_INFO("Predicting is over...");
-}
-
-template <typename KeyType, typename LenType>
-void SortMerger<KeyType, LenType>::PredictByQueue(DirectIO &io_stream) {
     UniquePtr<char[]> data_buf = MakeUnique<char[]>(PRE_BUF_SIZE_);
     while (pre_heap_.size() > 0) {
         KeyAddr top = pre_heap_.top();
@@ -471,12 +243,10 @@ void SortMerger<KeyType, LenType>::PredictByQueue(DirectIO &io_stream) {
         io_stream.Read(data_buf.get(), s);
         auto data_ptr = data_buf.get();
 
-        size_loaded_run_[idx] += s;
-        run_curr_addr_[idx] = io_stream.Tell();
-
         u32 pos = 0;
         u32 last_pos = -1;
-        pre_buf_num_ = 0;
+        u32 pre_buf_num = 0;
+        u32 pre_buf_size = 0;
         while (1) {
             if (pos + sizeof(LenType) > s) {
                 // the last record of this microrun
@@ -489,7 +259,6 @@ void SortMerger<KeyType, LenType>::PredictByQueue(DirectIO &io_stream) {
             }
             LenType len = *(LenType *)(data_ptr + pos);
             if (pos + sizeof(LenType) + len > s) {
-                // the last record of this microrun
                 IASSERT(last_pos != (u32)-1); // buffer too small that can't hold one record
                 len = *(LenType *)(data_ptr + last_pos) + sizeof(LenType);
                 char *tmp = (char *)malloc(len);
@@ -498,56 +267,30 @@ void SortMerger<KeyType, LenType>::PredictByQueue(DirectIO &io_stream) {
                 break;
             }
 
-            ++pre_buf_num_;
+            ++pre_buf_num;
             last_pos = pos;
             pos += sizeof(LenType) + len;
         }
-        pre_buf_size_ = pos;
+        pre_buf_size = pos;
 
         std::unique_lock lock(cycle_buf_mtx_);
         cycle_buf_con_.wait(lock, [this]() { return !this->cycle_buffer_->IsFull(); });
-        // auto data_ptr = cycle_buffer_->PutByRead(io_stream, s);
 
-        cycle_buffer_->PutReal(data_buf, pre_buf_size_, pre_buf_num_);
+        cycle_buffer_->PutReal(data_buf, pre_buf_size, pre_buf_num);
         cycle_buf_con_.notify_one();
     }
-    // fmt::print("1-read finish\n");
     {
         std::unique_lock lock(cycle_buf_mtx_);
-        // fmt::print("read finish\n");
-        // pre_buf_size_ = -1;
         read_finish_ = true;
         cycle_buf_con_.notify_one();
     }
-
-    // LOG_INFO("Predicting is over...");
 }
 
 template <typename KeyType, typename LenType>
-void SortMerger<KeyType, LenType>::MergeByQueue() {
-#ifdef USE_LOSER_TREE
+void SortMerger<KeyType, LenType>::Merge() {
     while (merge_loser_tree_->TopSource() != LoserTree<KeyAddr>::invalid_) {
         auto top = merge_loser_tree_->TopKey();
-#else
-    while (merge_heap_.size() > 0) {
-        KeyAddr top = merge_heap_.top();
-        merge_heap_.pop();
-#endif
         u32 idx = top.IDX();
-//        memcpy(sub_out_buf_[out_buf_in_idx_] + out_buf_size_[out_buf_in_idx_], top.data, top.LEN() + sizeof(LenType));
-        // fmt::print("loser tree pop idx = {}\n", idx);
-        // output
-//        {
-//            std::unique_lock lock(out_queue_mtx_);
-//            auto data_len = top.LEN() + sizeof(LenType);
-//            auto top_data = MakeUnique<char_t[]>(data_len + 10);
-//            memcpy(top_data.get(), top.data, data_len);
-//            out_queue_.push(std::move(top_data));
-//            out_size_queue_.push(data_len);
-//            if (out_queue_.size() >= OUT_BATCH_SIZE_ / 10) {
-//                out_queue_con_.notify_one();
-//            }
-//        }
         while (1) {
             assert(out_buf_in_idx_ < OUT_BUF_NUM_);
             std::unique_lock lock(in_out_mtx_[out_buf_in_idx_]);
@@ -577,7 +320,6 @@ void SortMerger<KeyType, LenType>::MergeByQueue() {
         if (micro_run_idx_[idx] == num_micro_run_[idx]) {
             IASSERT(micro_run_pos_[idx] <= size_micro_run_[idx]);
             std::unique_lock lock(cycle_buf_mtx_);
-            // fmt::print("cycle buffer size = {}\n", cycle_buffer_->Size());
 
             cycle_buf_con_.wait(lock, [this]() {
                 return !this->cycle_buffer_->IsEmpty() || (this->read_finish_ && this->cycle_buffer_->IsEmpty());
@@ -589,127 +331,23 @@ void SortMerger<KeyType, LenType>::MergeByQueue() {
             }
 
             assert(idx < MAX_GROUP_SIZE_);
-            // fmt::print("cycle buffer size = {}, read_finish_ = {}\n", cycle_buffer_->Size(), read_finish_);
             auto res = cycle_buffer_->GetTuple();
-            // micro_buf_[idx] = res.get<0>();
             auto pre_buf_size = std::get<1>(res);
             auto pre_buf_num = std::get<2>(res);
             memcpy(micro_buf_[idx], std::get<0>(res), pre_buf_size);
 
-//            fmt::print("[Merge] loser tree add idx = {}, pre_buf_size = {}, pre_buf_num = {}\n", idx, pre_buf_size_, pre_buf_num_);
-//            for (u32 i = 0; i < pre_buf_size_; ++i) {
-//                fmt::print("{}", micro_buf_[idx][i]);
-//            }
-//            fmt::print("\n");
-
             size_micro_run_[idx] = pre_buf_size;
             num_micro_run_[idx] = pre_buf_num;
             micro_run_pos_[idx] = micro_run_idx_[idx] = 0;
 
-//            cycle_buf_con_.notify_one();
             if (cycle_buffer_->Size() < CYCLE_BUF_THRESHOLD_) {
-                // fmt::print("cycle buffer size = {}\n", cycle_buffer_->Size());
                 cycle_buf_con_.notify_one();
             }
-            // cycle_buf_con_.notify_one();
-            // pre_buf_con_.notify_one();
-        }
-
-        assert(idx < MAX_GROUP_SIZE_);
-#ifdef USE_LOSER_TREE
-        auto key = KeyAddr(micro_buf_[idx] + micro_run_pos_[idx], -1, idx);
-//        fmt::print("[Merge] add key idx = {}, key = ", idx);
-//        for (u32 i = 0; i < key.LEN() + sizeof(LenType); ++i) {
-//            fmt::print("{}", micro_buf_[idx][micro_run_pos_[idx] + i]);
-//        }
-//        fmt::print("\n");
-
-        merge_loser_tree_->DeleteTopInsert(&key, false);
-#else
-        merge_heap_.push(KeyAddr(micro_buf_[idx] + micro_run_pos_[idx], -1, idx));
-#endif
-        ++micro_run_idx_[idx];
-        micro_run_pos_[idx] += KeyAddr(micro_buf_[idx] + micro_run_pos_[idx], -1, idx).LEN() + sizeof(LenType);
-    }
-    {
-//        std::unique_lock lock(out_queue_mtx_);
-//        out_queue_con_.notify_one();
-        assert(out_buf_in_idx_ < OUT_BUF_NUM_);
-        std::unique_lock lock(in_out_mtx_[out_buf_in_idx_]);
-        if (!out_buf_full_[out_buf_in_idx_] && out_buf_size_[out_buf_in_idx_] > 0) {
-            out_buf_full_[out_buf_in_idx_] = true;
-            in_out_con_[out_buf_in_idx_].notify_one();
-        }
-    }
-}
-
-template <typename KeyType, typename LenType>
-void SortMerger<KeyType, LenType>::Merge() {
-#ifdef USE_LOSER_TREE
-    while (merge_loser_tree_->TopSource() != LoserTree<KeyAddr>::invalid_) {
-        auto top = merge_loser_tree_->TopKey();
-#else
-    while (merge_heap_.size() > 0) {
-        KeyAddr top = merge_heap_.top();
-        merge_heap_.pop();
-#endif
-        u32 idx = top.IDX();
-
-        // output
-        while (1) {
-            assert(out_buf_in_idx_ < OUT_BUF_NUM_);
-            std::unique_lock lock(in_out_mtx_[out_buf_in_idx_]);
-            while (out_buf_full_[out_buf_in_idx_])
-                in_out_con_[out_buf_in_idx_].wait(lock);
-
-            // if buffer is full
-            if (top.LEN() + sizeof(LenType) + out_buf_size_[out_buf_in_idx_] > OUT_BUF_SIZE_ / OUT_BUF_NUM_) {
-                IASSERT(out_buf_size_[out_buf_in_idx_] != 0); // output buffer chanel is smaller than size of a record
-                out_buf_full_[out_buf_in_idx_] = true;
-                u32 tmp = out_buf_in_idx_;
-                ++out_buf_in_idx_;
-                out_buf_in_idx_ %= OUT_BUF_NUM_;
-                in_out_con_[tmp].notify_one();
-                continue;
-            }
-
-            assert(out_buf_in_idx_ < OUT_BUF_NUM_);
-            memcpy(sub_out_buf_[out_buf_in_idx_] + out_buf_size_[out_buf_in_idx_], top.data, top.LEN() + sizeof(LenType));
-            out_buf_size_[out_buf_in_idx_] += top.LEN() + sizeof(LenType);
-
-            break;
-        }
-
-        assert(idx < MAX_GROUP_SIZE_);
-        // reach the end of a microrun
-        if (micro_run_idx_[idx] == num_micro_run_[idx]) {
-            IASSERT(micro_run_pos_[idx] <= size_micro_run_[idx]);
-            std::unique_lock lock(pre_buf_mtx_);
-            while (pre_buf_size_ == 0)
-                pre_buf_con_.wait(lock);
-
-            if (pre_buf_size_ == (u32)-1) {
-#ifdef USE_LOSER_TREE
-                merge_loser_tree_->DeleteTopInsert(nullptr, true);
-#endif
-                continue;
-            }
-
-            assert(idx < MAX_GROUP_SIZE_);
-            memcpy(micro_buf_[idx], pre_buf_, pre_buf_size_);
-            size_micro_run_[idx] = pre_buf_size_;
-            num_micro_run_[idx] = pre_buf_num_;
-            micro_run_pos_[idx] = micro_run_idx_[idx] = pre_buf_num_ = pre_buf_size_ = 0;
-            pre_buf_con_.notify_one();
         }
 
         assert(idx < MAX_GROUP_SIZE_);
-#ifdef USE_LOSER_TREE
         auto key = KeyAddr(micro_buf_[idx] + micro_run_pos_[idx], -1, idx);
         merge_loser_tree_->DeleteTopInsert(&key, false);
-#else
-        merge_heap_.push(KeyAddr(micro_buf_[idx] + micro_run_pos_[idx], -1, idx));
-#endif
         ++micro_run_idx_[idx];
         micro_run_pos_[idx] += KeyAddr(micro_buf_[idx] + micro_run_pos_[idx], -1, idx).LEN() + sizeof(LenType);
     }
@@ -741,14 +379,11 @@ void SortMerger<KeyType, LenType>::OutputByQueue(FILE *f) {
             auto write_cnt = OUT_BATCH_SIZE_;
 
             while (count_ > 0 && write_cnt > 0 && !out_queue_.empty()) {
-                // auto top_data = std::move(out_queue_.front());
-                // auto data_len = out_size_queue_.front();
                 temp_out_queue.push(std::move(out_queue_.front()));
                 temp_out_size_queue.push(out_size_queue_.front());
                 out_queue_.pop();
                 out_size_queue_.pop();
 
-                // io_stream.Write(top_data.get(), data_len);
                 --count_;
                 --write_cnt;
             }
@@ -803,45 +438,8 @@ void SortMerger<KeyType, LenType>::Output(FILE *f, u32 idx) {
     }
 }
 
-#define PRINT_TIME_COST
-
 template <typename KeyType, typename LenType>
 void SortMerger<KeyType, LenType>::Run() {
-#ifdef PRINT_TIME_COST
-    BaseProfiler profiler;
-    profiler.Begin();
-#endif
-#ifdef USE_MMAP_IO
-    MmapReader io_stream(filenm_);
-    FILE_LEN_ = io_stream.DataLen();
-    io_stream.ReadU64(count_);
-    // fmt::print("FILE LEN: {}, count: {}, read begin tell = {}\n", FILE_LEN_, count_, io_stream.Tell());
-    Init(io_stream);
-
-//    FILE *out_f = fopen((filenm_ + ".out").c_str(), "w+");
-//    IASSERT(out_f);
-//    IASSERT(fwrite(&count_, sizeof(u64), 1, out_f) == 1);
-    String out_file = filenm_ + ".out";
-    LocalFileSystem fs;
-    SharedPtr<FileWriter> out_file_writer = MakeShared<FileWriter>(fs, out_file, 128000);
-    out_file_writer->Write((char*)&count_, sizeof(u64));
-
-    MergeMmap(io_stream, out_file_writer);
-    // out_file_writer->Sync();
-//    Thread merge_thread(std::bind(&self_t::MergeMmap, this, std::ref(io_stream)));
-//    FILE *out_f = fopen((filenm_ + ".out").c_str(), "w+");
-//    IASSERT(out_f);
-//    IASSERT(fwrite(&count_, sizeof(u64), 1, out_f) == 1);
-//    Vector<Thread *> out_thread(OUT_BUF_NUM_);
-//    for (u32 i = 0; i < OUT_BUF_NUM_; ++i) {
-//        out_thread[i] = new Thread(std::bind(&self_t::Output, this, out_f, i));
-//    }
-//    merge_thread.join();
-//    for (u32 i = 0; i < OUT_BUF_NUM_; ++i) {
-//        out_thread[i]->join();
-//        delete out_thread[i];
-//    }
-#else
     FILE *f = fopen(filenm_.c_str(), "r");
 
     DirectIO io_stream(f);
@@ -851,15 +449,12 @@ void SortMerger<KeyType, LenType>::Run() {
 
     Init(io_stream);
 
-    // Thread predict_thread(std::bind(&self_t::Predict, this, io_stream));
-    // Thread merge_thread(std::bind(&self_t::Merge, this));
-    Thread predict_thread(std::bind(&self_t::PredictByQueue, this, io_stream));
-    Thread merge_thread(std::bind(&self_t::MergeByQueue, this));
+    Thread predict_thread(std::bind(&self_t::Predict, this, io_stream));
+    Thread merge_thread(std::bind(&self_t::Merge, this));
     FILE *out_f = fopen((filenm_ + ".out").c_str(), "w+");
     IASSERT(out_f);
     IASSERT(fwrite(&count_, sizeof(u64), 1, out_f) == 1);
 
-//    Thread out_thread(std::bind(&self_t::OutputByQueue, this, out_f));
     Vector<Thread *> out_thread(OUT_BUF_NUM_);
     for (u32 i = 0; i < OUT_BUF_NUM_; ++i) {
         out_thread[i] = new Thread(std::bind(&self_t::Output, this, out_f, i));
@@ -867,7 +462,6 @@ void SortMerger<KeyType, LenType>::Run() {
 
     predict_thread.join();
     merge_thread.join();
-//    out_thread.join();
     for (u32 i = 0; i < OUT_BUF_NUM_; ++i) {
         out_thread[i]->join();
         delete out_thread[i];
@@ -875,35 +469,21 @@ void SortMerger<KeyType, LenType>::Run() {
     fclose(f);
     fclose(out_f);
 
-#endif
-
     if (std::filesystem::exists(filenm_)) {
         std::filesystem::remove(filenm_);
-//        std::filesystem::rename(filenm_, filenm_ + ".backup");
     }
     if (std::filesystem::exists(filenm_ + ".out")) {
         std::filesystem::rename(filenm_ + ".out", filenm_);
     }
-#ifdef PRINT_TIME_COST
-    // LOG_INFO(fmt::format("SortMerger<KeyType, LenType>::Run() time cost: {}", profiler.ElapsedToString()));
-    fmt::print("SortMerger<KeyType, LenType>::Run() time cost: {}\n", profiler.ElapsedToString());
-    profiler.End();
-#endif
 }
 
 template <typename KeyType, typename LenType>
 requires std::same_as<KeyType, TermTuple>
-void SortMergerTerm<KeyType, LenType>::MergeByQueueTerm() {
+void SortMergerTermTuple<KeyType, LenType>::MergeImpl() {
     UniquePtr<TermTupleList> tuple_list = nullptr;
     u32 last_idx = -1;
-#ifdef USE_LOSER_TREE
     while (merge_loser_tree_->TopSource() != LoserTree<KeyAddr>::invalid_) {
         auto top = merge_loser_tree_->TopKey();
-#else
-    while (merge_heap_.size() > 0) {
-        KeyAddr top = merge_heap_.top();
-        merge_heap_.pop();
-#endif
         u32 idx = top.IDX();
         auto out_key = top.KEY();
         if (tuple_list == nullptr) {
@@ -925,7 +505,7 @@ void SortMergerTerm<KeyType, LenType>::MergeByQueueTerm() {
         }
 
         assert(idx < MAX_GROUP_SIZE_);
-        // reach the end of a microrun
+
         if (micro_run_idx_[idx] == num_micro_run_[idx]) {
             IASSERT(micro_run_pos_[idx] <= size_micro_run_[idx]);
             std::unique_lock lock(cycle_buf_mtx_);
@@ -945,16 +525,13 @@ void SortMergerTerm<KeyType, LenType>::MergeByQueueTerm() {
             auto pre_buf_num = std::get<2>(res);
             memcpy(micro_buf_[idx], std::get<0>(res), pre_buf_size);
 
-
             size_micro_run_[idx] = pre_buf_size;
             num_micro_run_[idx] = pre_buf_num;
             micro_run_pos_[idx] = micro_run_idx_[idx] = 0;
 
             if (cycle_buffer_->Size() < CYCLE_BUF_THRESHOLD_) {
-                // fmt::print("cycle buffer size = {}\n", cycle_buffer_->Size());
                 cycle_buf_con_.notify_one();
             }
-            // cycle_buf_con_.notify_one();
         }
 
         assert(idx < MAX_GROUP_SIZE_);
@@ -971,17 +548,14 @@ void SortMergerTerm<KeyType, LenType>::MergeByQueueTerm() {
         }
         out_queue_con_.notify_one();
     }
-    fmt::print("MergeByQueueTerm finish\n");
 }
 
 template <typename KeyType, typename LenType>
 requires std::same_as<KeyType, TermTuple>
-void SortMergerTerm<KeyType, LenType>::OutputByQueueTerm(FILE *f) {
+void SortMergerTermTuple<KeyType, LenType>::OutputImpl(FILE *f) {
     DirectIO io_stream(f, "w");
     while (count_ > 0) {
-        // wait its turn to output
         UniquePtr<TermTupleList> temp_term_tuple;
-        // SizeT queue_size = 0;
         {
             std::unique_lock out_lock(out_queue_mtx_);
             out_queue_con_.wait(out_lock, [this]() { return !this->term_tuple_list_queue_.empty(); });
@@ -993,13 +567,12 @@ void SortMergerTerm<KeyType, LenType>::OutputByQueueTerm(FILE *f) {
             temp_term_tuple = std::move(term_tuple_list_queue_.front());
             ++term_list_count_;
             term_tuple_list_queue_.pop();
-            // queue_size = term_tuple_list_queue_.size();
         }
         count_ -= temp_term_tuple->Size();
-        // fmt::print("term = {}, count_ = {}, term queue size = {}\n", temp_term_tuple->term_, count_, queue_size);
-        /*
-         * data_len, term_len, doc_list_size, term, [doc_id, term_pos]...
-         */
+
+        // output format
+        // |   u32    |    u32   |     u32       |  char [term_len]  | pair<u32, u32> [doc_list_size]
+        // | data_len | term_len | doc_list_size |       term        |      [doc_id, term_pos]...
         u32 term_len = temp_term_tuple->term_.size();
         u32 doc_list_size = temp_term_tuple->Size();
         u32 data_len = sizeof(u32) + sizeof(u32) + term_len + 2 * sizeof(u32) * doc_list_size;
@@ -1010,10 +583,6 @@ void SortMergerTerm<KeyType, LenType>::OutputByQueueTerm(FILE *f) {
         memcpy(buf + SIZE_U32, &term_len, SIZE_U32);
         memcpy(buf + SIZE_U32 + SIZE_U32, &doc_list_size, SIZE_U32);
         io_stream.Write(buf, SIZE_U32 * 3);
-
-//        io_stream.Write((char*)(&data_len), sizeof(u32));
-//        io_stream.Write((char*)(&term_len), sizeof(u32));
-//        io_stream.Write((char*)(&term_len), sizeof(u32));
         io_stream.Write(temp_term_tuple->term_.data(), term_len);
         io_stream.Write((char*)temp_term_tuple->doc_pos_list_.data(), SIZE_U32 * 2 * doc_list_size);
         if (count_ == 0) {
@@ -1023,18 +592,17 @@ void SortMergerTerm<KeyType, LenType>::OutputByQueueTerm(FILE *f) {
             break;
         }
     }
-    fmt::print("OutputByQueueTerm finish\n");
 }
 
 template <typename KeyType, typename LenType>
 requires std::same_as<KeyType, TermTuple>
-void SortMergerTerm<KeyType, LenType>::RunTerm() {
-    LOG_INFO(fmt::format("begin run"));
-//    fmt::print("begin run\n");
-#ifdef PRINT_TIME_COST
-    BaseProfiler profiler;
-    profiler.Begin();
-#endif
+void SortMergerTermTuple<KeyType, LenType>::PredictImpl(DirectIO &io_stream) {
+    this->Predict(io_stream);
+}
+
+template <typename KeyType, typename LenType>
+requires std::same_as<KeyType, TermTuple>
+void SortMergerTermTuple<KeyType, LenType>::Run() {
     FILE *f = fopen(filenm_.c_str(), "r");
 
     DirectIO io_stream(f);
@@ -1045,44 +613,31 @@ void SortMergerTerm<KeyType, LenType>::RunTerm() {
 
     Super::Init(io_stream);
 
-    Thread predict_thread(std::bind(&Super::PredictByQueue, this, io_stream));
-    Thread merge_thread(std::bind(&self_t::MergeByQueueTerm, this));
+    Thread predict_thread(std::bind(&self_t::PredictImpl, this, io_stream));
+    Thread merge_thread(std::bind(&self_t::MergeImpl, this));
     FILE *out_f = fopen((filenm_ + ".out").c_str(), "w+");
     IASSERT(out_f);
     IASSERT(fwrite(&count_, sizeof(u64), 1, out_f) == 1);
 
-    Thread out_thread(std::bind(&self_t::OutputByQueueTerm, this, out_f));
-//    Vector<Thread *> out_thread(OUT_BUF_NUM_);
-//    for (u32 i = 0; i < OUT_BUF_NUM_; ++i) {
-//        out_thread[i] = new Thread(std::bind(&self_t::Output, this, out_f, i));
-//    }
+    Thread out_thread(std::bind(&self_t::OutputImpl, this, out_f));
 
     predict_thread.join();
     merge_thread.join();
     out_thread.join();
-//    for (u32 i = 0; i < OUT_BUF_NUM_; ++i) {
-//        out_thread[i]->join();
-//        delete out_thread[i];
-//    }
+
     fclose(f);
     fclose(out_f);
 
     if (std::filesystem::exists(filenm_)) {
         std::filesystem::remove(filenm_);
-//        std::filesystem::rename(filenm_, filenm_ + ".backup");
     }
     if (std::filesystem::exists(filenm_ + ".out")) {
         std::filesystem::rename(filenm_ + ".out", filenm_);
     }
-#ifdef PRINT_TIME_COST
-    LOG_INFO(fmt::format("SortMerger<KeyType, LenType>::Run() time cost: {}", profiler.ElapsedToString()));
-//    fmt::print("SortMergerTerm<KeyType, LenType>::RunTerm() time cost: {}\n", profiler.ElapsedToString());
-    profiler.End();
-#endif
 }
 
 
 template class SortMerger<u32, u8>;
 template class SortMerger<TermTuple, u32>;
-template class SortMergerTerm<TermTuple, u32>;
+template class SortMergerTermTuple<TermTuple, u32>;
 } // namespace infinity
\ No newline at end of file
diff --git a/src/storage/invertedindex/common/external_sort_merger.cppm b/src/storage/invertedindex/common/external_sort_merger.cppm
index cbf9be6ce0..ad2699d63c 100644
--- a/src/storage/invertedindex/common/external_sort_merger.cppm
+++ b/src/storage/invertedindex/common/external_sort_merger.cppm
@@ -310,28 +310,7 @@ public:
         }
     }
 
-    char* PutByRead(DirectIO &io_stream, u32& length) {
-        if (length > buffer_size_) {
-            throw std::runtime_error("Data length exceeds buffer capacity");
-        }
-        if (IsFull()) {
-            throw std::runtime_error("Buffer is full");
-        }
-
-        // Read data into the current buffer
-        length = io_stream.Read(buffer_array_[head_].get(), length);
-        return buffer_array_[head_].get();
-//        auto res_ptr = buffer_array_[head_].get();
-//        head_ = (head_ + 1) % total_buffers_;
-//
-//        if (head_ == tail_) {
-//            full_ = true;
-//        }
-//        return res_ptr;
-    }
-
     Tuple<const char*, u32, u32> GetTuple() {
-        // std::cout << "CycleBuffer::Get" << std::endl;
         if (IsEmpty()) {
             throw std::runtime_error("Buffer is empty");
         }
@@ -346,7 +325,6 @@ public:
     }
 
     const char* Get() {
-        // std::cout << "CycleBuffer::Get" << std::endl;
         if (IsEmpty()) {
             throw std::runtime_error("Buffer is empty");
         }
@@ -358,7 +336,6 @@ public:
         return result_data;
     }
 
-
     void Reset() {
         head_ = tail_ = 0;
         full_ = false;
@@ -395,7 +372,7 @@ private:
 
 export template <typename KeyType, typename LenType>
 class SortMerger {
-public:
+protected:
     typedef SortMerger<KeyType, LenType> self_t;
     typedef KeyAddress<KeyType, LenType> KeyAddr;
     static constexpr SizeT MAX_TUPLE_LENGTH = 1024;
@@ -408,23 +385,18 @@ public:
     const u32 OUT_BUF_NUM_;    //!< output threads number
 
     std::priority_queue<KeyAddr> pre_heap_;   //!< predict heap
-    std::priority_queue<KeyAddr> merge_heap_; //!< merge heap
     SharedPtr<LoserTree<KeyAddr>> merge_loser_tree_;
 
     u32 *micro_run_idx_{nullptr};   //!< the access index of each microruns
     u32 *micro_run_pos_{nullptr};   //!< the access position within each microruns
     u32 *num_micro_run_{nullptr};   //!< the records number of each microruns
     u32 *size_micro_run_{nullptr};  //!< the size of entire microrun
-    u32 *num_run_{nullptr};         //!< records number of each runs
     u32 *size_run_{nullptr};        //!< size of each entire runs
-    u32 *size_loaded_run_{nullptr}; //!< size of data that have been read within each entire runs
     u64 *run_addr_{nullptr};        //!< start file address of each runs
-    u64 *run_curr_addr_{nullptr};   //!< current file address of each runs
 
     char **micro_buf_{nullptr};   //!< address of every microrun channel buffer
     char **sub_out_buf_{nullptr}; //!< addresses of each output buffer
 
-    char *pre_buf_{nullptr}; //!< predict buffer
     char *run_buf_{nullptr}; //!< entire run buffer
     char *out_buf_{nullptr}; //!< the entire output buffer
 
@@ -437,19 +409,11 @@ public:
     std::mutex out_out_mtx_; //!< mutex and condition to ensure the seqence of file writing of all the output threads
     std::condition_variable out_out_con_;
 
-    u32 pre_buf_size_; //!< the current size of microrun that has been loaded onto prediect buffer
-    u32 pre_buf_num_;  //!< the current records number of microrun that has been loaded onto prediect buffer
-    //u32 pre_idx_;    //!< the index of microrun channel right in the predict buffer
     u32 out_buf_in_idx_;          //!< used by merge to get the current available output buffer
     u32 out_buf_out_idx_;         //!< used by output threads to get the index of the turn of outputting
     u32 *out_buf_size_{nullptr};  //!< data size of each output buffer
     bool *out_buf_full_{nullptr}; //!< a flag to ensure if the output buffer is full or not
 
-    Vector<u64> curr_addr_;
-    Vector<u64> end_addr_;
-    Vector<UniquePtr<char_t[]>> key_buf_;
-    Vector<char*> key_buf_ptr_;
-    Vector<SharedPtr<MmapReader>> mmap_io_streams_;
     UniquePtr<CycleBuffer> cycle_buffer_;
     std::mutex cycle_buf_mtx_;
     std::condition_variable cycle_buf_con_;
@@ -461,8 +425,6 @@ public:
     SizeT OUT_BATCH_SIZE_;
     Queue<UniquePtr<TermTupleList>> term_tuple_list_queue_;
 
-    UniquePtr<CycleBuffer> cycle_term_tuple_list_queue_;
-
     bool read_finish_{false};
     u32 CYCLE_BUF_SIZE_;
     u32 CYCLE_BUF_THRESHOLD_;
@@ -477,64 +439,31 @@ public:
 
     void Predict(DirectIO &io_stream);
 
-    void PredictByQueue(DirectIO &io_stream);
-
     void Merge();
 
-    void MergeMmap(MmapReader &io_stream, SharedPtr<FileWriter> out_file_writer);
-
-    void MergeByQueue();
-
     void Output(FILE *f, u32 idx);
 
     void OutputByQueue(FILE* f);
 
-    // void OutputByQueueTerm(FILE *f);
-
-    // void MergeByQueueTerm();
-
     void Init(MmapReader &io_stream);
 
     void ReadKeyAt(MmapReader &io_stream, u64 pos);
 
     void ReadKeyAtNonCopy(MmapReader &io_stream, u64 pos);
 
-    // void MergeByQueue<TermTuple, LenType>();
 public:
     SortMerger(const char *filenm, u32 group_size = 4, u32 bs = 100000000, u32 output_num = 2);
 
     virtual ~SortMerger();
 
-    void SetParams(u32 max_record_len) {
-        if (max_record_len > PRE_BUF_SIZE_) {
-            PRE_BUF_SIZE_ = max_record_len;
-            RUN_BUF_SIZE_ = PRE_BUF_SIZE_ * MAX_GROUP_SIZE_;
-            // OUT_BUF_SIZE_ = BS_SIZE_ - RUN_BUF_SIZE_ - PRE_BUF_SIZE_; ///we do not change OUT_BUF_SIZE_
-        }
-    }
-
-    void SetParams(u32 max_record_len, u32 min_buff_size_required) {
-        if (max_record_len > PRE_BUF_SIZE_)
-            PRE_BUF_SIZE_ = max_record_len;
-        if (RUN_BUF_SIZE_ < min_buff_size_required)
-            RUN_BUF_SIZE_ = min_buff_size_required;
-        if (RUN_BUF_SIZE_ < PRE_BUF_SIZE_ * MAX_GROUP_SIZE_)
-            RUN_BUF_SIZE_ = PRE_BUF_SIZE_ * MAX_GROUP_SIZE_;
-        if (OUT_BUF_SIZE_ < min_buff_size_required)
-            OUT_BUF_SIZE_ = min_buff_size_required;
-    }
-
     virtual void Run();
 };
 
-//export template <typename KeyType, typename LenType>
-//class SortMergerTerm;
-
 export template <typename KeyType, typename LenType>
 requires std::same_as<KeyType, TermTuple>
-class SortMergerTerm : public SortMerger<KeyType, LenType> {
+class SortMergerTermTuple : public SortMerger<KeyType, LenType> {
 protected:
-    typedef SortMergerTerm<KeyType, LenType> self_t;
+    typedef SortMergerTermTuple<KeyType, LenType> self_t;
     using Super = SortMerger<KeyType, LenType>;
     using Super::filenm_;
     using Super::MAX_GROUP_SIZE_;
@@ -544,20 +473,15 @@ protected:
     using Super::OUT_BUF_SIZE_;
     using Super::OUT_BUF_NUM_;
     using Super::pre_heap_;
-    using Super::merge_heap_;
     using Super::merge_loser_tree_;
     using Super::micro_run_idx_;
     using Super::micro_run_pos_;
     using Super::num_micro_run_;
     using Super::size_micro_run_;
-    using Super::num_run_;
     using Super::size_run_;
-    using Super::size_loaded_run_;
     using Super::run_addr_;
-    using Super::run_curr_addr_;
     using Super::micro_buf_;
     using Super::sub_out_buf_;
-    using Super::pre_buf_;
     using Super::run_buf_;
     using Super::out_buf_;
     using Super::pre_buf_mtx_;
@@ -566,17 +490,10 @@ protected:
     using Super::in_out_con_;
     using Super::out_out_mtx_;
     using Super::out_out_con_;
-    using Super::pre_buf_size_;
-    using Super::pre_buf_num_;
     using Super::out_buf_in_idx_;
     using Super::out_buf_out_idx_;
     using Super::out_buf_size_;
     using Super::out_buf_full_;
-    using Super::curr_addr_;
-    using Super::end_addr_;
-    using Super::key_buf_;
-    using Super::key_buf_ptr_;
-    using Super::mmap_io_streams_;
     using Super::cycle_buffer_;
     using Super::cycle_buf_mtx_;
     using Super::cycle_buf_con_;
@@ -596,14 +513,16 @@ protected:
     using Super::MAX_TUPLE_LENGTH;
     u64 term_list_count_{0};
 
-    void OutputByQueueTerm(FILE *f);
-    void MergeByQueueTerm();
+    void PredictImpl(DirectIO &io_stream);
+
+    void OutputImpl(FILE *f);
 
+    void MergeImpl();
 public:
-    SortMergerTerm(const char *filenm, u32 group_size = 4, u32 bs = 100000000, u32 output_num = 2)
-            : Super(filenm, group_size, bs, output_num) {}
+    SortMergerTermTuple(const char *filenm, u32 group_size = 4, u32 bs = 100000000, u32 output_num = 2)
+        : Super(filenm, group_size, bs, output_num) {}
 
-    void RunTerm();
+    void Run() override;
 };
 
 } // namespace infinity
diff --git a/src/storage/invertedindex/common/loser_tree.cppm b/src/storage/invertedindex/common/loser_tree.cppm
index 762e512629..700f470abd 100644
--- a/src/storage/invertedindex/common/loser_tree.cppm
+++ b/src/storage/invertedindex/common/loser_tree.cppm
@@ -9,7 +9,6 @@ import stl;
 
 namespace infinity {
 
-//! LoserTreeBase class definition
 export template <typename ValueType, typename Comparator = std::greater<ValueType>>
 class LoserTreeBase {
 public:
@@ -25,20 +24,19 @@ public:
     }
 protected:
     struct Loser {
-        //! flag, true if is a virtual maximum sentinel
+        // flag, true if is a virtual maximum sentinel
         bool sup;
         Source source;
         ValueType key;
     };
 
-    //! The number of nodes in the tree.
+    // The number of nodes in the tree.
     const Source ik_;
-    //! The next greater power of two of ik_.
+    // The next greater power of two of ik_.
     const Source k_;
-    //! Array containing loser tree nodes.
+
     Vector<Loser> losers_;
-    //! Function object for comparing ValueTypes.
-    // std::function<bool(const ValueType&, const ValueType&)> cmp_;
+
     Comparator cmp_;
 
     bool first_insert_;
@@ -47,21 +45,14 @@ public:
                            const Comparator& cmp = Comparator())
          : ik_(k), k_(round_up_to_power_of_two(k)),
           losers_(2 * k_), cmp_(cmp), first_insert_(true) {
-        // : ik_(k), k_(static_cast<Source>(1) << static_cast<Source>(std::ceil(std::log2(static_cast<float>(k))))),
 
         for (Source i = ik_ - 1; i < k_; i++) {
             losers_[i + k_].sup = true;
             losers_[i + k_].source = invalid_;
         }
-//        for (Source i = 0; i < k_; ++i) {
-//            losers_[i].source = invalid_;
-//            losers_[i].keyp = &sentinel;
-//        }
     }
 
-    //! Return the index of the player with the smallest element.
     Source TopSource() {
-        // if (losers_[0].sup) return invalid_;
         return losers_[0].source;
     }
 
@@ -69,16 +60,13 @@ public:
         return losers_[0].key;
     }
 
-    //! Initializes the player source with the element key.
     void InsertStart(const ValueType* keyp, const Source& source, bool sup) {
         Source pos = k_ + source;
-        // assert(pos < losers_.size());
         losers_[pos].source = source;
         losers_[pos].sup = sup;
 
         if (first_insert_) {
             for (Source i = 0; i < 2 * k_; ++i) {
-                // losers_[i].keyp = keyp;
                 if (keyp) {
                     losers_[i].key = *keyp;
                 } else {
@@ -87,17 +75,15 @@ public:
             }
             first_insert_ = false;
         } else {
-            // losers_[pos].keyp = keyp;
             losers_[pos].key = (keyp ? *keyp : ValueType());
         }
     }
 
-    //! Recursively compute the winner of the competition at player root.
+    // Recursively compute the winner of the competition at player root.
     Source InitWinner(const Source& root) {
         if (root >= k_) {
             return root;
         }
-
         Source left = InitWinner(2 * root);
         Source right = InitWinner(2 * root + 1);
         if (losers_[right].sup ||
@@ -118,7 +104,6 @@ public:
     }
 };
 
-//! Unguarded loser tree, keeping only pointers to the elements in the tree structure.
 export template <typename ValueType, typename Comparator = std::greater<ValueType>>
 class LoserTree : public LoserTreeBase<ValueType, Comparator> {
 public:
@@ -126,13 +111,12 @@ public:
     using Source = typename Super::Source;
 
 public:
-    //! Constructor.
     explicit LoserTree(const Source& k,
                        const Comparator& cmp = Comparator())
                        // const std::function<bool(const ValueType&, const ValueType&)>& cmp = std::greater<ValueType>())
         : Super(k, cmp) {}
 
-    //! Delete the current minimum and insert a new element.
+    // Delete the current minimum and insert a new element.
     void DeleteTopInsert(const ValueType* keyp, bool sup) {
         assert(sup == (keyp == nullptr));
         Source source = Super::losers_[0].source;
@@ -146,7 +130,6 @@ public:
             } else if (Super::losers_[pos].sup) {
                 // do nothing
             } else if (Super::cmp_(Super::losers_[pos].key, key)) {
-                // std::swap(Super::losers_[pos].sup, sup);
                 std::swap(Super::losers_[pos].source, source);
                 std::swap(Super::losers_[pos].key, key);
             } else {
diff --git a/src/storage/invertedindex/common/mmap.cppm b/src/storage/invertedindex/common/mmap.cppm
index 3826e3f366..fcec640ad2 100644
--- a/src/storage/invertedindex/common/mmap.cppm
+++ b/src/storage/invertedindex/common/mmap.cppm
@@ -48,13 +48,11 @@ export int MunmapFile(u8 *&data_ptr, SizeT &data_len, SizeT offset_diff = 0) {
 
 export struct MmapReader {
     MmapReader(const String &filename, SizeT offset = 0, SizeT len = SizeT(-1), int advice = MADV_SEQUENTIAL) {
-        // int rc = MmapFile(filename, data_ptr_, data_len_, advice);
-        // fmt::print("filename = {}, offset = {}, len = {}\n", filename, offset, len);
         int rc = MmapPartFile(filename, data_ptr_, len, advice, offset);
         idx_ = 0;
         data_len_ = len;
         if (rc < 0) {
-            throw UnrecoverableException("MmapFile failed");
+            UnrecoverableError("MmapFile failed");
         }
     }
 
@@ -117,7 +115,6 @@ export struct MmapReader {
         offset_diff_ = offset - aligned_offset;
 
         SizeT mapped_length = data_len + offset_diff_;
-        // void* mapped = mmap(NULL, mapped_length, PROT_READ, MAP_SHARED, fd, aligned_offset);
 
         int f = open(fp.c_str(), O_RDONLY);
         void *tmpd = mmap(NULL, mapped_length, PROT_READ, MAP_SHARED, f, aligned_offset);
@@ -128,7 +125,6 @@ export struct MmapReader {
         if (rc < 0)
             return -1;
         data_ptr = (u8 *)tmpd + offset_diff_;
-        // data_len = len_f;
         return 0;
     }
 
diff --git a/src/storage/invertedindex/memory_indexer.cpp b/src/storage/invertedindex/memory_indexer.cpp
index f8a7da43f0..b5fdd6df2a 100644
--- a/src/storage/invertedindex/memory_indexer.cpp
+++ b/src/storage/invertedindex/memory_indexer.cpp
@@ -63,9 +63,6 @@ import third_party;
 
 namespace infinity {
 constexpr int MAX_TUPLE_LENGTH = 1024; // we assume that analyzed term, together with docid/offset info, will never exceed such length
-#define USE_MMAP
-#define USE_BUF
-//#define USE_MORE_BUF
 bool MemoryIndexer::KeyComp::operator()(const String &lhs, const String &rhs) const {
     int ret = strcmp(lhs.c_str(), rhs.c_str());
     return ret < 0;
@@ -89,10 +86,9 @@ MemoryIndexer::MemoryIndexer(const String &index_dir,
     prepared_posting_ = MakeShared<PostingWriter>(nullptr, nullptr, PostingFormatOption(flag_), column_lengths_);
     Path path = Path(index_dir) / (base_name + ".tmp.merge");
     spill_full_path_ = path.string();
-#ifdef USE_BUF
+
     spill_buffer_size_ = MAX_TUPLE_LENGTH * 2;
     spill_buffer_ = MakeUnique<char_t[]>(spill_buffer_size_);
-#endif
 }
 
 MemoryIndexer::~MemoryIndexer() {
@@ -176,8 +172,6 @@ void MemoryIndexer::Commit(bool offline) {
 }
 
 SizeT MemoryIndexer::CommitOffline(SizeT wait_if_empty_ms) {
-    // BaseProfiler profiler;
-    // profiler.Begin();
     std::unique_lock<std::mutex> lock(mutex_commit_, std::defer_lock);
     if (!lock.try_lock()) {
         return 0;
@@ -191,13 +185,7 @@ SizeT MemoryIndexer::CommitOffline(SizeT wait_if_empty_ms) {
     SizeT num = inverters.size();
     if (num > 0) {
         for (auto &inverter : inverters) {
-            // inverter->SpillSortResults(this->spill_file_handle_, this->tuple_count_);
-#ifdef USE_BUF
             inverter->SpillSortResults(this->spill_file_handle_, this->tuple_count_, spill_buffer_, spill_buffer_size_);
-            // inverter->SpillSortResults(this->spill_file_handle_, this->tuple_count_, buf_writer_);
-#else
-            inverter->SpillSortResults(this->spill_file_handle_, this->tuple_count_);
-#endif
             num_runs_++;
         }
     }
@@ -208,8 +196,6 @@ SizeT MemoryIndexer::CommitOffline(SizeT wait_if_empty_ms) {
             cv_.notify_all();
         }
     }
-    // LOG_INFO(fmt::format("MemoryIndexer::CommitOffline time cost: {}", profiler.ElapsedToString()));
-    // profiler.End();
     return num;
 }
 
@@ -258,24 +244,13 @@ SizeT MemoryIndexer::CommitSync(SizeT wait_if_empty_ms) {
 
     return num_generated;
 }
-#define PRINT_TIME_COST
 void MemoryIndexer::Dump(bool offline, bool spill) {
     if (offline) {
         assert(!spill);
         while (GetInflightTasks() > 0) {
             CommitOffline(100);
         }
-#ifdef PRINT_TIME_COST
-        BaseProfiler profiler;
-        profiler.Begin();
-#endif
-        OfflineDumpTermTupleList();
-//        OfflineDump();
-#ifdef PRINT_TIME_COST
-//        LOG_INFO(fmt::format("MemoryIndexer::OfflineDump() time cost: {}", profiler.ElapsedToString()));
-        fmt::print("MemoryIndexer::OfflineDumpTermTupleList() time cost: {}\n", profiler.ElapsedToString());
-        profiler.End();
-#endif
+        OfflineDump();
         return;
     }
 
@@ -397,141 +372,9 @@ void MemoryIndexer::OfflineDump() {
     }
     FinalSpillFile();
     constexpr u32 buffer_size_of_each_run = 2 * 1024 * 1024;
-    SortMerger<TermTuple, u32> *merger = new SortMerger<TermTuple, u32>(spill_full_path_.c_str(), num_runs_, buffer_size_of_each_run * num_runs_, 2);
-//    SortMergerTerm<TermTuple, u32> *merger = new SortMergerTerm<TermTuple, u32>(spill_full_path_.c_str(), num_runs_, buffer_size_of_each_run * num_runs_, 2);
-//    merger->RunTerm();
+    SortMergerTermTuple<TermTuple, u32> *merger = new SortMergerTermTuple<TermTuple, u32>(spill_full_path_.c_str(), num_runs_, buffer_size_of_each_run * num_runs_, 2);
     merger->Run();
     delete merger;
-#ifdef USE_MMAP
-    MmapReader reader(spill_full_path_);
-    u64 count;
-    reader.ReadU64(count);
-    // idx += sizeof(u64);
-#else
-    FILE *f = fopen(spill_full_path_.c_str(), "r");
-    u64 count;
-    fread((char *)&count, sizeof(u64), 1, f);
-#endif
-    Path path = Path(index_dir_) / base_name_;
-    String index_prefix = path.string();
-    LocalFileSystem fs;
-    String posting_file = index_prefix + POSTING_SUFFIX;
-    SharedPtr<FileWriter> posting_file_writer = MakeShared<FileWriter>(fs, posting_file, 128000);
-    String dict_file = index_prefix + DICT_SUFFIX;
-    SharedPtr<FileWriter> dict_file_writer = MakeShared<FileWriter>(fs, dict_file, 128000);
-    TermMetaDumper term_meta_dumpler((PostingFormatOption(flag_)));
-    String fst_file = index_prefix + DICT_SUFFIX + ".fst";
-    std::ofstream ofs(fst_file.c_str(), std::ios::binary | std::ios::trunc);
-    OstreamWriter wtr(ofs);
-    FstBuilder fst_builder(wtr);
-
-    u32 record_length;
-    char buf[MAX_TUPLE_LENGTH];
-    String last_term_str;
-    std::string_view last_term;
-    u32 last_doc_id = INVALID_DOCID;
-    UniquePtr<PostingWriter> posting;
-
-    for (u64 i = 0; i < count; ++i) {
-#ifdef USE_MMAP
-        reader.ReadU32(record_length);
-#else
-        fread(&record_length, sizeof(u32), 1, f);
-#endif
-        if (record_length >= MAX_TUPLE_LENGTH) {
-#ifdef USE_MMAP
-            reader.Seek(record_length);
-            // idx += record_length;
-#else
-            // rubbish tuple, abandoned
-            char *buffer = new char[record_length];
-            fread(buffer, record_length, 1, f);
-            // TermTuple tuple(buffer, record_length);
-            delete[] buffer;
-#endif
-            continue;
-        }
-#ifdef USE_MMAP
-        reader.ReadBuf(buf, record_length);
-        // char* tuple_data = reader.ReadBufNonCopy(record_length);
-#else
-        fread(buf, record_length, 1, f);
-#endif
-        TermTuple tuple(buf, record_length);
-        if (tuple.term_ != last_term) {
-            assert(last_term < tuple.term_);
-            if (last_doc_id != INVALID_DOCID) {
-                posting->EndDocument(last_doc_id, 0);
-                // printf(" EndDocument1-%u\n", last_doc_id);
-            }
-            if (posting.get()) {
-                TermMeta term_meta(posting->GetDF(), posting->GetTotalTF());
-                posting->Dump(posting_file_writer, term_meta);
-                SizeT term_meta_offset = dict_file_writer->TotalWrittenBytes();
-                term_meta_dumpler.Dump(dict_file_writer, term_meta);
-                fst_builder.Insert((u8 *)last_term.data(), last_term.length(), term_meta_offset);
-            }
-            posting = MakeUnique<PostingWriter>(nullptr, nullptr, PostingFormatOption(flag_), column_lengths_);
-            // printf("\nswitched-term-%d-<%s>\n", i.term_num_, term.data());
-            last_term_str = String(tuple.term_);
-            last_term = std::string_view(last_term_str);
-        } else if (last_doc_id != tuple.doc_id_) {
-            assert(last_doc_id != INVALID_DOCID);
-            assert(last_doc_id < tuple.doc_id_);
-            assert(posting.get() != nullptr);
-            posting->EndDocument(last_doc_id, 0);
-            // printf(" EndDocument2-%u\n", last_doc_id);
-        }
-        last_doc_id = tuple.doc_id_;
-        posting->AddPosition(tuple.term_pos_);
-        // printf(" pos-%u", tuple.term_pos_);
-    }
-#ifdef USE_MMAP
-    // MunmapFile(data_ptr, data_len);
-    // reader.MunmapFile();
-#endif
-    if (last_doc_id != INVALID_DOCID) {
-        posting->EndDocument(last_doc_id, 0);
-        // printf(" EndDocument3-%u\n", last_doc_id);
-        TermMeta term_meta(posting->GetDF(), posting->GetTotalTF());
-        posting->Dump(posting_file_writer, term_meta);
-        SizeT term_meta_offset = dict_file_writer->TotalWrittenBytes();
-        term_meta_dumpler.Dump(dict_file_writer, term_meta);
-        fst_builder.Insert((u8 *)last_term.data(), last_term.length(), term_meta_offset);
-    }
-    posting_file_writer->Sync();
-    dict_file_writer->Sync();
-    fst_builder.Finish();
-    fs.AppendFile(dict_file, fst_file);
-    fs.DeleteFile(fst_file);
-
-    String column_length_file = index_prefix + LENGTH_SUFFIX;
-    UniquePtr<FileHandler> file_handler = fs.OpenFile(column_length_file, FileFlags::WRITE_FLAG | FileFlags::TRUNCATE_CREATE, FileLockType::kNoLock);
-    Vector<u32> &unsafe_column_lengths = column_lengths_.UnsafeVec();
-    fs.Write(*file_handler, &unsafe_column_lengths[0], sizeof(unsafe_column_lengths[0]) * unsafe_column_lengths.size());
-    fs.Close(*file_handler);
-
-    std::filesystem::remove(spill_full_path_);
-    // LOG_INFO(fmt::format("MemoryIndexer::OfflineDump done, num_runs_ {}", num_runs_));
-    num_runs_ = 0;
-}
-
-void MemoryIndexer::OfflineDumpTermTupleList() {
-    // Steps of offline dump:
-    // 1. External sort merge
-    // 2. Generate posting
-    // 3. Dump disk segment data
-    // LOG_INFO(fmt::format("MemoryIndexer::OfflineDump begin, num_runs_ {}\n", num_runs_));
-    if (tuple_count_ == 0) {
-        return;
-    }
-    FinalSpillFile();
-    constexpr u32 buffer_size_of_each_run = 2 * 1024 * 1024;
-    // SortMerger<TermTuple, u32> *merger = new SortMerger<TermTuple, u32>(spill_full_path_.c_str(), num_runs_, buffer_size_of_each_run * num_runs_, 2);
-    SortMergerTerm<TermTuple, u32> *merger = new SortMergerTerm<TermTuple, u32>(spill_full_path_.c_str(), num_runs_, buffer_size_of_each_run * num_runs_, 2);
-    merger->RunTerm();
-    // merger->Run();
-    delete merger;
 
     MmapReader reader(spill_full_path_);
     u64 term_list_count;
@@ -555,7 +398,7 @@ void MemoryIndexer::OfflineDumpTermTupleList() {
     u32 doc_pos_list_size = 0;
     const u32 MAX_TUPLE_LIST_LENGTH = MAX_TUPLE_LENGTH + 2 * 1024 * 1024;
     auto buf = MakeUnique<char[]>(MAX_TUPLE_LIST_LENGTH);
-    // char buf[MAX_TUPLE_LENGTH];
+
     String last_term_str;
     std::string_view last_term;
     u32 last_doc_id = INVALID_DOCID;
@@ -564,7 +407,6 @@ void MemoryIndexer::OfflineDumpTermTupleList() {
     assert(record_length < MAX_TUPLE_LIST_LENGTH);
 
     for (u64 i = 0; i < term_list_count; ++i) {
-
         reader.ReadU32(record_length);
         reader.ReadU32(term_length);
 
@@ -573,7 +415,6 @@ void MemoryIndexer::OfflineDumpTermTupleList() {
             continue;
         }
 
-
         reader.ReadBuf(buf.get(), record_length - sizeof(u32));
         u32 buf_idx = 0;
 
@@ -583,12 +424,10 @@ void MemoryIndexer::OfflineDumpTermTupleList() {
         std::string_view term = std::string_view(buf.get() + buf_idx, term_length);
         buf_idx += term_length;
 
-        // TermTuple tuple(buf, record_length);
         if (term != last_term) {
             assert(last_term < term);
             if (last_doc_id != INVALID_DOCID) {
                 posting->EndDocument(last_doc_id, 0);
-                // printf(" EndDocument1-%u\n", last_doc_id);
             }
             if (posting.get()) {
                 TermMeta term_meta(posting->GetDF(), posting->GetTotalTF());
@@ -598,7 +437,6 @@ void MemoryIndexer::OfflineDumpTermTupleList() {
                 fst_builder.Insert((u8 *)last_term.data(), last_term.length(), term_meta_offset);
             }
             posting = MakeUnique<PostingWriter>(nullptr, nullptr, PostingFormatOption(flag_), column_lengths_);
-            // printf("\nswitched-term-%d-<%s>\n", i.term_num_, term.data());
             last_term_str = String(term);
             last_term = std::string_view(last_term_str);
             last_doc_id = INVALID_DOCID;
@@ -610,27 +448,17 @@ void MemoryIndexer::OfflineDumpTermTupleList() {
             buf_idx += sizeof(u32);
 
             if (last_doc_id != INVALID_DOCID && last_doc_id != doc_id) {
-                // assert(last_doc_id != INVALID_DOCID);
                 assert(last_doc_id < doc_id);
                 assert(posting.get() != nullptr);
                 posting->EndDocument(last_doc_id, 0);
-                // printf(" EndDocument2-%u\n", last_doc_id);
             }
             last_doc_id = doc_id;
             posting->AddPosition(term_pos);
         }
 
-//        last_doc_id = doc_id;
-//        posting->AddPosition(tuple.term_pos_);
-        // printf(" pos-%u", tuple.term_pos_);
     }
-#ifdef USE_MMAP
-    // MunmapFile(data_ptr, data_len);
-    // reader.MunmapFile();
-#endif
     if (last_doc_id != INVALID_DOCID) {
         posting->EndDocument(last_doc_id, 0);
-        // printf(" EndDocument3-%u\n", last_doc_id);
         TermMeta term_meta(posting->GetDF(), posting->GetTotalTF());
         posting->Dump(posting_file_writer, term_meta);
         SizeT term_meta_offset = dict_file_writer->TotalWrittenBytes();
@@ -650,7 +478,6 @@ void MemoryIndexer::OfflineDumpTermTupleList() {
     fs.Close(*file_handler);
 
     std::filesystem::remove(spill_full_path_);
-    // LOG_INFO(fmt::format("MemoryIndexer::OfflineDump done, num_runs_ {}", num_runs_));
     num_runs_ = 0;
 }
 
@@ -665,10 +492,6 @@ void MemoryIndexer::FinalSpillFile() {
 void MemoryIndexer::PrepareSpillFile() {
     spill_file_handle_ = fopen(spill_full_path_.c_str(), "w");
     fwrite(&tuple_count_, sizeof(u64), 1, spill_file_handle_);
-#ifdef USE_MORE_BUF
-    const SizeT spill_buf_size = 128000;
-    buf_writer_ = MakeUnique<BufWriter>(spill_file_handle_, spill_buf_size);
-#endif
 }
 
 } // namespace infinity
\ No newline at end of file
diff --git a/src/storage/invertedindex/memory_indexer.cppm b/src/storage/invertedindex/memory_indexer.cppm
index 730826996b..f280d0ea4f 100644
--- a/src/storage/invertedindex/memory_indexer.cppm
+++ b/src/storage/invertedindex/memory_indexer.cppm
@@ -117,8 +117,6 @@ private:
 
     void OfflineDump();
 
-    void OfflineDumpTermTupleList();
-
     void FinalSpillFile();
 
     void PrepareSpillFile();
diff --git a/src/unit_test/storage/common/loser_tree.cpp b/src/unit_test/storage/common/loser_tree.cpp
index 6b1e52293e..033dcc582b 100644
--- a/src/unit_test/storage/common/loser_tree.cpp
+++ b/src/unit_test/storage/common/loser_tree.cpp
@@ -46,10 +46,6 @@ void LoserTreeTest::GenerateData(infinity::SizeT num_size, infinity::SizeT loser
     }
     for (SizeT i = 0; i < loser_num; ++i) {
         std::sort(loser[i].begin(), loser[i].end());
-//        for (SizeT j = 0; j < loser[i].size(); ++j) {
-//            fmt::print("{} ", loser[i][j]);
-//        }
-//        fmt::print("\n");
     }
 }
 
@@ -70,7 +66,6 @@ void LoserTreeTest::MultiWayMerge(infinity::SizeT num_size, infinity::SizeT lose
         auto min_value = loser_tree->TopKey();
         auto min_source = loser_tree->TopSource();
         merge_res.push_back(min_value);
-        // fmt::print("min val = {}, min source = {}\n", min_value, min_source);
         auto& min_seq = num_idx[min_source];
 
         if (min_seq < loser[min_source].size()) {
@@ -85,17 +80,6 @@ void LoserTreeTest::MultiWayMerge(infinity::SizeT num_size, infinity::SizeT lose
     for (SizeT i = 0; i < merge_res.size(); ++i) {
         EXPECT_EQ(merge_res[i], numbers[i]);
     }
-    return ;
-    for (SizeT i = 0; i < merge_res.size(); ++i) {
-        fmt::print("{} ", merge_res[i]);
-        // EXPECT_EQ(merge_res[i], numbers[i]);
-    }
-    fmt::print("\n");
-    for (SizeT i = 0; i < numbers.size(); ++i) {
-        fmt::print("{} ", numbers[i]);
-        // EXPECT_EQ(merge_res[i], numbers[i]);
-    }
-    fmt::print("\n");
 }
 
 TEST_F(LoserTreeTest, BasicMerge1) {
diff --git a/src/unit_test/storage/invertedindex/common/external_sort.cpp b/src/unit_test/storage/invertedindex/common/external_sort.cpp
index 5439ec4a59..15f9ec63df 100644
--- a/src/unit_test/storage/invertedindex/common/external_sort.cpp
+++ b/src/unit_test/storage/invertedindex/common/external_sort.cpp
@@ -69,7 +69,6 @@ class ExternalSortTest : public BaseTest {
         u32 run_num = rand() % 300;
         while (run_num < 100 || SIZE % run_num != 0)
             run_num = rand() % 300;
-        // fmt::print("begin tell = {}\n", ftell(f));
         for (u32 i = 0; i < run_num; ++i) {
             u64 pos = ftell(f);
             fseek(f, 2 * sizeof(u32) + sizeof(u64), SEEK_CUR);
@@ -77,14 +76,8 @@ class ExternalSortTest : public BaseTest {
             for (u32 j = 0; j < SIZE / run_num; ++j) {
                 str = RandStr<KeyType>(i * SIZE / run_num + j);
                 LenType len = str.size();
-                // fmt::print("begin tell = {}\n", ftell(f));
                 fwrite(&len, sizeof(LenType), 1, f);
                 fwrite(str.data(), len, 1, f);
-//                fmt::print("len: {}, str.size() = {}, size len_type = {}, tell = {}, str: ", len, str.size(), sizeof(LenType), ftell(f));
-//                for (auto c : str) {
-//                    fmt::print("{}",c);
-//                }
-//                fmt::print("\n");
                 s += len + sizeof(LenType);
             }
             u64 next_run_pos = ftell(f);
@@ -93,7 +86,6 @@ class ExternalSortTest : public BaseTest {
             s = SIZE / run_num;
             fwrite(&s, sizeof(u32), 1, f);
             fwrite(&next_run_pos, sizeof(u64), 1, f);
-            // fmt::print("next_pos: {}\n", next_run_pos);
             fseek(f, 0, SEEK_END);
         }
         fclose(f);
diff --git a/src/unit_test/storage/invertedindex/memory_indexer.cpp b/src/unit_test/storage/invertedindex/memory_indexer.cpp
index 3c7c2342b8..44d7f94627 100644
--- a/src/unit_test/storage/invertedindex/memory_indexer.cpp
+++ b/src/unit_test/storage/invertedindex/memory_indexer.cpp
@@ -71,14 +71,6 @@ class MemoryIndexerTest : public BaseTest {
             R"#(The two tapes of a transducer are typically viewed as an input tape and an output tape. On this view, a transducer is said to transduce (i.e., translate) the contents of its input tape to its output tape, by accepting a string on its input tape and generating another string on its output tape. It may do so nondeterministically and it may produce more than one output for each input string. A transducer may also produce no output for a given input string, in which case it is said to reject the input. In general, a transducer computes a relation between two formal languages.)#",
         };
 
-//        const char *paragraphs[] = {
-//            R"#(a, b, c)#",
-//            R"#(a, b)#",
-//            R"#(c, d)#",
-//            R"#(e, d)#",
-//            R"#(a, c)#",
-//        };
-
         const SizeT num_paragraph = sizeof(paragraphs) / sizeof(char *);
         column_ = ColumnVector::Make(MakeShared<DataType>(LogicalType::kVarchar));
         column_->Initialize();

From 5414da6b7fa55de84448b93cf8a01f9390d3e2e4 Mon Sep 17 00:00:00 2001
From: Ma-cat <1054638297@qq.com>
Date: Thu, 23 May 2024 13:57:05 +0800
Subject: [PATCH 11/14] delete some useless code

---
 src/storage/invertedindex/column_inverter.cpp |  60 +-
 .../invertedindex/column_inverter.cppm        |   1 -
 .../common/external_sort_merger.cpp           | 533 ++----------------
 .../common/external_sort_merger.cppm          | 101 +---
 .../invertedindex/common/loser_tree.cppm      |  32 +-
 src/storage/invertedindex/common/mmap.cppm    |   6 +-
 src/storage/invertedindex/memory_indexer.cpp  | 185 +-----
 src/storage/invertedindex/memory_indexer.cppm |   2 -
 src/unit_test/storage/common/loser_tree.cpp   |  16 -
 .../invertedindex/common/external_sort.cpp    |   8 -
 .../storage/invertedindex/memory_indexer.cpp  |   8 -
 11 files changed, 67 insertions(+), 885 deletions(-)

diff --git a/src/storage/invertedindex/column_inverter.cpp b/src/storage/invertedindex/column_inverter.cpp
index 438dbf750b..6fcb26d875 100644
--- a/src/storage/invertedindex/column_inverter.cpp
+++ b/src/storage/invertedindex/column_inverter.cpp
@@ -57,8 +57,6 @@ ColumnInverter::~ColumnInverter() = default;
 bool ColumnInverter::CompareTermRef::operator()(const u32 lhs, const u32 rhs) const { return std::strcmp(GetTerm(lhs), GetTerm(rhs)) < 0; }
 
 SizeT ColumnInverter::InvertColumn(SharedPtr<ColumnVector> column_vector, u32 row_offset, u32 row_count, u32 begin_doc_id) {
-    // BaseProfiler profiler;
-    // profiler.Begin();
     begin_doc_id_ = begin_doc_id;
     doc_count_ = row_count;
     Vector<u32> column_lengths(row_count);
@@ -73,8 +71,6 @@ SizeT ColumnInverter::InvertColumn(SharedPtr<ColumnVector> column_vector, u32 ro
         term_count_sum += term_count;
     }
     column_lengths_.SetBatch(begin_doc_id, column_lengths);
-    // LOG_INFO(fmt::format("ColumnInverter::InvertColumn time cost: {}", profiler.ElapsedToString()));
-    // profiler.End();
     return term_count_sum;
 }
 
@@ -240,12 +236,8 @@ void ColumnInverter::GeneratePosting() {
 }
 
 void ColumnInverter::SortForOfflineDump() {
-    // BaseProfiler profiler;
-    // profiler.Begin();
     MergePrepare();
     Sort();
-    // LOG_INFO(fmt::format("ColumnInverter::SortForOfflineDump time cost: {}", profiler.ElapsedToString()));
-    // profiler.End();
 }
 
 /// Layout of the input of external sort file
@@ -312,7 +304,6 @@ void ColumnInverter::SpillSortResults(FILE *spill_file, u64 &tuple_count, Unique
     // size of this Run in bytes
     u32 data_size = 0;
     u64 data_size_pos = spill_file_tell;
-    // fwrite(&data_size, sizeof(u32), 1, spill_file);
     memcpy(spill_buffer.get() + spill_buf_idx, &data_size, sizeof(u32));
     spill_buf_idx += sizeof(u32);
     spill_file_tell += sizeof(u32);
@@ -320,7 +311,6 @@ void ColumnInverter::SpillSortResults(FILE *spill_file, u64 &tuple_count, Unique
     // number of tuples
     u32 num_of_tuples = positions_.size();
     tuple_count += num_of_tuples;
-    // fwrite(&num_of_tuples, sizeof(u32), 1, spill_file);
     memcpy(spill_buffer.get() + spill_buf_idx, &num_of_tuples, sizeof(u32));
     spill_buf_idx += sizeof(u32);
     spill_file_tell += sizeof(u32);
@@ -328,8 +318,6 @@ void ColumnInverter::SpillSortResults(FILE *spill_file, u64 &tuple_count, Unique
     // start offset for next spill
     u64 next_start_offset = 0;
     u64 next_start_offset_pos = spill_file_tell;
-    // u64 next_start_offset_pos = ftell(spill_file);
-    // fwrite(&next_start_offset, sizeof(u64), 1, spill_file);
     memcpy(spill_buffer.get() + spill_buf_idx, &next_start_offset, sizeof(u64));
     spill_buf_idx += sizeof(u64);
     spill_file_tell += sizeof(u64);
@@ -338,7 +326,6 @@ void ColumnInverter::SpillSortResults(FILE *spill_file, u64 &tuple_count, Unique
     fwrite(spill_buffer.get(), spill_buf_idx, 1, spill_file);
     spill_buf_idx = 0;
 
-    // u64 data_start_offset = ftell(spill_file);
     u64 data_start_offset = spill_file_tell;
     assert((SizeT)ftell(spill_file) == spill_file_tell);
     // sorted data
@@ -352,11 +339,6 @@ void ColumnInverter::SpillSortResults(FILE *spill_file, u64 &tuple_count, Unique
             term = GetTermFromNum(last_term_num);
         }
         record_length = term.size() + sizeof(docid_t) + sizeof(u32) + 1;
-        //        fwrite(&record_length, sizeof(u32), 1, spill_file);
-        //        fwrite(term.data(), term.size(), 1, spill_file);
-        //        fwrite(&str_null, sizeof(char), 1, spill_file);
-        //        fwrite(&i.doc_id_, sizeof(docid_t), 1, spill_file);
-        //        fwrite(&i.term_pos_, sizeof(u32), 1, spill_file);
         memcpy(spill_buffer.get() + spill_buf_idx, &record_length, sizeof(u32));
         spill_buf_idx += sizeof(u32);
 
@@ -392,43 +374,26 @@ void ColumnInverter::SpillSortResults(FILE *spill_file, u64 &tuple_count, Unique
     if (positions_.empty()) {
         return;
     }
-    // SizeT spill_buf_idx = 0;
     SizeT spill_file_tell = ftell(spill_file);
     // size of this Run in bytes
     u32 data_size = 0;
     u64 data_size_pos = spill_file_tell;
-    // fwrite(&data_size, sizeof(u32), 1, spill_file);
-//    memcpy(spill_buffer.get() + spill_buf_idx, &data_size, sizeof(u32));
-//    spill_buf_idx += sizeof(u32);
     buf_writer->Write((const char*)&data_size, sizeof(u32));
     spill_file_tell += sizeof(u32);
 
     // number of tuples
     u32 num_of_tuples = positions_.size();
     tuple_count += num_of_tuples;
-    // fwrite(&num_of_tuples, sizeof(u32), 1, spill_file);
-//    memcpy(spill_buffer.get() + spill_buf_idx, &num_of_tuples, sizeof(u32));
-//    spill_buf_idx += sizeof(u32);
     buf_writer->Write((const char*)&num_of_tuples, sizeof(u32));
     spill_file_tell += sizeof(u32);
 
     // start offset for next spill
     u64 next_start_offset = 0;
     u64 next_start_offset_pos = spill_file_tell;
-    // u64 next_start_offset_pos = ftell(spill_file);
-    // fwrite(&next_start_offset, sizeof(u64), 1, spill_file);
-//    memcpy(spill_buffer.get() + spill_buf_idx, &next_start_offset, sizeof(u64));
-//    spill_buf_idx += sizeof(u64);
     buf_writer->Write((const char*)&next_start_offset, sizeof(u64));
     spill_file_tell += sizeof(u64);
 
-    // assert(spill_buf_idx < spill_buf_size);
-//    fwrite(spill_buffer.get(), spill_buf_idx, 1, spill_file);
-//    spill_buf_idx = 0;
-
-    // u64 data_start_offset = ftell(spill_file);
     u64 data_start_offset = spill_file_tell;
-    // assert((SizeT)ftell(spill_file) == spill_file_tell);
     // sorted data
     u32 last_term_num = std::numeric_limits<u32>::max();
     StringRef term;
@@ -440,37 +405,15 @@ void ColumnInverter::SpillSortResults(FILE *spill_file, u64 &tuple_count, Unique
             term = GetTermFromNum(last_term_num);
         }
         record_length = term.size() + sizeof(docid_t) + sizeof(u32) + 1;
-//        fwrite(&record_length, sizeof(u32), 1, spill_file);
-//        fwrite(term.data(), term.size(), 1, spill_file);
-//        fwrite(&str_null, sizeof(char), 1, spill_file);
-//        fwrite(&i.doc_id_, sizeof(docid_t), 1, spill_file);
-//        fwrite(&i.term_pos_, sizeof(u32), 1, spill_file);
-//        memcpy(spill_buffer.get() + spill_buf_idx, &record_length, sizeof(u32));
-//        spill_buf_idx += sizeof(u32);
-//
-//        memcpy(spill_buffer.get() + spill_buf_idx, term.data(), term.size());
-//        spill_buf_idx += term.size();
-//
-//        memcpy(spill_buffer.get() + spill_buf_idx, &str_null, sizeof(char));
-//        spill_buf_idx += sizeof(char);
-//
-//        memcpy(spill_buffer.get() + spill_buf_idx, &i.doc_id_, sizeof(docid_t));
-//        spill_buf_idx += sizeof(docid_t);
-//
-//        memcpy(spill_buffer.get() + spill_buf_idx, &i.term_pos_, sizeof(u32));
-//        spill_buf_idx += sizeof(u32);
+
         buf_writer->Write((const char*)&record_length, sizeof(u32));
         buf_writer->Write(term.data(), term.size());
         buf_writer->Write((const char*)&str_null, sizeof(char));
         buf_writer->Write((const char*)&(i.doc_id_), sizeof(docid_t));
         buf_writer->Write((const char*)&(i.term_pos_), sizeof(u32));
-        // assert(spill_buf_idx < spill_buf_size);
-        // fwrite(spill_buffer.get(), spill_buf_idx, 1, spill_file);
-        // spill_buf_idx = 0;
     }
     buf_writer->Flush();
     // update data size
-    // next_start_offset = ftell(spill_file);
     next_start_offset = buf_writer->Tell();
     data_size = next_start_offset - data_start_offset;
     fseek(spill_file, data_size_pos, SEEK_SET);
@@ -480,5 +423,4 @@ void ColumnInverter::SpillSortResults(FILE *spill_file, u64 &tuple_count, Unique
     fseek(spill_file, next_start_offset, SEEK_SET);
 }
 
-
 } // namespace infinity
\ No newline at end of file
diff --git a/src/storage/invertedindex/column_inverter.cppm b/src/storage/invertedindex/column_inverter.cppm
index 5a545e5db8..8b64342add 100644
--- a/src/storage/invertedindex/column_inverter.cppm
+++ b/src/storage/invertedindex/column_inverter.cppm
@@ -74,7 +74,6 @@ public:
 
     void SpillSortResults(FILE *spill_file, u64 &tuple_count);
 
-    // fast
     void SpillSortResults(FILE *spill_file, u64 &tuple_count, UniquePtr<char_t[]>& spill_buffer, SizeT spill_buf_size);
 
     void SpillSortResults(FILE *spill_file, u64 &tuple_count, UniquePtr<BufWriter>& buf_writer);
diff --git a/src/storage/invertedindex/common/external_sort_merger.cpp b/src/storage/invertedindex/common/external_sort_merger.cpp
index cd2b5d4ff0..a72c426afe 100644
--- a/src/storage/invertedindex/common/external_sort_merger.cpp
+++ b/src/storage/invertedindex/common/external_sort_merger.cpp
@@ -42,26 +42,19 @@ namespace infinity {
         }                                                                                                                                            \
     }
 
-#define USE_LOSER_TREE
-//#define USE_MMAP_IO
-
 template <typename KeyType, typename LenType>
 SortMerger<KeyType, LenType>::SortMerger(const char *filenm, u32 group_size, u32 bs, u32 output_num)
     : filenm_(filenm), MAX_GROUP_SIZE_(group_size), BS_SIZE_(bs), PRE_BUF_SIZE_((u32)(1. * bs * 0.8 / (group_size + 1))),
       RUN_BUF_SIZE_(PRE_BUF_SIZE_ * group_size), OUT_BUF_SIZE_(bs - RUN_BUF_SIZE_ - PRE_BUF_SIZE_), OUT_BUF_NUM_(output_num) {
-    pre_buf_ = run_buf_ = out_buf_ = nullptr;
+    run_buf_ = out_buf_ = nullptr;
     count_ = 0;
 
-    pre_buf_size_ = pre_buf_num_ = 0;
     micro_run_idx_ = new u32[MAX_GROUP_SIZE_];
     micro_run_pos_ = new u32[MAX_GROUP_SIZE_];
     num_micro_run_ = new u32[MAX_GROUP_SIZE_];
     size_micro_run_ = new u32[MAX_GROUP_SIZE_];
-    num_run_ = new u32[MAX_GROUP_SIZE_];
     size_run_ = new u32[MAX_GROUP_SIZE_];
-    size_loaded_run_ = new u32[MAX_GROUP_SIZE_];
     run_addr_ = new u64[MAX_GROUP_SIZE_];
-    run_curr_addr_ = new u64[MAX_GROUP_SIZE_];
 
     micro_buf_ = new char *[MAX_GROUP_SIZE_];
     sub_out_buf_ = new char *[OUT_BUF_NUM_];
@@ -72,51 +65,32 @@ SortMerger<KeyType, LenType>::SortMerger(const char *filenm, u32 group_size, u32
     out_buf_size_ = new u32[OUT_BUF_NUM_];
     out_buf_full_ = new bool[OUT_BUF_NUM_];
 
-    curr_addr_.resize(MAX_GROUP_SIZE_, 0);
-    end_addr_.resize(MAX_GROUP_SIZE_, 0);
-    key_buf_.resize(MAX_GROUP_SIZE_);
-    key_buf_ptr_.resize(MAX_GROUP_SIZE_, nullptr);
-    mmap_io_streams_.resize(MAX_GROUP_SIZE_, nullptr);
 
-    for (u32 i = 0; i < MAX_GROUP_SIZE_; ++i) {
-        key_buf_[i] = MakeUnique<char_t[]>(MAX_TUPLE_LENGTH + 100);
-        // mmap_io_streams_[i] = MakeShared<MmapReader>(filenm_);
-    }
     CYCLE_BUF_SIZE_ = MAX_GROUP_SIZE_ * 4;
     CYCLE_BUF_THRESHOLD_ = MAX_GROUP_SIZE_ * 3;
     OUT_BATCH_SIZE_ = 10240;
-    // fmt::print("cycle buf size = {}, buf threshold = {}\n", CYCLE_BUF_SIZE_, CYCLE_BUF_THRESHOLD_);
     assert(CYCLE_BUF_THRESHOLD_ <= CYCLE_BUF_SIZE_);
     cycle_buffer_ = MakeUnique<CycleBuffer>(CYCLE_BUF_SIZE_, PRE_BUF_SIZE_);
 
-    const SizeT term_tuple_list_cycle_buf_size = MAX_GROUP_SIZE_ * 2;
-    const SizeT term_tuple_list_buf_size = 1024 + 2 * 1024 * 1024 + 100;
-    cycle_term_tuple_list_queue_ = MakeUnique<CycleBuffer>(term_tuple_list_cycle_buf_size, term_tuple_list_buf_size);
-#ifdef USE_LOSER_TREE
     merge_loser_tree_ = MakeShared<LoserTree<KeyAddr>>(MAX_GROUP_SIZE_);
-#endif
 }
 
 template <typename KeyType, typename LenType>
 SortMerger<KeyType, LenType>::~SortMerger() {
-    if (pre_buf_)
-        free(pre_buf_);
-
-    if (run_buf_)
+    if (run_buf_) {
         free(run_buf_);
+    }
 
-    if (out_buf_)
+    if (out_buf_) {
         free(out_buf_);
+    }
 
     delete[] micro_run_idx_;
     delete[] micro_run_pos_;
     delete[] num_micro_run_;
     delete[] size_micro_run_;
-    delete[] num_run_;
     delete[] size_run_;
-    delete[] size_loaded_run_;
     delete[] run_addr_;
-    delete[] run_curr_addr_;
 
     delete[] micro_buf_;
     delete[] sub_out_buf_;
@@ -130,14 +104,13 @@ SortMerger<KeyType, LenType>::~SortMerger() {
 
 template <typename KeyType, typename LenType>
 void SortMerger<KeyType, LenType>::NewBuffer() {
-    if (!pre_buf_)
-        pre_buf_ = (char *)malloc(PRE_BUF_SIZE_);
-
-    if (!run_buf_)
+    if (!run_buf_) {
         run_buf_ = (char *)malloc(RUN_BUF_SIZE_);
+    }
 
-    if (!out_buf_)
+    if (!out_buf_) {
         out_buf_ = (char *)malloc(OUT_BUF_SIZE_);
+    }
 }
 
 template <typename KeyType, typename LenType>
@@ -159,17 +132,19 @@ void SortMerger<KeyType, LenType>::Init(DirectIO &io_stream) {
 
     // initiate the microrun buffer
     micro_buf_[0] = run_buf_;
-    for (u32 i = 1; i < MAX_GROUP_SIZE_; ++i)
+    for (u32 i = 1; i < MAX_GROUP_SIZE_; ++i) {
         micro_buf_[i] = micro_buf_[i - 1] + PRE_BUF_SIZE_;
+    }
 
     //
     group_size_ = 0;
     u64 next_run_pos = 0;
+    u32 num_run = 0;
     for (u32 i = 0; i < MAX_GROUP_SIZE_ && (u64)io_stream.Tell() < FILE_LEN_; ++i, ++group_size_) {
         // get the size of run
         io_stream.Read((char *)(size_run_ + i), sizeof(u32));
         // get the records number of a run
-        io_stream.Read((char *)(num_run_ + i), sizeof(u32));
+        io_stream.Read((char *)(&num_run), sizeof(u32));
         io_stream.Read((char *)(&next_run_pos), sizeof(u64));
 
         run_addr_[i] = io_stream.Tell(); // ftell(f);
@@ -178,10 +153,6 @@ void SortMerger<KeyType, LenType>::Init(DirectIO &io_stream) {
         u32 s = size_run_[i] > PRE_BUF_SIZE_ ? PRE_BUF_SIZE_ : size_run_[i];
         size_t ret = io_stream.Read(micro_buf_[i], s);
         size_micro_run_[i] = ret;
-        size_loaded_run_[i] = ret;
-        run_curr_addr_[i] = io_stream.Tell();
-        // std::cout << "num_run_[" << i << "] " << num_run_[i] << " size_run_ " << size_run_[i] << " size_micro_run " << size_micro_run_[i]
-        //           << std::endl;
 
         /// it is not needed for compression, validation will be made within IOStream in that case
         // if a record can fit in microrun buffer
@@ -189,7 +160,6 @@ void SortMerger<KeyType, LenType>::Init(DirectIO &io_stream) {
         while (*(LenType *)(micro_buf_[i]) + sizeof(LenType) > s) {
             size_micro_run_[i] = 0;
             --count_;
-            // LOG_WARN("[Warning]: A record is too long, it will be ignored");
 
             io_stream.Seek(*(LenType *)(micro_buf_[i]) + sizeof(LenType) - s, SEEK_CUR);
 
@@ -204,28 +174,23 @@ void SortMerger<KeyType, LenType>::Init(DirectIO &io_stream) {
             io_stream.Read(micro_buf_[i], s);
         }
 
-#ifdef USE_LOSER_TREE
         if (flag) {
             merge_loser_tree_->InsertStart(nullptr, static_cast<LoserTree<u64>::Source>(i), true);
             continue;
         }
+
         auto key = KeyAddr(micro_buf_[i], -1, i);
         merge_loser_tree_->InsertStart(&key, static_cast<LoserTree<u64>::Source>(i), false);
-#else
-        if (flag) {
-            continue;
-        }
-        merge_heap_.push(KeyAddr(micro_buf_[i], -1, i));
-#endif
+
         micro_run_idx_[i] = 1;
         micro_run_pos_[i] = KeyAddr(micro_buf_[i], -1, i).LEN() + sizeof(LenType);
         num_micro_run_[i] = 0;
 
         io_stream.Seek(next_run_pos);
     }
-#ifdef USE_LOSER_TREE
+
     merge_loser_tree_->Init();
-#endif
+
     // initialize predict heap and records number of every microrun
     for (u32 i = 0; i < group_size_; ++i) {
         u32 pos = 0;
@@ -243,7 +208,6 @@ void SortMerger<KeyType, LenType>::Init(DirectIO &io_stream) {
                 break;
             }
         }
-        // std::cout << "len " << len << " size_micro_run_[" << i << "] " << size_micro_run_[i] << std::endl;
         assert(last_pos != (u32)-1); // buffer too small that can't hold one record
         assert(last_pos + sizeof(LenType) <= size_micro_run_[i]);
         assert(pos <= size_micro_run_[i]);
@@ -255,200 +219,8 @@ void SortMerger<KeyType, LenType>::Init(DirectIO &io_stream) {
     }
 }
 
-template <typename KeyType, typename LenType>
-void SortMerger<KeyType, LenType>::ReadKeyAt(MmapReader &io_stream, u64 pos) {
-    auto file_pos = curr_addr_[pos];
-//    if (file_pos != io_stream.Tell()) {
-//        io_stream.Seek(file_pos, true);
-//    }
-    // fmt::print("begin tell = {}\n", file_pos);
-    io_stream.Seek(file_pos, true);
-    LenType len;
-    io_stream.ReadBuf((char_t*)&len, sizeof(LenType));
-    io_stream.Seek(file_pos, true);
-    io_stream.ReadBuf(key_buf_[pos].get(), len + sizeof(LenType));
-//    fmt::print("len = {}, sizeof len_type = {}, key buf get len = {}, tell = {}\n", len, sizeof(LenType), *(LenType *)key_buf_[pos].get(), io_stream.Tell());
-//    fmt::print("data = ");
-//    for (u32 i = 0; i < len; ++i) {
-//        fmt::print("{}", key_buf_[pos].get()[i + sizeof(LenType)]);
-//    }
-//    fmt::print("\n");
-    curr_addr_[pos] = io_stream.Tell();
-}
-
-template <typename KeyType, typename LenType>
-void SortMerger<KeyType, LenType>::ReadKeyAtNonCopy(MmapReader &io_stream, u64 pos) {
-//    auto file_pos = curr_addr_[pos];
-//
-//    io_stream.Seek(file_pos, true);
-    // assert(curr_addr_[pos] == io_stream.Tell());
-    LenType len;
-//    io_stream.ReadBuf((char_t*)&len, sizeof(LenType));
-//    io_stream.Seek(file_pos, true);
-    key_buf_ptr_[pos] = io_stream.ReadBufNonCopy(sizeof(LenType));
-    len = *(LenType *)key_buf_ptr_[pos];
-    io_stream.ReadBufNonCopy(len);
-//    fmt::print("len = {}, key = ", len);
-//    for (u32 i = 0; i < len; ++i) {
-//        fmt::print("{}", key_buf_ptr_[pos][i + sizeof(LenType)]);
-//    }
-//    fmt::print("\n");
-
-    // io_stream.ReadBuf(key_buf_[pos].get(), len + sizeof(LenType));
-    curr_addr_[pos] = io_stream.Tell();
-}
-
-#ifdef USE_MMAP_IO
-template <typename KeyType, typename LenType>
-void SortMerger<KeyType, LenType>::Init(MmapReader &io_stream) {
-    group_size_ = 0;
-    u64 next_run_pos = 0;
-    for (u32 i = 0; i < MAX_GROUP_SIZE_ && (u64)io_stream.Tell() < FILE_LEN_; ++i, ++group_size_) {
-        // get the size of run
-        io_stream.ReadU32(size_run_[i]);
-        // get the records number of a run
-        io_stream.ReadU32(num_run_[i]);
-        io_stream.ReadU64(next_run_pos);
-//        fmt::print("i = {}, size_run = {}, num_run = {}, next run pos:{}\n", i, size_run_[i], num_run_[i], next_run_pos);
-        assert(next_run_pos <= FILE_LEN_);
-        end_addr_[i] = next_run_pos;
-        curr_addr_[i] = io_stream.Tell();
-        // fmt::print("curr_addr_[{}] = {}, end_addr_[{}] = {}\n", i, curr_addr_[i], i, end_addr_[i]);
-        mmap_io_streams_[i] = MakeShared<MmapReader>(filenm_, curr_addr_[i], end_addr_[i] - curr_addr_[i]);
-        // mmap_io_streams_[i]->Seek(curr_addr_[i], true);
-#ifdef USE_LOSER_TREE
-        if (curr_addr_[i] >= end_addr_[i]) {
-            merge_loser_tree_->InsertStart(nullptr, static_cast<LoserTree<u64>::Source>(i), true);
-            continue;
-        }
-//        ReadKeyAt(io_stream, i);
-//        auto key = KeyAddr(key_buf_[i].get(), -1, i);
-        // read block use mmap, need update end_addr
-        end_addr_[i] = mmap_io_streams_[i]->DataLen();
-        ReadKeyAtNonCopy(*mmap_io_streams_[i], i);
-
-        auto key = KeyAddr(key_buf_ptr_[i], -1, i);
-        merge_loser_tree_->InsertStart(&key, static_cast<LoserTree<u64>::Source>(i), false);
-#else
-        if (curr_addr_[i] >= end_addr_[i]) {
-            continue;
-        }
-        ReadKeyAtNonCopy(io_stream, i);
-        merge_heap_.push(KeyAddr(key_buf_ptr_[i].get(), -1, i));
-#endif
-        io_stream.Seek(next_run_pos, true);
-    }
-#ifdef USE_LOSER_TREE
-    merge_loser_tree_->Init();
-#endif
-}
-
-template <typename KeyType, typename LenType>
-void SortMerger<KeyType, LenType>::MergeMmap(MmapReader &io_stream, SharedPtr<FileWriter> out_file_writer) {
-#ifdef USE_LOSER_TREE
-    while (merge_loser_tree_->TopSource() != LoserTree<KeyAddr>::invalid_) {
-        auto top = merge_loser_tree_->TopKey();
-#else
-    while (merge_heap_.size() > 0) {
-        KeyAddr top = merge_heap_.top();
-        merge_heap_.pop();
-#endif
-        u32 idx = top.IDX();
-        // fmt::print("idx = {}\n", idx);
-        out_file_writer->Write(top.data, top.LEN() + sizeof(LenType));
-        assert(idx < MAX_GROUP_SIZE_);
-        // reach the end of a microrun
-        if (curr_addr_[idx] >= end_addr_[idx]) {
-            merge_loser_tree_->DeleteTopInsert(nullptr, true);
-            --count_;
-            continue;
-        }
-        assert(idx < MAX_GROUP_SIZE_);
-
-//        ReadKeyAt(io_stream, idx);
-//        auto key = KeyAddr(key_buf_[idx].get(), -1, idx);
-        // ReadKeyAtNonCopy(io_stream, idx);
-        ReadKeyAtNonCopy(*mmap_io_streams_[idx], idx);
-        auto key = KeyAddr(key_buf_ptr_[idx], -1, idx);
-        merge_loser_tree_->DeleteTopInsert(&key, false);
-    }
-    out_file_writer->Sync();
-}
-
-#endif
-
 template <typename KeyType, typename LenType>
 void SortMerger<KeyType, LenType>::Predict(DirectIO &io_stream) {
-    while (pre_heap_.size() > 0) {
-        KeyAddr top = pre_heap_.top();
-        pre_heap_.pop();
-        u64 addr = top.ADDR();
-        u32 idx = top.IDX();
-        free(top.data);
-
-        std::unique_lock lock(pre_buf_mtx_);
-
-        while (pre_buf_size_ != 0)
-            pre_buf_con_.wait(lock);
-
-        assert(idx < MAX_GROUP_SIZE_);
-        // get loading size of a microrun
-        u32 s;
-        s = (u32)((u64)size_run_[idx] - (addr - run_addr_[idx]));
-
-        if (s == 0) {
-            continue;
-        }
-        s = s > PRE_BUF_SIZE_ ? PRE_BUF_SIZE_ : s;
-
-        // load microrun
-        io_stream.Seek(addr);
-        s = io_stream.Read(pre_buf_, s);
-        size_loaded_run_[idx] += s;
-        run_curr_addr_[idx] = io_stream.Tell();
-
-        u32 pos = 0;
-        u32 last_pos = -1;
-        pre_buf_num_ = 0;
-        while (1) {
-            if (pos + sizeof(LenType) > s) {
-                // the last record of this microrun
-                IASSERT(last_pos != (u32)-1); // buffer too small that can't hold one record
-                LenType len = *(LenType *)(pre_buf_ + last_pos) + sizeof(LenType);
-                char *tmp = (char *)malloc(len);
-                memcpy(tmp, pre_buf_ + last_pos, len);
-                pre_heap_.push(KeyAddr(tmp, addr + (u64)pos, idx));
-                break;
-            }
-            LenType len = *(LenType *)(pre_buf_ + pos);
-            if (pos + sizeof(LenType) + len > s) {
-                // the last record of this microrun
-                IASSERT(last_pos != (u32)-1); // buffer too small that can't hold one record
-                len = *(LenType *)(pre_buf_ + last_pos) + sizeof(LenType);
-                char *tmp = (char *)malloc(len);
-                memcpy(tmp, pre_buf_ + last_pos, len);
-                pre_heap_.push(KeyAddr(tmp, addr + (u64)pos, idx));
-                break;
-            }
-
-            ++pre_buf_num_;
-            last_pos = pos;
-            pos += sizeof(LenType) + len;
-        }
-        pre_buf_size_ = pos;
-        pre_buf_con_.notify_one();
-    }
-    {
-        std::unique_lock lock(pre_buf_mtx_);
-        pre_buf_size_ = -1;
-        pre_buf_con_.notify_one();
-    }
-
-    // LOG_INFO("Predicting is over...");
-}
-
-template <typename KeyType, typename LenType>
-void SortMerger<KeyType, LenType>::PredictByQueue(DirectIO &io_stream) {
     UniquePtr<char[]> data_buf = MakeUnique<char[]>(PRE_BUF_SIZE_);
     while (pre_heap_.size() > 0) {
         KeyAddr top = pre_heap_.top();
@@ -471,12 +243,10 @@ void SortMerger<KeyType, LenType>::PredictByQueue(DirectIO &io_stream) {
         io_stream.Read(data_buf.get(), s);
         auto data_ptr = data_buf.get();
 
-        size_loaded_run_[idx] += s;
-        run_curr_addr_[idx] = io_stream.Tell();
-
         u32 pos = 0;
         u32 last_pos = -1;
-        pre_buf_num_ = 0;
+        u32 pre_buf_num = 0;
+        u32 pre_buf_size = 0;
         while (1) {
             if (pos + sizeof(LenType) > s) {
                 // the last record of this microrun
@@ -489,7 +259,6 @@ void SortMerger<KeyType, LenType>::PredictByQueue(DirectIO &io_stream) {
             }
             LenType len = *(LenType *)(data_ptr + pos);
             if (pos + sizeof(LenType) + len > s) {
-                // the last record of this microrun
                 IASSERT(last_pos != (u32)-1); // buffer too small that can't hold one record
                 len = *(LenType *)(data_ptr + last_pos) + sizeof(LenType);
                 char *tmp = (char *)malloc(len);
@@ -498,56 +267,30 @@ void SortMerger<KeyType, LenType>::PredictByQueue(DirectIO &io_stream) {
                 break;
             }
 
-            ++pre_buf_num_;
+            ++pre_buf_num;
             last_pos = pos;
             pos += sizeof(LenType) + len;
         }
-        pre_buf_size_ = pos;
+        pre_buf_size = pos;
 
         std::unique_lock lock(cycle_buf_mtx_);
         cycle_buf_con_.wait(lock, [this]() { return !this->cycle_buffer_->IsFull(); });
-        // auto data_ptr = cycle_buffer_->PutByRead(io_stream, s);
 
-        cycle_buffer_->PutReal(data_buf, pre_buf_size_, pre_buf_num_);
+        cycle_buffer_->PutReal(data_buf, pre_buf_size, pre_buf_num);
         cycle_buf_con_.notify_one();
     }
-    // fmt::print("1-read finish\n");
     {
         std::unique_lock lock(cycle_buf_mtx_);
-        // fmt::print("read finish\n");
-        // pre_buf_size_ = -1;
         read_finish_ = true;
         cycle_buf_con_.notify_one();
     }
-
-    // LOG_INFO("Predicting is over...");
 }
 
 template <typename KeyType, typename LenType>
-void SortMerger<KeyType, LenType>::MergeByQueue() {
-#ifdef USE_LOSER_TREE
+void SortMerger<KeyType, LenType>::Merge() {
     while (merge_loser_tree_->TopSource() != LoserTree<KeyAddr>::invalid_) {
         auto top = merge_loser_tree_->TopKey();
-#else
-    while (merge_heap_.size() > 0) {
-        KeyAddr top = merge_heap_.top();
-        merge_heap_.pop();
-#endif
         u32 idx = top.IDX();
-//        memcpy(sub_out_buf_[out_buf_in_idx_] + out_buf_size_[out_buf_in_idx_], top.data, top.LEN() + sizeof(LenType));
-        // fmt::print("loser tree pop idx = {}\n", idx);
-        // output
-//        {
-//            std::unique_lock lock(out_queue_mtx_);
-//            auto data_len = top.LEN() + sizeof(LenType);
-//            auto top_data = MakeUnique<char_t[]>(data_len + 10);
-//            memcpy(top_data.get(), top.data, data_len);
-//            out_queue_.push(std::move(top_data));
-//            out_size_queue_.push(data_len);
-//            if (out_queue_.size() >= OUT_BATCH_SIZE_ / 10) {
-//                out_queue_con_.notify_one();
-//            }
-//        }
         while (1) {
             assert(out_buf_in_idx_ < OUT_BUF_NUM_);
             std::unique_lock lock(in_out_mtx_[out_buf_in_idx_]);
@@ -577,7 +320,6 @@ void SortMerger<KeyType, LenType>::MergeByQueue() {
         if (micro_run_idx_[idx] == num_micro_run_[idx]) {
             IASSERT(micro_run_pos_[idx] <= size_micro_run_[idx]);
             std::unique_lock lock(cycle_buf_mtx_);
-            // fmt::print("cycle buffer size = {}\n", cycle_buffer_->Size());
 
             cycle_buf_con_.wait(lock, [this]() {
                 return !this->cycle_buffer_->IsEmpty() || (this->read_finish_ && this->cycle_buffer_->IsEmpty());
@@ -589,127 +331,23 @@ void SortMerger<KeyType, LenType>::MergeByQueue() {
             }
 
             assert(idx < MAX_GROUP_SIZE_);
-            // fmt::print("cycle buffer size = {}, read_finish_ = {}\n", cycle_buffer_->Size(), read_finish_);
             auto res = cycle_buffer_->GetTuple();
-            // micro_buf_[idx] = res.get<0>();
             auto pre_buf_size = std::get<1>(res);
             auto pre_buf_num = std::get<2>(res);
             memcpy(micro_buf_[idx], std::get<0>(res), pre_buf_size);
 
-//            fmt::print("[Merge] loser tree add idx = {}, pre_buf_size = {}, pre_buf_num = {}\n", idx, pre_buf_size_, pre_buf_num_);
-//            for (u32 i = 0; i < pre_buf_size_; ++i) {
-//                fmt::print("{}", micro_buf_[idx][i]);
-//            }
-//            fmt::print("\n");
-
             size_micro_run_[idx] = pre_buf_size;
             num_micro_run_[idx] = pre_buf_num;
             micro_run_pos_[idx] = micro_run_idx_[idx] = 0;
 
-//            cycle_buf_con_.notify_one();
             if (cycle_buffer_->Size() < CYCLE_BUF_THRESHOLD_) {
-                // fmt::print("cycle buffer size = {}\n", cycle_buffer_->Size());
                 cycle_buf_con_.notify_one();
             }
-            // cycle_buf_con_.notify_one();
-            // pre_buf_con_.notify_one();
-        }
-
-        assert(idx < MAX_GROUP_SIZE_);
-#ifdef USE_LOSER_TREE
-        auto key = KeyAddr(micro_buf_[idx] + micro_run_pos_[idx], -1, idx);
-//        fmt::print("[Merge] add key idx = {}, key = ", idx);
-//        for (u32 i = 0; i < key.LEN() + sizeof(LenType); ++i) {
-//            fmt::print("{}", micro_buf_[idx][micro_run_pos_[idx] + i]);
-//        }
-//        fmt::print("\n");
-
-        merge_loser_tree_->DeleteTopInsert(&key, false);
-#else
-        merge_heap_.push(KeyAddr(micro_buf_[idx] + micro_run_pos_[idx], -1, idx));
-#endif
-        ++micro_run_idx_[idx];
-        micro_run_pos_[idx] += KeyAddr(micro_buf_[idx] + micro_run_pos_[idx], -1, idx).LEN() + sizeof(LenType);
-    }
-    {
-//        std::unique_lock lock(out_queue_mtx_);
-//        out_queue_con_.notify_one();
-        assert(out_buf_in_idx_ < OUT_BUF_NUM_);
-        std::unique_lock lock(in_out_mtx_[out_buf_in_idx_]);
-        if (!out_buf_full_[out_buf_in_idx_] && out_buf_size_[out_buf_in_idx_] > 0) {
-            out_buf_full_[out_buf_in_idx_] = true;
-            in_out_con_[out_buf_in_idx_].notify_one();
-        }
-    }
-}
-
-template <typename KeyType, typename LenType>
-void SortMerger<KeyType, LenType>::Merge() {
-#ifdef USE_LOSER_TREE
-    while (merge_loser_tree_->TopSource() != LoserTree<KeyAddr>::invalid_) {
-        auto top = merge_loser_tree_->TopKey();
-#else
-    while (merge_heap_.size() > 0) {
-        KeyAddr top = merge_heap_.top();
-        merge_heap_.pop();
-#endif
-        u32 idx = top.IDX();
-
-        // output
-        while (1) {
-            assert(out_buf_in_idx_ < OUT_BUF_NUM_);
-            std::unique_lock lock(in_out_mtx_[out_buf_in_idx_]);
-            while (out_buf_full_[out_buf_in_idx_])
-                in_out_con_[out_buf_in_idx_].wait(lock);
-
-            // if buffer is full
-            if (top.LEN() + sizeof(LenType) + out_buf_size_[out_buf_in_idx_] > OUT_BUF_SIZE_ / OUT_BUF_NUM_) {
-                IASSERT(out_buf_size_[out_buf_in_idx_] != 0); // output buffer chanel is smaller than size of a record
-                out_buf_full_[out_buf_in_idx_] = true;
-                u32 tmp = out_buf_in_idx_;
-                ++out_buf_in_idx_;
-                out_buf_in_idx_ %= OUT_BUF_NUM_;
-                in_out_con_[tmp].notify_one();
-                continue;
-            }
-
-            assert(out_buf_in_idx_ < OUT_BUF_NUM_);
-            memcpy(sub_out_buf_[out_buf_in_idx_] + out_buf_size_[out_buf_in_idx_], top.data, top.LEN() + sizeof(LenType));
-            out_buf_size_[out_buf_in_idx_] += top.LEN() + sizeof(LenType);
-
-            break;
-        }
-
-        assert(idx < MAX_GROUP_SIZE_);
-        // reach the end of a microrun
-        if (micro_run_idx_[idx] == num_micro_run_[idx]) {
-            IASSERT(micro_run_pos_[idx] <= size_micro_run_[idx]);
-            std::unique_lock lock(pre_buf_mtx_);
-            while (pre_buf_size_ == 0)
-                pre_buf_con_.wait(lock);
-
-            if (pre_buf_size_ == (u32)-1) {
-#ifdef USE_LOSER_TREE
-                merge_loser_tree_->DeleteTopInsert(nullptr, true);
-#endif
-                continue;
-            }
-
-            assert(idx < MAX_GROUP_SIZE_);
-            memcpy(micro_buf_[idx], pre_buf_, pre_buf_size_);
-            size_micro_run_[idx] = pre_buf_size_;
-            num_micro_run_[idx] = pre_buf_num_;
-            micro_run_pos_[idx] = micro_run_idx_[idx] = pre_buf_num_ = pre_buf_size_ = 0;
-            pre_buf_con_.notify_one();
         }
 
         assert(idx < MAX_GROUP_SIZE_);
-#ifdef USE_LOSER_TREE
         auto key = KeyAddr(micro_buf_[idx] + micro_run_pos_[idx], -1, idx);
         merge_loser_tree_->DeleteTopInsert(&key, false);
-#else
-        merge_heap_.push(KeyAddr(micro_buf_[idx] + micro_run_pos_[idx], -1, idx));
-#endif
         ++micro_run_idx_[idx];
         micro_run_pos_[idx] += KeyAddr(micro_buf_[idx] + micro_run_pos_[idx], -1, idx).LEN() + sizeof(LenType);
     }
@@ -741,14 +379,11 @@ void SortMerger<KeyType, LenType>::OutputByQueue(FILE *f) {
             auto write_cnt = OUT_BATCH_SIZE_;
 
             while (count_ > 0 && write_cnt > 0 && !out_queue_.empty()) {
-                // auto top_data = std::move(out_queue_.front());
-                // auto data_len = out_size_queue_.front();
                 temp_out_queue.push(std::move(out_queue_.front()));
                 temp_out_size_queue.push(out_size_queue_.front());
                 out_queue_.pop();
                 out_size_queue_.pop();
 
-                // io_stream.Write(top_data.get(), data_len);
                 --count_;
                 --write_cnt;
             }
@@ -803,45 +438,8 @@ void SortMerger<KeyType, LenType>::Output(FILE *f, u32 idx) {
     }
 }
 
-#define PRINT_TIME_COST
-
 template <typename KeyType, typename LenType>
 void SortMerger<KeyType, LenType>::Run() {
-#ifdef PRINT_TIME_COST
-    BaseProfiler profiler;
-    profiler.Begin();
-#endif
-#ifdef USE_MMAP_IO
-    MmapReader io_stream(filenm_);
-    FILE_LEN_ = io_stream.DataLen();
-    io_stream.ReadU64(count_);
-    // fmt::print("FILE LEN: {}, count: {}, read begin tell = {}\n", FILE_LEN_, count_, io_stream.Tell());
-    Init(io_stream);
-
-//    FILE *out_f = fopen((filenm_ + ".out").c_str(), "w+");
-//    IASSERT(out_f);
-//    IASSERT(fwrite(&count_, sizeof(u64), 1, out_f) == 1);
-    String out_file = filenm_ + ".out";
-    LocalFileSystem fs;
-    SharedPtr<FileWriter> out_file_writer = MakeShared<FileWriter>(fs, out_file, 128000);
-    out_file_writer->Write((char*)&count_, sizeof(u64));
-
-    MergeMmap(io_stream, out_file_writer);
-    // out_file_writer->Sync();
-//    Thread merge_thread(std::bind(&self_t::MergeMmap, this, std::ref(io_stream)));
-//    FILE *out_f = fopen((filenm_ + ".out").c_str(), "w+");
-//    IASSERT(out_f);
-//    IASSERT(fwrite(&count_, sizeof(u64), 1, out_f) == 1);
-//    Vector<Thread *> out_thread(OUT_BUF_NUM_);
-//    for (u32 i = 0; i < OUT_BUF_NUM_; ++i) {
-//        out_thread[i] = new Thread(std::bind(&self_t::Output, this, out_f, i));
-//    }
-//    merge_thread.join();
-//    for (u32 i = 0; i < OUT_BUF_NUM_; ++i) {
-//        out_thread[i]->join();
-//        delete out_thread[i];
-//    }
-#else
     FILE *f = fopen(filenm_.c_str(), "r");
 
     DirectIO io_stream(f);
@@ -851,15 +449,12 @@ void SortMerger<KeyType, LenType>::Run() {
 
     Init(io_stream);
 
-    // Thread predict_thread(std::bind(&self_t::Predict, this, io_stream));
-    // Thread merge_thread(std::bind(&self_t::Merge, this));
-    Thread predict_thread(std::bind(&self_t::PredictByQueue, this, io_stream));
-    Thread merge_thread(std::bind(&self_t::MergeByQueue, this));
+    Thread predict_thread(std::bind(&self_t::Predict, this, io_stream));
+    Thread merge_thread(std::bind(&self_t::Merge, this));
     FILE *out_f = fopen((filenm_ + ".out").c_str(), "w+");
     IASSERT(out_f);
     IASSERT(fwrite(&count_, sizeof(u64), 1, out_f) == 1);
 
-//    Thread out_thread(std::bind(&self_t::OutputByQueue, this, out_f));
     Vector<Thread *> out_thread(OUT_BUF_NUM_);
     for (u32 i = 0; i < OUT_BUF_NUM_; ++i) {
         out_thread[i] = new Thread(std::bind(&self_t::Output, this, out_f, i));
@@ -867,7 +462,6 @@ void SortMerger<KeyType, LenType>::Run() {
 
     predict_thread.join();
     merge_thread.join();
-//    out_thread.join();
     for (u32 i = 0; i < OUT_BUF_NUM_; ++i) {
         out_thread[i]->join();
         delete out_thread[i];
@@ -875,35 +469,21 @@ void SortMerger<KeyType, LenType>::Run() {
     fclose(f);
     fclose(out_f);
 
-#endif
-
     if (std::filesystem::exists(filenm_)) {
         std::filesystem::remove(filenm_);
-//        std::filesystem::rename(filenm_, filenm_ + ".backup");
     }
     if (std::filesystem::exists(filenm_ + ".out")) {
         std::filesystem::rename(filenm_ + ".out", filenm_);
     }
-#ifdef PRINT_TIME_COST
-    // LOG_INFO(fmt::format("SortMerger<KeyType, LenType>::Run() time cost: {}", profiler.ElapsedToString()));
-    fmt::print("SortMerger<KeyType, LenType>::Run() time cost: {}\n", profiler.ElapsedToString());
-    profiler.End();
-#endif
 }
 
 template <typename KeyType, typename LenType>
 requires std::same_as<KeyType, TermTuple>
-void SortMergerTerm<KeyType, LenType>::MergeByQueueTerm() {
+void SortMergerTermTuple<KeyType, LenType>::MergeImpl() {
     UniquePtr<TermTupleList> tuple_list = nullptr;
     u32 last_idx = -1;
-#ifdef USE_LOSER_TREE
     while (merge_loser_tree_->TopSource() != LoserTree<KeyAddr>::invalid_) {
         auto top = merge_loser_tree_->TopKey();
-#else
-    while (merge_heap_.size() > 0) {
-        KeyAddr top = merge_heap_.top();
-        merge_heap_.pop();
-#endif
         u32 idx = top.IDX();
         auto out_key = top.KEY();
         if (tuple_list == nullptr) {
@@ -925,7 +505,7 @@ void SortMergerTerm<KeyType, LenType>::MergeByQueueTerm() {
         }
 
         assert(idx < MAX_GROUP_SIZE_);
-        // reach the end of a microrun
+
         if (micro_run_idx_[idx] == num_micro_run_[idx]) {
             IASSERT(micro_run_pos_[idx] <= size_micro_run_[idx]);
             std::unique_lock lock(cycle_buf_mtx_);
@@ -945,16 +525,13 @@ void SortMergerTerm<KeyType, LenType>::MergeByQueueTerm() {
             auto pre_buf_num = std::get<2>(res);
             memcpy(micro_buf_[idx], std::get<0>(res), pre_buf_size);
 
-
             size_micro_run_[idx] = pre_buf_size;
             num_micro_run_[idx] = pre_buf_num;
             micro_run_pos_[idx] = micro_run_idx_[idx] = 0;
 
             if (cycle_buffer_->Size() < CYCLE_BUF_THRESHOLD_) {
-                // fmt::print("cycle buffer size = {}\n", cycle_buffer_->Size());
                 cycle_buf_con_.notify_one();
             }
-            // cycle_buf_con_.notify_one();
         }
 
         assert(idx < MAX_GROUP_SIZE_);
@@ -971,17 +548,14 @@ void SortMergerTerm<KeyType, LenType>::MergeByQueueTerm() {
         }
         out_queue_con_.notify_one();
     }
-    fmt::print("MergeByQueueTerm finish\n");
 }
 
 template <typename KeyType, typename LenType>
 requires std::same_as<KeyType, TermTuple>
-void SortMergerTerm<KeyType, LenType>::OutputByQueueTerm(FILE *f) {
+void SortMergerTermTuple<KeyType, LenType>::OutputImpl(FILE *f) {
     DirectIO io_stream(f, "w");
     while (count_ > 0) {
-        // wait its turn to output
         UniquePtr<TermTupleList> temp_term_tuple;
-        // SizeT queue_size = 0;
         {
             std::unique_lock out_lock(out_queue_mtx_);
             out_queue_con_.wait(out_lock, [this]() { return !this->term_tuple_list_queue_.empty(); });
@@ -993,13 +567,12 @@ void SortMergerTerm<KeyType, LenType>::OutputByQueueTerm(FILE *f) {
             temp_term_tuple = std::move(term_tuple_list_queue_.front());
             ++term_list_count_;
             term_tuple_list_queue_.pop();
-            // queue_size = term_tuple_list_queue_.size();
         }
         count_ -= temp_term_tuple->Size();
-        // fmt::print("term = {}, count_ = {}, term queue size = {}\n", temp_term_tuple->term_, count_, queue_size);
-        /*
-         * data_len, term_len, doc_list_size, term, [doc_id, term_pos]...
-         */
+
+        // output format
+        // |   u32    |    u32   |     u32       |  char [term_len]  | pair<u32, u32> [doc_list_size]
+        // | data_len | term_len | doc_list_size |       term        |      [doc_id, term_pos]...
         u32 term_len = temp_term_tuple->term_.size();
         u32 doc_list_size = temp_term_tuple->Size();
         u32 data_len = sizeof(u32) + sizeof(u32) + term_len + 2 * sizeof(u32) * doc_list_size;
@@ -1010,10 +583,6 @@ void SortMergerTerm<KeyType, LenType>::OutputByQueueTerm(FILE *f) {
         memcpy(buf + SIZE_U32, &term_len, SIZE_U32);
         memcpy(buf + SIZE_U32 + SIZE_U32, &doc_list_size, SIZE_U32);
         io_stream.Write(buf, SIZE_U32 * 3);
-
-//        io_stream.Write((char*)(&data_len), sizeof(u32));
-//        io_stream.Write((char*)(&term_len), sizeof(u32));
-//        io_stream.Write((char*)(&term_len), sizeof(u32));
         io_stream.Write(temp_term_tuple->term_.data(), term_len);
         io_stream.Write((char*)temp_term_tuple->doc_pos_list_.data(), SIZE_U32 * 2 * doc_list_size);
         if (count_ == 0) {
@@ -1023,18 +592,17 @@ void SortMergerTerm<KeyType, LenType>::OutputByQueueTerm(FILE *f) {
             break;
         }
     }
-    fmt::print("OutputByQueueTerm finish\n");
 }
 
 template <typename KeyType, typename LenType>
 requires std::same_as<KeyType, TermTuple>
-void SortMergerTerm<KeyType, LenType>::RunTerm() {
-    LOG_INFO(fmt::format("begin run"));
-//    fmt::print("begin run\n");
-#ifdef PRINT_TIME_COST
-    BaseProfiler profiler;
-    profiler.Begin();
-#endif
+void SortMergerTermTuple<KeyType, LenType>::PredictImpl(DirectIO &io_stream) {
+    this->Predict(io_stream);
+}
+
+template <typename KeyType, typename LenType>
+requires std::same_as<KeyType, TermTuple>
+void SortMergerTermTuple<KeyType, LenType>::Run() {
     FILE *f = fopen(filenm_.c_str(), "r");
 
     DirectIO io_stream(f);
@@ -1045,44 +613,31 @@ void SortMergerTerm<KeyType, LenType>::RunTerm() {
 
     Super::Init(io_stream);
 
-    Thread predict_thread(std::bind(&Super::PredictByQueue, this, io_stream));
-    Thread merge_thread(std::bind(&self_t::MergeByQueueTerm, this));
+    Thread predict_thread(std::bind(&self_t::PredictImpl, this, io_stream));
+    Thread merge_thread(std::bind(&self_t::MergeImpl, this));
     FILE *out_f = fopen((filenm_ + ".out").c_str(), "w+");
     IASSERT(out_f);
     IASSERT(fwrite(&count_, sizeof(u64), 1, out_f) == 1);
 
-    Thread out_thread(std::bind(&self_t::OutputByQueueTerm, this, out_f));
-//    Vector<Thread *> out_thread(OUT_BUF_NUM_);
-//    for (u32 i = 0; i < OUT_BUF_NUM_; ++i) {
-//        out_thread[i] = new Thread(std::bind(&self_t::Output, this, out_f, i));
-//    }
+    Thread out_thread(std::bind(&self_t::OutputImpl, this, out_f));
 
     predict_thread.join();
     merge_thread.join();
     out_thread.join();
-//    for (u32 i = 0; i < OUT_BUF_NUM_; ++i) {
-//        out_thread[i]->join();
-//        delete out_thread[i];
-//    }
+
     fclose(f);
     fclose(out_f);
 
     if (std::filesystem::exists(filenm_)) {
         std::filesystem::remove(filenm_);
-//        std::filesystem::rename(filenm_, filenm_ + ".backup");
     }
     if (std::filesystem::exists(filenm_ + ".out")) {
         std::filesystem::rename(filenm_ + ".out", filenm_);
     }
-#ifdef PRINT_TIME_COST
-    LOG_INFO(fmt::format("SortMerger<KeyType, LenType>::Run() time cost: {}", profiler.ElapsedToString()));
-//    fmt::print("SortMergerTerm<KeyType, LenType>::RunTerm() time cost: {}\n", profiler.ElapsedToString());
-    profiler.End();
-#endif
 }
 
 
 template class SortMerger<u32, u8>;
 template class SortMerger<TermTuple, u32>;
-template class SortMergerTerm<TermTuple, u32>;
+template class SortMergerTermTuple<TermTuple, u32>;
 } // namespace infinity
\ No newline at end of file
diff --git a/src/storage/invertedindex/common/external_sort_merger.cppm b/src/storage/invertedindex/common/external_sort_merger.cppm
index cbf9be6ce0..ad2699d63c 100644
--- a/src/storage/invertedindex/common/external_sort_merger.cppm
+++ b/src/storage/invertedindex/common/external_sort_merger.cppm
@@ -310,28 +310,7 @@ public:
         }
     }
 
-    char* PutByRead(DirectIO &io_stream, u32& length) {
-        if (length > buffer_size_) {
-            throw std::runtime_error("Data length exceeds buffer capacity");
-        }
-        if (IsFull()) {
-            throw std::runtime_error("Buffer is full");
-        }
-
-        // Read data into the current buffer
-        length = io_stream.Read(buffer_array_[head_].get(), length);
-        return buffer_array_[head_].get();
-//        auto res_ptr = buffer_array_[head_].get();
-//        head_ = (head_ + 1) % total_buffers_;
-//
-//        if (head_ == tail_) {
-//            full_ = true;
-//        }
-//        return res_ptr;
-    }
-
     Tuple<const char*, u32, u32> GetTuple() {
-        // std::cout << "CycleBuffer::Get" << std::endl;
         if (IsEmpty()) {
             throw std::runtime_error("Buffer is empty");
         }
@@ -346,7 +325,6 @@ public:
     }
 
     const char* Get() {
-        // std::cout << "CycleBuffer::Get" << std::endl;
         if (IsEmpty()) {
             throw std::runtime_error("Buffer is empty");
         }
@@ -358,7 +336,6 @@ public:
         return result_data;
     }
 
-
     void Reset() {
         head_ = tail_ = 0;
         full_ = false;
@@ -395,7 +372,7 @@ private:
 
 export template <typename KeyType, typename LenType>
 class SortMerger {
-public:
+protected:
     typedef SortMerger<KeyType, LenType> self_t;
     typedef KeyAddress<KeyType, LenType> KeyAddr;
     static constexpr SizeT MAX_TUPLE_LENGTH = 1024;
@@ -408,23 +385,18 @@ public:
     const u32 OUT_BUF_NUM_;    //!< output threads number
 
     std::priority_queue<KeyAddr> pre_heap_;   //!< predict heap
-    std::priority_queue<KeyAddr> merge_heap_; //!< merge heap
     SharedPtr<LoserTree<KeyAddr>> merge_loser_tree_;
 
     u32 *micro_run_idx_{nullptr};   //!< the access index of each microruns
     u32 *micro_run_pos_{nullptr};   //!< the access position within each microruns
     u32 *num_micro_run_{nullptr};   //!< the records number of each microruns
     u32 *size_micro_run_{nullptr};  //!< the size of entire microrun
-    u32 *num_run_{nullptr};         //!< records number of each runs
     u32 *size_run_{nullptr};        //!< size of each entire runs
-    u32 *size_loaded_run_{nullptr}; //!< size of data that have been read within each entire runs
     u64 *run_addr_{nullptr};        //!< start file address of each runs
-    u64 *run_curr_addr_{nullptr};   //!< current file address of each runs
 
     char **micro_buf_{nullptr};   //!< address of every microrun channel buffer
     char **sub_out_buf_{nullptr}; //!< addresses of each output buffer
 
-    char *pre_buf_{nullptr}; //!< predict buffer
     char *run_buf_{nullptr}; //!< entire run buffer
     char *out_buf_{nullptr}; //!< the entire output buffer
 
@@ -437,19 +409,11 @@ public:
     std::mutex out_out_mtx_; //!< mutex and condition to ensure the seqence of file writing of all the output threads
     std::condition_variable out_out_con_;
 
-    u32 pre_buf_size_; //!< the current size of microrun that has been loaded onto prediect buffer
-    u32 pre_buf_num_;  //!< the current records number of microrun that has been loaded onto prediect buffer
-    //u32 pre_idx_;    //!< the index of microrun channel right in the predict buffer
     u32 out_buf_in_idx_;          //!< used by merge to get the current available output buffer
     u32 out_buf_out_idx_;         //!< used by output threads to get the index of the turn of outputting
     u32 *out_buf_size_{nullptr};  //!< data size of each output buffer
     bool *out_buf_full_{nullptr}; //!< a flag to ensure if the output buffer is full or not
 
-    Vector<u64> curr_addr_;
-    Vector<u64> end_addr_;
-    Vector<UniquePtr<char_t[]>> key_buf_;
-    Vector<char*> key_buf_ptr_;
-    Vector<SharedPtr<MmapReader>> mmap_io_streams_;
     UniquePtr<CycleBuffer> cycle_buffer_;
     std::mutex cycle_buf_mtx_;
     std::condition_variable cycle_buf_con_;
@@ -461,8 +425,6 @@ public:
     SizeT OUT_BATCH_SIZE_;
     Queue<UniquePtr<TermTupleList>> term_tuple_list_queue_;
 
-    UniquePtr<CycleBuffer> cycle_term_tuple_list_queue_;
-
     bool read_finish_{false};
     u32 CYCLE_BUF_SIZE_;
     u32 CYCLE_BUF_THRESHOLD_;
@@ -477,64 +439,31 @@ public:
 
     void Predict(DirectIO &io_stream);
 
-    void PredictByQueue(DirectIO &io_stream);
-
     void Merge();
 
-    void MergeMmap(MmapReader &io_stream, SharedPtr<FileWriter> out_file_writer);
-
-    void MergeByQueue();
-
     void Output(FILE *f, u32 idx);
 
     void OutputByQueue(FILE* f);
 
-    // void OutputByQueueTerm(FILE *f);
-
-    // void MergeByQueueTerm();
-
     void Init(MmapReader &io_stream);
 
     void ReadKeyAt(MmapReader &io_stream, u64 pos);
 
     void ReadKeyAtNonCopy(MmapReader &io_stream, u64 pos);
 
-    // void MergeByQueue<TermTuple, LenType>();
 public:
     SortMerger(const char *filenm, u32 group_size = 4, u32 bs = 100000000, u32 output_num = 2);
 
     virtual ~SortMerger();
 
-    void SetParams(u32 max_record_len) {
-        if (max_record_len > PRE_BUF_SIZE_) {
-            PRE_BUF_SIZE_ = max_record_len;
-            RUN_BUF_SIZE_ = PRE_BUF_SIZE_ * MAX_GROUP_SIZE_;
-            // OUT_BUF_SIZE_ = BS_SIZE_ - RUN_BUF_SIZE_ - PRE_BUF_SIZE_; ///we do not change OUT_BUF_SIZE_
-        }
-    }
-
-    void SetParams(u32 max_record_len, u32 min_buff_size_required) {
-        if (max_record_len > PRE_BUF_SIZE_)
-            PRE_BUF_SIZE_ = max_record_len;
-        if (RUN_BUF_SIZE_ < min_buff_size_required)
-            RUN_BUF_SIZE_ = min_buff_size_required;
-        if (RUN_BUF_SIZE_ < PRE_BUF_SIZE_ * MAX_GROUP_SIZE_)
-            RUN_BUF_SIZE_ = PRE_BUF_SIZE_ * MAX_GROUP_SIZE_;
-        if (OUT_BUF_SIZE_ < min_buff_size_required)
-            OUT_BUF_SIZE_ = min_buff_size_required;
-    }
-
     virtual void Run();
 };
 
-//export template <typename KeyType, typename LenType>
-//class SortMergerTerm;
-
 export template <typename KeyType, typename LenType>
 requires std::same_as<KeyType, TermTuple>
-class SortMergerTerm : public SortMerger<KeyType, LenType> {
+class SortMergerTermTuple : public SortMerger<KeyType, LenType> {
 protected:
-    typedef SortMergerTerm<KeyType, LenType> self_t;
+    typedef SortMergerTermTuple<KeyType, LenType> self_t;
     using Super = SortMerger<KeyType, LenType>;
     using Super::filenm_;
     using Super::MAX_GROUP_SIZE_;
@@ -544,20 +473,15 @@ protected:
     using Super::OUT_BUF_SIZE_;
     using Super::OUT_BUF_NUM_;
     using Super::pre_heap_;
-    using Super::merge_heap_;
     using Super::merge_loser_tree_;
     using Super::micro_run_idx_;
     using Super::micro_run_pos_;
     using Super::num_micro_run_;
     using Super::size_micro_run_;
-    using Super::num_run_;
     using Super::size_run_;
-    using Super::size_loaded_run_;
     using Super::run_addr_;
-    using Super::run_curr_addr_;
     using Super::micro_buf_;
     using Super::sub_out_buf_;
-    using Super::pre_buf_;
     using Super::run_buf_;
     using Super::out_buf_;
     using Super::pre_buf_mtx_;
@@ -566,17 +490,10 @@ protected:
     using Super::in_out_con_;
     using Super::out_out_mtx_;
     using Super::out_out_con_;
-    using Super::pre_buf_size_;
-    using Super::pre_buf_num_;
     using Super::out_buf_in_idx_;
     using Super::out_buf_out_idx_;
     using Super::out_buf_size_;
     using Super::out_buf_full_;
-    using Super::curr_addr_;
-    using Super::end_addr_;
-    using Super::key_buf_;
-    using Super::key_buf_ptr_;
-    using Super::mmap_io_streams_;
     using Super::cycle_buffer_;
     using Super::cycle_buf_mtx_;
     using Super::cycle_buf_con_;
@@ -596,14 +513,16 @@ protected:
     using Super::MAX_TUPLE_LENGTH;
     u64 term_list_count_{0};
 
-    void OutputByQueueTerm(FILE *f);
-    void MergeByQueueTerm();
+    void PredictImpl(DirectIO &io_stream);
+
+    void OutputImpl(FILE *f);
 
+    void MergeImpl();
 public:
-    SortMergerTerm(const char *filenm, u32 group_size = 4, u32 bs = 100000000, u32 output_num = 2)
-            : Super(filenm, group_size, bs, output_num) {}
+    SortMergerTermTuple(const char *filenm, u32 group_size = 4, u32 bs = 100000000, u32 output_num = 2)
+        : Super(filenm, group_size, bs, output_num) {}
 
-    void RunTerm();
+    void Run() override;
 };
 
 } // namespace infinity
diff --git a/src/storage/invertedindex/common/loser_tree.cppm b/src/storage/invertedindex/common/loser_tree.cppm
index 762e512629..3fd1b293dd 100644
--- a/src/storage/invertedindex/common/loser_tree.cppm
+++ b/src/storage/invertedindex/common/loser_tree.cppm
@@ -9,7 +9,6 @@ import stl;
 
 namespace infinity {
 
-//! LoserTreeBase class definition
 export template <typename ValueType, typename Comparator = std::greater<ValueType>>
 class LoserTreeBase {
 public:
@@ -25,20 +24,19 @@ public:
     }
 protected:
     struct Loser {
-        //! flag, true if is a virtual maximum sentinel
+        // flag, true if is a virtual maximum sentinel
         bool sup;
         Source source;
         ValueType key;
     };
 
-    //! The number of nodes in the tree.
+    // The number of nodes in the tree.
     const Source ik_;
-    //! The next greater power of two of ik_.
+    // The next greater power of two of ik_.
     const Source k_;
-    //! Array containing loser tree nodes.
+
     Vector<Loser> losers_;
-    //! Function object for comparing ValueTypes.
-    // std::function<bool(const ValueType&, const ValueType&)> cmp_;
+
     Comparator cmp_;
 
     bool first_insert_;
@@ -47,21 +45,14 @@ public:
                            const Comparator& cmp = Comparator())
          : ik_(k), k_(round_up_to_power_of_two(k)),
           losers_(2 * k_), cmp_(cmp), first_insert_(true) {
-        // : ik_(k), k_(static_cast<Source>(1) << static_cast<Source>(std::ceil(std::log2(static_cast<float>(k))))),
 
         for (Source i = ik_ - 1; i < k_; i++) {
             losers_[i + k_].sup = true;
             losers_[i + k_].source = invalid_;
         }
-//        for (Source i = 0; i < k_; ++i) {
-//            losers_[i].source = invalid_;
-//            losers_[i].keyp = &sentinel;
-//        }
     }
 
-    //! Return the index of the player with the smallest element.
     Source TopSource() {
-        // if (losers_[0].sup) return invalid_;
         return losers_[0].source;
     }
 
@@ -69,16 +60,13 @@ public:
         return losers_[0].key;
     }
 
-    //! Initializes the player source with the element key.
     void InsertStart(const ValueType* keyp, const Source& source, bool sup) {
         Source pos = k_ + source;
-        // assert(pos < losers_.size());
         losers_[pos].source = source;
         losers_[pos].sup = sup;
 
         if (first_insert_) {
             for (Source i = 0; i < 2 * k_; ++i) {
-                // losers_[i].keyp = keyp;
                 if (keyp) {
                     losers_[i].key = *keyp;
                 } else {
@@ -87,17 +75,15 @@ public:
             }
             first_insert_ = false;
         } else {
-            // losers_[pos].keyp = keyp;
             losers_[pos].key = (keyp ? *keyp : ValueType());
         }
     }
 
-    //! Recursively compute the winner of the competition at player root.
+    // Recursively compute the winner of the competition at player root.
     Source InitWinner(const Source& root) {
         if (root >= k_) {
             return root;
         }
-
         Source left = InitWinner(2 * root);
         Source right = InitWinner(2 * root + 1);
         if (losers_[right].sup ||
@@ -118,7 +104,6 @@ public:
     }
 };
 
-//! Unguarded loser tree, keeping only pointers to the elements in the tree structure.
 export template <typename ValueType, typename Comparator = std::greater<ValueType>>
 class LoserTree : public LoserTreeBase<ValueType, Comparator> {
 public:
@@ -126,13 +111,11 @@ public:
     using Source = typename Super::Source;
 
 public:
-    //! Constructor.
     explicit LoserTree(const Source& k,
                        const Comparator& cmp = Comparator())
-                       // const std::function<bool(const ValueType&, const ValueType&)>& cmp = std::greater<ValueType>())
         : Super(k, cmp) {}
 
-    //! Delete the current minimum and insert a new element.
+    // Delete the current minimum and insert a new element.
     void DeleteTopInsert(const ValueType* keyp, bool sup) {
         assert(sup == (keyp == nullptr));
         Source source = Super::losers_[0].source;
@@ -146,7 +129,6 @@ public:
             } else if (Super::losers_[pos].sup) {
                 // do nothing
             } else if (Super::cmp_(Super::losers_[pos].key, key)) {
-                // std::swap(Super::losers_[pos].sup, sup);
                 std::swap(Super::losers_[pos].source, source);
                 std::swap(Super::losers_[pos].key, key);
             } else {
diff --git a/src/storage/invertedindex/common/mmap.cppm b/src/storage/invertedindex/common/mmap.cppm
index 3826e3f366..fcec640ad2 100644
--- a/src/storage/invertedindex/common/mmap.cppm
+++ b/src/storage/invertedindex/common/mmap.cppm
@@ -48,13 +48,11 @@ export int MunmapFile(u8 *&data_ptr, SizeT &data_len, SizeT offset_diff = 0) {
 
 export struct MmapReader {
     MmapReader(const String &filename, SizeT offset = 0, SizeT len = SizeT(-1), int advice = MADV_SEQUENTIAL) {
-        // int rc = MmapFile(filename, data_ptr_, data_len_, advice);
-        // fmt::print("filename = {}, offset = {}, len = {}\n", filename, offset, len);
         int rc = MmapPartFile(filename, data_ptr_, len, advice, offset);
         idx_ = 0;
         data_len_ = len;
         if (rc < 0) {
-            throw UnrecoverableException("MmapFile failed");
+            UnrecoverableError("MmapFile failed");
         }
     }
 
@@ -117,7 +115,6 @@ export struct MmapReader {
         offset_diff_ = offset - aligned_offset;
 
         SizeT mapped_length = data_len + offset_diff_;
-        // void* mapped = mmap(NULL, mapped_length, PROT_READ, MAP_SHARED, fd, aligned_offset);
 
         int f = open(fp.c_str(), O_RDONLY);
         void *tmpd = mmap(NULL, mapped_length, PROT_READ, MAP_SHARED, f, aligned_offset);
@@ -128,7 +125,6 @@ export struct MmapReader {
         if (rc < 0)
             return -1;
         data_ptr = (u8 *)tmpd + offset_diff_;
-        // data_len = len_f;
         return 0;
     }
 
diff --git a/src/storage/invertedindex/memory_indexer.cpp b/src/storage/invertedindex/memory_indexer.cpp
index f8a7da43f0..b5fdd6df2a 100644
--- a/src/storage/invertedindex/memory_indexer.cpp
+++ b/src/storage/invertedindex/memory_indexer.cpp
@@ -63,9 +63,6 @@ import third_party;
 
 namespace infinity {
 constexpr int MAX_TUPLE_LENGTH = 1024; // we assume that analyzed term, together with docid/offset info, will never exceed such length
-#define USE_MMAP
-#define USE_BUF
-//#define USE_MORE_BUF
 bool MemoryIndexer::KeyComp::operator()(const String &lhs, const String &rhs) const {
     int ret = strcmp(lhs.c_str(), rhs.c_str());
     return ret < 0;
@@ -89,10 +86,9 @@ MemoryIndexer::MemoryIndexer(const String &index_dir,
     prepared_posting_ = MakeShared<PostingWriter>(nullptr, nullptr, PostingFormatOption(flag_), column_lengths_);
     Path path = Path(index_dir) / (base_name + ".tmp.merge");
     spill_full_path_ = path.string();
-#ifdef USE_BUF
+
     spill_buffer_size_ = MAX_TUPLE_LENGTH * 2;
     spill_buffer_ = MakeUnique<char_t[]>(spill_buffer_size_);
-#endif
 }
 
 MemoryIndexer::~MemoryIndexer() {
@@ -176,8 +172,6 @@ void MemoryIndexer::Commit(bool offline) {
 }
 
 SizeT MemoryIndexer::CommitOffline(SizeT wait_if_empty_ms) {
-    // BaseProfiler profiler;
-    // profiler.Begin();
     std::unique_lock<std::mutex> lock(mutex_commit_, std::defer_lock);
     if (!lock.try_lock()) {
         return 0;
@@ -191,13 +185,7 @@ SizeT MemoryIndexer::CommitOffline(SizeT wait_if_empty_ms) {
     SizeT num = inverters.size();
     if (num > 0) {
         for (auto &inverter : inverters) {
-            // inverter->SpillSortResults(this->spill_file_handle_, this->tuple_count_);
-#ifdef USE_BUF
             inverter->SpillSortResults(this->spill_file_handle_, this->tuple_count_, spill_buffer_, spill_buffer_size_);
-            // inverter->SpillSortResults(this->spill_file_handle_, this->tuple_count_, buf_writer_);
-#else
-            inverter->SpillSortResults(this->spill_file_handle_, this->tuple_count_);
-#endif
             num_runs_++;
         }
     }
@@ -208,8 +196,6 @@ SizeT MemoryIndexer::CommitOffline(SizeT wait_if_empty_ms) {
             cv_.notify_all();
         }
     }
-    // LOG_INFO(fmt::format("MemoryIndexer::CommitOffline time cost: {}", profiler.ElapsedToString()));
-    // profiler.End();
     return num;
 }
 
@@ -258,24 +244,13 @@ SizeT MemoryIndexer::CommitSync(SizeT wait_if_empty_ms) {
 
     return num_generated;
 }
-#define PRINT_TIME_COST
 void MemoryIndexer::Dump(bool offline, bool spill) {
     if (offline) {
         assert(!spill);
         while (GetInflightTasks() > 0) {
             CommitOffline(100);
         }
-#ifdef PRINT_TIME_COST
-        BaseProfiler profiler;
-        profiler.Begin();
-#endif
-        OfflineDumpTermTupleList();
-//        OfflineDump();
-#ifdef PRINT_TIME_COST
-//        LOG_INFO(fmt::format("MemoryIndexer::OfflineDump() time cost: {}", profiler.ElapsedToString()));
-        fmt::print("MemoryIndexer::OfflineDumpTermTupleList() time cost: {}\n", profiler.ElapsedToString());
-        profiler.End();
-#endif
+        OfflineDump();
         return;
     }
 
@@ -397,141 +372,9 @@ void MemoryIndexer::OfflineDump() {
     }
     FinalSpillFile();
     constexpr u32 buffer_size_of_each_run = 2 * 1024 * 1024;
-    SortMerger<TermTuple, u32> *merger = new SortMerger<TermTuple, u32>(spill_full_path_.c_str(), num_runs_, buffer_size_of_each_run * num_runs_, 2);
-//    SortMergerTerm<TermTuple, u32> *merger = new SortMergerTerm<TermTuple, u32>(spill_full_path_.c_str(), num_runs_, buffer_size_of_each_run * num_runs_, 2);
-//    merger->RunTerm();
+    SortMergerTermTuple<TermTuple, u32> *merger = new SortMergerTermTuple<TermTuple, u32>(spill_full_path_.c_str(), num_runs_, buffer_size_of_each_run * num_runs_, 2);
     merger->Run();
     delete merger;
-#ifdef USE_MMAP
-    MmapReader reader(spill_full_path_);
-    u64 count;
-    reader.ReadU64(count);
-    // idx += sizeof(u64);
-#else
-    FILE *f = fopen(spill_full_path_.c_str(), "r");
-    u64 count;
-    fread((char *)&count, sizeof(u64), 1, f);
-#endif
-    Path path = Path(index_dir_) / base_name_;
-    String index_prefix = path.string();
-    LocalFileSystem fs;
-    String posting_file = index_prefix + POSTING_SUFFIX;
-    SharedPtr<FileWriter> posting_file_writer = MakeShared<FileWriter>(fs, posting_file, 128000);
-    String dict_file = index_prefix + DICT_SUFFIX;
-    SharedPtr<FileWriter> dict_file_writer = MakeShared<FileWriter>(fs, dict_file, 128000);
-    TermMetaDumper term_meta_dumpler((PostingFormatOption(flag_)));
-    String fst_file = index_prefix + DICT_SUFFIX + ".fst";
-    std::ofstream ofs(fst_file.c_str(), std::ios::binary | std::ios::trunc);
-    OstreamWriter wtr(ofs);
-    FstBuilder fst_builder(wtr);
-
-    u32 record_length;
-    char buf[MAX_TUPLE_LENGTH];
-    String last_term_str;
-    std::string_view last_term;
-    u32 last_doc_id = INVALID_DOCID;
-    UniquePtr<PostingWriter> posting;
-
-    for (u64 i = 0; i < count; ++i) {
-#ifdef USE_MMAP
-        reader.ReadU32(record_length);
-#else
-        fread(&record_length, sizeof(u32), 1, f);
-#endif
-        if (record_length >= MAX_TUPLE_LENGTH) {
-#ifdef USE_MMAP
-            reader.Seek(record_length);
-            // idx += record_length;
-#else
-            // rubbish tuple, abandoned
-            char *buffer = new char[record_length];
-            fread(buffer, record_length, 1, f);
-            // TermTuple tuple(buffer, record_length);
-            delete[] buffer;
-#endif
-            continue;
-        }
-#ifdef USE_MMAP
-        reader.ReadBuf(buf, record_length);
-        // char* tuple_data = reader.ReadBufNonCopy(record_length);
-#else
-        fread(buf, record_length, 1, f);
-#endif
-        TermTuple tuple(buf, record_length);
-        if (tuple.term_ != last_term) {
-            assert(last_term < tuple.term_);
-            if (last_doc_id != INVALID_DOCID) {
-                posting->EndDocument(last_doc_id, 0);
-                // printf(" EndDocument1-%u\n", last_doc_id);
-            }
-            if (posting.get()) {
-                TermMeta term_meta(posting->GetDF(), posting->GetTotalTF());
-                posting->Dump(posting_file_writer, term_meta);
-                SizeT term_meta_offset = dict_file_writer->TotalWrittenBytes();
-                term_meta_dumpler.Dump(dict_file_writer, term_meta);
-                fst_builder.Insert((u8 *)last_term.data(), last_term.length(), term_meta_offset);
-            }
-            posting = MakeUnique<PostingWriter>(nullptr, nullptr, PostingFormatOption(flag_), column_lengths_);
-            // printf("\nswitched-term-%d-<%s>\n", i.term_num_, term.data());
-            last_term_str = String(tuple.term_);
-            last_term = std::string_view(last_term_str);
-        } else if (last_doc_id != tuple.doc_id_) {
-            assert(last_doc_id != INVALID_DOCID);
-            assert(last_doc_id < tuple.doc_id_);
-            assert(posting.get() != nullptr);
-            posting->EndDocument(last_doc_id, 0);
-            // printf(" EndDocument2-%u\n", last_doc_id);
-        }
-        last_doc_id = tuple.doc_id_;
-        posting->AddPosition(tuple.term_pos_);
-        // printf(" pos-%u", tuple.term_pos_);
-    }
-#ifdef USE_MMAP
-    // MunmapFile(data_ptr, data_len);
-    // reader.MunmapFile();
-#endif
-    if (last_doc_id != INVALID_DOCID) {
-        posting->EndDocument(last_doc_id, 0);
-        // printf(" EndDocument3-%u\n", last_doc_id);
-        TermMeta term_meta(posting->GetDF(), posting->GetTotalTF());
-        posting->Dump(posting_file_writer, term_meta);
-        SizeT term_meta_offset = dict_file_writer->TotalWrittenBytes();
-        term_meta_dumpler.Dump(dict_file_writer, term_meta);
-        fst_builder.Insert((u8 *)last_term.data(), last_term.length(), term_meta_offset);
-    }
-    posting_file_writer->Sync();
-    dict_file_writer->Sync();
-    fst_builder.Finish();
-    fs.AppendFile(dict_file, fst_file);
-    fs.DeleteFile(fst_file);
-
-    String column_length_file = index_prefix + LENGTH_SUFFIX;
-    UniquePtr<FileHandler> file_handler = fs.OpenFile(column_length_file, FileFlags::WRITE_FLAG | FileFlags::TRUNCATE_CREATE, FileLockType::kNoLock);
-    Vector<u32> &unsafe_column_lengths = column_lengths_.UnsafeVec();
-    fs.Write(*file_handler, &unsafe_column_lengths[0], sizeof(unsafe_column_lengths[0]) * unsafe_column_lengths.size());
-    fs.Close(*file_handler);
-
-    std::filesystem::remove(spill_full_path_);
-    // LOG_INFO(fmt::format("MemoryIndexer::OfflineDump done, num_runs_ {}", num_runs_));
-    num_runs_ = 0;
-}
-
-void MemoryIndexer::OfflineDumpTermTupleList() {
-    // Steps of offline dump:
-    // 1. External sort merge
-    // 2. Generate posting
-    // 3. Dump disk segment data
-    // LOG_INFO(fmt::format("MemoryIndexer::OfflineDump begin, num_runs_ {}\n", num_runs_));
-    if (tuple_count_ == 0) {
-        return;
-    }
-    FinalSpillFile();
-    constexpr u32 buffer_size_of_each_run = 2 * 1024 * 1024;
-    // SortMerger<TermTuple, u32> *merger = new SortMerger<TermTuple, u32>(spill_full_path_.c_str(), num_runs_, buffer_size_of_each_run * num_runs_, 2);
-    SortMergerTerm<TermTuple, u32> *merger = new SortMergerTerm<TermTuple, u32>(spill_full_path_.c_str(), num_runs_, buffer_size_of_each_run * num_runs_, 2);
-    merger->RunTerm();
-    // merger->Run();
-    delete merger;
 
     MmapReader reader(spill_full_path_);
     u64 term_list_count;
@@ -555,7 +398,7 @@ void MemoryIndexer::OfflineDumpTermTupleList() {
     u32 doc_pos_list_size = 0;
     const u32 MAX_TUPLE_LIST_LENGTH = MAX_TUPLE_LENGTH + 2 * 1024 * 1024;
     auto buf = MakeUnique<char[]>(MAX_TUPLE_LIST_LENGTH);
-    // char buf[MAX_TUPLE_LENGTH];
+
     String last_term_str;
     std::string_view last_term;
     u32 last_doc_id = INVALID_DOCID;
@@ -564,7 +407,6 @@ void MemoryIndexer::OfflineDumpTermTupleList() {
     assert(record_length < MAX_TUPLE_LIST_LENGTH);
 
     for (u64 i = 0; i < term_list_count; ++i) {
-
         reader.ReadU32(record_length);
         reader.ReadU32(term_length);
 
@@ -573,7 +415,6 @@ void MemoryIndexer::OfflineDumpTermTupleList() {
             continue;
         }
 
-
         reader.ReadBuf(buf.get(), record_length - sizeof(u32));
         u32 buf_idx = 0;
 
@@ -583,12 +424,10 @@ void MemoryIndexer::OfflineDumpTermTupleList() {
         std::string_view term = std::string_view(buf.get() + buf_idx, term_length);
         buf_idx += term_length;
 
-        // TermTuple tuple(buf, record_length);
         if (term != last_term) {
             assert(last_term < term);
             if (last_doc_id != INVALID_DOCID) {
                 posting->EndDocument(last_doc_id, 0);
-                // printf(" EndDocument1-%u\n", last_doc_id);
             }
             if (posting.get()) {
                 TermMeta term_meta(posting->GetDF(), posting->GetTotalTF());
@@ -598,7 +437,6 @@ void MemoryIndexer::OfflineDumpTermTupleList() {
                 fst_builder.Insert((u8 *)last_term.data(), last_term.length(), term_meta_offset);
             }
             posting = MakeUnique<PostingWriter>(nullptr, nullptr, PostingFormatOption(flag_), column_lengths_);
-            // printf("\nswitched-term-%d-<%s>\n", i.term_num_, term.data());
             last_term_str = String(term);
             last_term = std::string_view(last_term_str);
             last_doc_id = INVALID_DOCID;
@@ -610,27 +448,17 @@ void MemoryIndexer::OfflineDumpTermTupleList() {
             buf_idx += sizeof(u32);
 
             if (last_doc_id != INVALID_DOCID && last_doc_id != doc_id) {
-                // assert(last_doc_id != INVALID_DOCID);
                 assert(last_doc_id < doc_id);
                 assert(posting.get() != nullptr);
                 posting->EndDocument(last_doc_id, 0);
-                // printf(" EndDocument2-%u\n", last_doc_id);
             }
             last_doc_id = doc_id;
             posting->AddPosition(term_pos);
         }
 
-//        last_doc_id = doc_id;
-//        posting->AddPosition(tuple.term_pos_);
-        // printf(" pos-%u", tuple.term_pos_);
     }
-#ifdef USE_MMAP
-    // MunmapFile(data_ptr, data_len);
-    // reader.MunmapFile();
-#endif
     if (last_doc_id != INVALID_DOCID) {
         posting->EndDocument(last_doc_id, 0);
-        // printf(" EndDocument3-%u\n", last_doc_id);
         TermMeta term_meta(posting->GetDF(), posting->GetTotalTF());
         posting->Dump(posting_file_writer, term_meta);
         SizeT term_meta_offset = dict_file_writer->TotalWrittenBytes();
@@ -650,7 +478,6 @@ void MemoryIndexer::OfflineDumpTermTupleList() {
     fs.Close(*file_handler);
 
     std::filesystem::remove(spill_full_path_);
-    // LOG_INFO(fmt::format("MemoryIndexer::OfflineDump done, num_runs_ {}", num_runs_));
     num_runs_ = 0;
 }
 
@@ -665,10 +492,6 @@ void MemoryIndexer::FinalSpillFile() {
 void MemoryIndexer::PrepareSpillFile() {
     spill_file_handle_ = fopen(spill_full_path_.c_str(), "w");
     fwrite(&tuple_count_, sizeof(u64), 1, spill_file_handle_);
-#ifdef USE_MORE_BUF
-    const SizeT spill_buf_size = 128000;
-    buf_writer_ = MakeUnique<BufWriter>(spill_file_handle_, spill_buf_size);
-#endif
 }
 
 } // namespace infinity
\ No newline at end of file
diff --git a/src/storage/invertedindex/memory_indexer.cppm b/src/storage/invertedindex/memory_indexer.cppm
index 730826996b..f280d0ea4f 100644
--- a/src/storage/invertedindex/memory_indexer.cppm
+++ b/src/storage/invertedindex/memory_indexer.cppm
@@ -117,8 +117,6 @@ private:
 
     void OfflineDump();
 
-    void OfflineDumpTermTupleList();
-
     void FinalSpillFile();
 
     void PrepareSpillFile();
diff --git a/src/unit_test/storage/common/loser_tree.cpp b/src/unit_test/storage/common/loser_tree.cpp
index 6b1e52293e..033dcc582b 100644
--- a/src/unit_test/storage/common/loser_tree.cpp
+++ b/src/unit_test/storage/common/loser_tree.cpp
@@ -46,10 +46,6 @@ void LoserTreeTest::GenerateData(infinity::SizeT num_size, infinity::SizeT loser
     }
     for (SizeT i = 0; i < loser_num; ++i) {
         std::sort(loser[i].begin(), loser[i].end());
-//        for (SizeT j = 0; j < loser[i].size(); ++j) {
-//            fmt::print("{} ", loser[i][j]);
-//        }
-//        fmt::print("\n");
     }
 }
 
@@ -70,7 +66,6 @@ void LoserTreeTest::MultiWayMerge(infinity::SizeT num_size, infinity::SizeT lose
         auto min_value = loser_tree->TopKey();
         auto min_source = loser_tree->TopSource();
         merge_res.push_back(min_value);
-        // fmt::print("min val = {}, min source = {}\n", min_value, min_source);
         auto& min_seq = num_idx[min_source];
 
         if (min_seq < loser[min_source].size()) {
@@ -85,17 +80,6 @@ void LoserTreeTest::MultiWayMerge(infinity::SizeT num_size, infinity::SizeT lose
     for (SizeT i = 0; i < merge_res.size(); ++i) {
         EXPECT_EQ(merge_res[i], numbers[i]);
     }
-    return ;
-    for (SizeT i = 0; i < merge_res.size(); ++i) {
-        fmt::print("{} ", merge_res[i]);
-        // EXPECT_EQ(merge_res[i], numbers[i]);
-    }
-    fmt::print("\n");
-    for (SizeT i = 0; i < numbers.size(); ++i) {
-        fmt::print("{} ", numbers[i]);
-        // EXPECT_EQ(merge_res[i], numbers[i]);
-    }
-    fmt::print("\n");
 }
 
 TEST_F(LoserTreeTest, BasicMerge1) {
diff --git a/src/unit_test/storage/invertedindex/common/external_sort.cpp b/src/unit_test/storage/invertedindex/common/external_sort.cpp
index 5439ec4a59..15f9ec63df 100644
--- a/src/unit_test/storage/invertedindex/common/external_sort.cpp
+++ b/src/unit_test/storage/invertedindex/common/external_sort.cpp
@@ -69,7 +69,6 @@ class ExternalSortTest : public BaseTest {
         u32 run_num = rand() % 300;
         while (run_num < 100 || SIZE % run_num != 0)
             run_num = rand() % 300;
-        // fmt::print("begin tell = {}\n", ftell(f));
         for (u32 i = 0; i < run_num; ++i) {
             u64 pos = ftell(f);
             fseek(f, 2 * sizeof(u32) + sizeof(u64), SEEK_CUR);
@@ -77,14 +76,8 @@ class ExternalSortTest : public BaseTest {
             for (u32 j = 0; j < SIZE / run_num; ++j) {
                 str = RandStr<KeyType>(i * SIZE / run_num + j);
                 LenType len = str.size();
-                // fmt::print("begin tell = {}\n", ftell(f));
                 fwrite(&len, sizeof(LenType), 1, f);
                 fwrite(str.data(), len, 1, f);
-//                fmt::print("len: {}, str.size() = {}, size len_type = {}, tell = {}, str: ", len, str.size(), sizeof(LenType), ftell(f));
-//                for (auto c : str) {
-//                    fmt::print("{}",c);
-//                }
-//                fmt::print("\n");
                 s += len + sizeof(LenType);
             }
             u64 next_run_pos = ftell(f);
@@ -93,7 +86,6 @@ class ExternalSortTest : public BaseTest {
             s = SIZE / run_num;
             fwrite(&s, sizeof(u32), 1, f);
             fwrite(&next_run_pos, sizeof(u64), 1, f);
-            // fmt::print("next_pos: {}\n", next_run_pos);
             fseek(f, 0, SEEK_END);
         }
         fclose(f);
diff --git a/src/unit_test/storage/invertedindex/memory_indexer.cpp b/src/unit_test/storage/invertedindex/memory_indexer.cpp
index 3c7c2342b8..44d7f94627 100644
--- a/src/unit_test/storage/invertedindex/memory_indexer.cpp
+++ b/src/unit_test/storage/invertedindex/memory_indexer.cpp
@@ -71,14 +71,6 @@ class MemoryIndexerTest : public BaseTest {
             R"#(The two tapes of a transducer are typically viewed as an input tape and an output tape. On this view, a transducer is said to transduce (i.e., translate) the contents of its input tape to its output tape, by accepting a string on its input tape and generating another string on its output tape. It may do so nondeterministically and it may produce more than one output for each input string. A transducer may also produce no output for a given input string, in which case it is said to reject the input. In general, a transducer computes a relation between two formal languages.)#",
         };
 
-//        const char *paragraphs[] = {
-//            R"#(a, b, c)#",
-//            R"#(a, b)#",
-//            R"#(c, d)#",
-//            R"#(e, d)#",
-//            R"#(a, c)#",
-//        };
-
         const SizeT num_paragraph = sizeof(paragraphs) / sizeof(char *);
         column_ = ColumnVector::Make(MakeShared<DataType>(LogicalType::kVarchar));
         column_->Initialize();

From ada5d1a4e85f297329b3ce678f60bf8addf1283a Mon Sep 17 00:00:00 2001
From: Ma-cat <1054638297@qq.com>
Date: Thu, 23 May 2024 19:40:58 +0800
Subject: [PATCH 12/14] refactor some code

---
 src/storage/invertedindex/column_inverter.cpp | 120 ------------------
 .../invertedindex/column_inverter.cppm        |   4 -
 .../invertedindex/common/buf_writer.cppm      |   6 +-
 .../common/external_sort_merger.cpp           |  39 ++----
 .../common/external_sort_merger.cppm          |  79 ++----------
 .../invertedindex/common/loser_tree.cppm      |   2 +
 src/storage/invertedindex/memory_indexer.cpp  |   7 +-
 7 files changed, 32 insertions(+), 225 deletions(-)

diff --git a/src/storage/invertedindex/column_inverter.cpp b/src/storage/invertedindex/column_inverter.cpp
index bd0dec0eb2..8af14dc8df 100644
--- a/src/storage/invertedindex/column_inverter.cpp
+++ b/src/storage/invertedindex/column_inverter.cpp
@@ -256,126 +256,6 @@ void ColumnInverter::SortForOfflineDump() {
 //    +-----------+  +----------------++--------------------++--------------------------++-------------------------------------------------------+
 //                   ----------------------------------------------------------------------------------------------------------------------------+
 //                                                            Data within each group
-void ColumnInverter::SpillSortResults(FILE *spill_file, u64 &tuple_count) {
-    // spill sort results for external merge sort
-    if (positions_.empty()) {
-        return;
-    }
-    // size of this Run in bytes
-    u32 data_size = 0;
-    u64 data_size_pos = ftell(spill_file);
-    fwrite(&data_size, sizeof(u32), 1, spill_file);
-    // number of tuples
-    u32 num_of_tuples = positions_.size();
-    tuple_count += num_of_tuples;
-    fwrite(&num_of_tuples, sizeof(u32), 1, spill_file);
-    // start offset for next spill
-    u64 next_start_offset = 0;
-    u64 next_start_offset_pos = ftell(spill_file);
-    fwrite(&next_start_offset, sizeof(u64), 1, spill_file);
-    u64 data_start_offset = ftell(spill_file);
-    // sorted data
-    u32 last_term_num = std::numeric_limits<u32>::max();
-    StringRef term;
-    u32 record_length = 0;
-    char str_null = '\0';
-    for (auto &i : positions_) {
-        if (last_term_num != i.term_num_) {
-            last_term_num = i.term_num_;
-            term = GetTermFromNum(last_term_num);
-        }
-        record_length = term.size() + sizeof(docid_t) + sizeof(u32) + 1;
-        fwrite(&record_length, sizeof(u32), 1, spill_file);
-        fwrite(term.data(), term.size(), 1, spill_file);
-        fwrite(&str_null, sizeof(char), 1, spill_file);
-        fwrite(&i.doc_id_, sizeof(docid_t), 1, spill_file);
-        fwrite(&i.term_pos_, sizeof(u32), 1, spill_file);
-    }
-
-    // update data size
-    next_start_offset = ftell(spill_file);
-    data_size = next_start_offset - data_start_offset;
-    fseek(spill_file, data_size_pos, SEEK_SET);
-    fwrite(&data_size, sizeof(u32), 1, spill_file); // update offset for next spill
-    fseek(spill_file, next_start_offset_pos, SEEK_SET);
-    fwrite(&next_start_offset, sizeof(u64), 1, spill_file);
-    fseek(spill_file, next_start_offset, SEEK_SET);
-}
-
-void ColumnInverter::SpillSortResults(FILE *spill_file, u64 &tuple_count, UniquePtr<char_t[]>& spill_buffer, SizeT spill_buf_size) {
-    // spill sort results for external merge sort
-    if (positions_.empty()) {
-        return;
-    }
-    SizeT spill_buf_idx = 0;
-    SizeT spill_file_tell = ftell(spill_file);
-    // size of this Run in bytes
-    u32 data_size = 0;
-    u64 data_size_pos = spill_file_tell;
-    memcpy(spill_buffer.get() + spill_buf_idx, &data_size, sizeof(u32));
-    spill_buf_idx += sizeof(u32);
-    spill_file_tell += sizeof(u32);
-
-    // number of tuples
-    u32 num_of_tuples = positions_.size();
-    tuple_count += num_of_tuples;
-    memcpy(spill_buffer.get() + spill_buf_idx, &num_of_tuples, sizeof(u32));
-    spill_buf_idx += sizeof(u32);
-    spill_file_tell += sizeof(u32);
-
-    // start offset for next spill
-    u64 next_start_offset = 0;
-    u64 next_start_offset_pos = spill_file_tell;
-    memcpy(spill_buffer.get() + spill_buf_idx, &next_start_offset, sizeof(u64));
-    spill_buf_idx += sizeof(u64);
-    spill_file_tell += sizeof(u64);
-
-    assert(spill_buf_idx < spill_buf_size);
-    fwrite(spill_buffer.get(), spill_buf_idx, 1, spill_file);
-    spill_buf_idx = 0;
-
-    u64 data_start_offset = spill_file_tell;
-    assert((SizeT)ftell(spill_file) == spill_file_tell);
-    // sorted data
-    u32 last_term_num = std::numeric_limits<u32>::max();
-    StringRef term;
-    u32 record_length = 0;
-    char str_null = '\0';
-    for (auto &i : positions_) {
-        if (last_term_num != i.term_num_) {
-            last_term_num = i.term_num_;
-            term = GetTermFromNum(last_term_num);
-        }
-        record_length = term.size() + sizeof(docid_t) + sizeof(u32) + 1;
-        memcpy(spill_buffer.get() + spill_buf_idx, &record_length, sizeof(u32));
-        spill_buf_idx += sizeof(u32);
-
-        memcpy(spill_buffer.get() + spill_buf_idx, term.data(), term.size());
-        spill_buf_idx += term.size();
-
-        memcpy(spill_buffer.get() + spill_buf_idx, &str_null, sizeof(char));
-        spill_buf_idx += sizeof(char);
-
-        memcpy(spill_buffer.get() + spill_buf_idx, &i.doc_id_, sizeof(docid_t));
-        spill_buf_idx += sizeof(docid_t);
-
-        memcpy(spill_buffer.get() + spill_buf_idx, &i.term_pos_, sizeof(u32));
-        spill_buf_idx += sizeof(u32);
-
-        assert(spill_buf_idx < spill_buf_size);
-        fwrite(spill_buffer.get(), spill_buf_idx, 1, spill_file);
-        spill_buf_idx = 0;
-    }
-
-    // update data size
-    next_start_offset = ftell(spill_file);
-    data_size = next_start_offset - data_start_offset;
-    fseek(spill_file, data_size_pos, SEEK_SET);
-    fwrite(&data_size, sizeof(u32), 1, spill_file); // update offset for next spill
-    fseek(spill_file, next_start_offset_pos, SEEK_SET);
-    fwrite(&next_start_offset, sizeof(u64), 1, spill_file);
-    fseek(spill_file, next_start_offset, SEEK_SET);
-}
 
 void ColumnInverter::SpillSortResults(FILE *spill_file, u64 &tuple_count, UniquePtr<BufWriter>& buf_writer) {
     // spill sort results for external merge sort
diff --git a/src/storage/invertedindex/column_inverter.cppm b/src/storage/invertedindex/column_inverter.cppm
index 6533221fc6..253bdc46d2 100644
--- a/src/storage/invertedindex/column_inverter.cppm
+++ b/src/storage/invertedindex/column_inverter.cppm
@@ -74,10 +74,6 @@ public:
         }
     };
 
-    void SpillSortResults(FILE *spill_file, u64 &tuple_count);
-
-    void SpillSortResults(FILE *spill_file, u64 &tuple_count, UniquePtr<char_t[]>& spill_buffer, SizeT spill_buf_size);
-
     void SpillSortResults(FILE *spill_file, u64 &tuple_count, UniquePtr<BufWriter>& buf_writer);
 
 private:
diff --git a/src/storage/invertedindex/common/buf_writer.cppm b/src/storage/invertedindex/common/buf_writer.cppm
index 6e60077db6..7e8a6f0d26 100644
--- a/src/storage/invertedindex/common/buf_writer.cppm
+++ b/src/storage/invertedindex/common/buf_writer.cppm
@@ -7,14 +7,16 @@ export module buf_writer;
 import stl;
 
 namespace infinity {
-
+// A simple buffer writer that writes data to a file.
+// Now only used for ColumnInverter
+// ColumnInverter will use BufWriter sequentially write data and use spill_file pointer randomly write data
 export struct BufWriter {
     BufWriter(FILE *spill_file, SizeT spill_buf_size) : spill_file_(spill_file), spill_buf_size_(spill_buf_size) {
         spill_buffer_ = MakeUnique<char_t[]>(spill_buf_size_);
     }
 
     void Write(const char* data, SizeT data_size) {
-        if (spill_buf_idx_ + data_size >= spill_buf_size_) {
+        if (spill_buf_idx_ + data_size > spill_buf_size_) {
             Flush();
         }
         memcpy(spill_buffer_.get() + spill_buf_idx_, data, data_size);
diff --git a/src/storage/invertedindex/common/external_sort_merger.cpp b/src/storage/invertedindex/common/external_sort_merger.cpp
index a72c426afe..24fb3c8d5f 100644
--- a/src/storage/invertedindex/common/external_sort_merger.cpp
+++ b/src/storage/invertedindex/common/external_sort_merger.cpp
@@ -26,7 +26,6 @@ module;
 module external_sort_merger;
 
 import stl;
-import mmap;
 import third_party;
 import file_writer;
 import local_file_system;
@@ -72,7 +71,7 @@ SortMerger<KeyType, LenType>::SortMerger(const char *filenm, u32 group_size, u32
     assert(CYCLE_BUF_THRESHOLD_ <= CYCLE_BUF_SIZE_);
     cycle_buffer_ = MakeUnique<CycleBuffer>(CYCLE_BUF_SIZE_, PRE_BUF_SIZE_);
 
-    merge_loser_tree_ = MakeShared<LoserTree<KeyAddr>>(MAX_GROUP_SIZE_);
+    merge_loser_tree_ = MakeShared<LoserTree<KeyAddr, std::less<KeyAddr>>>(MAX_GROUP_SIZE_);
 }
 
 template <typename KeyType, typename LenType>
@@ -196,8 +195,9 @@ void SortMerger<KeyType, LenType>::Init(DirectIO &io_stream) {
         u32 pos = 0;
         u32 last_pos = -1;
         assert(i < MAX_GROUP_SIZE_);
-        if (size_micro_run_[i] <= 0)
+        if (size_micro_run_[i] <= 0) {
             continue;
+        }
         while (pos + sizeof(LenType) <= size_micro_run_[i]) {
             LenType len = *(LenType *)(micro_buf_[i] + pos);
             if (pos + sizeof(LenType) + len <= size_micro_run_[i]) {
@@ -248,7 +248,7 @@ void SortMerger<KeyType, LenType>::Predict(DirectIO &io_stream) {
         u32 pre_buf_num = 0;
         u32 pre_buf_size = 0;
         while (1) {
-            if (pos + sizeof(LenType) > s) {
+            if (pos + sizeof(LenType) > s || pos + sizeof(LenType) + *(LenType *)(data_ptr + pos) > s) {
                 // the last record of this microrun
                 IASSERT(last_pos != (u32)-1); // buffer too small that can't hold one record
                 LenType len = *(LenType *)(data_ptr + last_pos) + sizeof(LenType);
@@ -257,19 +257,9 @@ void SortMerger<KeyType, LenType>::Predict(DirectIO &io_stream) {
                 pre_heap_.push(KeyAddr(tmp, addr + (u64)pos, idx));
                 break;
             }
-            LenType len = *(LenType *)(data_ptr + pos);
-            if (pos + sizeof(LenType) + len > s) {
-                IASSERT(last_pos != (u32)-1); // buffer too small that can't hold one record
-                len = *(LenType *)(data_ptr + last_pos) + sizeof(LenType);
-                char *tmp = (char *)malloc(len);
-                memcpy(tmp, data_ptr + last_pos, len);
-                pre_heap_.push(KeyAddr(tmp, addr + (u64)pos, idx));
-                break;
-            }
-
             ++pre_buf_num;
             last_pos = pos;
-            pos += sizeof(LenType) + len;
+            pos += sizeof(LenType) + *(LenType *)(data_ptr + pos);
         }
         pre_buf_size = pos;
 
@@ -322,10 +312,11 @@ void SortMerger<KeyType, LenType>::Merge() {
             std::unique_lock lock(cycle_buf_mtx_);
 
             cycle_buf_con_.wait(lock, [this]() {
-                return !this->cycle_buffer_->IsEmpty() || (this->read_finish_ && this->cycle_buffer_->IsEmpty());
+                return !this->cycle_buffer_->IsEmpty() || read_finish_;
             });
 
-            if (cycle_buffer_->IsEmpty() && read_finish_) {
+            if (read_finish_) {
+                assert(cycle_buffer_->IsEmpty());
                 merge_loser_tree_->DeleteTopInsert(nullptr, true);
                 continue;
             }
@@ -365,7 +356,6 @@ template <typename KeyType, typename LenType>
 void SortMerger<KeyType, LenType>::OutputByQueue(FILE *f) {
     DirectIO io_stream(f, "w");
     while (count_ > 0) {
-        // wait its turn to output
         Queue<UniquePtr<char_t[]>> temp_out_queue;
         Queue<u32> temp_out_size_queue;
         {
@@ -406,8 +396,9 @@ void SortMerger<KeyType, LenType>::Output(FILE *f, u32 idx) {
     while (count_ > 0) {
         // wait its turn to output
         std::unique_lock out_lock(out_out_mtx_);
-        while (out_buf_out_idx_ != idx)
+        while (out_buf_out_idx_ != idx) {
             out_out_con_.wait(out_lock);
+        }
 
         if (count_ == 0) {
             ++out_buf_out_idx_;
@@ -488,7 +479,6 @@ void SortMergerTermTuple<KeyType, LenType>::MergeImpl() {
         auto out_key = top.KEY();
         if (tuple_list == nullptr) {
             tuple_list = MakeUnique<TermTupleList>(out_key.term_);
-            tuple_list->Add(out_key.doc_id_, out_key.term_pos_);
         } else if (idx != last_idx) {
             if (tuple_list->IsFull() || out_key.term_ != tuple_list->term_) {
                 // output
@@ -498,11 +488,9 @@ void SortMergerTermTuple<KeyType, LenType>::MergeImpl() {
                     out_queue_con_.notify_one();
                 }
                 tuple_list = MakeUnique<TermTupleList>(out_key.term_);
-                tuple_list->Add(out_key.doc_id_, out_key.term_pos_);
-            } else {
-                tuple_list->Add(out_key.doc_id_, out_key.term_pos_);
             }
         }
+        tuple_list->Add(out_key.doc_id_, out_key.term_pos_);
 
         assert(idx < MAX_GROUP_SIZE_);
 
@@ -511,10 +499,11 @@ void SortMergerTermTuple<KeyType, LenType>::MergeImpl() {
             std::unique_lock lock(cycle_buf_mtx_);
 
             cycle_buf_con_.wait(lock, [this]() {
-                return !this->cycle_buffer_->IsEmpty() || (this->read_finish_ && this->cycle_buffer_->IsEmpty());
+                return !this->cycle_buffer_->IsEmpty() || read_finish_;
             });
 
-            if (cycle_buffer_->IsEmpty() && read_finish_) {
+            if (read_finish_) {
+                assert(cycle_buffer_->IsEmpty());
                 merge_loser_tree_->DeleteTopInsert(nullptr, true);
                 continue;
             }
diff --git a/src/storage/invertedindex/common/external_sort_merger.cppm b/src/storage/invertedindex/common/external_sort_merger.cppm
index ad2699d63c..ac7d6431e9 100644
--- a/src/storage/invertedindex/common/external_sort_merger.cppm
+++ b/src/storage/invertedindex/common/external_sort_merger.cppm
@@ -24,7 +24,6 @@ export module external_sort_merger;
 
 import stl;
 import loser_tree;
-import mmap;
 import infinity_exception;
 import file_writer;
 
@@ -154,9 +153,9 @@ struct KeyAddress {
 
     bool operator==(const KeyAddress &other) const { return Compare(other) == 0; }
 
-    bool operator>(const KeyAddress &other) const { return Compare(other) < 0; }
+    bool operator>(const KeyAddress &other) const { return Compare(other) > 0; }
 
-    bool operator<(const KeyAddress &other) const { return Compare(other) > 0; }
+    bool operator<(const KeyAddress &other) const { return Compare(other) < 0; }
 };
 
 template <typename KeyType, typename LenType>
@@ -197,9 +196,9 @@ struct KeyAddress<KeyType, LenType, typename std::enable_if<std::is_scalar<KeyTy
 
     bool operator==(const KeyAddress &other) const { return Compare(other) == 0; }
 
-    bool operator>(const KeyAddress &other) const { return Compare(other) < 0; }
+    bool operator>(const KeyAddress &other) const { return Compare(other) > 0; }
 
-    bool operator<(const KeyAddress &other) const { return Compare(other) > 0; }
+    bool operator<(const KeyAddress &other) const { return Compare(other) < 0; }
 };
 
 template <typename LenType>
@@ -234,9 +233,9 @@ struct KeyAddress<TermTuple, LenType> {
 
     bool operator==(const KeyAddress &other) const { return Compare(other) == 0; }
 
-    bool operator>(const KeyAddress &other) const { return Compare(other) < 0; }
+    bool operator>(const KeyAddress &other) const { return Compare(other) > 0; }
 
-    bool operator<(const KeyAddress &other) const { return Compare(other) > 0; }
+    bool operator<(const KeyAddress &other) const { return Compare(other) < 0; }
 };
 
 class CycleBuffer {
@@ -251,36 +250,6 @@ public:
         }
     }
 
-    void Put(const TermTupleList& tuple_list) {
-        /*
-         * data_len, term_len, doc_list_size, term, [doc_id, term_pos]...
-         */
-        u32 term_len = tuple_list.term_.size();
-        u32 doc_list_size = tuple_list.Size();
-        auto SIZE_U32 = sizeof(u32);
-        u32 data_len = SIZE_U32 + SIZE_U32 + term_len + 2 * SIZE_U32 * doc_list_size;
-        if (data_len > buffer_size_) {
-            throw std::runtime_error("Data length exceeds buffer capacity");
-        }
-        SizeT idx = 0;
-        std::memcpy(buffer_array_[head_].get() + idx, &data_len, SIZE_U32);
-        idx += SIZE_U32;
-        std::memcpy(buffer_array_[head_].get() + idx, &term_len, SIZE_U32);
-        idx += SIZE_U32;
-        std::memcpy(buffer_array_[head_].get() + idx, &doc_list_size, SIZE_U32);
-        idx += SIZE_U32;
-        std::memcpy(buffer_array_[head_].get() + idx, tuple_list.term_.data(), term_len);
-        idx += term_len;
-        std::memcpy(buffer_array_[head_].get() + idx, tuple_list.doc_pos_list_.data(), SIZE_U32 * 2 * doc_list_size);
-        idx += SIZE_U32 * 2 * doc_list_size;
-
-        head_ = (head_ + 1) % total_buffers_;
-
-        if (head_ == tail_) {
-            full_ = true;
-        }
-    }
-
     void Put(const char* data, SizeT length) {
         if (length > buffer_size_) {
             throw std::runtime_error("Data length exceeds buffer capacity");
@@ -384,8 +353,9 @@ protected:
     u32 OUT_BUF_SIZE_;         //!< max size of output buffer
     const u32 OUT_BUF_NUM_;    //!< output threads number
 
-    std::priority_queue<KeyAddr> pre_heap_;   //!< predict heap
-    SharedPtr<LoserTree<KeyAddr>> merge_loser_tree_;
+    // both pre_heap_ and merge_losser_tree are defined as small root heaps
+    std::priority_queue<KeyAddr, std::vector<KeyAddr>, std::greater<KeyAddr>> pre_heap_;
+    SharedPtr<LoserTree<KeyAddr, std::less<KeyAddr>>> merge_loser_tree_;
 
     u32 *micro_run_idx_{nullptr};   //!< the access index of each microruns
     u32 *micro_run_pos_{nullptr};   //!< the access position within each microruns
@@ -445,12 +415,6 @@ protected:
 
     void OutputByQueue(FILE* f);
 
-    void Init(MmapReader &io_stream);
-
-    void ReadKeyAt(MmapReader &io_stream, u64 pos);
-
-    void ReadKeyAtNonCopy(MmapReader &io_stream, u64 pos);
-
 public:
     SortMerger(const char *filenm, u32 group_size = 4, u32 bs = 100000000, u32 output_num = 2);
 
@@ -467,50 +431,25 @@ protected:
     using Super = SortMerger<KeyType, LenType>;
     using Super::filenm_;
     using Super::MAX_GROUP_SIZE_;
-    using Super::BS_SIZE_;
-    using Super::PRE_BUF_SIZE_;
-    using Super::RUN_BUF_SIZE_;
-    using Super::OUT_BUF_SIZE_;
-    using Super::OUT_BUF_NUM_;
-    using Super::pre_heap_;
     using Super::merge_loser_tree_;
     using Super::micro_run_idx_;
     using Super::micro_run_pos_;
     using Super::num_micro_run_;
     using Super::size_micro_run_;
-    using Super::size_run_;
-    using Super::run_addr_;
     using Super::micro_buf_;
-    using Super::sub_out_buf_;
     using Super::run_buf_;
-    using Super::out_buf_;
-    using Super::pre_buf_mtx_;
-    using Super::pre_buf_con_;
-    using Super::in_out_mtx_;
-    using Super::in_out_con_;
-    using Super::out_out_mtx_;
-    using Super::out_out_con_;
-    using Super::out_buf_in_idx_;
-    using Super::out_buf_out_idx_;
-    using Super::out_buf_size_;
-    using Super::out_buf_full_;
     using Super::cycle_buffer_;
     using Super::cycle_buf_mtx_;
     using Super::cycle_buf_con_;
     using Super::out_queue_mtx_;
     using Super::out_queue_con_;
-    using Super::out_queue_;
-    using Super::out_size_queue_;
-    using Super::OUT_BATCH_SIZE_;
     using Super::term_tuple_list_queue_;
     using Super::read_finish_;
     using Super::CYCLE_BUF_SIZE_;
     using Super::CYCLE_BUF_THRESHOLD_;
     using Super::count_;
-    using Super::group_size_;
     using Super::FILE_LEN_;
     using typename Super::KeyAddr;
-    using Super::MAX_TUPLE_LENGTH;
     u64 term_list_count_{0};
 
     void PredictImpl(DirectIO &io_stream);
diff --git a/src/storage/invertedindex/common/loser_tree.cppm b/src/storage/invertedindex/common/loser_tree.cppm
index 3fd1b293dd..be5c260df6 100644
--- a/src/storage/invertedindex/common/loser_tree.cppm
+++ b/src/storage/invertedindex/common/loser_tree.cppm
@@ -1,3 +1,5 @@
+// Refers to
+// https://github.com/tlx/tlx/blob/master/tlx/container/loser_tree.hpp
 module;
 
 #include <cassert>
diff --git a/src/storage/invertedindex/memory_indexer.cpp b/src/storage/invertedindex/memory_indexer.cpp
index fdcd1ffaf9..20db5d40f8 100644
--- a/src/storage/invertedindex/memory_indexer.cpp
+++ b/src/storage/invertedindex/memory_indexer.cpp
@@ -85,9 +85,6 @@ MemoryIndexer::MemoryIndexer(const String &index_dir,
     prepared_posting_ = MakeShared<PostingWriter>(nullptr, nullptr, PostingFormatOption(flag_), column_lengths_);
     Path path = Path(index_dir) / (base_name + ".tmp.merge");
     spill_full_path_ = path.string();
-
-    spill_buffer_size_ = MAX_TUPLE_LENGTH * 2;
-    spill_buffer_ = MakeUnique<char_t[]>(spill_buffer_size_);
 }
 
 MemoryIndexer::~MemoryIndexer() {
@@ -186,7 +183,7 @@ SizeT MemoryIndexer::CommitOffline(SizeT wait_if_empty_ms) {
     SizeT num = inverters.size();
     if (num > 0) {
         for (auto &inverter : inverters) {
-            inverter->SpillSortResults(this->spill_file_handle_, this->tuple_count_, spill_buffer_, spill_buffer_size_);
+            inverter->SpillSortResults(this->spill_file_handle_, this->tuple_count_, buf_writer_);
             num_runs_++;
         }
     }
@@ -494,6 +491,8 @@ void MemoryIndexer::FinalSpillFile() {
 void MemoryIndexer::PrepareSpillFile() {
     spill_file_handle_ = fopen(spill_full_path_.c_str(), "w");
     fwrite(&tuple_count_, sizeof(u64), 1, spill_file_handle_);
+    const SizeT write_buf_size = 128000;
+    buf_writer_ = MakeUnique<BufWriter>(spill_file_handle_, write_buf_size);
 }
 
 } // namespace infinity
\ No newline at end of file

From c4f7be5cc76d0153e477bc047e69c0b35b3264d3 Mon Sep 17 00:00:00 2001
From: Ma-cat <1054638297@qq.com>
Date: Fri, 24 May 2024 10:33:25 +0800
Subject: [PATCH 13/14] fix deadlock when parallel create index

---
 .../invertedindex/common/external_sort_merger.cpp        | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/storage/invertedindex/common/external_sort_merger.cpp b/src/storage/invertedindex/common/external_sort_merger.cpp
index 24fb3c8d5f..c9d7c8d10e 100644
--- a/src/storage/invertedindex/common/external_sort_merger.cpp
+++ b/src/storage/invertedindex/common/external_sort_merger.cpp
@@ -315,8 +315,7 @@ void SortMerger<KeyType, LenType>::Merge() {
                 return !this->cycle_buffer_->IsEmpty() || read_finish_;
             });
 
-            if (read_finish_) {
-                assert(cycle_buffer_->IsEmpty());
+            if (read_finish_ && cycle_buffer_->IsEmpty()) {
                 merge_loser_tree_->DeleteTopInsert(nullptr, true);
                 continue;
             }
@@ -479,6 +478,7 @@ void SortMergerTermTuple<KeyType, LenType>::MergeImpl() {
         auto out_key = top.KEY();
         if (tuple_list == nullptr) {
             tuple_list = MakeUnique<TermTupleList>(out_key.term_);
+            tuple_list->Add(out_key.doc_id_, out_key.term_pos_);
         } else if (idx != last_idx) {
             if (tuple_list->IsFull() || out_key.term_ != tuple_list->term_) {
                 // output
@@ -489,8 +489,8 @@ void SortMergerTermTuple<KeyType, LenType>::MergeImpl() {
                 }
                 tuple_list = MakeUnique<TermTupleList>(out_key.term_);
             }
+            tuple_list->Add(out_key.doc_id_, out_key.term_pos_);
         }
-        tuple_list->Add(out_key.doc_id_, out_key.term_pos_);
 
         assert(idx < MAX_GROUP_SIZE_);
 
@@ -502,8 +502,7 @@ void SortMergerTermTuple<KeyType, LenType>::MergeImpl() {
                 return !this->cycle_buffer_->IsEmpty() || read_finish_;
             });
 
-            if (read_finish_) {
-                assert(cycle_buffer_->IsEmpty());
+            if (read_finish_ && cycle_buffer_->IsEmpty()) {
                 merge_loser_tree_->DeleteTopInsert(nullptr, true);
                 continue;
             }

From 1dcd235cfa9e8b07975df842f7196d444acd9e8c Mon Sep 17 00:00:00 2001
From: Ma-cat <1054638297@qq.com>
Date: Fri, 24 May 2024 14:06:12 +0800
Subject: [PATCH 14/14] fix infinity only occupies one core during the
 external_sort stage and delete some useless code

---
 .../common/external_sort_merger.cpp           | 144 ++++++++++--------
 .../common/external_sort_merger.cppm          |  21 +--
 2 files changed, 84 insertions(+), 81 deletions(-)

diff --git a/src/storage/invertedindex/common/external_sort_merger.cpp b/src/storage/invertedindex/common/external_sort_merger.cpp
index c9d7c8d10e..8f1bb43a33 100644
--- a/src/storage/invertedindex/common/external_sort_merger.cpp
+++ b/src/storage/invertedindex/common/external_sort_merger.cpp
@@ -388,6 +388,19 @@ void SortMerger<KeyType, LenType>::OutputByQueue(FILE *f) {
     }
 }
 
+template <typename KeyType, typename LenType>
+void SortMerger<KeyType, LenType>::Unpin(Vector<UniquePtr<Thread>> &threads) {
+    int num_cores = std::thread::hardware_concurrency();
+    cpu_set_t cpuset;
+    CPU_ZERO(&cpuset);
+    for (int i = 0; i < num_cores; ++i) {
+        CPU_SET(i, &cpuset);
+    }
+    for (auto& thread : threads) {
+        pthread_setaffinity_np(thread->native_handle(), sizeof(cpu_set_t), &cpuset);
+    }
+}
+
 template <typename KeyType, typename LenType>
 void SortMerger<KeyType, LenType>::Output(FILE *f, u32 idx) {
     DirectIO io_stream(f, "w");
@@ -439,22 +452,24 @@ void SortMerger<KeyType, LenType>::Run() {
 
     Init(io_stream);
 
-    Thread predict_thread(std::bind(&self_t::Predict, this, io_stream));
-    Thread merge_thread(std::bind(&self_t::Merge, this));
+    UniquePtr<Thread> predict_thread = MakeUnique<Thread>(std::bind(&self_t::Predict, this, io_stream));
+    UniquePtr<Thread> merge_thread = MakeUnique<Thread>(std::bind(&self_t::Merge, this));
     FILE *out_f = fopen((filenm_ + ".out").c_str(), "w+");
     IASSERT(out_f);
     IASSERT(fwrite(&count_, sizeof(u64), 1, out_f) == 1);
 
-    Vector<Thread *> out_thread(OUT_BUF_NUM_);
+    Vector<UniquePtr<Thread>> threads;
+    threads.push_back(std::move(predict_thread));
+    threads.push_back(std::move(merge_thread));
     for (u32 i = 0; i < OUT_BUF_NUM_; ++i) {
-        out_thread[i] = new Thread(std::bind(&self_t::Output, this, out_f, i));
+        UniquePtr<Thread> out_thread = MakeUnique<Thread>(std::bind(&self_t::Output, this, out_f, i));
+        threads.push_back(std::move(out_thread));
     }
 
-    predict_thread.join();
-    merge_thread.join();
-    for (u32 i = 0; i < OUT_BUF_NUM_; ++i) {
-        out_thread[i]->join();
-        delete out_thread[i];
+    this->Unpin(threads);
+
+    for (auto& thread : threads) {
+        thread->join();
     }
     fclose(f);
     fclose(out_f);
@@ -472,8 +487,8 @@ requires std::same_as<KeyType, TermTuple>
 void SortMergerTermTuple<KeyType, LenType>::MergeImpl() {
     UniquePtr<TermTupleList> tuple_list = nullptr;
     u32 last_idx = -1;
-    while (merge_loser_tree_->TopSource() != LoserTree<KeyAddr>::invalid_) {
-        auto top = merge_loser_tree_->TopKey();
+    while (this->merge_loser_tree_->TopSource() != LoserTree<KeyAddr>::invalid_) {
+        auto top = this->merge_loser_tree_->TopKey();
         u32 idx = top.IDX();
         auto out_key = top.KEY();
         if (tuple_list == nullptr) {
@@ -483,58 +498,58 @@ void SortMergerTermTuple<KeyType, LenType>::MergeImpl() {
             if (tuple_list->IsFull() || out_key.term_ != tuple_list->term_) {
                 // output
                 {
-                    std::unique_lock lock(out_queue_mtx_);
-                    term_tuple_list_queue_.push(std::move(tuple_list));
-                    out_queue_con_.notify_one();
+                    std::unique_lock lock(this->out_queue_mtx_);
+                    this->term_tuple_list_queue_.push(std::move(tuple_list));
+                    this->out_queue_con_.notify_one();
                 }
                 tuple_list = MakeUnique<TermTupleList>(out_key.term_);
             }
             tuple_list->Add(out_key.doc_id_, out_key.term_pos_);
         }
 
-        assert(idx < MAX_GROUP_SIZE_);
+        assert(idx < this->MAX_GROUP_SIZE_);
 
-        if (micro_run_idx_[idx] == num_micro_run_[idx]) {
-            IASSERT(micro_run_pos_[idx] <= size_micro_run_[idx]);
-            std::unique_lock lock(cycle_buf_mtx_);
+        if (this->micro_run_idx_[idx] == this->num_micro_run_[idx]) {
+            IASSERT(this->micro_run_pos_[idx] <= this->size_micro_run_[idx]);
+            std::unique_lock lock(this->cycle_buf_mtx_);
 
-            cycle_buf_con_.wait(lock, [this]() {
-                return !this->cycle_buffer_->IsEmpty() || read_finish_;
+            this->cycle_buf_con_.wait(lock, [this]() {
+                return !this->cycle_buffer_->IsEmpty() || this->read_finish_;
             });
 
-            if (read_finish_ && cycle_buffer_->IsEmpty()) {
-                merge_loser_tree_->DeleteTopInsert(nullptr, true);
+            if (this->read_finish_ && this->cycle_buffer_->IsEmpty()) {
+                this->merge_loser_tree_->DeleteTopInsert(nullptr, true);
                 continue;
             }
 
-            assert(idx < MAX_GROUP_SIZE_);
-            auto res = cycle_buffer_->GetTuple();
+            assert(idx < this->MAX_GROUP_SIZE_);
+            auto res = this->cycle_buffer_->GetTuple();
             auto pre_buf_size = std::get<1>(res);
             auto pre_buf_num = std::get<2>(res);
-            memcpy(micro_buf_[idx], std::get<0>(res), pre_buf_size);
+            memcpy(this->micro_buf_[idx], std::get<0>(res), pre_buf_size);
 
-            size_micro_run_[idx] = pre_buf_size;
-            num_micro_run_[idx] = pre_buf_num;
-            micro_run_pos_[idx] = micro_run_idx_[idx] = 0;
+            this->size_micro_run_[idx] = pre_buf_size;
+            this->num_micro_run_[idx] = pre_buf_num;
+            this->micro_run_pos_[idx] = this->micro_run_idx_[idx] = 0;
 
-            if (cycle_buffer_->Size() < CYCLE_BUF_THRESHOLD_) {
-                cycle_buf_con_.notify_one();
+            if (this->cycle_buffer_->Size() < this->CYCLE_BUF_THRESHOLD_) {
+                this->cycle_buf_con_.notify_one();
             }
         }
 
-        assert(idx < MAX_GROUP_SIZE_);
-        auto key = KeyAddr(micro_buf_[idx] + micro_run_pos_[idx], -1, idx);
-        merge_loser_tree_->DeleteTopInsert(&key, false);
+        assert(idx < this->MAX_GROUP_SIZE_);
+        auto key = KeyAddr(this->micro_buf_[idx] + this->micro_run_pos_[idx], -1, idx);
+        this->merge_loser_tree_->DeleteTopInsert(&key, false);
 
-        ++micro_run_idx_[idx];
-        micro_run_pos_[idx] += KeyAddr(micro_buf_[idx] + micro_run_pos_[idx], -1, idx).LEN() + sizeof(LenType);
+        ++this->micro_run_idx_[idx];
+        this->micro_run_pos_[idx] += KeyAddr(this->micro_buf_[idx] + this->micro_run_pos_[idx], -1, idx).LEN() + sizeof(LenType);
     }
     {
-        std::unique_lock lock(out_queue_mtx_);
+        std::unique_lock lock(this->out_queue_mtx_);
         if (tuple_list != nullptr) {
-            term_tuple_list_queue_.push(std::move(tuple_list));
+            this->term_tuple_list_queue_.push(std::move(tuple_list));
         }
-        out_queue_con_.notify_one();
+        this->out_queue_con_.notify_one();
     }
 }
 
@@ -542,21 +557,21 @@ template <typename KeyType, typename LenType>
 requires std::same_as<KeyType, TermTuple>
 void SortMergerTermTuple<KeyType, LenType>::OutputImpl(FILE *f) {
     DirectIO io_stream(f, "w");
-    while (count_ > 0) {
+    while (this->count_ > 0) {
         UniquePtr<TermTupleList> temp_term_tuple;
         {
-            std::unique_lock out_lock(out_queue_mtx_);
-            out_queue_con_.wait(out_lock, [this]() { return !this->term_tuple_list_queue_.empty(); });
+            std::unique_lock out_lock(this->out_queue_mtx_);
+            this->out_queue_con_.wait(out_lock, [this]() { return !this->term_tuple_list_queue_.empty(); });
 
-            if (count_ == 0) {
+            if (this->count_ == 0) {
                 break;
             }
 
-            temp_term_tuple = std::move(term_tuple_list_queue_.front());
+            temp_term_tuple = std::move(this->term_tuple_list_queue_.front());
             ++term_list_count_;
-            term_tuple_list_queue_.pop();
+            this->term_tuple_list_queue_.pop();
         }
-        count_ -= temp_term_tuple->Size();
+        this->count_ -= temp_term_tuple->Size();
 
         // output format
         // |   u32    |    u32   |     u32       |  char [term_len]  | pair<u32, u32> [doc_list_size]
@@ -573,7 +588,7 @@ void SortMergerTermTuple<KeyType, LenType>::OutputImpl(FILE *f) {
         io_stream.Write(buf, SIZE_U32 * 3);
         io_stream.Write(temp_term_tuple->term_.data(), term_len);
         io_stream.Write((char*)temp_term_tuple->doc_pos_list_.data(), SIZE_U32 * 2 * doc_list_size);
-        if (count_ == 0) {
+        if (this->count_ == 0) {
             io_stream.Seek(0, SEEK_SET);
             io_stream.Write((char*)(&term_list_count_), sizeof(u64));
             term_list_count_ = 0;
@@ -591,36 +606,43 @@ void SortMergerTermTuple<KeyType, LenType>::PredictImpl(DirectIO &io_stream) {
 template <typename KeyType, typename LenType>
 requires std::same_as<KeyType, TermTuple>
 void SortMergerTermTuple<KeyType, LenType>::Run() {
-    FILE *f = fopen(filenm_.c_str(), "r");
+    FILE *f = fopen(this->filenm_.c_str(), "r");
 
     DirectIO io_stream(f);
-    FILE_LEN_ = io_stream.Length();
+    this->FILE_LEN_ = io_stream.Length();
 
     term_list_count_ = 0;
-    io_stream.Read((char *)(&count_), sizeof(u64));
+    io_stream.Read((char *)(&this->count_), sizeof(u64));
 
     Super::Init(io_stream);
 
-    Thread predict_thread(std::bind(&self_t::PredictImpl, this, io_stream));
-    Thread merge_thread(std::bind(&self_t::MergeImpl, this));
-    FILE *out_f = fopen((filenm_ + ".out").c_str(), "w+");
+    UniquePtr<Thread> predict_thread = MakeUnique<Thread>(std::bind(&self_t::PredictImpl, this, io_stream));
+    UniquePtr<Thread> merge_thread = MakeUnique<Thread>(std::bind(&self_t::MergeImpl, this));
+    FILE *out_f = fopen((this->filenm_ + ".out").c_str(), "w+");
     IASSERT(out_f);
-    IASSERT(fwrite(&count_, sizeof(u64), 1, out_f) == 1);
+    IASSERT(fwrite(&this->count_, sizeof(u64), 1, out_f) == 1);
+
+    UniquePtr<Thread> out_thread = MakeUnique<Thread>(std::bind(&self_t::OutputImpl, this, out_f));
 
-    Thread out_thread(std::bind(&self_t::OutputImpl, this, out_f));
+    Vector<UniquePtr<Thread>> threads;
+    threads.push_back(std::move(predict_thread));
+    threads.push_back(std::move(merge_thread));
+    threads.push_back(std::move(out_thread));
 
-    predict_thread.join();
-    merge_thread.join();
-    out_thread.join();
+    this->Unpin(threads);
+
+    for (auto& thread : threads) {
+        thread->join();
+    }
 
     fclose(f);
     fclose(out_f);
 
-    if (std::filesystem::exists(filenm_)) {
-        std::filesystem::remove(filenm_);
+    if (std::filesystem::exists(this->filenm_)) {
+        std::filesystem::remove(this->filenm_);
     }
-    if (std::filesystem::exists(filenm_ + ".out")) {
-        std::filesystem::rename(filenm_ + ".out", filenm_);
+    if (std::filesystem::exists(this->filenm_ + ".out")) {
+        std::filesystem::rename(this->filenm_ + ".out", this->filenm_);
     }
 }
 
diff --git a/src/storage/invertedindex/common/external_sort_merger.cppm b/src/storage/invertedindex/common/external_sort_merger.cppm
index ac7d6431e9..08ba8cb7ae 100644
--- a/src/storage/invertedindex/common/external_sort_merger.cppm
+++ b/src/storage/invertedindex/common/external_sort_merger.cppm
@@ -415,6 +415,7 @@ protected:
 
     void OutputByQueue(FILE* f);
 
+    void Unpin(Vector<UniquePtr<Thread>> &threads);
 public:
     SortMerger(const char *filenm, u32 group_size = 4, u32 bs = 100000000, u32 output_num = 2);
 
@@ -429,26 +430,6 @@ class SortMergerTermTuple : public SortMerger<KeyType, LenType> {
 protected:
     typedef SortMergerTermTuple<KeyType, LenType> self_t;
     using Super = SortMerger<KeyType, LenType>;
-    using Super::filenm_;
-    using Super::MAX_GROUP_SIZE_;
-    using Super::merge_loser_tree_;
-    using Super::micro_run_idx_;
-    using Super::micro_run_pos_;
-    using Super::num_micro_run_;
-    using Super::size_micro_run_;
-    using Super::micro_buf_;
-    using Super::run_buf_;
-    using Super::cycle_buffer_;
-    using Super::cycle_buf_mtx_;
-    using Super::cycle_buf_con_;
-    using Super::out_queue_mtx_;
-    using Super::out_queue_con_;
-    using Super::term_tuple_list_queue_;
-    using Super::read_finish_;
-    using Super::CYCLE_BUF_SIZE_;
-    using Super::CYCLE_BUF_THRESHOLD_;
-    using Super::count_;
-    using Super::FILE_LEN_;
     using typename Super::KeyAddr;
     u64 term_list_count_{0};