In [1]:
from pymongo import MongoClient

col_mongo_name = "国家按章分"
# col_mongo_name = "上海按章分"
# col_mongo_name = "中心按章分"
mongo = MongoClient("mongodb://localhost:27017/")
db = mongo["xunfei"]
col_mongo = db[col_mongo_name]

In [12]:
from pymilvus import connections, Collection
connections.connect("default", host="localhost", port="19530")
col_name = "AllPolicy"
# col_name = "CountryPolicy"
# col_name = "ShanghaiPolicy"
# col_name = "CentrePolicy"
col = Collection(col_name)
col.load()

In [13]:
from FlagEmbedding import BGEM3FlagModel

model = BGEM3FlagModel('BAAI/bge-m3',
                      use_fp16=False,
                      pooling_method='cls',
                      devices=['cuda:0'])

def get_embeddings(text):
    embeddings = model.encode(
        text,
        return_dense=True,
        return_sparse=True,
        return_colbert_vecs=False
    )
    return embeddings

  from .autonotebook import tqdm as notebook_tqdm
Fetching 30 files: 100%|███████████████████████████████████████████████████████████████████████| 30/30 [00:00<?, ?it/s]


### 三种搜索函数

In [110]:
from pymilvus import AnnSearchRequest, WeightedRanker

def dense_search(col, query_dense_embedding, limit=5):
    res = col.search(
        [query_dense_embedding],
        anns_field="dense_vector",
        limit=limit,
        output_fields=["para_id", "text"],
        param={"metric_type": "L2", "params": {}},
    )
    return [
        {"para_id": hit.entity.get("para_id"), "text": hit.entity.get("text")}
        for hit in res[0]
    ]

def sparse_search(col, query_sparse_embedding, limit=5):
    res = col.search(
        [query_sparse_embedding],
        anns_field="sparse_vector",
        limit=limit,
        output_fields=["para_id", "text"],
        param={"metric_type": "IP", "params": {}},
    )
    return [
        {"para_id": hit.entity.get("para_id"), "text": hit.entity.get("text")}
        for hit in res[0]
    ]

def hybrid_search(
    col,
    query_dense_embedding,
    query_sparse_embedding,
    sparse_weight=1.0,
    dense_weight=1.0,
    limit=5,
):
    dense_req = AnnSearchRequest(
        [query_dense_embedding], "dense_vector", {"metric_type": "L2", "params": {}}, limit=limit
    )
    sparse_req = AnnSearchRequest(
        [query_sparse_embedding], "sparse_vector", {"metric_type": "IP", "params": {}}, limit=limit
    )
    rerank = WeightedRanker(sparse_weight, dense_weight)
    res = col.hybrid_search(
        [sparse_req, dense_req],
        rerank=rerank,
        limit=limit,
        output_fields=["para_id", "text"]
    )
    return [
        {"para_id": hit.entity.get("para_id"), "text": hit.entity.get("text")}
        for hit in res[0]
    ]


### 格式化搜索结果

In [51]:
import jieba

def doc_text_formatting(query, docs):
    query_words = list(set(jieba.lcut(query)))  # 中文分词
    formatted_texts = []

    for doc in docs:
        highlighted = doc.get('text')
        for word in query_words:
            if not word.strip():
                continue
            highlighted = re.sub(
                re.escape(word),
                f"<span style='color:red'>{word}</span>",
                highlighted
            )
        formatted_texts.append(highlighted)
    return formatted_texts


### 检索

In [223]:
import pandas as pd
import json

# query = input("Enter your search query: ")
# query_embeddings = get_embeddings([query])

with open("qa_single_all_500.json", "r", encoding="utf-8") as f:
    data = json.load(f)
query = [data[i].get('question').strip() for i in range(len(data))]
gold_id = [data[i].get('relevant_para_ids') for i in range(len(data))]
num = len(query)

In [224]:
query_embeddings = []
for i in range(len(query)):
    embeddings = get_embeddings(query[i])
    query_embeddings.append(embeddings)

query_dense_embeddings = [query_embeddings[i].get('dense_vecs') for i in range(len(query_embeddings))]
query_sparse_embeddings = [query_embeddings[i].get('lexical_weights') for i in range(len(query_embeddings))]

In [225]:
len(query_sparse_embeddings)

575

In [226]:
%%time

dense_results = []
sparse_results = []
hybrid_results = []
for i in range(num):
    dense_results.append(dense_search(col, query_dense_embeddings[i], limit=50))
    sparse_results.append(sparse_search(col, query_sparse_embeddings[i], limit=50))
    hybrid_results.append(hybrid_search(
        col,
        query_dense_embeddings[i],
        query_sparse_embeddings[i],
        sparse_weight=1.0,
        dense_weight=1.0,
        limit=50
    ))

CPU times: total: 1.17 s
Wall time: 8min 26s


In [227]:
import re
from IPython.display import Markdown, display

idx = 0
print(f"gold_id: {gold_id[idx]}")
print(f"query: {query[idx]}")
# Dense 搜索结果
display(Markdown("### 🔍 **Dense Search Results:**"))
formatted_results = doc_text_formatting(query[idx], dense_results[idx][:5])
for i, result in enumerate(formatted_results):
    display(Markdown(f"para_id: {dense_results[idx][i].get('para_id')}"))
    display(Markdown(result))

# Sparse 搜索结果
display(Markdown("### 🌿 **Sparse Search Results:**"))
formatted_results = doc_text_formatting(query[idx], sparse_results[idx][:5])
for i, result in enumerate(formatted_results):
    display(Markdown(f"para_id: {sparse_results[idx][i].get('para_id')}"))
    display(Markdown(result))

# Hybrid 搜索结果
display(Markdown("### 🔄 **Hybrid Search Results:**"))
formatted_results = doc_text_formatting(query[idx], hybrid_results[idx][:5])
for i, result in enumerate(formatted_results):
    display(Markdown(f"para_id: {hybrid_results[idx][i].get('para_id')}"))
    display(Markdown(result))


gold_id: ['8063292_T17']
query: 招标文件或资格预审文件的出售时间、地点、收费要求以及终止招标的条件是什么？


### 🔍 **Dense Search Results:**

para_id: 8063292_T17

第十五条 第十五条<span style='color:red'>招标</span>人应当按<span style='color:red'>招标</span>公告<span style='color:red'>或</span>者投标邀请书规定<span style='color:red'>的</span><span style='color:red'>时间</span><span style='color:red'>、</span><span style='color:red'>地点</span><span style='color:red'>出售</span><span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span><span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span>。自<span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span>者<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>出售</span>之日起至停止<span style='color:red'>出售</span>之日止，最短不得少于五日。<span style='color:red'>招标</span>人可以通过信息网络<span style='color:red'>或</span>者其他媒介发布<span style='color:red'>招标</span><span style='color:red'>文件</span>，通过信息网络<span style='color:red'>或</span>者其他媒介发布<span style='color:red'>的</span><span style='color:red'>招标</span><span style='color:red'>文件</span>与书面<span style='color:red'>招标</span><span style='color:red'>文件</span>具有同等法律效力，出现不一致时以书面<span style='color:red'>招标</span><span style='color:red'>文件</span>为准，国家另有规定<span style='color:red'>的</span>除外。对<span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span>者<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>的</span><span style='color:red'>收费</span>应当限于补偿印刷<span style='color:red'>、</span>邮寄<span style='color:red'>的</span>成本支出，不得以营利为目<span style='color:red'>的</span>。对于所附<span style='color:red'>的</span>设计<span style='color:red'>文件</span>，<span style='color:red'>招标</span>人可以向投标人酌收押金；对于开标后投标人退还设计<span style='color:red'>文件</span><span style='color:red'>的</span>，<span style='color:red'>招标</span>人应当向投标人退还押金。<span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span>者<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span>售出后，不予退还。除不可抗力原因外，<span style='color:red'>招标</span>人在发布<span style='color:red'>招标</span>公告<span style='color:red'>、</span>发出投标邀请书后<span style='color:red'>或</span>者售出<span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span><span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span>后不得<span style='color:red'>终止</span><span style='color:red'>招标</span>。

para_id: 8063277_T16

第十四条 第十四条<span style='color:red'>招标</span>人应当按照<span style='color:red'>资格</span><span style='color:red'>预审</span>公告<span style='color:red'>、</span><span style='color:red'>招标</span>公告<span style='color:red'>或</span>者投标邀请书规定<span style='color:red'>的</span><span style='color:red'>时间</span><span style='color:red'>、</span><span style='color:red'>地点</span>发售<span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span>者<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span>。自<span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span>者<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span>发售之日起至停止发售之日止，最短不得少于五日。<span style='color:red'>招标</span>人可以通过信息网络<span style='color:red'>或</span>者其他媒介发布<span style='color:red'>招标</span><span style='color:red'>文件</span>，通过信息网络<span style='color:red'>或</span>者其他媒介发布<span style='color:red'>的</span><span style='color:red'>招标</span><span style='color:red'>文件</span>与书面<span style='color:red'>招标</span><span style='color:red'>文件</span>具有同等法律效力，出现不一致时以书面<span style='color:red'>招标</span><span style='color:red'>文件</span>为准，但国家另有规定<span style='color:red'>的</span>除外。对<span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span>者<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>的</span><span style='color:red'>收费</span>应当限于补偿印刷<span style='color:red'>、</span>邮寄<span style='color:red'>的</span>成本支出，不得以营利为目<span style='color:red'>的</span>。除不可抗力原因外，<span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span>者<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span>发出后，不予退还；<span style='color:red'>招标</span>人在发布<span style='color:red'>招标</span>公告<span style='color:red'>、</span>发出投标邀请书后<span style='color:red'>或</span>者发出<span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span><span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span>后不得<span style='color:red'>终止</span><span style='color:red'>招标</span>。<span style='color:red'>招标</span>人<span style='color:red'>终止</span><span style='color:red'>招标</span><span style='color:red'>的</span>，应当及时发布公告，<span style='color:red'>或</span>者以书面形式通知被邀请<span style='color:red'>的</span><span style='color:red'>或</span>者已经获取<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>、</span><span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>的</span>潜在投标人。已经发售<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>、</span><span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span>者已经收取投标保证金<span style='color:red'>的</span>，<span style='color:red'>招标</span>人应当及时退还所收取<span style='color:red'>的</span><span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>、</span><span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>的</span>费用，<span style='color:red'>以及</span>所收取<span style='color:red'>的</span>投标保证金及银行同期存款利息。

para_id: 8063293_T27

第二十三条 第二十三条<span style='color:red'>招标</span>人应当按<span style='color:red'>资格</span><span style='color:red'>预审</span>公告<span style='color:red'>、</span><span style='color:red'>招标</span>公告<span style='color:red'>或</span>者投标邀请书规定<span style='color:red'>的</span><span style='color:red'>时间</span><span style='color:red'>、</span><span style='color:red'>地点</span>发售<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>或</span>者<span style='color:red'>招标</span><span style='color:red'>文件</span>。<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>或</span>者<span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>的</span>发售期不得少于5日。<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>或</span>者<span style='color:red'>招标</span><span style='color:red'>文件</span>售出后，不予退还。

para_id: 8063256_T22

第十六条 第十六条　<span style='color:red'>招标</span>人应当按照<span style='color:red'>资格</span><span style='color:red'>预审</span>公告<span style='color:red'>、</span><span style='color:red'>招标</span>公告<span style='color:red'>或</span>者投标邀请书规定<span style='color:red'>的</span><span style='color:red'>时间</span><span style='color:red'>、</span><span style='color:red'>地点</span>发售<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>或</span>者<span style='color:red'>招标</span><span style='color:red'>文件</span>。<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>或</span>者<span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>的</span>发售期不得少于5日。<span style='color:red'>招标</span>人发售<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>、</span><span style='color:red'>招标</span><span style='color:red'>文件</span>收取<span style='color:red'>的</span>费用应当限于补偿印刷<span style='color:red'>、</span>邮寄<span style='color:red'>的</span>成本支出，不得以营利为目<span style='color:red'>的</span>。

para_id: 8063293_T37

第三十三条 第三十三条<span style='color:red'>招标</span>人在发布<span style='color:red'>资格</span><span style='color:red'>预审</span>公告<span style='color:red'>、</span><span style='color:red'>招标</span>公告<span style='color:red'>、</span>发出投标邀请书<span style='color:red'>或</span>者售出<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>、</span><span style='color:red'>招标</span><span style='color:red'>文件</span>后，无正当理由不得随意<span style='color:red'>终止</span><span style='color:red'>招标</span>。<span style='color:red'>招标</span>人因特殊原因需要<span style='color:red'>终止</span><span style='color:red'>招标</span><span style='color:red'>的</span>，应当及时发布公告，<span style='color:red'>或</span>者以书面形式通知被邀请<span style='color:red'>的</span><span style='color:red'>或</span>者已经获取<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>、</span><span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>的</span>潜在投标人。已经发售<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>、</span><span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span>者已经收取投标保证金<span style='color:red'>的</span>，<span style='color:red'>招标</span>人应当及时退还所收取<span style='color:red'>的</span>购买<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>、</span><span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>的</span>费用，<span style='color:red'>以及</span>所收取<span style='color:red'>的</span>投标保证金及银行同期存款利息。利息<span style='color:red'>的</span>计算方法应当在<span style='color:red'>招标</span><span style='color:red'>文件</span>中载明。

### 🌿 **Sparse Search Results:**

para_id: 8063292_T17

第十五条 第十五条<span style='color:red'>招标</span>人应当按<span style='color:red'>招标</span>公告<span style='color:red'>或</span>者投标邀请书规定<span style='color:red'>的</span><span style='color:red'>时间</span><span style='color:red'>、</span><span style='color:red'>地点</span><span style='color:red'>出售</span><span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span><span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span>。自<span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span>者<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>出售</span>之日起至停止<span style='color:red'>出售</span>之日止，最短不得少于五日。<span style='color:red'>招标</span>人可以通过信息网络<span style='color:red'>或</span>者其他媒介发布<span style='color:red'>招标</span><span style='color:red'>文件</span>，通过信息网络<span style='color:red'>或</span>者其他媒介发布<span style='color:red'>的</span><span style='color:red'>招标</span><span style='color:red'>文件</span>与书面<span style='color:red'>招标</span><span style='color:red'>文件</span>具有同等法律效力，出现不一致时以书面<span style='color:red'>招标</span><span style='color:red'>文件</span>为准，国家另有规定<span style='color:red'>的</span>除外。对<span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span>者<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>的</span><span style='color:red'>收费</span>应当限于补偿印刷<span style='color:red'>、</span>邮寄<span style='color:red'>的</span>成本支出，不得以营利为目<span style='color:red'>的</span>。对于所附<span style='color:red'>的</span>设计<span style='color:red'>文件</span>，<span style='color:red'>招标</span>人可以向投标人酌收押金；对于开标后投标人退还设计<span style='color:red'>文件</span><span style='color:red'>的</span>，<span style='color:red'>招标</span>人应当向投标人退还押金。<span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span>者<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span>售出后，不予退还。除不可抗力原因外，<span style='color:red'>招标</span>人在发布<span style='color:red'>招标</span>公告<span style='color:red'>、</span>发出投标邀请书后<span style='color:red'>或</span>者售出<span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span><span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span>后不得<span style='color:red'>终止</span><span style='color:red'>招标</span>。

para_id: 8063277_T16

第十四条 第十四条<span style='color:red'>招标</span>人应当按照<span style='color:red'>资格</span><span style='color:red'>预审</span>公告<span style='color:red'>、</span><span style='color:red'>招标</span>公告<span style='color:red'>或</span>者投标邀请书规定<span style='color:red'>的</span><span style='color:red'>时间</span><span style='color:red'>、</span><span style='color:red'>地点</span>发售<span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span>者<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span>。自<span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span>者<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span>发售之日起至停止发售之日止，最短不得少于五日。<span style='color:red'>招标</span>人可以通过信息网络<span style='color:red'>或</span>者其他媒介发布<span style='color:red'>招标</span><span style='color:red'>文件</span>，通过信息网络<span style='color:red'>或</span>者其他媒介发布<span style='color:red'>的</span><span style='color:red'>招标</span><span style='color:red'>文件</span>与书面<span style='color:red'>招标</span><span style='color:red'>文件</span>具有同等法律效力，出现不一致时以书面<span style='color:red'>招标</span><span style='color:red'>文件</span>为准，但国家另有规定<span style='color:red'>的</span>除外。对<span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span>者<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>的</span><span style='color:red'>收费</span>应当限于补偿印刷<span style='color:red'>、</span>邮寄<span style='color:red'>的</span>成本支出，不得以营利为目<span style='color:red'>的</span>。除不可抗力原因外，<span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span>者<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span>发出后，不予退还；<span style='color:red'>招标</span>人在发布<span style='color:red'>招标</span>公告<span style='color:red'>、</span>发出投标邀请书后<span style='color:red'>或</span>者发出<span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span><span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span>后不得<span style='color:red'>终止</span><span style='color:red'>招标</span>。<span style='color:red'>招标</span>人<span style='color:red'>终止</span><span style='color:red'>招标</span><span style='color:red'>的</span>，应当及时发布公告，<span style='color:red'>或</span>者以书面形式通知被邀请<span style='color:red'>的</span><span style='color:red'>或</span>者已经获取<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>、</span><span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>的</span>潜在投标人。已经发售<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>、</span><span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span>者已经收取投标保证金<span style='color:red'>的</span>，<span style='color:red'>招标</span>人应当及时退还所收取<span style='color:red'>的</span><span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>、</span><span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>的</span>费用，<span style='color:red'>以及</span>所收取<span style='color:red'>的</span>投标保证金及银行同期存款利息。

para_id: 8063285_T18

第十五条 第十五条<span style='color:red'>招标</span>人可以根据<span style='color:red'>招标</span>工程<span style='color:red'>的</span>需要，对投标申请人进行<span style='color:red'>资格</span><span style='color:red'>预审</span>，也可以委托工程<span style='color:red'>招标</span>代理机构对投标申请人进行<span style='color:red'>资格</span><span style='color:red'>预审</span>。实行<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>的</span><span style='color:red'>招标</span>工程，<span style='color:red'>招标</span>人应当在<span style='color:red'>招标</span>公告<span style='color:red'>或</span>者投标邀请书中载明<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>的</span><span style='color:red'>条件</span>和获取<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>的</span>办法。<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span>一般应当包括<span style='color:red'>资格</span><span style='color:red'>预审</span>申请书格式<span style='color:red'>、</span>申请人须知，<span style='color:red'>以及</span>需要投标申请人提供<span style='color:red'>的</span>企业资质<span style='color:red'>、</span>业绩<span style='color:red'>、</span>技术装备<span style='color:red'>、</span>财务状况和拟派出<span style='color:red'>的</span>项目经理与主要技术人员<span style='color:red'>的</span>简历<span style='color:red'>、</span>业绩等证明材料。

para_id: 8063293_T28

第二十四条 第二十四条自<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span>停止发售之日起至提交<span style='color:red'>资格</span><span style='color:red'>预审</span>申请<span style='color:red'>文件</span>截止之日止，不得少于5日。对<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>的</span>澄清<span style='color:red'>或</span>修改可能影响<span style='color:red'>资格</span><span style='color:red'>预审</span>申请<span style='color:red'>文件</span>编制<span style='color:red'>的</span>，应当在提交<span style='color:red'>资格</span><span style='color:red'>预审</span>申请<span style='color:red'>文件</span>截止<span style='color:red'>时间</span>至少3日前以书面形式通知所有获取<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>的</span>潜在投标人。不足3日<span style='color:red'>的</span>，<span style='color:red'>招标</span>人应当顺延提交<span style='color:red'>资格</span><span style='color:red'>预审</span>申请<span style='color:red'>文件</span><span style='color:red'>的</span>截止<span style='color:red'>时间</span>。依法必须<span style='color:red'>招标</span><span style='color:red'>的</span>项目在<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span>停止发售之日止，获取<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>的</span>潜在投标人少于3个<span style='color:red'>的</span>，应当重新<span style='color:red'>招标</span>。

para_id: 8063256_T37

第三十一条 第三十一条　<span style='color:red'>招标</span>人<span style='color:red'>终止</span><span style='color:red'>招标</span><span style='color:red'>的</span>，应当及时发布公告，<span style='color:red'>或</span>者以书面形式通知被邀请<span style='color:red'>的</span><span style='color:red'>或</span>者已经获取<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>、</span><span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>的</span>潜在投标人。已经发售<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>、</span><span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span>者已经收取投标保证金<span style='color:red'>的</span>，<span style='color:red'>招标</span>人应当及时退还所收取<span style='color:red'>的</span><span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>、</span><span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>的</span>费用，<span style='color:red'>以及</span>所收取<span style='color:red'>的</span>投标保证金及银行同期存款利息。

### 🔄 **Hybrid Search Results:**

para_id: 8063292_T17

第十五条 第十五条<span style='color:red'>招标</span>人应当按<span style='color:red'>招标</span>公告<span style='color:red'>或</span>者投标邀请书规定<span style='color:red'>的</span><span style='color:red'>时间</span><span style='color:red'>、</span><span style='color:red'>地点</span><span style='color:red'>出售</span><span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span><span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span>。自<span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span>者<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>出售</span>之日起至停止<span style='color:red'>出售</span>之日止，最短不得少于五日。<span style='color:red'>招标</span>人可以通过信息网络<span style='color:red'>或</span>者其他媒介发布<span style='color:red'>招标</span><span style='color:red'>文件</span>，通过信息网络<span style='color:red'>或</span>者其他媒介发布<span style='color:red'>的</span><span style='color:red'>招标</span><span style='color:red'>文件</span>与书面<span style='color:red'>招标</span><span style='color:red'>文件</span>具有同等法律效力，出现不一致时以书面<span style='color:red'>招标</span><span style='color:red'>文件</span>为准，国家另有规定<span style='color:red'>的</span>除外。对<span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span>者<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>的</span><span style='color:red'>收费</span>应当限于补偿印刷<span style='color:red'>、</span>邮寄<span style='color:red'>的</span>成本支出，不得以营利为目<span style='color:red'>的</span>。对于所附<span style='color:red'>的</span>设计<span style='color:red'>文件</span>，<span style='color:red'>招标</span>人可以向投标人酌收押金；对于开标后投标人退还设计<span style='color:red'>文件</span><span style='color:red'>的</span>，<span style='color:red'>招标</span>人应当向投标人退还押金。<span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span>者<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span>售出后，不予退还。除不可抗力原因外，<span style='color:red'>招标</span>人在发布<span style='color:red'>招标</span>公告<span style='color:red'>、</span>发出投标邀请书后<span style='color:red'>或</span>者售出<span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span><span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span>后不得<span style='color:red'>终止</span><span style='color:red'>招标</span>。

para_id: 8063277_T16

第十四条 第十四条<span style='color:red'>招标</span>人应当按照<span style='color:red'>资格</span><span style='color:red'>预审</span>公告<span style='color:red'>、</span><span style='color:red'>招标</span>公告<span style='color:red'>或</span>者投标邀请书规定<span style='color:red'>的</span><span style='color:red'>时间</span><span style='color:red'>、</span><span style='color:red'>地点</span>发售<span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span>者<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span>。自<span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span>者<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span>发售之日起至停止发售之日止，最短不得少于五日。<span style='color:red'>招标</span>人可以通过信息网络<span style='color:red'>或</span>者其他媒介发布<span style='color:red'>招标</span><span style='color:red'>文件</span>，通过信息网络<span style='color:red'>或</span>者其他媒介发布<span style='color:red'>的</span><span style='color:red'>招标</span><span style='color:red'>文件</span>与书面<span style='color:red'>招标</span><span style='color:red'>文件</span>具有同等法律效力，出现不一致时以书面<span style='color:red'>招标</span><span style='color:red'>文件</span>为准，但国家另有规定<span style='color:red'>的</span>除外。对<span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span>者<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>的</span><span style='color:red'>收费</span>应当限于补偿印刷<span style='color:red'>、</span>邮寄<span style='color:red'>的</span>成本支出，不得以营利为目<span style='color:red'>的</span>。除不可抗力原因外，<span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span>者<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span>发出后，不予退还；<span style='color:red'>招标</span>人在发布<span style='color:red'>招标</span>公告<span style='color:red'>、</span>发出投标邀请书后<span style='color:red'>或</span>者发出<span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span><span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span>后不得<span style='color:red'>终止</span><span style='color:red'>招标</span>。<span style='color:red'>招标</span>人<span style='color:red'>终止</span><span style='color:red'>招标</span><span style='color:red'>的</span>，应当及时发布公告，<span style='color:red'>或</span>者以书面形式通知被邀请<span style='color:red'>的</span><span style='color:red'>或</span>者已经获取<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>、</span><span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>的</span>潜在投标人。已经发售<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>、</span><span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span>者已经收取投标保证金<span style='color:red'>的</span>，<span style='color:red'>招标</span>人应当及时退还所收取<span style='color:red'>的</span><span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>、</span><span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>的</span>费用，<span style='color:red'>以及</span>所收取<span style='color:red'>的</span>投标保证金及银行同期存款利息。

para_id: 8063293_T27

第二十三条 第二十三条<span style='color:red'>招标</span>人应当按<span style='color:red'>资格</span><span style='color:red'>预审</span>公告<span style='color:red'>、</span><span style='color:red'>招标</span>公告<span style='color:red'>或</span>者投标邀请书规定<span style='color:red'>的</span><span style='color:red'>时间</span><span style='color:red'>、</span><span style='color:red'>地点</span>发售<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>或</span>者<span style='color:red'>招标</span><span style='color:red'>文件</span>。<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>或</span>者<span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>的</span>发售期不得少于5日。<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>或</span>者<span style='color:red'>招标</span><span style='color:red'>文件</span>售出后，不予退还。

para_id: 8063256_T22

第十六条 第十六条　<span style='color:red'>招标</span>人应当按照<span style='color:red'>资格</span><span style='color:red'>预审</span>公告<span style='color:red'>、</span><span style='color:red'>招标</span>公告<span style='color:red'>或</span>者投标邀请书规定<span style='color:red'>的</span><span style='color:red'>时间</span><span style='color:red'>、</span><span style='color:red'>地点</span>发售<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>或</span>者<span style='color:red'>招标</span><span style='color:red'>文件</span>。<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>或</span>者<span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>的</span>发售期不得少于5日。<span style='color:red'>招标</span>人发售<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>、</span><span style='color:red'>招标</span><span style='color:red'>文件</span>收取<span style='color:red'>的</span>费用应当限于补偿印刷<span style='color:red'>、</span>邮寄<span style='color:red'>的</span>成本支出，不得以营利为目<span style='color:red'>的</span>。

para_id: 8063293_T37

第三十三条 第三十三条<span style='color:red'>招标</span>人在发布<span style='color:red'>资格</span><span style='color:red'>预审</span>公告<span style='color:red'>、</span><span style='color:red'>招标</span>公告<span style='color:red'>、</span>发出投标邀请书<span style='color:red'>或</span>者售出<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>、</span><span style='color:red'>招标</span><span style='color:red'>文件</span>后，无正当理由不得随意<span style='color:red'>终止</span><span style='color:red'>招标</span>。<span style='color:red'>招标</span>人因特殊原因需要<span style='color:red'>终止</span><span style='color:red'>招标</span><span style='color:red'>的</span>，应当及时发布公告，<span style='color:red'>或</span>者以书面形式通知被邀请<span style='color:red'>的</span><span style='color:red'>或</span>者已经获取<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>、</span><span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>的</span>潜在投标人。已经发售<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>、</span><span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>或</span>者已经收取投标保证金<span style='color:red'>的</span>，<span style='color:red'>招标</span>人应当及时退还所收取<span style='color:red'>的</span>购买<span style='color:red'>资格</span><span style='color:red'>预审</span><span style='color:red'>文件</span><span style='color:red'>、</span><span style='color:red'>招标</span><span style='color:red'>文件</span><span style='color:red'>的</span>费用，<span style='color:red'>以及</span>所收取<span style='color:red'>的</span>投标保证金及银行同期存款利息。利息<span style='color:red'>的</span>计算方法应当在<span style='color:red'>招标</span><span style='color:red'>文件</span>中载明。

## Calculate Recall

In [59]:
def compute_recall(gold_set, retrieved_set):
    if not gold_set:
        return 1.0  # 没有正确答案也不惩罚
    return len(gold_set & retrieved_set) / len(gold_set)

In [228]:
idx = 2
# Dense Recall
retrieved_dense = set([r.get('para_id') for r in dense_results[idx]])
dense_recall = compute_recall(set(gold_id[idx]), retrieved_dense)
print(f"Dense Search Recall for '{query[idx]}': {dense_recall:.2%}")

# Sparse Recall
retrieved_sparse = set([r.get('para_id') for r in sparse_results[idx]])
sparse_recall = compute_recall(set(gold_id[idx]), retrieved_sparse)
print(f"Sparse Search Recall for '{query[idx]}': {sparse_recall:.2%}")

# Hybrid Recall
retrieved_hybrid = set([r.get('para_id') for r in hybrid_results[idx]])
hybrid_recall = compute_recall(set(gold_id[idx]), retrieved_hybrid)
print(f"Hybrid Search Recall for '{query[idx]}': {hybrid_recall:.2%}")

Dense Search Recall for '谈判小组在评审响应文件时，应如何处理未实质性响应谈判文件的响应文件？谈判小组与哪些供应商进行谈判？': 100.00%
Sparse Search Recall for '谈判小组在评审响应文件时，应如何处理未实质性响应谈判文件的响应文件？谈判小组与哪些供应商进行谈判？': 100.00%
Hybrid Search Recall for '谈判小组在评审响应文件时，应如何处理未实质性响应谈判文件的响应文件？谈判小组与哪些供应商进行谈判？': 100.00%


In [229]:
dense_sum, sparse_sum, hybrid_sum = 0, 0, 0
for idx in range(num):
    retrieved_dense = set([r.get('para_id') for r in dense_results[idx][:5]])
    retrieved_sparse = set([r.get('para_id') for r in sparse_results[idx][:5]])
    retrieved_hybrid = set([r.get('para_id') for r in hybrid_results[idx][:5]])
    
    dense_recall = compute_recall(set(gold_id[idx]), retrieved_dense)
    sparse_recall = compute_recall(set(gold_id[idx]), retrieved_sparse)
    hybrid_recall = compute_recall(set(gold_id[idx]), retrieved_hybrid)

    dense_sum += dense_recall
    sparse_sum += sparse_recall
    hybrid_sum += hybrid_recall


dense_avg = dense_sum / num
sparse_avg = sparse_sum / num
hybrid_avg = hybrid_sum / num
print(f' Dense Recall: {dense_avg} \n Sparse Recall: {sparse_avg} \n Hybrid Recall: {hybrid_avg}')

 Dense Recall: 0.9878260869565217 
 Sparse Recall: 0.9826086956521739 
 Hybrid Recall: 0.9843478260869565


## Rerank

Rerank model

In [158]:
from pymilvus.model.reranker import BGERerankFunction

bge_rf = BGERerankFunction(
    model_name="BAAI/bge-reranker-v2-m3",  # Specify the model name. Defaults to `BAAI/bge-reranker-v2-m3`.
    device="cuda:0" # Specify the device to use, e.g., 'cpu' or 'cuda:0'
)


In [234]:
def rerank(query, results, top_k=5):
    rerank_sum = 0
    for idx in range(num):
        q = query[idx]
        documents = [p['text'] for p in results[idx]]
        rerank_results = bge_rf(query=q, documents=documents, top_k=top_k)
        rerank_id = []
        for result in rerank_results:
            rerank_id.append(results[idx][result.index]['para_id'])
            # print(rerank_id)
            # print(f"Index: {result.index}")
            # print(f"Score: {result.score:.6f}")
            # print(f"Text: {result.text}\n")
        rerank_recall = compute_recall(set(gold_id[idx]), set(rerank_id))
        # print(rerank_recall)
        rerank_sum += rerank_recall
    
    rerank_avg = rerank_sum / num
    return rerank_avg

In [231]:
%%time

rerank_hybrid_sum = 0
for idx in range(num):
    q = query[idx]
    documents = [p['text'] for p in hybrid_results[idx]]
    rerank_results = bge_rf(query=q, documents=documents, top_k=5)
    rerank_id = []
    for result in rerank_results:
        rerank_id.append(hybrid_results[idx][result.index]['para_id'])
        # print(rerank_id)
        # print(f"Index: {result.index}")
        # print(f"Score: {result.score:.6f}")
        # print(f"Text: {result.text}\n")
    rerank_recall = compute_recall(set(gold_id[idx]), set(rerank_id))
    # print(rerank_recall)
    rerank_hybrid_sum += rerank_recall

rerank_hybrid_avg = rerank_hybrid_sum / num
print(f'Reranked Hybrid Recall: {rerank_hybrid_avg}')

Reranked Hybrid Recall: 0.9895652173913043
CPU times: total: 6min 45s
Wall time: 12min 20s


In [237]:
rerank_sparse_avg = rerank(query, sparse_results)
print(rerank_sparse_avg)

0.9878260869565217


In [236]:
rerank_dense_avg = rerank(query, dense_results)
print(rerank_dense_avg)

0.9895652173913043


In [239]:
rerank_hybrid_avg_1 = rerank(query, hybrid_results, top_k=1)

In [240]:
rerank_hybrid_avg_3 = rerank(query, hybrid_results, top_k=3)

In [242]:
print(rerank_hybrid_avg_1)
print(rerank_hybrid_avg_3)

0.9234782608695652
0.9808695652173913
