In [None]:
# 定义嵌入模型
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv
import os

load_dotenv()

embed_model = OpenAIEmbeddings(
    api_key=os.getenv("BL_API_KEY"),
    base_url=os.getenv("BL_BASE_URL"),
    model="text-embedding-v3",
    dimensions=1024,
    check_embedding_ctx_length=False
)

In [None]:
# 加载两个向量数据库
from langchain_milvus import Milvus, BM25BuiltInFunction

dense_vs = Milvus(
    collection_name='dense_hotpotqa_500',
    embedding_function=embed_model,
)

hybrid_vs = Milvus(
    collection_name='hybrid_hotpotqa_500',
    embedding_function=embed_model,
    builtin_function=BM25BuiltInFunction(output_field_names='sparse'),
    vector_field=['dense', 'sparse']
)

In [None]:
# 定义判断检索结果是否一致的函数
from typing import Tuple, List
from langchain_core.documents import Document

def is_same_result(
        dense_res: List[Tuple[Document, float]],
        hybrid_res: List[Tuple[Document, float]]
) -> bool:
    """
    比较两个向量数据库进行密集检索的结果是否一致

    Args:
        dense_res (List[Tuple[Document, float]]): 密集检索的结果
        hybrid_res (List[Tuple[Document, float]]): 混合检索的结果

    Returns:
        bool: 检索结果是否一致
    """
    # 检查两个结果的数量是否一致
    if len(dense_res) != len(hybrid_res):
        return False

    # 检查两个结果里面的具体内容是否一样
    for d_res, h_res in zip(dense_res, hybrid_res):
        if (
            d_res[1] != h_res[1]  # 比较分数
            or d_res[0].page_content != h_res[0].page_content  # 比较文档内容
        ):
            return False

    return True

In [None]:
# 定义查询的 500 个问题
import json
from loguru import logger as log
import pprint

all_questions = []
question_path = r'C:\Users\Lenovo\Desktop\RAG rebuild v2\evaluation\hotpotqa\hybrid_test_data_500.json'
with open(question_path, 'r', encoding='utf-8') as f:
    all_questions = json.load(f)

log.info(f'加载了 {len(all_questions)} 个问题')
pprint.pprint(all_questions[0])

# 定义查询的参数
top_k = 10
search_params = {
    'metric_type': 'L2',
    'params': {}
}

[32m2025-07-23 17:17:43.267[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m11[0m - [1m加载了 500 个问题[0m


{'_id': '5a8ba761554299240d9c2066',
 'answer': 'nearly 80 years',
 'context': [['Bus Stop (TV series)',
              ['Bus Stop is a 26-episode American drama which aired on ABC '
               'from October 1, 1961, until March 25, 1962, starring Marilyn '
               'Maxwell as Grace Sherwood, the owner of a bus station and '
               'diner in the fictitious town of Sunrise in the Colorado '
               'Rockies.',
               ' The program was adapted from William Inge\'s play, "Bus '
               'Stop", and Inge was a script consultant for the series, which '
               'followed the lives of travelers passing through the bus '
               'station and the diner.',
               " Maxwell's co-stars were Richard Anderson as District Attorney "
               'Glenn Wagner, Rhodes Reason as Sheriff Will Mayberry, Joan '
               'Freeman as waitress Elma Gahrigner, Bernard Kates as Ralph the '
               'coroner, and Buddy Ebsen as Virge Bles

In [None]:
# 在两个向量数据库上使用 client.search 进行检索
from tqdm import tqdm

diff_res = []
same_res_num = 0

for question in tqdm(all_questions):
    raw_dense_res = dense_vs.client.search(
        collection_name='dense_hotpotqa_500',
        data=[embed_model.embed_query(question['question'])],
        anns_field='vector',  # 在 dense_hotpotqa_500 向量数据库中，密集嵌入的索引为 'vector'
        limit=top_k,
        search_params=search_params,
        output_fields=["*"]
    )
    dense_res = dense_vs._parse_documents_from_search_results(raw_dense_res)


    raw_hybrid_res = hybrid_vs.client.search(
        collection_name='hybrid_hotpotqa_500',
        data=[embed_model.embed_query(question['question'])],
        anns_field='dense',
        limit=top_k,
        search_params=search_params,
        output_fields=["*"]
    )
    hybrid_res = hybrid_vs._parse_documents_from_search_results(raw_hybrid_res)

    # 比较检索结果
    if is_same_result(dense_res, hybrid_res):
        same_res_num += 1
    else:
        tmp = {
            'question': question['question'],
            'dense_res': dense_res,
            'hybrid_res': hybrid_res
        }
        diff_res.append(tmp)

        # log.info(f'{question["question"]} 检索结果不一致')
        tqdm.write(f'{question["question"]} 检索结果不一致')
# 输出结果
log.info(f'检索结果一致的问题数量：{same_res_num} / {len(all_questions)}')
log.info(f'检索结果不一致的问题数量：{len(diff_res)}')

  0%|          | 1/500 [00:00<03:01,  2.75it/s]

How long did the career span of the actor who starred with Mickey Rooney and Marilyn Maxwell in Off Limits? 检索结果不一致


  2%|▏         | 10/500 [00:03<02:28,  3.30it/s]

Who was the running mate of the man about whose candidacy the 2016 book Insane Clown President: Dispatches from the 2016 Circus documents? 检索结果不一致


  3%|▎         | 15/500 [00:04<02:21,  3.42it/s]

What relationship does Fred Gehrke have to the 23rd overall pick in the 2010 Major League Baseball Draft? 检索结果不一致


  3%|▎         | 17/500 [00:05<02:35,  3.11it/s]

Spider9 was founded in 2011 by the head of which subsidiary of Wanxiang Group? 检索结果不一致


  4%|▎         | 18/500 [00:05<02:30,  3.19it/s]

John Stoltenberg is the managing editor of the magazine that focuses on what? 检索结果不一致


  4%|▍         | 19/500 [00:05<02:17,  3.50it/s]

What kind of song did Alexander Grant produce for Eminem? 检索结果不一致


  4%|▍         | 22/500 [00:06<02:15,  3.52it/s]

Is Rick Husband Amarillo International Airport or Grand Forks International Airport closer to the town it is near? 检索结果不一致


  5%|▍         | 23/500 [00:07<02:14,  3.54it/s]

The Greatest Event in Television History aired on the channel that was founded by whom? 检索结果不一致


  5%|▍         | 24/500 [00:07<02:19,  3.41it/s]

The Lowry Hotel is named after an artist, many of whose drawings and paintings depict what town? 检索结果不一致


  5%|▌         | 25/500 [00:07<02:29,  3.18it/s]

ICI House is now named after the company that provides what type of item? 检索结果不一致


  5%|▌         | 26/500 [00:08<02:52,  2.75it/s]

What professionn does Stanley Kwan and Ken Hughes have in common? 检索结果不一致


  6%|▌         | 28/500 [00:08<02:26,  3.23it/s]

The musician who did "Khalaara" was born on what date? 检索结果不一致


  7%|▋         | 35/500 [00:10<02:15,  3.44it/s]

Who was born first, Nellee Hooper or Baz Luhrmann? 检索结果不一致


  7%|▋         | 36/500 [00:11<02:18,  3.36it/s]

Who is younger, Keisuke Kuwata or Moya Brennan? 检索结果不一致


  7%|▋         | 37/500 [00:11<02:15,  3.41it/s]

Which of the starting pitchers for Team USA in the Baseball at the 2000 Summer Olympics was not born August 29, 1977? 检索结果不一致


  8%|▊         | 38/500 [00:11<02:06,  3.66it/s]

When was the rock band to which Pre-Creedence name was changed active?  检索结果不一致


  8%|▊         | 39/500 [00:12<02:22,  3.23it/s]

What other teams played in the same division that Chad Clements played in? 检索结果不一致


  8%|▊         | 41/500 [00:12<02:30,  3.06it/s]

What Cantonese slang term can mean both "ghost man" and to refer to Westerners? 检索结果不一致


  8%|▊         | 42/500 [00:12<02:26,  3.14it/s]

What location is shared by both Great Neck School District and Saddle Rock Elementary School? 检索结果不一致


  9%|▊         | 43/500 [00:13<02:26,  3.11it/s]

Jim Sharman and Ava DuVernay both share what title concerning their actions leading actors in film or stage? 检索结果不一致


  9%|▉         | 45/500 [00:13<02:11,  3.46it/s]

When was the younger brother of Ervin Randle born? 检索结果不一致


  9%|▉         | 47/500 [00:14<02:01,  3.73it/s]

The Waterloo Vase has been used as a garden ornament at whose residence and administrative headquarters? 检索结果不一致


 10%|█         | 51/500 [00:15<01:57,  3.82it/s]

Tony Leung Ka-fai is a four time winner of the award that has been on offer since what month? 检索结果不一致


 10%|█         | 52/500 [00:15<01:59,  3.75it/s]

Swingin' Down Yonder is the first full-length, 12-inch album recorded by a singer known by what nickname? 检索结果不一致


 11%|█▏        | 57/500 [00:17<01:58,  3.74it/s]

When is the football club which Stan Spinks played for founded 检索结果不一致


 12%|█▏        | 58/500 [00:17<01:51,  3.98it/s]

Theme Park Inc is a video game that created what sequel in North America in 1999? 检索结果不一致


 12%|█▏        | 61/500 [00:18<02:02,  3.59it/s]

Which artist is a lyricist, Eric Bloom or Otep Shamaya? 检索结果不一致


 12%|█▏        | 62/500 [00:18<02:08,  3.42it/s]

Which German project recorded a song that featured vocals by a duo from Silverdale, England? 检索结果不一致


 13%|█▎        | 64/500 [00:19<02:04,  3.51it/s]

Which actress, who debuted in Friday Night Lights also starred in The River Why? 检索结果不一致


 14%|█▍        | 71/500 [00:21<02:09,  3.33it/s]

According to the 2011 census what is the population of the city where Royal Wahingdoh F.C. are based ? 检索结果不一致


 17%|█▋        | 84/500 [00:24<01:56,  3.58it/s]

How many total staff (including part-time, flexible, and fixed contract staff) does the broadcaster, who released HyperNormalisation, employ? 检索结果不一致


 17%|█▋        | 85/500 [00:25<02:08,  3.24it/s]

Are Harry Everett Smith and Vladimir Danilevich both from Russia? 检索结果不一致


 17%|█▋        | 87/500 [00:25<02:02,  3.38it/s]

In what county is the community, to which the character Everett Turner is travelling to in the book "Away West", located? 检索结果不一致


 18%|█▊        | 88/500 [00:25<01:51,  3.69it/s]

Which of the people featured on Wall of Fame is the daughter of Bernie Ecclestone? 检索结果不一致


 18%|█▊        | 89/500 [00:26<02:03,  3.34it/s]

The 2004 romantic drama film based on Shake'spears play of the same name was produced by whom? 检索结果不一致


 18%|█▊        | 91/500 [00:26<02:04,  3.28it/s]

Who is older Craig Nicholls or Norman Blake ? 检索结果不一致


 18%|█▊        | 92/500 [00:27<02:00,  3.40it/s]

Eduard Schweizer teaches at a German university with over how many students?  检索结果不一致


 19%|█▉        | 95/500 [00:27<01:58,  3.42it/s]

Who wrote the music for Billy Elliot the Musical and had at least one song on the "Billboard" Hot 100 for 31 consecutive years? 检索结果不一致


 19%|█▉        | 96/500 [00:28<01:55,  3.51it/s]

What is the man who Hofstadter's law is named after's profession? 检索结果不一致


 20%|██        | 100/500 [00:29<02:07,  3.15it/s]

Nicholas Medforth-Mills is a grandson to a king who was born on what day and year? 检索结果不一致


 20%|██        | 101/500 [00:29<02:05,  3.19it/s]

Whataburger in Richardson Square is loacted in what city and state? 检索结果不一致


 21%|██        | 103/500 [00:30<02:09,  3.07it/s]

What other jobs did the actress Olivia Munn from Mortdecai have? 检索结果不一致


 21%|██        | 106/500 [00:31<01:59,  3.28it/s]

What type of forum did a former Soviet statesman initiate? 检索结果不一致


 22%|██▏       | 110/500 [00:32<01:55,  3.37it/s]

What aviator participated to the Transatlantic flight organized by the "heir apparent" to Italian dictator Benito Mussolini? 检索结果不一致


 22%|██▏       | 112/500 [00:33<02:02,  3.17it/s]

What song was on an American radio network that is owned by Disney Channels Worldwide, Inc. is a song by Senegalese-American R&B and rapper Akon?  检索结果不一致


 23%|██▎       | 115/500 [00:34<02:08,  3.01it/s]

Jasmin Geljo is known for his role in a 2005 post-apocalyptic horror film written and directed by who? 检索结果不一致


 24%|██▎       | 118/500 [00:35<02:01,  3.14it/s]

The Joggers are a four-piece band whose lead singer is the son of an American chemist who received the highest what? 检索结果不一致


 24%|██▍       | 119/500 [00:35<01:59,  3.19it/s]

In what Chennai location are trade laws different from rest of the country? 检索结果不一致


 24%|██▍       | 121/500 [00:36<02:06,  2.99it/s]

What dance competition television series features host and NFL reporter Erin Andrews? 检索结果不一致


 25%|██▍       | 124/500 [00:37<01:55,  3.24it/s]

John MacGregor, Baron MacGregor of Pulham Market was educated at the University of St Andrews and another university established by what monach? 检索结果不一致


 26%|██▌       | 130/500 [00:39<02:08,  2.88it/s]

Who is the former pentecostal preacher who released the comedy album Leader of the Banned in 1990? 检索结果不一致


 27%|██▋       | 136/500 [00:40<01:37,  3.73it/s]

The 45th president of the United States did what to the Grand Hyatt New York? 检索结果不一致


 28%|██▊       | 139/500 [00:41<01:48,  3.32it/s]

What was the name changed to after Allison J71 was modified to a thrust of 7400 lbf? 检索结果不一致


 29%|██▉       | 144/500 [00:43<01:52,  3.16it/s]

Which American writer wrote both The Ganymede Takeover (1967) and The Man in the High Castle (1962)? 检索结果不一致


 29%|██▉       | 145/500 [00:43<01:44,  3.40it/s]

Who was born first, Erika Jayne or Marco Da Silva 检索结果不一致


 29%|██▉       | 146/500 [00:43<01:52,  3.14it/s]

 "Collateral Damage" is an episode of the television series "Millennium" and features the host of a late night talk show that airs how often? 检索结果不一致


 30%|███       | 151/500 [00:45<01:33,  3.72it/s]

What is the name of the popular shopping destination located close to the Taj Mahal and Agra Fort in India? 检索结果不一致


 30%|███       | 152/500 [00:45<01:46,  3.25it/s]

What year was the alphabetically first writer of Fairytale of New York born? 检索结果不一致


 31%|███       | 154/500 [00:46<01:49,  3.15it/s]

Panna a netvor is a re-telling of a classic tale by what novelist? 检索结果不一致


 32%|███▏      | 158/500 [00:47<01:43,  3.31it/s]

This British television series was adapted from one of the better-known novels of a 19th-century writer and was first published in what magazine?  检索结果不一致


 33%|███▎      | 164/500 [00:48<01:27,  3.85it/s]

Which President did the American model who posed nude for Penthouse and whose story is recounted in the book 'Their Lives' gain notoriety after revealing a sexual encounter with him? 检索结果不一致


 33%|███▎      | 165/500 [00:49<01:28,  3.80it/s]

Brindisi, the city where Louis-Jean-Nicolas Lejoille died, is located in what country? 检索结果不一致


 33%|███▎      | 166/500 [00:49<01:25,  3.89it/s]

Mummulgum is a stop on the Bruxner Highway before reaching the town on the banks of which river? 检索结果不一致


 34%|███▍      | 169/500 [00:50<01:15,  4.41it/s]

 What species has an extinct subspecies that are classified as Trogloxene and are known from fossils and prehistoric art? 检索结果不一致


 34%|███▍      | 171/500 [00:50<01:37,  3.38it/s]

Are Random House Tower and 888 7th Avenue both used for real estate? 检索结果不一致


 35%|███▌      | 176/500 [00:52<01:22,  3.93it/s]

Which was published first Polarity or The Farming Game? 检索结果不一致


 35%|███▌      | 177/500 [00:52<01:29,  3.60it/s]

No Smoking is a film loosely based upon the short story Quitters, Inc., which of three stories that are connected by the presence of what animal?  检索结果不一致


 36%|███▌      | 179/500 [00:52<01:18,  4.11it/s]

Jacques Mathou starred in Delicatessen, the 1991 French movie in the style of which celebrated American animator? 检索结果不一致


 36%|███▌      | 180/500 [00:53<01:25,  3.75it/s]

What company did Roscoe Channing form with the owner of a leading stable of thoroughbred racehorses? 检索结果不一致


 36%|███▌      | 181/500 [00:53<01:37,  3.27it/s]

Were The Living Desert and What Would Jesus Buy? directed by the same person? 检索结果不一致


 36%|███▋      | 182/500 [00:53<01:29,  3.55it/s]

What was the name of the country ran by the president who maintained the mercenary unit, White Legion?  检索结果不一致


 38%|███▊      | 189/500 [00:55<01:37,  3.19it/s]

What National Hockey League (NHL) season saw the Dallas Stars finish the season in a lower position than the Nashville Predators? 检索结果不一致


 39%|███▉      | 194/500 [00:57<01:39,  3.06it/s]

What  profession did Mike Robbie's father practice? 检索结果不一致


 40%|███▉      | 198/500 [00:58<01:27,  3.44it/s]

What was Randy Shughart's rank when he died? 检索结果不一致


 40%|███▉      | 199/500 [00:58<01:28,  3.42it/s]

Are Steve Perry and Dennis Lyxzén both members of the same band ? 检索结果不一致


 40%|████      | 200/500 [00:59<01:33,  3.20it/s]

The man, who the award in which "Summer of the Monkeys" won, was a spokesman for white sector of America? 检索结果不一致


 41%|████      | 203/500 [01:00<01:32,  3.21it/s]

What was the first comic book written by the writer who had a series developed into a 2010 film with Bruce Willis and Morgan Freeman? 检索结果不一致


 41%|████      | 204/500 [01:00<01:26,  3.40it/s]

What film adaptation do both Jerome Bernard and Ira Lewis have in common? 检索结果不一致


 42%|████▏     | 209/500 [01:01<01:23,  3.47it/s]

What Guatemalan Latin pop singer and songwriter  and writer of "El amor es un fantasma" shared a stage with Cristian Sáez Valdés Castro? 检索结果不一致


 42%|████▏     | 212/500 [01:02<01:20,  3.59it/s]

What city is the army division that conducted Operation Blue Hearts in the Korean War based out of? 检索结果不一致


 43%|████▎     | 213/500 [01:03<01:25,  3.37it/s]

What type of music were vocalists Billie Joe Armstrong and Frank Iero involved with? 检索结果不一致


 43%|████▎     | 216/500 [01:03<01:16,  3.71it/s]

Are both Coldplay and Pierre Bouvier from the same country? 检索结果不一致


 43%|████▎     | 217/500 [01:04<01:21,  3.49it/s]

The 2006 NCAA Wrestling Championships crowned Jake Rosholt and another athlete known for powerful punches from what hand? 检索结果不一致


 44%|████▎     | 218/500 [01:04<01:14,  3.77it/s]

What do Susan Stafford and Vanna White have in common? 检索结果不一致


 44%|████▍     | 221/500 [01:05<01:29,  3.12it/s]

Are both American Foxhound and Löwchen types of Foxhounds? 检索结果不一致


 44%|████▍     | 222/500 [01:05<01:27,  3.18it/s]

Who was older, Andrew Preston or James Taylor? 检索结果不一致


 45%|████▍     | 223/500 [01:05<01:24,  3.26it/s]

Kenneth L. Gile is the Chief Operating Officer of an airline with its head office in what airport? 检索结果不一致


 45%|████▌     | 225/500 [01:06<01:24,  3.27it/s]

Eleventh Dream Day and The Shins are both rock bands from where? 检索结果不一致


 45%|████▌     | 226/500 [01:06<01:21,  3.38it/s]

SWX Right Now airs on the station that broadcasts on what channel in Billings, Montana? 检索结果不一致


 45%|████▌     | 227/500 [01:07<01:20,  3.41it/s]

What was the 2016 population of the city on the Bay Fundy which had an office of the Bank of British North America? 检索结果不一致


 46%|████▌     | 228/500 [01:07<01:17,  3.51it/s]

Riom Trial was headed by the French general who reached what distinction? 检索结果不一致


 46%|████▋     | 232/500 [01:08<01:20,  3.34it/s]

Who was from farther west, Max Neufeld or Eduard Zahariev? 检索结果不一致


 47%|████▋     | 235/500 [01:09<01:26,  3.05it/s]

Who designed the theater where the London Philharmonic Orchestra plays?  检索结果不一致


 47%|████▋     | 237/500 [01:10<01:19,  3.31it/s]

Who was one of the first religious sceptics’ best disciple? 检索结果不一致


 48%|████▊     | 238/500 [01:10<01:17,  3.40it/s]

Where does Tiko's Spanish football club hold home games at? 检索结果不一致


 48%|████▊     | 242/500 [01:11<01:13,  3.50it/s]

What production company is owned by the director of "She Hate Me"? 检索结果不一致


 49%|████▊     | 243/500 [01:11<01:17,  3.33it/s]

What profession does Leonty Magnitsky and Leonid Khachiyan have in common? 检索结果不一致


 49%|████▉     | 245/500 [01:12<01:11,  3.59it/s]

Who starred in Umm-e-Kulsoom whose mother was Afshan Qureshi? 检索结果不一致


 49%|████▉     | 247/500 [01:13<01:13,  3.44it/s]

What award won by only twelve people has a man who Ted Kooshian has performed with won? 检索结果不一致


 51%|█████     | 254/500 [01:15<01:07,  3.64it/s]

The physicist who is responsible for identifying the Rabi cycle won what award? 检索结果不一致


 51%|█████     | 256/500 [01:15<01:14,  3.27it/s]

What is one of the most successful retro-clones that is no longer supported by Wizards of the Coast? 检索结果不一致


 51%|█████▏    | 257/500 [01:16<01:15,  3.23it/s]

What was the name of the hill that was the seat of an estate uphill Aldbury? 检索结果不一致


 52%|█████▏    | 260/500 [01:16<01:10,  3.43it/s]

Are Villa Paletti and Betrayal at House on the Hill both board games? 检索结果不一致


 53%|█████▎    | 263/500 [01:17<01:06,  3.59it/s]

What profession was both John Updike and Bret Easton Ellis ? 检索结果不一致


 53%|█████▎    | 265/500 [01:18<01:03,  3.70it/s]

Who has held more positions in the film industry, Donald Cammell or Justin David Swibel? 检索结果不一致


 54%|█████▍    | 270/500 [01:19<01:01,  3.77it/s]

What occupations do Toshi and Emily Haines share? 检索结果不一致


 54%|█████▍    | 271/500 [01:19<00:57,  3.98it/s]

In what administrative category are the cities of Beitun, Xinjiang and Wafangdian? 检索结果不一致


 55%|█████▍    | 273/500 [01:20<01:00,  3.76it/s]

The city where Alex Shevelev died is the capital of what region? 检索结果不一致


 55%|█████▍    | 274/500 [01:20<01:05,  3.48it/s]

What type of profession does Chris Jericho and Gary Barlow have in common? 检索结果不一致


 55%|█████▌    | 275/500 [01:21<01:06,  3.40it/s]

What movie is the the Spinning turtle attraction in the worlds 21st largest theme park based off of? 检索结果不一致


 56%|█████▌    | 279/500 [01:22<01:00,  3.63it/s]

Who was born first, Marino Girolami or Daniel Myrick? 检索结果不一致


 56%|█████▌    | 281/500 [01:22<01:06,  3.28it/s]

The rapper whose debut album was titled "Thug Misses" has sold over how many records worldwide? 检索结果不一致


 58%|█████▊    | 288/500 [01:24<00:55,  3.79it/s]

When did the third international spin off of Pawn Stars debut? 检索结果不一致


 58%|█████▊    | 289/500 [01:25<00:57,  3.66it/s]

Are both Deerhunter and Nine Lashes American Christian rock bands? 检索结果不一致


 58%|█████▊    | 291/500 [01:25<01:02,  3.33it/s]

What was the nationality of the person Mountbatten Institute was named after? 检索结果不一致


 59%|█████▉    | 297/500 [01:27<00:59,  3.42it/s]

About how many people are employed by the company founded by Denise Coates? 检索结果不一致


 60%|██████    | 300/500 [01:28<00:57,  3.50it/s]

Inductivism was a scientific method attributed to a man that served in what two capacities? 检索结果不一致


 60%|██████    | 301/500 [01:28<00:59,  3.34it/s]

What American actress stars in Tainted? 检索结果不一致


 61%|██████    | 306/500 [01:30<00:52,  3.68it/s]

What was the formal name of the building that housed the scene that formed the band Hjertestop? 检索结果不一致


 61%|██████▏   | 307/500 [01:30<00:52,  3.68it/s]

What county was Matt Herr raised in? 检索结果不一致


 62%|██████▏   | 310/500 [01:31<00:51,  3.69it/s]

The 2017–18 Wigan Athletic F.C. season will be a year in which the team competes in the league cup known as what for sponsorship reasons? 检索结果不一致


 62%|██████▏   | 312/500 [01:31<00:45,  4.10it/s]

Ithihasa and the Indian music composer of  "Chronic Bachelor", "Udayananu Tharam", "Naran", "Puthiya Mukham," "Urumi", "Grandmaster" and "Bhaskar the Rascal" are known in what type of films/cinema? 检索结果不一致


 63%|██████▎   | 313/500 [01:31<00:47,  3.95it/s]

Which dog is native to France, the Basset Bleu de Gascogne or the Bulldog? 检索结果不一致


 63%|██████▎   | 314/500 [01:32<00:53,  3.49it/s]

"Big for Your Boots" is a song by an English grime and hip hop artist whose most  successful song to date is what? 检索结果不一致


 63%|██████▎   | 315/500 [01:32<00:51,  3.59it/s]

On what date will the comedy film directed by Kevin Tent and starring the Bulgarian-Canadian actress known for portraying the role of Mia Jones be released on video on demand services? 检索结果不一致


 64%|██████▎   | 318/500 [01:33<00:55,  3.26it/s]

 Willie Geist frequently serves as fill-in anchor on "Today" for a tv journalist that was the host of what show from 1980-86? 检索结果不一致


 64%|██████▍   | 319/500 [01:33<00:52,  3.48it/s]

Are both Stephen R. Donaldson and Michael Moorcock science fiction writers? 检索结果不一致


 64%|██████▍   | 320/500 [01:34<00:54,  3.32it/s]

3:10 to Yuma starred what American actor also known for his role as Hoban Washburne? 检索结果不一致


 65%|██████▌   | 326/500 [01:35<00:47,  3.66it/s]

what city will host the event in which marie gisele eleme asse won twoo medals in 2017 检索结果不一致


 66%|██████▌   | 330/500 [01:36<00:52,  3.23it/s]

Did Big Pig or Blur have more members? 检索结果不一致


 66%|██████▌   | 331/500 [01:37<00:50,  3.36it/s]

What type of media does Jeff Rona and Veeram have in common? 检索结果不一致


 66%|██████▋   | 332/500 [01:37<00:47,  3.57it/s]

What was the nickname of the English monarch that Petruccio Ubaldini presented one of his books? 检索结果不一致


 67%|██████▋   | 335/500 [01:38<00:47,  3.50it/s]

Britt Walford played with what musician who is best known for his guitar work in the band Maurice 检索结果不一致


 67%|██████▋   | 336/500 [01:38<00:45,  3.61it/s]

Scout Tufankjian and Daron Malakian are both what? 检索结果不一致


 69%|██████▊   | 343/500 [01:40<00:51,  3.04it/s]

Name the movie that was cowritten by Paul Rudd and the director of the 2011 British science fiction horror comedy film written and directed by Joe Cornish and starring John Boyega, Nick Frost, Jodie Whittaker and Luke Treadaway. 检索结果不一致


 69%|██████▉   | 344/500 [01:40<00:49,  3.17it/s]

In what year did the director of The Lion King win the Tony Awards? 检索结果不一致


 69%|██████▉   | 345/500 [01:41<00:47,  3.27it/s]

What railroad in which Challengers were most common was completed in 1915? 检索结果不一致


 69%|██████▉   | 347/500 [01:41<00:43,  3.52it/s]

Which genus has more species, Monstera or Cercis? 检索结果不一致


 70%|██████▉   | 348/500 [01:42<00:51,  2.93it/s]

What was the father of Kasper Schmeichel voted to be by the IFFHS in 1992? 检索结果不一致


 71%|███████   | 355/500 [01:44<00:45,  3.19it/s]

When was the producer of the film Betrayal born? 检索结果不一致


 72%|███████▏  | 358/500 [01:45<00:45,  3.11it/s]

God Is Not Great is by a journalist which due to his actions made him what? 检索结果不一致


 73%|███████▎  | 366/500 [01:47<00:38,  3.52it/s]

Which writer of French descent actually lived in France, Maurice Level or John Dufresne? 检索结果不一致


 74%|███████▍  | 370/500 [01:49<00:43,  3.01it/s]

Katie sagona is known as what kind of actor because she was a child acting on stage or in motion pictures or television? 检索结果不一致


 76%|███████▌  | 379/500 [01:51<00:36,  3.32it/s]

Which author has won more Pulitzer Prizes, Thomas Friedman or Henri Bergson? 检索结果不一致


 76%|███████▌  | 381/500 [01:52<00:33,  3.52it/s]

What was the island, on which Marinelli Glacier is located, formerly known as? 检索结果不一致


 76%|███████▋  | 382/500 [01:52<00:33,  3.57it/s]

Zacarías Ferreira is the uncle of a professional basketball player who played college basketball for who? 检索结果不一致


 77%|███████▋  | 385/500 [01:53<00:33,  3.46it/s]

What is the name of the number-one single featuring the winner of Celebrity Apprentice 3? 检索结果不一致


 77%|███████▋  | 386/500 [01:53<00:34,  3.30it/s]

What film did Tom Wu appear in that also starred Jason Statham and Ray Liotta?  检索结果不一致


 77%|███████▋  | 387/500 [01:54<00:34,  3.28it/s]

Who was born more recent.y,Gunnar Nelson or Florence Welch? 检索结果不一致


 78%|███████▊  | 390/500 [01:55<00:32,  3.36it/s]

The person "Planet Earth Rock and Roll Orchestra" was the final solo album for died on what date? 检索结果不一致


 78%|███████▊  | 391/500 [01:55<00:31,  3.43it/s]

Where is Anticimex's parent company headquartered? 检索结果不一致


 78%|███████▊  | 392/500 [01:55<00:28,  3.76it/s]

Which two Swiss attractions were the cause of the "grand hotels" in which Jakob Ragaz worked on?  检索结果不一致


 79%|███████▉  | 395/500 [01:56<00:31,  3.38it/s]

Were Sound Team and Dead by Sunrise both formed before 2010?  检索结果不一致


 79%|███████▉  | 396/500 [01:56<00:31,  3.27it/s]

Father Jean-Pierre Aulneau de le Touche was killed before he could go on an expedition to Mandan, which is located in which state?  检索结果不一致


 80%|███████▉  | 398/500 [01:57<00:27,  3.76it/s]

To which competition did the University at Albany, SUNY send a Puerto Rican hurdler and sprinter? 检索结果不一致


 81%|████████  | 403/500 [01:58<00:26,  3.73it/s]

Which species is the most numerous, Podocarpus or Osmunda?  检索结果不一致


 81%|████████  | 406/500 [01:59<00:28,  3.27it/s]

Which Canadian rock band released a song called "Counterparts" and had a drummer who was inducted into the Modern Drummer Hall of Fame? 检索结果不一致


 81%|████████▏ | 407/500 [01:59<00:29,  3.13it/s]

What is the rank of the incumbent that RJ Harris challenged in 2010? 检索结果不一致


 82%|████████▏ | 408/500 [02:00<00:28,  3.21it/s]

Which is headquarter farther south, Jet's Pizza or Chuck E. Cheese's? 检索结果不一致


 82%|████████▏ | 409/500 [02:00<00:29,  3.12it/s]

Signed with Maybach Music Group in 2011, which artist was featured as a guest in Fire of Zamani? 检索结果不一致


 83%|████████▎ | 417/500 [02:03<00:23,  3.49it/s]

In what year did Australian-born comedian Peter Helliar write I Love you Too? 检索结果不一致


 84%|████████▍ | 420/500 [02:04<00:23,  3.38it/s]

Do The Importance of Being Icelandic and The Five Obstructions belong to different film genres ? 检索结果不一致


 84%|████████▍ | 422/500 [02:04<00:24,  3.24it/s]

What big-budget epic film was released the same year as the film that focuses on a series of sudden, unexplained violent bird attacks? 检索结果不一致


 86%|████████▌ | 430/500 [02:06<00:19,  3.54it/s]

 La Yesca Dam was inaugurated by which Mexican politiician and President of the country from 2006-2012? 检索结果不一致


 86%|████████▌ | 431/500 [02:07<00:19,  3.53it/s]

Who has more scope of profession, Edward Dmytryk or Otto Preminger? 检索结果不一致


 87%|████████▋ | 437/500 [02:09<00:18,  3.41it/s]

From which team did the linebacker for four NFL teams between 1985-1996 get fired by on November 29? 检索结果不一致


 88%|████████▊ | 439/500 [02:09<00:16,  3.78it/s]

Codex Escalada shows an apparition to the saint that is a native of which country? 检索结果不一致


 88%|████████▊ | 440/500 [02:09<00:15,  3.76it/s]

What album succeeded Kendrick Lamar's album that had the song Money Trees in it? 检索结果不一致


 89%|████████▉ | 447/500 [02:11<00:15,  3.41it/s]

Why did the CEO of the football team based in Denver, Colorado step down in 2014? 检索结果不一致


 90%|████████▉ | 449/500 [02:12<00:14,  3.43it/s]

When did the Disney sitcom in which Genevieve Knight "G" Hannelius starred as Avery Jennings end? 检索结果不一致


 92%|█████████▏| 461/500 [02:16<00:10,  3.68it/s]

Are the writing from Maurice Level and Alice Hoffman focused on different audieces? 检索结果不一致


 93%|█████████▎| 464/500 [02:17<00:10,  3.32it/s]

Brad Elterman is a professional photographer who addressed the rock 'n' roll lifestyle of a band that became noted for what? 检索结果不一致


 93%|█████████▎| 465/500 [02:17<00:10,  3.32it/s]

Who voices the character that stars in Baseball Bugs? 检索结果不一致


 93%|█████████▎| 467/500 [02:18<00:09,  3.42it/s]

Carlo Buccirosso had a role in a movie that premiered at what film festival? 检索结果不一致


 94%|█████████▍| 472/500 [02:19<00:07,  3.65it/s]

Which airport is operated by the Coos Country Airport District, Southwest Oregon Regional Airport or Lawton–Fort Sill Regional Airport? 检索结果不一致


 95%|█████████▍| 474/500 [02:19<00:06,  3.77it/s]

What man is linked to both Johnny Tremain and Magic Kingdom? 检索结果不一致


 95%|█████████▌| 475/500 [02:20<00:07,  3.46it/s]

Why is Bangor Daily News talkin about Sawin Millett? 检索结果不一致


 96%|█████████▌| 480/500 [02:21<00:05,  3.85it/s]

Heinrich Marschner was a composer who performed in the time frame after one of the first significant composers in what school of work? 检索结果不一致


 96%|█████████▌| 481/500 [02:21<00:05,  3.77it/s]

What city does Paul Clyne and David Soares have in common? 检索结果不一致


 97%|█████████▋| 486/500 [02:23<00:04,  3.07it/s]

Are both Grapico and Izze artificially flavored soft drinks? 检索结果不一致


 98%|█████████▊| 488/500 [02:24<00:03,  3.14it/s]

Who is from farther west, Halestorm or Audioslave? 检索结果不一致


 98%|█████████▊| 490/500 [02:24<00:03,  3.23it/s]

The revue Putting It Together was devised by Stephen Sondheim and what actress best known as Miss Marple? 检索结果不一致


 98%|█████████▊| 491/500 [02:25<00:02,  3.25it/s]

Who was born in Ireland and has coached the captain of MLS club Colorado Rapids? 检索结果不一致


 98%|█████████▊| 492/500 [02:25<00:02,  3.21it/s]

Co-Dependents' Day is an episode of a tv show season which began when? 检索结果不一致


 99%|█████████▊| 493/500 [02:25<00:02,  3.37it/s]

What is the estimated population of the city in which the suburb of Beaumont is located ? 检索结果不一致


100%|█████████▉| 499/500 [02:27<00:00,  3.09it/s]

From what county is Steven Avery, wrongfully convicted man on which the documentary Making a Murderer is based?  检索结果不一致


100%|██████████| 500/500 [02:27<00:00,  3.38it/s]
[32m2025-07-23 17:24:09.526[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m43[0m - [1m检索结果一致的问题数量：314 / 500[0m
[32m2025-07-23 17:24:09.526[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m44[0m - [1m检索结果不一致的问题数量：186[0m


In [None]:
# 把检索不一致的内容持久化到文件中
res = []

for diff in diff_res:
    tmp = {
        'question': diff['question'],
    }

    d_res = diff['dense_res']
    h_res = diff['hybrid_res']

    i = 0
    for d_r, h_r in zip(d_res, h_res):
        tmp[str(i)] = {
            'dense_score': d_r[1],
            'hybrid_score': h_r[1],
            'dense_content': d_r[0].page_content,
            'hybrid_content': h_r[0].page_content
        }
        i += 1
    
    res.append(tmp)


res_path = r'C:\Users\Lenovo\Desktop\RAG rebuild v2\client_search_diff.json'
os.makedirs(os.path.dirname(res_path), exist_ok=True)

with open(res_path, 'w', encoding='utf-8') as f:
    json.dump(res, f, ensure_ascii=False)

In [None]:
# 统计 500 个问题中，top_1 到 top_10 中不一样的分别有多少
from collections import defaultdict

res = defaultdict(int)
less_3 = 0

for diff in diff_res:
    d_res = diff['dense_res']
    h_res = diff['hybrid_res']

    i = 0
    for d_r, h_r in zip(d_res, h_res):
        if d_r[1] != h_r[1]:
            res[str(i)] += 1
            if i < 3:
                log.info(f'{i}: {diff['question']}')
                less_3 += 1
            break
        i += 1

[32m2025-07-23 17:25:42.532[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m16[0m - [1m2: Spider9 was founded in 2011 by the head of which subsidiary of Wanxiang Group?[0m
[32m2025-07-23 17:25:42.533[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m16[0m - [1m0: ICI House is now named after the company that provides what type of item?[0m
[32m2025-07-23 17:25:42.533[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m16[0m - [1m2: The musician who did "Khalaara" was born on what date?[0m
[32m2025-07-23 17:25:42.534[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m16[0m - [1m2: When was the younger brother of Ervin Randle born?[0m
[32m2025-07-23 17:25:42.534[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m16[0m - [1m1: Swingin' Down Yonder is the first full-length, 12-inch album recorded by a singer known by what nickname?[0m
[32m2025-07-23 17:25:42.535[0m | [1mINFO    [0m | [36m_

In [None]:
res

defaultdict(int,
            {'4': 22,
             '9': 33,
             '8': 27,
             '2': 11,
             '7': 19,
             '5': 16,
             '0': 12,
             '3': 13,
             '6': 23,
             '1': 10})

### 结果总结

1. 在相同的问题和搜索参数下， `dense_vs.client.search` 和 `hybrid_vs.client.search` 的结果是不一致的

2. `dense_vs.similarity_search_with_score` 和 `hybrid_vs.client.search` 得到的不一致检索结果存储在 `diff.json` 中

    `dense_vs.client.search` 和 `hybrid_vs.client.search` 得到的不一致检索结果存储在 `client_search_diff.json` 中

    两个文件的 SHA-256 摘要是一致的，说明两次检索所捕获的不一致结果完全相同

In [None]:
import hashlib
from pathlib import Path

BUF_SIZE = 1024 * 1024  # 1 MB 缓冲区，可按需调整

def sha256_of_file(path: Path) -> str:
    """返回文件的十六进制 SHA-256 摘要字符串。"""
    h = hashlib.sha256()
    with path.open("rb") as f:
        while chunk := f.read(BUF_SIZE):
            h.update(chunk)
    return h.hexdigest()

def files_equal_sha256(path1: Path, path2: Path) -> bool:
    """如果两个文件 SHA-256 摘要一致，返回 True"""
    return sha256_of_file(path1) == sha256_of_file(path2)

path1 = r'C:\Users\Lenovo\Desktop\RAG rebuild v2\diff.json'
path2 = r'C:\Users\Lenovo\Desktop\RAG rebuild v2\client_search_diff.json'
files_equal_sha256(Path(path1), Path(path2))

True