在使用 llamaindex 构建 rag 引擎时，可以有以下方式：
- 所有文档 1 个引擎
- 每个文档1 个引擎：对每个文档构建引擎
- 每个文档 2 个引擎：对每个文档，构建2种查询引擎，比如关键字、向量等，然后通过llm选择查询引擎

|方法|answer_relevancy|context_relevancy|correctness|faithfulness|
|---|---|---|---|---|
|所有文档1个引擎|0.75|0.7375|2.925|0.15|
|每个文档1个引擎|0.7|0.866875|2.825|0.15|
|每个文档2个引擎|0.775|0.76375|2.9|0.15|
|每个文档2个引擎2|0.775|0.745|2.925|0.2|

指标只是有相对参考性，原因如下：1）没有使用私域数据，文档内的知识可能llm本身就具备；2) 没有定制prompt，不同方式的倾向不同，有的方法擅长给出步骤，有的方法擅长总结;3) 测试数据有限，数据太少导致指标出现偏差



In [1]:
from llama_index.core import Settings
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding

base_url='http://localhost:11434'
llm = Ollama(model="qwen2.5:latest", request_timeout=360.0,base_url=base_url)
Settings.llm = llm
Settings.embed_model = OllamaEmbedding(model_name="quentinz/bge-large-zh-v1.5:latest",base_url=base_url)

## 生成测试数据

In [2]:
# Enable async for the notebook
import nest_asyncio
nest_asyncio.apply()

In [3]:
def displayify_df(df):
    """For pretty displaying DataFrame in a notebook."""
    display_df = df.style.set_properties(
        **{
            "inline-size": "500px",
            "overflow-wrap": "break-word",
        }
    )
    display(display_df)

In [4]:
import os
from llama_index.core.llama_dataset.generator import RagDatasetGenerator
from llama_index.core.prompts.base import PromptTemplate
from llama_index.core.prompts.prompt_type import PromptType
from llama_index.core.llama_dataset import LabeledRagDataset
from llama_index.core.llama_dataset import RagPredictionDataset

async def Build_test_dataset(nodes,query_engine,test_size=10,data_dir='./data',prefix=''):
    ragdataset_path=os.path.join(data_dir,f'ragdataset.json')
    ragdataset_predictions_path=os.path.join(data_dir,f'{prefix}-ragdataset_predictions.json')

    if os.path.exists(ragdataset_path):
        rag_dataset=LabeledRagDataset.from_json(ragdataset_path)
    else:
        DEFAULT_QUESTION_GENERATION_PROMPT = """\
        Context information is below.
        ---------------------
        {context_str}
        ---------------------
        Given the context information and not prior knowledge.
        generate only questions based on the below query.
        使用中文生成答案
        {query_str}
        """

        DEFAULT_TEXT_QA_PROMPT_TMPL=(
            "Context information is below.\n"
            "---------------------\n"
            "{context_str}\n"
            "---------------------\n"
            "Given the context information and not prior knowledge,answer the query.\n"
            "使用中文生成答案\n"
            "Query: {query_str}\n"
            "Answer: "
        )

        text_qa_template = PromptTemplate(
            DEFAULT_TEXT_QA_PROMPT_TMPL, prompt_type=PromptType.QUESTION_ANSWER
        )

        text_question_template=PromptTemplate(DEFAULT_QUESTION_GENERATION_PROMPT)

        num_questions_per_chunk=1
        # role="Teacher/Professor"
        role="小说作家"
        question_gen_query=f"""
            You are a {role}. 
            Your task is to setup {num_questions_per_chunk} questions for an upcoming quiz/examination. 
            The questions should be diverse in nature across the document. 
            Restrict the questions to the context information provided. 
        """

        import random
        random.seed(0)
        test_size=min(len(nodes),test_size)
        sample_nodes=random.sample(nodes,test_size)

        # step1:初始化数据生成器
        print('step1:初始化数据生成器')
        rag_dataset_generator=RagDatasetGenerator(nodes=sample_nodes,
                                                text_question_template=text_question_template,
                                                text_qa_template=text_qa_template,
                                                question_gen_query=question_gen_query,
                                                num_questions_per_chunk=num_questions_per_chunk)

        # step2:为每个node生成问题（包含标准答案）
        print('step2:为每个node生成问题（包含标准答案）')
        rag_dataset = rag_dataset_generator.generate_dataset_from_nodes()
        rag_dataset.save_json(ragdataset_path)

    if os.path.exists(ragdataset_predictions_path):
        rag_predictions_dataset=RagPredictionDataset.from_json(ragdataset_predictions_path)
    else:
        # step3:使用query_engine回答问题
        print('step3:使用query_engine回答问题')
        rag_predictions_dataset=await rag_dataset.amake_predictions_with(
            predictor=query_engine,
            batch_size=10,
            sleep_time_in_seconds=2
            )
        rag_predictions_dataset.save_json(ragdataset_predictions_path)
    
    return rag_dataset,rag_predictions_dataset

## 定义评估函数

In [5]:
from typing import List

from llama_index.core.evaluation import BatchEvalRunner
from llama_index.core.evaluation import (
    AnswerRelevancyEvaluator,
    ContextRelevancyEvaluator,
    CorrectnessEvaluator,
    FaithfulnessEvaluator,
)

runner=BatchEvalRunner(
    evaluators={
        "answer_relevancy":AnswerRelevancyEvaluator(),
        "context_relevancy":ContextRelevancyEvaluator(),
        "correctness":CorrectnessEvaluator(),
        "faithfulness":FaithfulnessEvaluator()        
    },
    workers=12,
    show_progress=True
)

async def eval_query_engine(queries:List[str],contexts_list:List[List[str]],response_strs:List[str]):
    eval_results=await runner.aevaluate_response_strs(
        queries=queries,
        contexts_list=contexts_list,
        response_strs=response_strs
    )

    for key in eval_results.keys():
        results = eval_results[key]
        scores = 0
        for result in results:
            score = getattr(result,'score',0)
            if score:
                scores += score
        score = scores / len(results)
        print(f"{key} Score: {score}")        

## 所有文档构建1个查询引擎

In [6]:
from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import SentenceSplitter

# 1.读取数据
documents=SimpleDirectoryReader(input_dir='../../data/sidaminzhu',recursive=True).load_data(show_progress=True)
splitter = SentenceSplitter(chunk_size=1024)
nodes = splitter.get_nodes_from_documents(documents,show_progress=True)

# 2.所有文档生成1个查询引擎
from llama_index.core import VectorStoreIndex
index=VectorStoreIndex(nodes=nodes,show_progress=True)
query_engine=index.as_query_engine()

# 3.基于node及查询引擎生成测试数据
rag_dataset,rag_predictions_dataset=await Build_test_dataset(
    nodes,query_engine,test_size=20,prefix='OneEngine')

# 4.测试
queries=[example.query for example in rag_dataset.examples]
contexts_list=[example.reference_contexts for example in rag_dataset.examples]
response_strs=[example.response for example in rag_predictions_dataset.predictions]
await eval_query_engine(queries,contexts_list,response_strs)

Loading files: 100%|██████████| 140/140 [00:00<00:00, 474.34file/s]
  from .autonotebook import tqdm as notebook_tqdm
Parsing nodes: 100%|██████████| 140/140 [00:00<00:00, 144.45it/s]
Generating embeddings: 100%|██████████| 963/963 [01:12<00:00, 13.33it/s]
100%|██████████| 80/80 [03:21<00:00,  2.52s/it]

answer_relevancy Score: 0.75
context_relevancy Score: 0.7375
correctness Score: 2.925
faithfulness Score: 0.15





## 每个文档构建1个查询引擎

In [7]:
import glob 
from llama_index.core.tools import FunctionTool

# 1.分别读取四大名著
dir_list=glob.glob('../../data/sidaminzhu/*')
documents=[
    SimpleDirectoryReader(input_dir).load_data(show_progress=True)
    for input_dir in dir_list
 ]

# 2.构建四大名著的nodes
splitter = SentenceSplitter(chunk_size=1024)
documents_nodes=[
    splitter.get_nodes_from_documents(document)
    for document in documents
]

# 3.构建四大名著的indexs
documents_indexs=[
    VectorStoreIndex(nodes=nodes,show_progress=True)
    for nodes in documents_nodes
]

# 4.构建四大名著的query_engine，并构建回复函数
def get_doc_tools(
    vector_index,name: str,
) -> str:
    
    def vector_query(query: str) -> str:
        f'''设计用于回答关于{name}的问题
        query : 输入内容
        '''

        query_engine = vector_index.as_query_engine(similarity_top_k=2)
        response = query_engine.query(query)
        return response
    
    vector_query_tool = FunctionTool.from_defaults(
        name=f"vector_tool_{name}", fn=vector_query,description=f"关于{name}问题的回答助手"
    )

    return vector_query_tool

# 5.封装四大名著的query_engine为tools
dir_info=[os.path.split(dir)[1].replace('白话文','') for dir in dir_list]
documents_tools=[
    get_doc_tools(vector_index,dir_info[i])
    for i,vector_index in enumerate(documents_indexs)
]

# 6.将四大名著的tools封装为1个index，并生成检索器
from llama_index.core.objects import ObjectIndex
tool_index=ObjectIndex.from_objects(
    documents_tools,
    index_cls=VectorStoreIndex
)
tool_retriever=tool_index.as_retriever(similarity_top_k=1)

# 7.基于FunctionCallingAgent生成测试数据集
from llama_index.core.agent import FunctionCallingAgent
agent = FunctionCallingAgent.from_tools(
    tool_retriever=tool_retriever,
    system_prompt="""You are an agent designed to answer queries over a set of given documents.
    Please use the tools provided to answer a question as possible. Do not rely on prior knowledge\
    """,
    verbose=False,
)
rag_dataset,rag_predictions_dataset=await Build_test_dataset(
    documents_nodes,agent,test_size=20,prefix='OneEngine')

# 8.评估
queries=[example.query for example in rag_dataset.examples]
contexts_list=[example.reference_contexts for example in rag_dataset.examples]
response_strs=[example.response for example in rag_predictions_dataset.predictions]
await eval_query_engine(queries,contexts_list,response_strs)

Loading files: 100%|██████████| 41/41 [00:00<00:00, 2641.98file/s]
Loading files: 100%|██████████| 34/34 [00:00<00:00, 2833.31file/s]
Loading files: 100%|██████████| 26/26 [00:00<00:00, 2888.79file/s]
Loading files: 100%|██████████| 39/39 [00:00<00:00, 1559.21file/s]
Generating embeddings: 100%|██████████| 127/127 [00:09<00:00, 12.74it/s]
Generating embeddings: 100%|██████████| 312/312 [00:21<00:00, 14.41it/s]
Generating embeddings: 100%|██████████| 340/340 [00:24<00:00, 13.91it/s]
Generating embeddings: 100%|██████████| 184/184 [00:13<00:00, 13.36it/s]
100%|██████████| 80/80 [03:43<00:00,  2.79s/it]

answer_relevancy Score: 0.7
context_relevancy Score: 0.866875
correctness Score: 2.825
faithfulness Score: 0.15





## 每个文档构建2个查询引擎

In [8]:
from llama_index.core import indices

indexs=list(filter(lambda att:att.find('Index')>0,dir(indices)))
print(indexs)

['DocumentSummaryIndex', 'EmptyIndex', 'GPTDocumentSummaryIndex', 'GPTEmptyIndex', 'GPTKeywordTableIndex', 'GPTListIndex', 'GPTPandasIndex', 'GPTRAKEKeywordTableIndex', 'GPTSQLStructStoreIndex', 'GPTSimpleKeywordTableIndex', 'GPTTreeIndex', 'GPTVectorStoreIndex', 'KeywordTableIndex', 'KnowledgeGraphIndex', 'ListIndex', 'MultiModalVectorStoreIndex', 'PandasIndex', 'PropertyGraphIndex', 'RAKEKeywordTableIndex', 'SQLStructStoreIndex', 'SimpleKeywordTableIndex', 'SummaryIndex', 'TreeIndex', 'VectorStoreIndex']


由以上输出可以，llamaindex索引内容的方式有多种，以下选择KeywordTableIndex,VectorStoreIndex分别索引1个文档，检索时，llm根据问题选择不同方式检索

In [9]:
import glob 
from llama_index.core import KeywordTableIndex,VectorStoreIndex
from llama_index.core.tools import FunctionTool

from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import SentenceSplitter

# 1.分别读取生成四大名著的nodes、indexs
dir_list=glob.glob('../../data/sidaminzhu/*')
documents=[
    SimpleDirectoryReader(input_dir).load_data(show_progress=True)
    for input_dir in dir_list
 ]

splitter = SentenceSplitter(chunk_size=1024)
documents_nodes=[
    splitter.get_nodes_from_documents(document)
    for document in documents
]

vector_indexs=[
    VectorStoreIndex(nodes=nodes,show_progress=True)
    for nodes in documents_nodes
]

keyword_indexs=[
    KeywordTableIndex(nodes=nodes,show_progress=True)
    for nodes in documents_nodes
]

# 2.将不同索引方式封装在一起
def get_doc_tools(
    vector_index,summary_indexs,name: str,
) -> str:
    
    def vector_query(query: str) -> str:
        f'''通过语义相关查询回答关于{name}的问题，擅长精确查询答案
        query : 输入内容
        '''

        query_engine = vector_index.as_query_engine(similarity_top_k=2)
        response = query_engine.query(query)
        return response
    
    vector_query_tool = FunctionTool.from_defaults(
        name=f"vector_tool_{name}", fn=vector_query,description=f"关于{name}问题的回答助手"
    )

    def keyword_query(query: str) -> str:
        f'''回答关于{name}的问题，擅长输出归纳性总结
        query : 输入内容
        '''

        query_engine = keyword_indexs.as_query_engine(
                response_mode="tree_summarize",
                use_async=True,
        )
        response = query_engine.query(query)
        return response
    
    vector_query_tool = FunctionTool.from_defaults(
        name=f"vector_tool_{name}", fn=vector_query,description=f"关于{name}问题的回答助手"
    )

    keyword_query_tool = FunctionTool.from_defaults(
        name=f"keyword_tool_{name}", fn=keyword_query,description=f"关于{name}问题的回答助手"
    )

    return vector_query_tool,keyword_query_tool

dir_info=[os.path.split(dir)[1].replace('白话文','') for dir in dir_list]
documents_tools=[
    get_doc_tools(vector_index,keyword_index,dir_info[i])
    for i,(vector_index,keyword_index) in enumerate(zip(vector_indexs,keyword_indexs))
]
all_tools = [t for documents_tools in documents_tools for t in documents_tools] # 注意：这里把所有文档的2个索引工具都放在一起

# 3.将所有检索工具封装到一起
from llama_index.core.objects import ObjectIndex
tool_index=ObjectIndex.from_objects(
    all_tools,
    index_cls=VectorStoreIndex
)
tool_retriever=tool_index.as_retriever(similarity_top_k=2)

# 4.生成测试数据
from llama_index.core.agent import FunctionCallingAgent
agent = FunctionCallingAgent.from_tools(
    tool_retriever=tool_retriever,
    system_prompt="""You are an agent designed to answer queries over a set of given documents.
    Please use the tools provided to answer a question as possible. Do not rely on prior knowledge\
    """
)
rag_dataset,rag_predictions_dataset=await Build_test_dataset(
    documents_nodes,agent,test_size=20,prefix='OneEngine')

# 5. 评估
queries=[example.query for example in rag_dataset.examples]
contexts_list=[example.reference_contexts for example in rag_dataset.examples]
response_strs=[example.response for example in rag_predictions_dataset.predictions]

await eval_query_engine(queries,contexts_list,response_strs)

Loading files: 100%|██████████| 41/41 [00:00<00:00, 2733.10file/s]
Loading files: 100%|██████████| 34/34 [00:00<00:00, 2425.03file/s]
Loading files: 100%|██████████| 26/26 [00:00<00:00, 2129.46file/s]
Loading files: 100%|██████████| 39/39 [00:00<00:00, 2166.39file/s]
Generating embeddings: 100%|██████████| 127/127 [00:10<00:00, 12.55it/s]
Generating embeddings: 100%|██████████| 312/312 [00:23<00:00, 13.55it/s]
Generating embeddings: 100%|██████████| 340/340 [00:23<00:00, 14.34it/s]
Generating embeddings: 100%|██████████| 184/184 [00:12<00:00, 14.22it/s]
Extracting keywords from nodes: 100%|██████████| 127/127 [03:51<00:00,  1.83s/it]
Extracting keywords from nodes: 100%|██████████| 312/312 [12:08<00:00,  2.33s/it]
Extracting keywords from nodes: 100%|██████████| 340/340 [10:55<00:00,  1.93s/it]
Extracting keywords from nodes: 100%|██████████| 184/184 [05:16<00:00,  1.72s/it]
100%|██████████| 80/80 [02:33<00:00,  1.92s/it]

answer_relevancy Score: 0.775
context_relevancy Score: 0.76375
correctness Score: 2.9
faithfulness Score: 0.15





## 每个文档构建2个查询引擎2

前面生成agent时，所有的engine都一起放入agent，可能存在问题，以下先通过RouterQueryEngine汇总一个文档的所有engine，再放到agent中

In [10]:
import glob 
from llama_index.core import KeywordTableIndex,VectorStoreIndex
from llama_index.core.tools import FunctionTool

from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import SentenceSplitter

# 1.分别读取生成四大名著的nodes、indexs
# dir_list=glob.glob('../../data/sidaminzhu/*')
# documents=[
#     SimpleDirectoryReader(input_dir).load_data(show_progress=True)
#     for input_dir in dir_list
#  ]

# splitter = SentenceSplitter(chunk_size=1024)
# documents_nodes=[
#     splitter.get_nodes_from_documents(document)
#     for document in documents
# ]

# vector_indexs=[
#     VectorStoreIndex(nodes=nodes,show_progress=True)
#     for nodes in documents_nodes
# ]

# keyword_indexs=[
#     KeywordTableIndex(nodes=nodes,show_progress=True)
#     for nodes in documents_nodes
# ]

# 2. 通过QueryEngineTool将每个文档的KeywordTableIndex与VectorStoreIndex索引封装为一个查询引擎
from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector
from llama_index.core.tools import QueryEngineTool

documents_engines=[]
for i,(vector_index,keyword_index) in enumerate(zip(vector_indexs,keyword_indexs)):
    vector_query_engine = vector_index.as_query_engine(similarity_top_k=2)
    vector_tool = QueryEngineTool.from_defaults(
        query_engine=vector_query_engine,
        description=(
            "Useful for retrieving specific context from the documents"
        ),
    )

    keyword_query_engine = keyword_index.as_query_engine(response_mode="tree_summarize",use_async=True)
    summary_tool = QueryEngineTool.from_defaults(
        query_engine=keyword_query_engine,
        description=("Useful for summarization questions related to documents"),
    )

    documents_engines.append(
        RouterQueryEngine(
            selector=LLMSingleSelector.from_defaults(),
            query_engine_tools=[vector_tool,summary_tool],
            verbose=True)
        )

# 3. 通过QueryEngineTool将所有文档的查询引擎融合到一起
from llama_index.core.tools import QueryEngineTool,ToolMetadata
dir_info=[os.path.split(dir)[1].replace('白话文','') for dir in dir_list]
query_engine_tools = [
    QueryEngineTool(
        query_engine=engine,
        metadata=ToolMetadata(name=f"query_engine_{dir_info[i]}",description=f"回答关于{dir_info[i]}的问题")
        )   
for i,engine in enumerate(documents_engines)]

# 4. 生成测试数据
from llama_index.core.agent import FunctionCallingAgent
agent = FunctionCallingAgent.from_tools(
    tools=query_engine_tools,
    system_prompt="""You are an agent designed to answer queries over a set of given documents.
    Please use the tools provided to answer a question as possible. Do not rely on prior knowledge\
    """,
    verbose=True,
)
rag_dataset,rag_predictions_dataset=await Build_test_dataset(
    documents_nodes,agent,test_size=20,prefix='OneEngine')

# 5.评估
queries=[example.query for example in rag_dataset.examples]
contexts_list=[example.reference_contexts for example in rag_dataset.examples]
response_strs=[example.response for example in rag_predictions_dataset.predictions]
await eval_query_engine(queries,contexts_list,response_strs)

100%|██████████| 80/80 [02:44<00:00,  2.06s/it]

answer_relevancy Score: 0.775
context_relevancy Score: 0.745
correctness Score: 2.925
faithfulness Score: 0.2



