### Multi document agent 
In this notebook, we will build a multi-document agent. The architecture is that each document would have its own individual agent and there would be an overachring agent that decides which of the agents to allocate a question to

In [4]:
%load_ext autoreload
%autoreload 2

In [5]:
# needed only once to download the llama index documentation website
# domain = "docs.llamaindex.ai"
# docs_url = "https://docs.llamaindex.ai/en/latest/"
# !wget -e robots=off --recursive --no-clobber --page-requisites --html-extension --convert-links --restrict-file-names=windows --domains {domain} --no-parent {docs_url}


In [6]:
from llama_hub.file.unstructured.base import UnstructuredReader
from llama_index.llms.openai import OpenAI
from llama_index.service_context import ServiceContext
from pathlib import Path

In [7]:
reader = UnstructuredReader()

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/kosisochukwuasuzu/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/kosisochukwuasuzu/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [8]:
all_files_gen = Path("./docs.llamaindex.ai/").rglob("*")
all_files = [f.resolve() for f in all_files_gen]

In [9]:
len(all_files)

322

In [11]:
suffixes = set([file.suffix for file in all_files])

In [12]:
suffixes

{'', '.css', '.html', '.js'}

In [13]:
all_html_files = [file for file in all_files if file.suffix.lower() == ".html"]

In [14]:
len(all_html_files)

227

In [15]:
from llama_index.schema import Document

doc_limit = 100

In [16]:
docs = []
for idx in range(doc_limit):
    f = all_html_files[idx]
    print(f"Idx {idx}/{len(all_html_files)}")
    loaded_doc = reader.load_data(file=f, split_documents=True)
    start_idx = 72
    loaded_doc = Document(
        text= "\n\n".join([d.get_content() for d in loaded_doc[72:]]),
        metadata={"path": str(f)}
    )
    print(loaded_doc.metadata["path"])
    docs.append(loaded_doc)

Idx 0/227
/Users/kosisochukwuasuzu/Developer/ai-startups/test-demos/pdfchat/src/llamaindex/qa/multidocagentqa/docs.llamaindex.ai/en/latest/index.html
Idx 1/227
/Users/kosisochukwuasuzu/Developer/ai-startups/test-demos/pdfchat/src/llamaindex/qa/multidocagentqa/docs.llamaindex.ai/en/latest/genindex.html
Idx 2/227
/Users/kosisochukwuasuzu/Developer/ai-startups/test-demos/pdfchat/src/llamaindex/qa/multidocagentqa/docs.llamaindex.ai/en/latest/search.html
Idx 3/227
/Users/kosisochukwuasuzu/Developer/ai-startups/test-demos/pdfchat/src/llamaindex/qa/multidocagentqa/docs.llamaindex.ai/en/latest/understanding/understanding.html
Idx 4/227
/Users/kosisochukwuasuzu/Developer/ai-startups/test-demos/pdfchat/src/llamaindex/qa/multidocagentqa/docs.llamaindex.ai/en/latest/understanding/using_llms/using_llms.html
Idx 5/227
/Users/kosisochukwuasuzu/Developer/ai-startups/test-demos/pdfchat/src/llamaindex/qa/multidocagentqa/docs.llamaindex.ai/en/latest/understanding/using_llms/privacy.html
Idx 6/227
/Users/

In [17]:
llm = OpenAI(model="mistralai/Mixtral-8x7B-Instruct-v0.1", temperature=0.0)
service_context = ServiceContext.from_defaults(llm=llm)

In [18]:
from llama_index.indices.vector_store import VectorStoreIndex
from llama_index.indices.list.base import SummaryIndex


In [19]:
import nest_asyncio

nest_asyncio.apply()

#### Building document agent for each html document
This means we will have ~227 agents. We define two query engines on each document, we have the semantic search engine the summaraization engine, these are then passed to the agent as tools, we would be using openais function calling capability to acheive this

In [20]:
from llama_index.agent.openai.base import OpenAIAgent
from llama_index.indices.loading import load_index_from_storage
from llama_index.storage import StorageContext
from llama_index.tools.query_engine import QueryEngineTool
from llama_index.tools.types import ToolMetadata
from llama_index.node_parser import SentenceSplitter
import os
from tqdm import tqdm
import pickle


In [21]:
from textwrap import dedent

In [22]:
async def build_agent_per_doc(nodes, file_base):
    print(file_base)
    vi_out_path = f"./data/llamaindex_docs/{file_base}" # output path for vector index
    summary_out_path = f"./data/llamaindex_docs/{file_base}_summary.pkl" # output file for summary index
    
    if not os.path.exists(vi_out_path):
        Path("./data/llamaindex_docs/").mkdir(parents=True, exist_ok=True)
        vector_index = VectorStoreIndex(nodes, service_context=service_context)
        vector_index.storage_context.persist(persist_dir=vi_out_path)
        
    else:
        vector_index = load_index_from_storage(StorageContext.from_defaults(persist_dir=vi_out_path),
                                               service_context=service_context)
    
    # build the summary index
    summary_index = SummaryIndex(nodes, service_context=service_context)
    
    vector_query_engine = vector_index.as_query_engine()
    summary_query_engine = summary_index.as_query_engine(response_mode="tree_summarize")
    
    if not os.path.exists(summary_out_path):
        Path(summary_out_path).parent.mkdir(parents=True, exist_ok=True)
        summary = str(
            await summary_query_engine.aquery(
                "Extract a concise 1-2 line summary of this document"
            )
        )
        pickle.dump(summary, open(summary_out_path, "wb"))
    else:
        summary = pickle.load(open(summary_out_path, "rb"))
        
    query_tools = [
        QueryEngineTool(query_engine=vector_query_engine,
                        metadata=ToolMetadata(
                            name=f"vector_tool_{file_base}",
                            description=f"Useful for questions related to specific facts"
                )),
         QueryEngineTool(query_engine=summary_query_engine,
                        metadata=ToolMetadata(
                            name=f"summary_tool_{file_base}",
                            description=f"Useful for summarization questions",
                            )),

    ]
    
    function_llm = OpenAI(model="mistralai/Mixtral-8x7B-Instruct-v0.1")
 
    agent = OpenAIAgent.from_llm(
        llm=function_llm,
        tools=query_tools,
        verbose=True,
        # system_prompt=dedent(f"""\
        #     You are a specialized agent designed to answer queries about the `{file_base}.html` part of the LlamaIndex docs.
        #     You must ALWAYS use at least one of the tools provided when answering a question; do NOT rely on prior knowledge.\
        #     """),
    )
    
    return agent, summary

In [23]:
async def build_agents(docs):
    node_parser = SentenceSplitter()
    
    agents_dict = {}
    extract_info_dict = {}
    
    for idx, doc in enumerate(tqdm(docs)):
        nodes = node_parser.get_nodes_from_documents([doc])
        # ID will be base + parent
        file_path = Path(doc.metadata["path"])
        file_base = str(file_path.parent.stem) + "_" + str(file_path.stem)
        agent, summary = await build_agent_per_doc(nodes, file_base)
        
        agents_dict[file_base] = agent
        extract_info_dict[file_base] = {"summary": summary, "nodes": nodes}
        
    return agents_dict, extract_info_dict
        

In [25]:
agents_dict, extra_info_dict = await build_agents(docs[:7])

  0%|          | 0/7 [00:00<?, ?it/s]

latest_index
latest_genindex


 14%|█▍        | 1/7 [12:00<1:12:03, 720.65s/it]


APITimeoutError: Request timed out.

#### Next we will build the retreiver enabled OpenAI agent
We are going to build the top level agent that orchestrates the use of the other agents to answer user queries

In [None]:
all_tools = []

for file_base, agent in agents_dict.items():
    summary = extra_info_dict[file_base]["summary"]
    doc_tool = QueryEngineTool(
        query_engine=agent, 
        metadata=ToolMetadata(
            name=f"tool_{file_base}",
            description=summary,
        )
    )
    all_tools.append(doc_tool)

NameError: name 'agents_dict' is not defined

In [None]:
from llama_index.objects import ObjectIndex, SimpleToolNodeMapping, ObjectRetriever
from llama_index.retrievers import BaseRetriever
from llama_index.postprocessor import CohereRerank
from llama_index.tools import QueryPlanTool
from llama_index.query_engine import SubQuestionQueryEngine


In [None]:
llm = OpenAI(model="mistralai/Mixtral-8x7B-Instruct-v0.1")

In [None]:
tool_mapping = SimpleToolNodeMapping.from_objects(all_tools)
obj_index = ObjectIndex.from_objects(
    all_tools, 
    tool_mapping,
    VectorStoreIndex
)

vector_node_retreiver = obj_index.as_node_retriever(similarity_top_k=10)

In [None]:
# define custom tretreivers with reranking. Why don't we add this as a postprocessor to our current retrevier
class CustomRetrevier(BaseRetriever):
    def __init__(self, vector_retriever, postprocessor):
        self._vector_retriever = vector_retriever
        self._postprocessor = postprocessor or CohereRerank(top_n=5)
        super().__init__()
    
    def _retreive(self, query_bundle):
        retreived_nodes = self._vector_retriever.retreive(query_bundle)
        filtered_nodes = self._postprocessor.postprocess_nodes(retreived_nodes, query_bundle=query_bundle)
        return filtered_nodes
    

In [None]:
# define a custom object retreiver that adds in a query planning tool
from typing import List


class CustomObjectRetriever(ObjectRetriever):
    def __init__(self, retriever, object_node_mapping, all_tools, llm=None):
        self._retriever = retriever
        self._object_node_mapping = object_node_mapping
        self._all_tools = all_tools
        self._llm = llm or OpenAI(model="mistralai/Mixtral-8x7B-Instruct-v0.1")
    
    def retrieve(self, query_bundle):
        nodes = self._retriever.retrieve(query_bundle)
        tools = [self._object_node_mapping.from_node(n.node) for n in nodes]
        sub_question_sc = ServiceContext.from_defaults(
            llm=self._llm
        )
        sub_question_engine = SubQuestionQueryEngine.from_defaults(
            query_engine_tools=tools, 
            service_context=sub_question_sc
        )
        sub_question_description = dedent(f"""\
            Useful for any queries that involve comparing multiple documents. ALWAYS use this tool for comparison queries - make sure to call this \
            tool with the original query. Do NOT use the other tools for any queries involving multiple documents.
            """)
        
        sub_question_tool = QueryEngineTool(
            query_engine=sub_question_engine, 
            metadata=ToolMetadata(
                name="compare_tool",
                description=sub_question_description
            )
        )
        
        return tools + [sub_question_tool]
    
    

In [None]:
custom_node_retreiver = CustomRetrevier(vector_node_retreiver)
custom_object_retreiver = CustomObjectRetriever(retriever=custom_node_retreiver, 
                                                object_node_mapping=tool_mapping, 
                                                all_tools=all_tools, 
                                                llm=llm)


In [None]:
tmps = custom_object_retreiver.retrieve("hello")
print(len(tmps))

In [None]:
from llama_index.agent import FnRetrieverOpenAIAgent

In [None]:
agent = FnRetrieverOpenAIAgent.from_retriever(
    retriever=custom_node_retreiver,
    system_prompt=dedent(""" \
    You are an agent designed to answer queries about the documentation.
    Please always use the tools provided to answer a question. Do not rely on prior knowledge.\

    """),
    llm=llm,
    verbose=True,
)

##### Creating Naive RAG implementation
To compare with our agent implementation we would be creating a naive RAG implementation to evaluate the validity of our entire experiment.

In [None]:
all_nodes = [
    n for extra_info in extra_info_dict.values() for n in extra_info["nodes"]
]

In [None]:
base_index = VectorStoreIndex(all_nodes)
base_query_engine = base_index.as_query_engine(similarity_top_k=4)

In [None]:
response = agent.query(
    "Tell me about the different types of evaluation in LlamaIndex"
)

In [26]:
# baseline
response = base_query_engine.query(
    "Tell me about the different types of evaluation in LlamaIndex"
)
print(str(response))

NameError: name 'base_query_engine' is not defined