In [1]:
from llama_index.core import SimpleDirectoryReader,VectorStoreIndex,SummaryIndex
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.tools import FunctionTool,QueryEngineTool
from llama_index.core.vector_stores import MetadataFilters,FilterCondition
from typing import List,Optional
from llama_index.core.objects import ObjectIndex
from llama_index.core.agent import FunctionCallingAgentWorker
from llama_index.core.agent import AgentRunner
from llama_index.llms.mistralai import MistralAI
import os

In [2]:
import  nest_asyncio
nest_asyncio.apply()

In [3]:
userdata = {
    "MISTRAL_API_KEY": "BWdlihu9sUh5P2g3bHnzjAaHiT4anTVH"
}

In [4]:
documents = SimpleDirectoryReader(input_files = ['./data/bert_pre_train.pdf']).load_data()
print(len(documents))
print(f"Document Metadata: {documents[0].metadata}")

16
Document Metadata: {'page_label': '1', 'file_name': 'bert_pre_train.pdf', 'file_path': 'data\\bert_pre_train.pdf', 'file_type': 'application/pdf', 'file_size': 775166, 'creation_date': '2024-06-17', 'last_modified_date': '2024-06-17'}


In [5]:
splitter = SentenceSplitter(chunk_size=1024,chunk_overlap=100)
nodes = splitter.get_nodes_from_documents(documents)
print(f"Length of nodes : {len(nodes)}")
print(f"get the content for node 0 :{nodes[0].get_content(metadata_mode='all')}")

Length of nodes : 28
get the content for node 0 :page_label: 1
file_name: bert_pre_train.pdf
file_path: data\bert_pre_train.pdf
file_type: application/pdf
file_size: 775166
creation_date: 2024-06-17
last_modified_date: 2024-06-17

BERT: Pre-training of Deep Bidirectional Transformers for
Language Understanding
Jacob Devlin Ming-Wei Chang Kenton Lee Kristina Toutanova
Google AI Language
{jacobdevlin,mingweichang,kentonl,kristout }@google.com
Abstract
We introduce a new language representa-
tion model called BERT , which stands for
Bidirectional Encoder Representations from
Transformers. Unlike recent language repre-
sentation models (Peters et al., 2018a; Rad-
ford et al., 2018), BERT is designed to pre-
train deep bidirectional representations from
unlabeled text by jointly conditioning on both
left and right context in all layers. As a re-
sult, the pre-trained BERT model can be ﬁne-
tuned with just one additional output layer
to create state-of-the-art models for a wide
range of task

In [6]:
# vector store
import chromadb
db = chromadb.PersistentClient(path="./chroma_db_mistral")
chroma_collection = db.get_or_create_collection("multidocument-agent")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [7]:
# embedding model
from llama_index.embeddings.fastembed import FastEmbedEmbedding
from llama_index.core import Settings
embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5")
Settings.embed_model = embed_model
Settings.chunk_size = 1024

  from .autonotebook import tqdm as notebook_tqdm
Fetching 5 files: 100%|██████████| 5/5 [00:00<?, ?it/s]


In [8]:
# instantiate the llama index
os.environ["MISTRAL_API_KEY"] = userdata.get("MISTRAL_API_KEY")
llm = MistralAI(model="mistral-large-latest")

In [12]:
#instantiate Vectorstore
name = "BERT"
vector_index = VectorStoreIndex(nodes,storage_context=storage_context)
vector_index.storage_context.vector_store.persist(persist_path="/content/chroma_db")

# Define Vectorstore Autoretrieval tool
def vector_query(query:str,page_numbers:Optional[List[str]]=None)->str:
  '''
  perform vector search over index on
  query(str): query string needs to be embedded
  page_numbers(List[str]): list of page numbers to be retrieved,
  leave blank if we want to perform a vector search over all pages
  '''
  page_numbers = page_numbers or []
  metadata_dict = [{"key":'page_label',"value":p} for p in page_numbers]
  
  query_engine = vector_index.as_query_engine(similarity_top_k =2, filters = MetadataFilters.from_dicts(metadata_dict, condition=FilterCondition.OR), llm=llm)
  
  response = query_engine.query(query)
  return response

#llamiondex FunctionTool wraps any python function we feed it
vector_query_tool = FunctionTool.from_defaults(name=f"vector_tool_{name}", fn=vector_query)
# Prepare Summary Tool
summary_index = SummaryIndex(nodes)
summary_query_engine = summary_index.as_query_engine(response_mode="tree_summarize", se_async=True, llm=llm)
summary_query_tool = QueryEngineTool.from_defaults(name=f"summary_tool_{name}", query_engine=summary_query_engine, description=("Use ONLY IF you want to get a holistic summary of the documents." "DO NOT USE if you have specified questions over the documents."))

Insert of existing embedding ID: e6c4a4e7-c5fe-4d99-a678-19405604679c
Insert of existing embedding ID: 9df84515-3c77-469e-a0c5-690dc8337528
Insert of existing embedding ID: a2a1e1e1-8971-45d4-8176-31823e5b28e7
Insert of existing embedding ID: 6da5e357-baad-48f1-a8a0-8d296780f93a
Insert of existing embedding ID: 91338de9-b89c-48e8-8a82-97fea20eaa74
Insert of existing embedding ID: 4c71ef69-cf5f-4585-b0ac-59d352c59169
Insert of existing embedding ID: 54b3be6a-c298-47de-9a0b-ec7e61636cc7
Insert of existing embedding ID: 41d64240-b3a6-4287-884e-d4776b583c78
Insert of existing embedding ID: 6dfe39c5-323e-4ce9-902a-d4319dd45e1e
Insert of existing embedding ID: 00f6489a-db31-4852-9b3a-fcc6d8140696
Insert of existing embedding ID: 69059688-2592-4001-b7c2-54177fd17c36
Insert of existing embedding ID: 9d6b83dd-710b-4236-b157-dcf1d74d1766
Insert of existing embedding ID: e703498a-a48d-4811-9fcf-097880d9dafa
Insert of existing embedding ID: 476d1cbb-c5b8-43e2-864e-6cbdc73aca30
Insert of existing e

In [13]:
response = llm.predict_and_call([vector_query_tool], "Summarize the content in page number 2", verbose=True)

=== Calling Function ===
Calling function: vector_tool_BERT with args: {"query": "summarize the content", "page_numbers": ["2"]}
=== Function Output ===
The content discusses experiments conducted for knowledge-intensive generation using MS-MARCO and Jeopardy question generation. The results show that the models used in these experiments generate responses that are more factual, specific, and diverse compared to a BART baseline. For FEVER fact verification, the outcomes are within 4.3% of state-of-the-art pipeline models that use strong retrieval supervision. The text also mentions the ability to update the models' knowledge as the world changes by replacing the non-parametric memory.

The methodology explores RAG models that utilize an input sequence to retrieve text documents and use them as additional context when generating the target sequence. These models consist of two main components: a retriever that returns distributions over text passages given a query, and a generator that 

In [16]:
# function for setting vector and summary tool
def get_doc_tools(file_path:str,name:str)->str:
  '''
  get vector query and sumnmary query tools from a document
  '''
  #load documents
  documents = SimpleDirectoryReader(input_files = [file_path]).load_data()
  print(f"length of nodes")
  splitter = SentenceSplitter(chunk_size=1024,chunk_overlap=100)
  nodes = splitter.get_nodes_from_documents(documents)
  print(f"Length of nodes : {len(nodes)}")
  
  #instantiate Vectorstore
  vector_index = VectorStoreIndex(nodes,storage_context=storage_context)
  vector_index.storage_context.vector_store.persist(persist_path="/content/chroma_db")
  
  # Define Vectorstore Autoretrieval tool
  def vector_query(query:str,page_numbers:Optional[List[str]]=None)->str:
    '''
    perform vector search over index on
    query(str): query string needs to be embedded
    page_numbers(List[str]): list of page numbers to be retrieved,
    leave blank if we want to perform a vector search over all pages
    '''
    page_numbers = page_numbers or []
    metadata_dict = [{"key":'page_label',"value":p} for p in page_numbers]
    
    query_engine = vector_index.as_query_engine(similarity_top_k =2, filters = MetadataFilters.from_dicts(metadata_dict, condition=FilterCondition.OR), llm=llm)
    
    response = query_engine.query(query)
    return response
  
  #llamiondex FunctionTool wraps any python function we feed it
  vector_query_tool = FunctionTool.from_defaults(name=f"vector_tool_{name}", fn=vector_query)
  
  # Prepare Summary Tool
  summary_index = SummaryIndex(nodes)
  summary_query_engine = summary_index.as_query_engine(response_mode="tree_summarize", se_async=True, llm=llm)
  summary_query_tool = QueryEngineTool.from_defaults(name=f"summary_tool_{name}",query_engine=summary_query_engine, description=("Use ONLY IF you want to get a holistic summary of the documents." "DO NOT USE if you have specified questions over the documents."))
  return vector_query_tool,summary_query_tool


In [21]:
root_path = "./data"
file_name = []
file_path = []
for file in os.listdir(root_path):
  if file.endswith(".pdf"):
    file_name.append(file.split(".")[0])
    file_path.append(os.path.join(root_path,file))
#
print(file_name)
print(file_path)

['bert_pre_train', 'corrective_rag', 'rag_nlp', 'self_rag']
['./data\\bert_pre_train.pdf', './data\\corrective_rag.pdf', './data\\rag_nlp.pdf', './data\\self_rag.pdf']


In [22]:
papers_to_tools_dict = {}
for name,filename in zip(file_name,file_path):
  vector_query_tool,summary_query_tool = get_doc_tools(filename,name)
  papers_to_tools_dict[name] = [vector_query_tool,summary_query_tool]

length of nodes
Length of nodes : 28
length of nodes
Length of nodes : 22
length of nodes
Length of nodes : 30
length of nodes
Length of nodes : 43


In [23]:
initial_tools = [t for f in file_name for t in papers_to_tools_dict[f]]
initial_tools

[<llama_index.core.tools.function_tool.FunctionTool at 0x25316002d40>,
 <llama_index.core.tools.query_engine.QueryEngineTool at 0x25316003400>,
 <llama_index.core.tools.function_tool.FunctionTool at 0x25315b3fe80>,
 <llama_index.core.tools.query_engine.QueryEngineTool at 0x25315b3eaa0>,
 <llama_index.core.tools.function_tool.FunctionTool at 0x2531bd0a1a0>,
 <llama_index.core.tools.query_engine.QueryEngineTool at 0x2531be98550>,
 <llama_index.core.tools.function_tool.FunctionTool at 0x2531bd0a890>,
 <llama_index.core.tools.query_engine.QueryEngineTool at 0x253178de2c0>]

In [24]:
obj_index = ObjectIndex.from_objects(initial_tools,index_cls=VectorStoreIndex)

In [25]:
obj_retriever = obj_index.as_retriever(similarity_top_k=2)
tools = obj_retriever.retrieve("compare and contrast the papers self rag and corrective rag")

print(tools[0].metadata)
print(tools[1].metadata)

ToolMetadata(description='vector_tool_corrective_rag(query: str, page_numbers: Optional[List[str]] = None) -> str\n\n    perform vector search over index on\n    query(str): query string needs to be embedded\n    page_numbers(List[str]): list of page numbers to be retrieved,\n    leave blank if we want to perform a vector search over all pages\n    ', name='vector_tool_corrective_rag', fn_schema=<class 'pydantic.v1.main.vector_tool_corrective_rag'>, return_direct=False)
ToolMetadata(description='Use ONLY IF you want to get a holistic summary of the documents.DO NOT USE if you have specified questions over the documents.', name='summary_tool_corrective_rag', fn_schema=<class 'llama_index.core.tools.types.DefaultToolFnSchema'>, return_direct=False)


In [26]:
# setup the agent
agent_worker = FunctionCallingAgentWorker.from_tools(tool_retriever=obj_retriever,
                                                     llm=llm,
                                                     system_prompt="""You are an agent designed to answer queries over a set of given papers.
                                                     Please always use the tools provided to answer a question.Do not rely on prior knowledge.""",
                                                     verbose=True)
agent = AgentRunner(agent_worker)

# Query

In [27]:
response = agent.query("summarize rag for nlp")
print(str(response))

Added user message to memory: summarize rag for nlp
=== Calling Function ===
Calling function: summary_tool_rag_nlp with args: {"input": "rag for nlp"}
=== Function Output ===
Retrieval-Augmented Generation (RAG) for Knowledge-Intensive NLP Tasks is a method that combines pre-trained parametric and non-parametric memory for language generation. The parametric memory is a pre-trained seq2seq model and the non-parametric memory is a dense vector index of Wikipedia, accessed with a pre-trained neural retriever. RAG models are fine-tuned and evaluated on a wide range of knowledge-intensive NLP tasks and set the state of the art on three open domain QA tasks, outperforming parametric seq2seq models and task-specific retrieve-and-extract architectures. For language generation tasks, RAG models generate more specific, diverse and factual language than a state-of-the-art parametric-only seq2seq baseline.
=== LLM Response ===
Retrieval-Augmented Generation (RAG) is a method used for knowledge-i

In [28]:
response = agent.query("what is a bidirectional transformer?"
                       "how are they trained for language understanding?")
print(str(response))

Added user message to memory: what is a bidirectional transformer?how are they trained for language understanding?
=== Calling Function ===
Calling function: vector_tool_bert_pre_train with args: {"query": "bidirectional transformer", "page_numbers": []}
=== Function Output ===
The bidirectional transformer is a model used in the BERT (Bidirectional Encoder Representations from Transformers) system. Unlike other models such as OpenAI GPT, which use a left-to-right transformer or a constrained self-attention where every token can only attend to context to its left, the bidirectional transformer allows every token to attend to context from both left and right. This makes the BERT model unique as its representations are jointly conditioned on both left and right context in all layers.
=== LLM Response ===
A bidirectional transformer is a model used in the BERT (Bidirectional Encoder Representations from Transformers) system. Unlike other models such as OpenAI GPT, which use a left-to-righ