In [2]:
from llama_index.core import SimpleDirectoryReader,VectorStoreIndex,SummaryIndex
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.tools import FunctionTool,QueryEngineTool
from llama_index.core.vector_stores import MetadataFilters,FilterCondition
from typing import List,Optional
from llama_index.core.objects import ObjectIndex
from llama_index.core.agent import FunctionCallingAgentWorker
from llama_index.core.agent import AgentRunner
from llama_index.llms.mistralai import MistralAI
import os

In [3]:
import  nest_asyncio
nest_asyncio.apply()

In [4]:
userdata = {
    "MISTRAL_API_KEY": "BWdlihu9sUh5P2g3bHnzjAaHiT4anTVH"
}

In [7]:
documents = SimpleDirectoryReader(input_files = ['./data/bert_pre_train.pdf']).load_data()
print(len(documents))
print(f"Document Metadata: {documents[0].metadata}")

16
Document Metadata: {'page_label': '1', 'file_name': 'bert_pre_train.pdf', 'file_path': 'data\\bert_pre_train.pdf', 'file_type': 'application/pdf', 'file_size': 775166, 'creation_date': '2024-06-17', 'last_modified_date': '2024-06-17'}


In [8]:
splitter = SentenceSplitter(chunk_size=1024,chunk_overlap=100)
nodes = splitter.get_nodes_from_documents(documents)
print(f"Length of nodes : {len(nodes)}")
print(f"get the content for node 0 :{nodes[0].get_content(metadata_mode='all')}")

Length of nodes : 28
get the content for node 0 :page_label: 1
file_name: bert_pre_train.pdf
file_path: data\bert_pre_train.pdf
file_type: application/pdf
file_size: 775166
creation_date: 2024-06-17
last_modified_date: 2024-06-17

BERT: Pre-training of Deep Bidirectional Transformers for
Language Understanding
Jacob Devlin Ming-Wei Chang Kenton Lee Kristina Toutanova
Google AI Language
{jacobdevlin,mingweichang,kentonl,kristout }@google.com
Abstract
We introduce a new language representa-
tion model called BERT , which stands for
Bidirectional Encoder Representations from
Transformers. Unlike recent language repre-
sentation models (Peters et al., 2018a; Rad-
ford et al., 2018), BERT is designed to pre-
train deep bidirectional representations from
unlabeled text by jointly conditioning on both
left and right context in all layers. As a re-
sult, the pre-trained BERT model can be ﬁne-
tuned with just one additional output layer
to create state-of-the-art models for a wide
range of task

In [9]:
# vector store
import chromadb
db = chromadb.PersistentClient(path="./chroma_db_mistral")
chroma_collection = db.get_or_create_collection("multidocument-agent")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [10]:
# embedding model
from llama_index.embeddings.fastembed import FastEmbedEmbedding
from llama_index.core import Settings
embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5")
Settings.embed_model = embed_model
Settings.chunk_size = 1024

  from .autonotebook import tqdm as notebook_tqdm
Fetching 5 files: 100%|██████████| 5/5 [00:00<?, ?it/s]


In [11]:
# instantiate the llama index
os.environ["MISTRAL_API_KEY"] = userdata.get("MISTRAL_API_KEY")
llm = MistralAI(model="mistral-large-latest")

In [12]:
#instantiate Vectorstore
name = "BERT_arxiv"
vector_index = VectorStoreIndex(nodes,storage_context=storage_context)
vector_index.storage_context.vector_store.persist(persist_path="/content/chroma_db")

# Define Vectorstore Autoretrieval tool
def vector_query(query:str,page_numbers:Optional[List[str]]=None)->str:
  page_numbers = page_numbers or []
  metadata_dict = [{"key":'page_label',"value":p} for p in page_numbers]
  
  query_engine = vector_index.as_query_engine(similarity_top_k =2, filters = MetadataFilters.from_dicts(metadata_dict, condition=FilterCondition.OR), llm=llm)
  
  response = query_engine.query(query)
  return response

#llamiondex FunctionTool wraps any python function we feed it
vector_query_tool = FunctionTool.from_defaults(name=f"vector_tool_{name}", fn=vector_query)
# Prepare Summary Tool
summary_index = SummaryIndex(nodes)
summary_query_engine = summary_index.as_query_engine(response_mode="tree_summarize", se_async=True, llm=llm)
summary_query_tool = QueryEngineTool.from_defaults(name=f"summary_tool_{name}", query_engine=summary_query_engine, description=("Use ONLY IF you want to get a holistic summary of the documents." "DO NOT USE if you have specified questions over the documents."))

Add of existing embedding ID: ba9060d7-db5c-4210-89d0-6c9d3d240528
Add of existing embedding ID: 6cab1fd3-cc91-479c-8178-2073efc87d80
Add of existing embedding ID: 43f8d46c-ee3a-4ef0-b49f-d5bfcfd0fb64
Add of existing embedding ID: 9ec80a9f-c646-4fa9-a578-c83e574bde26
Add of existing embedding ID: 11bb8a99-daa8-4431-a50f-c0dc711192a2
Add of existing embedding ID: b4a6f619-2e9c-460e-a44d-6e502adf7b6c
Add of existing embedding ID: 7bf5eb3a-81e0-490d-a95d-2b29f9cf1faf
Add of existing embedding ID: df7b81d8-12d5-4b44-a754-4adbabd5544d
Add of existing embedding ID: 1b80e0d2-5a87-4866-b847-77139222f729
Add of existing embedding ID: 79c6678f-ad13-4954-9ca8-82d50acfe864
Add of existing embedding ID: 9a61f1d8-39fb-4cb0-93bd-40dceb868c94
Add of existing embedding ID: c0d152d8-21f8-432c-817a-5322766dbe52
Add of existing embedding ID: 54ae2325-885e-45f7-838f-654dbc426c28
Add of existing embedding ID: d8e6f904-f30a-429e-9fa4-e987be5d4968
Add of existing embedding ID: c28e20b5-be86-4187-9c4c-48fb799f

In [13]:
response = llm.predict_and_call([vector_query_tool], "Summarize the content in page number 2", verbose=True)

=== Calling Function ===
Calling function: vector_tool_BERT_arxiv with args: {"query": "Summarize the content in page number 2", "page_numbers": ["2"]}
=== Function Output ===
The page discusses the experimentation and results of a study on knowledge-intensive generation using MS-MARCO and Jeopardy question generation. The models used in the study generate responses that are more factual, specific, and diverse than a BART baseline. For FEVER fact verification, the results are within 4.3% of state-of-the-art pipeline models that use strong retrieval supervision. The study also demonstrates the ability to update the models' knowledge as the world changes by replacing the non-parametric memory. The methods explored in the study are called RAG models, which use an input sequence to retrieve text documents and use them as additional context when generating the target sequence. The models consist of two components: a retriever that returns distributions over text passages given a query, and 

In [14]:
def get_doc_tools(file_path:str,name:str)->str:
  #load documents
  documents = SimpleDirectoryReader(input_files = [file_path]).load_data()
  print(f"length of nodes")
  splitter = SentenceSplitter(chunk_size=1024,chunk_overlap=100)
  nodes = splitter.get_nodes_from_documents(documents)
  print(f"Length of nodes : {len(nodes)}")
  
  #instantiate Vectorstore
  vector_index = VectorStoreIndex(nodes,storage_context=storage_context)
  vector_index.storage_context.vector_store.persist(persist_path="/content/chroma_db")
  
  # Define Vectorstore Autoretrieval tool
  def vector_query(query:str,page_numbers:Optional[List[str]]=None)->str:
    page_numbers = page_numbers or []
    metadata_dict = [{"key":'page_label',"value":p} for p in page_numbers]
    
    query_engine = vector_index.as_query_engine(similarity_top_k =2, filters = MetadataFilters.from_dicts(metadata_dict, condition=FilterCondition.OR), llm=llm)
    
    response = query_engine.query(query)
    return response
  
  #llamiondex FunctionTool wraps any python function we feed it
  vector_query_tool = FunctionTool.from_defaults(name=f"vector_tool_{name}", fn=vector_query)
  
  # Prepare Summary Tool
  summary_index = SummaryIndex(nodes)
  summary_query_engine = summary_index.as_query_engine(response_mode="tree_summarize", se_async=True, llm=llm)
  summary_query_tool = QueryEngineTool.from_defaults(name=f"summary_tool_{name}",query_engine=summary_query_engine, description=("Use ONLY IF you want to get a holistic summary of the documents." "DO NOT USE if you have specified questions over the documents."))
  return vector_query_tool,summary_query_tool


In [15]:
root_path = "./data"
file_name = []
file_path = []
for file in os.listdir(root_path):
  if file.endswith(".pdf"):
    file_name.append(file.split(".")[0])
    file_path.append(os.path.join(root_path,file))
#
print(file_name)
print(file_path)

['bert_pre_train', 'rag_nlp']
['./data\\bert_pre_train.pdf', './data\\rag_nlp.pdf']


In [16]:
papers_to_tools_dict = {}
for name,filename in zip(file_name,file_path):
  vector_query_tool,summary_query_tool = get_doc_tools(filename,name)
  papers_to_tools_dict[name] = [vector_query_tool,summary_query_tool]

length of nodes
Length of nodes : 28
length of nodes
Length of nodes : 30


In [17]:
initial_tools = [t for f in file_name for t in papers_to_tools_dict[f]]
initial_tools

[<llama_index.core.tools.function_tool.FunctionTool at 0x17535525060>,
 <llama_index.core.tools.query_engine.QueryEngineTool at 0x175355370d0>,
 <llama_index.core.tools.function_tool.FunctionTool at 0x1753a5b95a0>,
 <llama_index.core.tools.query_engine.QueryEngineTool at 0x1753a514a30>]

In [18]:
obj_index = ObjectIndex.from_objects(initial_tools,index_cls=VectorStoreIndex)

In [19]:
obj_retriever = obj_index.as_retriever(similarity_top_k=2)
tools = obj_retriever.retrieve("compare and contrast the papers self rag and corrective rag")

print(tools[0].metadata)
print(tools[1].metadata)

ToolMetadata(description='vector_tool_rag_nlp(query: str, page_numbers: Optional[List[str]] = None) -> str\nNone', name='vector_tool_rag_nlp', fn_schema=<class 'pydantic.v1.main.vector_tool_rag_nlp'>, return_direct=False)
ToolMetadata(description='Use ONLY IF you want to get a holistic summary of the documents.DO NOT USE if you have specified questions over the documents.', name='summary_tool_rag_nlp', fn_schema=<class 'llama_index.core.tools.types.DefaultToolFnSchema'>, return_direct=False)


In [20]:
# setup the agent
agent_worker = FunctionCallingAgentWorker.from_tools(tool_retriever=obj_retriever,
                                                     llm=llm,
                                                     system_prompt="""You are an agent designed to answer queries over a set of given papers.
                                                     Please always use the tools provided to answer a question.Do not rely on prior knowledge.""",
                                                     verbose=True)
agent = AgentRunner(agent_worker)

# Query

In [22]:
response = agent.query("summarize rag for nlp")
print(str(response))

Added user message to memory: summarize rag for nlp
=== Calling Function ===
Calling function: summary_tool_rag_nlp with args: {"input": "rag for nlp"}
=== Function Output ===
Retrieval-Augmented Generation (RAG) for NLP tasks is a method that combines pre-trained parametric and non-parametric memory for language generation. The parametric memory is a pre-trained seq2seq model and the non-parametric memory is a dense vector index of Wikipedia, accessed with a pre-trained neural retriever. RAG models condition on the same retrieved passages across the whole generated sequence or use different passages per token. They are fine-tuned and evaluated on a wide range of knowledge-intensive NLP tasks, setting the state of the art on three open domain QA tasks and outperforming parametric seq2seq models and task-specific retrieve-and-extract architectures. For language generation tasks, RAG models generate more specific, diverse, and factual language than a state-of-the-art parametric-only seq2

In [24]:
response = agent.query("what is a bidirectional transformer?")
print(str(response))

Added user message to memory: what is a bidirectional transformer?
=== Calling Function ===
Calling function: vector_tool_bert_pre_train with args: {"query": "what is a bidirectional transformer?"}
=== Function Output ===
A bidirectional transformer is a type of model that uses self-attention mechanisms to process input data in both directions. This is in contrast to other transformer models, like the one used in GPT, where each token can only attend to the context to its left. The bidirectional approach allows the model to consider the full context of a word, both before and after it, to better understand its meaning.
=== LLM Response ===
A bidirectional transformer is a type of model that uses self-attention mechanisms to process input data in both directions. This is in contrast to other transformer models, like the one used in GPT, where each token can only attend to the context to its left. The bidirectional approach allows the model to consider the full context of a word, both be