In [2]:

'''
We are now going to build the Agent
first,we load the index using the storage context and the load_index_from_storage function
'''

from llama_index.core import StorageContext,load_index_from_storage
from constant import embed_model

#load the index from the storage
storage_context = StorageContext.from_defaults(persist_dir='index/')
index = load_index_from_storage(storage_context,embed_model=embed_model) #we parse the storage context through the load_index_from_storage function and the embed_model 

In [3]:
'''
Create the RAG tool
This is the tool the agent will use to fetch information from the existing database
'''
from llama_index.core.tools import QueryEngineTool
from constant import llm_model


#load the llm model engine
query_engine= index.as_query_engine(llm_model=llm_model, similarity_top_k=4) #we find 5 max similarity vectors when the query is submitted

#the tool the engine will use to fetch information
rag_tool=QueryEngineTool.from_defaults(
    query_engine,
    name = "research_paper_query_engine_tool",
    description = "A RAG engine with recent Research paper",
    )
    


In [4]:
#use this function to display the prompt
from IPython.display import Markdown,display

def display_prompt_dict(prompts_dict):
    for key, prompt in prompts_dict.items():
        display(Markdown(f"**Prompt key**: {key}"))
        print(prompt.get_template())

In [5]:
prompts_dict = query_engine.get_prompts()
display_prompt_dict(prompts_dict)

**Prompt key**: response_synthesizer:text_qa_template

Context information is below.
---------------------
{context_str}
---------------------
Given the context information and not prior knowledge, answer the query.
Query: {query_str}
Answer: 


**Prompt key**: response_synthesizer:refine_template

The original query is as follows: {query_str}
We have provided an existing answer: {existing_answer}
We have the opportunity to refine the existing answer (only if needed) with some more context below.
------------
{context_msg}
------------
Given the new context, refine the original answer to better answer the query. If the context isn't useful, return the original answer.
Refined Answer: 


In [6]:
#define the other tools the agent will use

from llama_index.core.tools import FunctionTool
from tools import download_pdf, fetch_arxiv_papers

download_pdf_tool = FunctionTool.from_defaults(
    download_pdf, 
    name="download_pdf_file_tool", 
    description="pyhton function that downloads a pdfs file by link",
    )

fetch_arxiv_tool = FunctionTool.from_defaults(
    fetch_arxiv_papers, 
    name="fetch_from_arxiv", 
    description="download the {max_result} recent papers regarding the topic {title} from arvix",
    )

In [7]:
#create the Agent(We use the ReAct beacuse the agent will be able to react to the user input)
from llama_index.core.agent import ReActAgent

agent = ReActAgent.from_tools(
    [download_pdf_tool,rag_tool,fetch_arxiv_tool],llm=llm_model,verbose=True
)   #parse the list of tools the agent will use and the llm model

In [8]:
'''
to be able to chat with the agent,we need a query template
'''
query_template = ''' I am interested in {topic}.
Find papers in your knowledge base that are related to topic.
Use the following template to query research_paper_query_engine_tool tool:Provide title, summary, authors and links to download to papers related to {topic}.
if there are not,could you search the recent one from arxiv
IMPORTANT :do not download papers unless the user asks for it explicitly.'''


In [88]:
answer = agent.chat(query_template.format(topic= "Multi-modal models"))

> Running step eb620a0a-fb8b-4e9c-87af-3a9e735ddd45. Step input:  I am interested in Multi-modal models.
Find papers in your knowledge base that are related to topic.
Use the following template to query research_paper_query_engine_tool tool:Provide title, summary, authors and links to download to pappers related to Multi-modal models.
if there are not,could you search the recent one from arxiv
IMPORTANT :do not download papers unless the user asks for it explicitly.
[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me find research papers related to Multi-modal models.
Action: research_paper_query_engine_tool
Action Input: {'input': 'Multi-modal models'}
[0m[1;3;34mObservation: Context information is below.
---------------------
Title: Distilling Multi-modal Large Language Models for Autonomous Driving
Authors: Deepti Hegde, Rajeev Yasarla, Hong Cai, Shizhong Han, Apratim Bhattacharyya, Shweta Mahajan, Litian Liu, Risheek Garrepalli, V

In [89]:
#to better visualize the answer,we use the display function
Markdown(answer.response)

Here are some recent papers related to Multi-modal models:

1. **Title**: Distilling Multi-modal Large Language Models for Autonomous Driving  
   **Authors**: Deepti Hegde, Rajeev Yasarla, Hong Cai, Shizhong Han, Apratim Bhattacharyya, Shweta Mahajan, Litian Liu, Risheek Garrepalli, Vishal M. Patel, Fatih Porikli  
   **Summary**: This paper proposes DiMA, an end-to-end autonomous driving system that distills information from a multi-modal large language model (LLM) to a vision-based planner, improving efficiency and reducing trajectory errors.  
   **Link**: [Download PDF](http://arxiv.org/pdf/2501.09757v1) | [arXiv Link](http://arxiv.org/abs/2501.09757v1)

2. **Title**: A Simple Aerial Detection Baseline of Multimodal Language Models  
   **Authors**: Qingyun Li, Yushi Chen, Xinya Shu, Dong Chen, Xin He, Yi Yu, Xue Yang  
   **Summary**: This paper presents a baseline for applying multimodal language models (MLMs) to aerial detection in remote sensing, achieving impressive detection performance comparable to conventional models.  
   **Link**: [Download PDF](http://arxiv.org/pdf/2501.09720v1) | [arXiv Link](http://arxiv.org/abs/2501.09720v1)

3. **Title**: Inference-Time Scaling for Diffusion Models beyond Scaling Denoising Steps  
   **Authors**: Nanye Ma, Shangyuan Tong, Haolin Jia, Hexiang Hu, Yu-Chuan Su, Mingda Zhang, Xuan Yang, Yandong Li, Tommi Jaakkola, Xuhui Jia, Saining Xie  
   **Summary**: This work explores the inference-time scaling behavior of diffusion models, revealing how performance can improve with increased computation during inference.  
   **Link**: [Download PDF](http://arxiv.org/pdf/2501.09732v1) | [arXiv Link](http://arxiv.org/abs/2501.09732v1)

4. **Title**: Mitigating Hallucinations in Large Vision-Language Models via DPO: On-Policy Data Hold the Key  
   **Authors**: Zhihe Yang, Xufang Luo, Dongqi Han, Yunjian Xu, Dongsheng Li  
   **Summary**: This paper addresses hallucination issues in large vision-language models using Direct Preference Optimization (DPO) and proposes an On-Policy Alignment framework to improve response accuracy.  
   **Link**: [Download PDF](http://arxiv.org/pdf/2501.09695v1) | [arXiv Link](http://arxiv.org/abs/2501.09695v1)

These papers provide insights into various aspects of multi-modal models and their applications.

In [90]:
#since the agent retains memory,we can use the same query template to ask for the agnet to download the papers.
answer = agent.chat(''' download the following papers:
for each paper:
1. Process one paper at a time
2. state which paper number you are processing of the total
3.complete a full download cycle before moving to the next one
4.explicitly state when moving to the next one
5.provide a final summary only after all papers are downloaded''')

> Running step 0ae0bece-75ef-48f7-bc51-a3076a19eaf0. Step input:  download the following papers:
for each paper:
1. Process one paper at a time
2. state which paper number you are processing of the total
3.complete a full download cycle before moving to the next one
4.explicitly state when moving to the next one
5.provide a final summary only after all papers are downloaded
[1;3;38;5;200mThought: I will start downloading the papers one at a time, as requested. I will begin with the first paper.
Action: download_pdf_file_tool
Action Input: {'pdf_url': 'http://arxiv.org/pdf/2501.09757v1', 'output_file_name': 'Distilling_Multi-modal_Large_Language_Models_for_Autonomous_Driving.pdf'}
[0m[1;3;34mObservation: PDF file downloaded successfully to papers\Distilling_Multi-modal_Large_Language_Models_for_Autonomous_Driving.pdf
[0m> Running step 6f37eb42-65a5-4237-a4df-0211ceed86ec. Step input: None
[1;3;38;5;200mThought: I have completed the download of the first paper. Now I will proceed to

In [91]:
Markdown(answer.response)

1. **Distilling Multi-modal Large Language Models for Autonomous Driving**  
   - Authors: Deepti Hegde et al.  
   - Summary: Proposes DiMA, an end-to-end autonomous driving system that distills information from a multi-modal large language model (LLM) to a vision-based planner, improving efficiency and reducing trajectory errors.  

2. **A Simple Aerial Detection Baseline of Multimodal Language Models**  
   - Authors: Qingyun Li et al.  
   - Summary: Presents a baseline for applying multimodal language models (MLMs) to aerial detection in remote sensing, achieving impressive detection performance comparable to conventional models.  

3. **Inference-Time Scaling for Diffusion Models beyond Scaling Denoising Steps**  
   - Authors: Nanye Ma et al.  
   - Summary: Explores the inference-time scaling behavior of diffusion models, revealing how performance can improve with increased computation during inference.  

4. **Mitigating Hallucinations in Large Vision-Language Models via DPO: On-Policy Data Hold the Key**  
   - Authors: Zhihe Yang et al.  
   - Summary: Addresses hallucination issues in large vision-language models using Direct Preference Optimization (DPO) and proposes an On-Policy Alignment framework to improve response accuracy.  

All papers have been successfully downloaded and are available for review.

In [9]:
#we want toask abot a topic which is not in the database
answer = agent.chat(query_template.format(topic= "Quantum Computing"))

> Running step 80fb732e-df12-479a-8fd5-97aa593c1a35. Step input:  I am interested in Quantum Computing.
Find papers in your knowledge base that are related to topic.
Use the following template to query research_paper_query_engine_tool tool:Provide title, summary, authors and links to download to pappers related to Quantum Computing.
if there are not,could you search the recent one from arxiv
IMPORTANT :do not download papers unless the user asks for it explicitly.
[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to find research papers related to Quantum Computing.
Action: research_paper_query_engine_tool
Action Input: {'input': 'Quantum Computing'}
[0m[1;3;34mObservation: Context information is below.
---------------------
Title: Inference-Time Scaling for Diffusion Models beyond Scaling Denoising Steps
Authors: Nanye Ma, Shangyuan Tong, Haolin Jia, Hexiang Hu, Yu-Chuan Su, Mingda Zhang, Xuan Yang, Yandong Li, Tommi Jaakkola, Xuhui Jia, Sain

In [10]:
Markdown(answer.response)

Here are some recent papers related to Quantum Computing. If you would like to download any of them, please let me know!

In [None]:
#answer = agent.chat(query_template.format(topic= "Algebraic Topology"))

In [None]:
#Markdown(answer.response)