In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [2]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [3]:
import nest_asyncio
nest_asyncio.apply()

In [4]:
from llama_index.core import SimpleDirectoryReader, get_response_synthesizer
from llama_index.core import DocumentSummaryIndex, StorageContext, Settings
from llama_index.core import load_index_from_storage
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core.node_parser import SentenceSplitter
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core.prompts import PromptTemplate
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.response.notebook_utils import display_response, display_metadata
import chromadb
import model_utils
import prompt_utils




INFO:datasets:PyTorch version 2.3.0+cu118 available.
PyTorch version 2.3.0+cu118 available.


In [5]:
# load embeddings
embed_model = HuggingFaceEmbedding(model_name="models/bge-small-en-v1.5", device="cuda")

INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: models/bge-small-en-v1.5
Load pretrained SentenceTransformer: models/bge-small-en-v1.5
INFO:sentence_transformers.SentenceTransformer:2 prompts are loaded, with the keys: ['query', 'text']
2 prompts are loaded, with the keys: ['query', 'text']


In [6]:
# load model
model_name = "models/Meta-Llama-3-8B-Instruct"
model, tokenizer = model_utils.load_quantized_model(
    model_name_or_path=model_name,
    device="cuda"
)

Loading tokenizer and model with quantization config from: models/Meta-Llama-3-8B-Instruct


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [7]:
# config llm and embed_model to llamaindex
llm_hf = HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=512,
    query_wrapper_prompt=PromptTemplate(prompt_utils.get_llama3_prompt_template()),
    generate_kwargs={
        "temperature": 0.7,
        "do_sample": True
    },
    device_map="cuda",
    model_name=model_name,
    model=model,
    tokenizer=tokenizer
)

Settings.llm = llm_hf

In [None]:
# Settings.embed_model = embed_model
# Settings.llm = llm_hf

## Load documents

In [7]:
documents = SimpleDirectoryReader(
    input_dir="./data",
    filename_as_id=True,
).load_data()
len(documents)

159

In [8]:
# Creates a persistent instance of Chroma that saves to disk
chroma_client = chromadb.PersistentClient(path="./chroma_db")

# Get or create a collection with the given name and metadata.
vector_collection = chroma_client.get_or_create_collection("blogs_summary")
vector_store = ChromaVectorStore(
    chroma_collection=vector_collection, 
    persist_dir="./chroma_db/blogs_summary"
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

INFO:chromadb.telemetry.product.posthog:Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.


In [9]:
splitter = SentenceSplitter(
    tokenizer=tokenizer,
    chunk_size=1024
)

In [11]:
response_synthesizer = get_response_synthesizer(
    llm=llm_hf,
    response_mode="tree_summarize", 
    use_async=True,
    verbose=True
)

In [None]:
# # test only 3 documents
# doc_summary_index = DocumentSummaryIndex.from_documents(
#     documents=documents[:3],
#     llm=llm_hf,
#     embed_model=embed_model,
#     transformations=[splitter],
#     response_synthesizer=response_synthesizer,
#     show_progress=True,
#     storage_context=storage_context
# )

In [None]:
# doc_summary_index.storage_context.persist("summary")

In [None]:
# visualize
# doc_summary_index.get_document_summary("07a29a81-1d9f-445e-b1a3-cec315ffcd79")

## Load doc sumary index

### Load by chromadb

In [8]:
chroma_client = chromadb.PersistentClient(path="./chroma_db")
test_vector_collection = chroma_client.get_or_create_collection("blogs_summary")
test_vector_collection, test_vector_collection.count()

INFO:chromadb.telemetry.product.posthog:Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.


(Collection(id=e2bc79ee-49dd-45e4-85f3-6acb87185f7a, name=blogs_summary), 159)

In [9]:
chroma_vector_store = ChromaVectorStore(
    chroma_collection=test_vector_collection,
    # persist_dir="./backup/blogs_summary"
)
chroma_storage_context = StorageContext.from_defaults(
    vector_store=chroma_vector_store, 
    persist_dir="./database/blogs_summary_index/"
)

In [11]:
index_struct = chroma_storage_context.index_store.get_index_struct()

In [78]:
for k,v in index_struct.doc_id_to_summary_id.items():
    print(k)
    print(v)

/workspace/projects/LlamindexHelper/data/a-cheat-sheet-and-some-recipes-for-building-advanced-rag-803a9d94c41b.html
83001b14-fb2d-400e-b31b-3a48501408ce
/workspace/projects/LlamindexHelper/data/a-new-document-summary-index-for-llm-powered-qa-systems-9a32ece2f9ec.html
90d0dad3-83e4-48ff-969a-41a450639f14
/workspace/projects/LlamindexHelper/data/agentic-rag-with-llamaindex-2721b8a49ff6.html
8459bd37-a59b-4eb1-b346-49c451ffd64b
/workspace/projects/LlamindexHelper/data/ai-voice-assistant-enhancing-accessibility-in-ai-with-llamaindex-and-gpt3-5-f5509d296f4a.html
78e2927e-d1d5-46d0-9f7d-a83732e7369b
/workspace/projects/LlamindexHelper/data/announcing-llamaindex-0-9-719f03282945.html
77ba3a80-549e-47e9-bbf3-f70fbaa39686
/workspace/projects/LlamindexHelper/data/arize-ai-and-llamaindex-roll-out-joint-platform-for-evaluating-llm-applications.html
c07eb866-c51d-4e7a-b9af-1df89655b2d6
/workspace/projects/LlamindexHelper/data/automate-online-tasks-with-multion-and-llamaindex.html
6c94c9c1-8eaa-45d4

In [12]:
doc_sum_index = load_index_from_storage(
    llm=llm_hf,
    embed_model=embed_model,
    storage_context=chroma_storage_context
)

INFO:llama_index.core.indices.loading:Loading all indices.
Loading all indices.


In [13]:
# print sample summarization
print(doc_sum_index.get_document_summary("/workspace/projects/LlamindexHelper/data/a-cheat-sheet-and-some-recipes-for-building-advanced-rag-803a9d94c41b.html"))

The provided text is about using the llama-index library to evaluate the performance of RAG systems based on seven measurement aspects outlined in a survey paper by Gao et al. The text highlights the evaluation notebook guides provided by the llama-index library and explains the concept of faithfulness, which is further explained in a Notion document. This text can answer questions related to the evaluation capabilities of the llama-index library, the measurement aspects outlined by Gao et al., and how to use the library to assess the performance of RAG systems in relation to these aspects.

Some potential questions that this text can answer include:
- What is the llama-index library and how can it be used to evaluate RAG systems?
- What measurement aspects are outlined in the survey paper by Gao et al.?
- How can the evaluation notebook guides provided by the llama-index library be used to assess the performance of RAG systems in relation to these measurement aspects?
- What is faithf

## Querying

In [10]:
question1 = "What are key features of llama-agents?"
question2 = '''What are the two critical areas of RAG system performance that are assessed \
in the "Evaluating RAG with LlamaIndex" section of the OpenAI Cookbook?'''
question3 = '''What are the two main metrics used to evaluate the performance of the different rerankers in the RAG system?'''

In [11]:
query_engine = doc_sum_index.as_query_engine(
    llm=llm_hf,
    response_mode="compact", 
    use_async=True,
)

NameError: name 'doc_sum_index' is not defined

In [18]:
resp1 = query_engine.query(question1)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [19]:
display_response(resp1)

**`Final Response:`** Based on the new context, I refine the original answer to provide a more detailed and accurate response.

The key features of Llama Agents are:

1. **Text-to-Image Prompt Generation**: Llama Agents use LlamaIndex, a vector database created from DiffusionDB, to generate better prompts for text-to-image tasks. This allows for more accurate and varied image generation.
2. **Customizability**: Llama Agents can be customized to suit specific use cases by adjusting parameters such as temperature, which controls the variation of generated prompts.
3. **Integration with Transformers Agents**: Llama Agents can be integrated with Transformers Agents, allowing for the creation of text-to-image prompt assistants that can generate images based on user input.
4. **Easy Distribution and Sharing**: Llama Agents can be easily distributed and shared using Hugging Face Spaces, enabling collaboration and innovation in the community.
5. **Vector Database**: LlamaIndex is built on a vector database created from DiffusionDB, which allows for efficient and accurate prompt generation.

These features enable Llama Agents to generate high-quality images based on user input, making them a powerful tool for creative applications.

In [20]:
for node in resp1.source_nodes:
    print(node.metadata)

{'file_path': '/workspace/projects/LlamindexHelper/data/llamaindex-and-transformers-agents-67042ee1d8d6.html', 'file_name': 'llamaindex-and-transformers-agents-67042ee1d8d6.html', 'file_type': 'text/html', 'file_size': 14762, 'creation_date': '2024-07-21', 'last_modified_date': '2024-07-21'}


### Reranking

In [14]:
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.postprocessor import LLMRerank
from llama_index.core.postprocessor import SentenceTransformerRerank
from llama_index.core import QueryBundle
import pandas as pd
from IPython.display import display, HTML

In [15]:
pd.set_option("display.max_colwidth", 100)

def get_retrieved_nodes(
    query_str, index, vector_top_k=10, reranker=None
):
    query_bundle = QueryBundle(query_str)
    # configure retriever
    retriever = VectorIndexRetriever(
        index=index,
        similarity_top_k=vector_top_k,
    )
    retrieved_nodes = retriever.retrieve(query_bundle)

    if reranker is not None:
        retrieved_nodes = reranker.postprocess_nodes(
            retrieved_nodes, query_bundle
        )

    return retrieved_nodes

def pretty_print(df):
    return display(HTML(df.to_html().replace("\\n", "<br>")))


def visualize_retrieved_nodes(nodes) -> None:
    result_dicts = []
    for node in nodes:
        result_dict = {"Score": node.score, "Metadata": node.metadata, "Text": node.node.get_text()}
        result_dicts.append(result_dict)

    pretty_print(pd.DataFrame(result_dicts))

### Retrieval without reranker

In [16]:
raw_nodes = get_retrieved_nodes(
    question1,
    index=doc_sum_index,
    vector_top_k=3,
)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [17]:
visualize_retrieved_nodes(raw_nodes)

Unnamed: 0,Score,Metadata,Text
0,0.575996,"{'file_path': '/workspace/projects/LlamindexHelper/data/llamaindex-and-transformers-agents-67042ee1d8d6.html', 'file_name': 'llamaindex-and-transformers-agents-67042ee1d8d6.html', 'file_type': 'text/html', 'file_size': 14762, 'creation_date': '2024-07-21', 'last_modified_date': '2024-07-21'}","The provided text discusses the use of Large Language Models (LLMs) and agents, which are programs that can perform tasks and make decisions. It mentions the release of Transformers Agents, a popular use-case for LLMs, as well as the LlamaIndex tool, which can augment agents' image-generation capabilities by suggesting better prompts. The text also introduces a Text2Image Prompt Assistant tool, which can re-write prompts to generate more beautiful images. The text suggests that LlamaIndex can be used by agents and provides some context on the capabilities and limitations of LLMs and agents, as well as the challenges and opportunities associated with their use. Some potential questions that this text can answer include: - What are LLMs and agents, and how are they being used in practice? - How can LLMs and agents be integrated into specific applications, such as image generation? - What are some of the key benefits and drawbacks of using LLMs and agents, and how are these factors influencing their adoption and use? - How are LLMs and agents being developed and improved over time, and what are some of the key trends and innovations in this area? - How can LLMs and agents be made more robust and reliable, particularly in the face of uncertainty and ambiguity in input data? - How can LLMs and agents be made more efficient and scalable, particularly in terms of resource usage and computational complexity? - How can LLMs and agents be made more transparent and interpretable, particularly in terms of their decision-making processes and outcomes? - How can LLMs and agents be made more secure and trustworthy, particularly in terms of their privacy and confidentiality concerns? - How can LLMs and agents be made more adaptable and flexible, particularly in terms of their ability to handle new and diverse types of input data? - How can LlamaIndex be used with LLM agents, and how can custom tools in Transformers Agents be distributed and shared using Hugging Face Spaces? The text also provides examples of how the Text2Image Prompt Assistant tool can be used to generate different images based on varying prompts and temperatures."
1,0.575554,"{'file_path': '/workspace/projects/LlamindexHelper/data/building-better-tools-for-llm-agents-f8c5a6714f11.html', 'file_name': 'building-better-tools-for-llm-agents-f8c5a6714f11.html', 'file_type': 'text/html', 'file_size': 26798, 'creation_date': '2024-07-21', 'last_modified_date': '2024-07-21'}","The provided text discusses various aspects related to the creation and usage of tools for large language model (LLM) agents. It touches upon the LlamaHub Tools library, which enables LLMs such as ChatGPT to connect to APIs and perform actions on behalf of users. The author highlights the importance of writing informative and useful tool prompts and making them tolerant of partial inputs to minimize errors. The text also discusses the development of tools for Google Workspace applications, specifically focusing on Google Calendar and Gmail APIs. It emphasizes the need to provide simple deterministic functions for agents to use and return prompts from functions that perform mutations. The text also mentions the consideration of the size of the context window when building tools and making them compatible with the LoadAndSearchTool. The author suggests asking the agent about its own tools to debug them during development. The text also provides an example conversation between an Agent and a user to demonstrate how to debug tools. Overall, the text offers insights into creating and utilizing tools for LLamaIndex, as well as tips and techniques for optimizing their functionality and usability. Some of the questions that this text can answer include how to create tolerant tools, how to provide simple deterministic functions for agents, how to return prompts from functions that perform mutations, how to make tools compatible with the LoadAndSearchTool, and how to consider the size of the context window when building tools."
2,0.571714,"{'file_path': '/workspace/projects/LlamindexHelper/data/introducing-llama-agents-a-powerful-framework-for-building-production-multi-agent-ai-systems.html', 'file_name': 'introducing-llama-agents-a-powerful-framework-for-building-production-multi-agent-ai-systems.html', 'file_type': 'text/html', 'file_size': 18790, 'creation_date': '2024-07-21', 'last_modified_date': '2024-07-21'}","The provided text is introducing a Python library called llama-agents, which allows for the creation of complex multi-agent AI systems. The text explains how to use this library to construct a Query Rewriting RAG system, which combines query rewriting with RAG to enhance question-answering capabilities. This is an alpha release, and the developers are seeking feedback from the public to improve the library's features for use in production. Some potential questions that this text can answer include inquiries about the uniqueness of llama-agents in comparison to other AI libraries, examples of multi-agent AI systems that can be built using this library, how the Query Rewriting RAG system functions, and how to set up and utilize the multi-agent system with llama-agents. The text also mentions the availability of examples and resources in the llama-agents repository and explains how users can contribute to the public roadmap by providing feedback on the features. Overall, the text serves as a resource for acquiring knowledge about the library's capabilities and potential applications."


### Retrieve with reranker

In [18]:
rerank_postprocessor = SentenceTransformerRerank(
    model='models/mxbai-rerank-xsmall-v1',
    top_n=2, # number of nodes after re-ranking,
    keep_retrieval_score=True
)

In [32]:
reranked_nodes = get_retrieved_nodes(
    question1,
    index=doc_sum_index,
    vector_top_k=5,
    reranker= rerank_postprocessor
)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [20]:
visualize_retrieved_nodes(reranked_nodes)

Unnamed: 0,Score,Metadata,Text
0,0.864365,"{'file_path': '/workspace/projects/LlamindexHelper/data/introducing-llama-agents-a-powerful-framework-for-building-production-multi-agent-ai-systems.html', 'file_name': 'introducing-llama-agents-a-powerful-framework-for-building-production-multi-agent-ai-systems.html', 'file_type': 'text/html', 'file_size': 18790, 'creation_date': '2024-07-21', 'last_modified_date': '2024-07-21', 'retrieval_score': 0.5717135510066469}","The provided text is introducing a Python library called llama-agents, which allows for the creation of complex multi-agent AI systems. The text explains how to use this library to construct a Query Rewriting RAG system, which combines query rewriting with RAG to enhance question-answering capabilities. This is an alpha release, and the developers are seeking feedback from the public to improve the library's features for use in production. Some potential questions that this text can answer include inquiries about the uniqueness of llama-agents in comparison to other AI libraries, examples of multi-agent AI systems that can be built using this library, how the Query Rewriting RAG system functions, and how to set up and utilize the multi-agent system with llama-agents. The text also mentions the availability of examples and resources in the llama-agents repository and explains how users can contribute to the public roadmap by providing feedback on the features. Overall, the text serves as a resource for acquiring knowledge about the library's capabilities and potential applications."
1,0.411383,"{'file_path': '/workspace/projects/LlamindexHelper/data/llamaindex-and-transformers-agents-67042ee1d8d6.html', 'file_name': 'llamaindex-and-transformers-agents-67042ee1d8d6.html', 'file_type': 'text/html', 'file_size': 14762, 'creation_date': '2024-07-21', 'last_modified_date': '2024-07-21', 'retrieval_score': 0.5759962319721066}","The provided text discusses the use of Large Language Models (LLMs) and agents, which are programs that can perform tasks and make decisions. It mentions the release of Transformers Agents, a popular use-case for LLMs, as well as the LlamaIndex tool, which can augment agents' image-generation capabilities by suggesting better prompts. The text also introduces a Text2Image Prompt Assistant tool, which can re-write prompts to generate more beautiful images. The text suggests that LlamaIndex can be used by agents and provides some context on the capabilities and limitations of LLMs and agents, as well as the challenges and opportunities associated with their use. Some potential questions that this text can answer include: - What are LLMs and agents, and how are they being used in practice? - How can LLMs and agents be integrated into specific applications, such as image generation? - What are some of the key benefits and drawbacks of using LLMs and agents, and how are these factors influencing their adoption and use? - How are LLMs and agents being developed and improved over time, and what are some of the key trends and innovations in this area? - How can LLMs and agents be made more robust and reliable, particularly in the face of uncertainty and ambiguity in input data? - How can LLMs and agents be made more efficient and scalable, particularly in terms of resource usage and computational complexity? - How can LLMs and agents be made more transparent and interpretable, particularly in terms of their decision-making processes and outcomes? - How can LLMs and agents be made more secure and trustworthy, particularly in terms of their privacy and confidentiality concerns? - How can LLMs and agents be made more adaptable and flexible, particularly in terms of their ability to handle new and diverse types of input data? - How can LlamaIndex be used with LLM agents, and how can custom tools in Transformers Agents be distributed and shared using Hugging Face Spaces? The text also provides examples of how the Text2Image Prompt Assistant tool can be used to generate different images based on varying prompts and temperatures."


### LLM as reranker

In [33]:
from llama_index.postprocessor.rankgpt_rerank import RankGPTRerank

In [34]:
llm_reranker = RankGPTRerank(
    llm=llm_hf,
    top_n=2,
    verbose=True
)

In [37]:
llm_reranked_nodes = get_retrieved_nodes(
    question1,
    index=doc_sum_index,
    vector_top_k=5,
    reranker=llm_reranker
)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


After Reranking, new rank list for nodes: [2, 0, 3, 1, 4]

In [36]:
visualize_retrieved_nodes(llm_reranked_nodes)

Unnamed: 0,Score,Metadata,Text
0,0.571714,"{'file_path': '/workspace/projects/LlamindexHelper/data/introducing-llama-agents-a-powerful-framework-for-building-production-multi-agent-ai-systems.html', 'file_name': 'introducing-llama-agents-a-powerful-framework-for-building-production-multi-agent-ai-systems.html', 'file_type': 'text/html', 'file_size': 18790, 'creation_date': '2024-07-21', 'last_modified_date': '2024-07-21'}","The provided text is introducing a Python library called llama-agents, which allows for the creation of complex multi-agent AI systems. The text explains how to use this library to construct a Query Rewriting RAG system, which combines query rewriting with RAG to enhance question-answering capabilities. This is an alpha release, and the developers are seeking feedback from the public to improve the library's features for use in production. Some potential questions that this text can answer include inquiries about the uniqueness of llama-agents in comparison to other AI libraries, examples of multi-agent AI systems that can be built using this library, how the Query Rewriting RAG system functions, and how to set up and utilize the multi-agent system with llama-agents. The text also mentions the availability of examples and resources in the llama-agents repository and explains how users can contribute to the public roadmap by providing feedback on the features. Overall, the text serves as a resource for acquiring knowledge about the library's capabilities and potential applications."
1,0.575996,"{'file_path': '/workspace/projects/LlamindexHelper/data/llamaindex-and-transformers-agents-67042ee1d8d6.html', 'file_name': 'llamaindex-and-transformers-agents-67042ee1d8d6.html', 'file_type': 'text/html', 'file_size': 14762, 'creation_date': '2024-07-21', 'last_modified_date': '2024-07-21'}","The provided text discusses the use of Large Language Models (LLMs) and agents, which are programs that can perform tasks and make decisions. It mentions the release of Transformers Agents, a popular use-case for LLMs, as well as the LlamaIndex tool, which can augment agents' image-generation capabilities by suggesting better prompts. The text also introduces a Text2Image Prompt Assistant tool, which can re-write prompts to generate more beautiful images. The text suggests that LlamaIndex can be used by agents and provides some context on the capabilities and limitations of LLMs and agents, as well as the challenges and opportunities associated with their use. Some potential questions that this text can answer include: - What are LLMs and agents, and how are they being used in practice? - How can LLMs and agents be integrated into specific applications, such as image generation? - What are some of the key benefits and drawbacks of using LLMs and agents, and how are these factors influencing their adoption and use? - How are LLMs and agents being developed and improved over time, and what are some of the key trends and innovations in this area? - How can LLMs and agents be made more robust and reliable, particularly in the face of uncertainty and ambiguity in input data? - How can LLMs and agents be made more efficient and scalable, particularly in terms of resource usage and computational complexity? - How can LLMs and agents be made more transparent and interpretable, particularly in terms of their decision-making processes and outcomes? - How can LLMs and agents be made more secure and trustworthy, particularly in terms of their privacy and confidentiality concerns? - How can LLMs and agents be made more adaptable and flexible, particularly in terms of their ability to handle new and diverse types of input data? - How can LlamaIndex be used with LLM agents, and how can custom tools in Transformers Agents be distributed and shared using Hugging Face Spaces? The text also provides examples of how the Text2Image Prompt Assistant tool can be used to generate different images based on varying prompts and temperatures."


## Query with reranker

In [41]:
query_engine = doc_sum_index.as_query_engine(
    similarity_top_k=10,
    node_postprocessors=[rerank_postprocessor],
    response_mode="tree_summarize",
)

In [42]:
response = query_engine.query(question1)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [43]:
display_response(response)

**`Final Response:`** Based on the provided context information, the key features of llama-agents are:

1. **Distributed Service Oriented Architecture**: Every agent in LlamaIndex can be its own independently running microservice, orchestrated by a fully customizable LLM-powered control plane that routes and distributes tasks.
2. **Communication via standardized API interfaces**: Interface between agents using a central control plane orchestrator. Pass messages between agents using a message queue.
3. **Define agentic and explicit orchestration flows**: Developers have the flexibility to directly define the sequence of interactions between agents, or leave it up to an “agentic orchestrator” that decides which agents are relevant to the task.
4. **Ease of deployment**: Launch, scale, and monitor each agent and the control plane independently.
5. **Scalability and resource management**: Use built-in observability tools to monitor the quality and performance of the system and each individual agent service.
6. **Multi-agent system**: Llama-agents provides a framework for building complex multi-agent systems, allowing you to create multiple agents that can interact with each other.
7. **Query rewriting**: The system can rewrite user queries to improve retrieval, making it easier to find relevant information.
8. **Retriever Query Engine**: The system uses a Retriever Query Engine to execute queries and retrieve relevant information.
9. **OpenAI integration**: Llama-agents integrates with OpenAI, allowing you to leverage their large language models (LLMs) to perform tasks such as question-answering and natural language processing.
10. **Customizable**: The system is highly customizable, allowing you to define your own agents, tools, and workflows.
11. **Pipeline-based architecture**: Llama-agents uses a pipeline-based architecture, which allows you to chain multiple agents and tools together to create complex workflows.
12. **Agent orchestration**: The system provides an orchestration layer that allows you to manage and coordinate the execution of multiple agents and tools.
13. **Multi-source data integration**: Llama-agents can integrate with multiple data sources, allowing you to retrieve information from various sources and combine it to answer complex questions.

Note that some of these features may be mentioned in multiple sources, but they are all related to the key features of llama-agents.

In [44]:
response.metadata

{'c18d9bd8-763e-4a8c-b32f-2e8c87b5c3be': {'file_path': '/workspace/projects/LlamindexHelper/data/introducing-llama-agents-a-powerful-framework-for-building-production-multi-agent-ai-systems.html',
  'file_name': 'introducing-llama-agents-a-powerful-framework-for-building-production-multi-agent-ai-systems.html',
  'file_type': 'text/html',
  'file_size': 18790,
  'creation_date': '2024-07-21',
  'last_modified_date': '2024-07-21',
  'retrieval_score': None},
 '7e016859-6da8-439e-859d-93470687a784': {'file_path': '/workspace/projects/LlamindexHelper/data/how-to-build-llm-agents-in-typescript-with-llamaindex-ts-a88ed364a7aa.html',
  'file_name': 'how-to-build-llm-agents-in-typescript-with-llamaindex-ts-a88ed364a7aa.html',
  'file_type': 'text/html',
  'file_size': 16248,
  'creation_date': '2024-07-21',
  'last_modified_date': '2024-07-21',
  'retrieval_score': None}}