In [None]:
from helper import get_azure_openai_keys

AZURE_API_KEY, AZURE_ENDPOINT, AZURE_API_VERSION = get_azure_openai_keys()

In [None]:
import nest_asyncio

nest_asyncio.apply()

# simple tools

In [None]:
from llama_index.core.tools import FunctionTool


def add(x: int, y: int) -> int:
    """Adds two integers together."""
    return x + y


def mystery(x: int, y: int) -> int:
    """Mystery function that operates on top of two numbers."""
    return (x + y) * (x + y)


add_tool = FunctionTool.from_defaults(fn=add)
mystery_tool = FunctionTool.from_defaults(fn=mystery)

In [None]:
from helper import get_azure_llm

llm = get_azure_llm()
response = llm.predict_and_call(
    [add_tool, mystery_tool],
    "Tell me the output of the mystery function on 2 and 9",
    verbose=True,
)
print(str(response))

=== Calling Function ===
Calling function: mystery with args: {"x": 2, "y": 9}
=== Function Output ===
121
121


# auto retrieval tool

## load data

In [None]:
from helper import get_azure_llm, get_azure_embed_model

llm = get_azure_llm()
embed_model = get_azure_embed_model()

In [None]:
from llama_index.core import SimpleDirectoryReader
from helper import get_data_path

# load documents
documents = SimpleDirectoryReader(
    input_files=[get_data_path("metagpt.pdf")]
).load_data()

In [None]:
from llama_index.core.node_parser import SentenceSplitter

splitter = SentenceSplitter(chunk_size=1024)
nodes = splitter.get_nodes_from_documents(documents)

In [None]:
print(nodes[30].get_content(metadata_mode="all"))

page_label: 25
file_name: metagpt.pdf
file_path: /mnt/c/Users/f279814/git/deeplearning_ai_courses/data/building-agentic-rag-with-llamaindex/metagpt.pdf
file_type: application/pdf
file_size: 16911937
creation_date: 2025-01-03
last_modified_date: 2025-01-03

Preprint
be given a function signature and its docstring by the user. Write your full implementation (restate
the function signature).” As shown in Table 7, GPT-4 is more sensitive to prompt, code parser, and
post-processing results on the HumanEval data set. It is difficult for GPT-3.5-Turbo to return the
correct completion code without prompt words.
Table 7: Performance of GPT models on HumanEval. Experiments were conducted five times
using gpt-4-0613 and gpt-3.5-turbo-0613 with different settings.
Settings Model 1 2 3 4 5 Avg. Std.
A gpt-4-0613 0.732 0.707 0.732 0.713 0.738 0.724 0.013
A gpt-3.5-turbo-0613 0.360 0.366 0.360 0.348 0.354 0.357 0.007
B gpt-4-0613 0.787 0.811 0.817 0.829 0.817 0.812 0.016
B gpt-3.5-turbo-0613 0.348 0.

In [None]:
for i, node in enumerate(nodes):
    print(f"{i} {node.get_metadata_str(mode='embed')[:14]}")

0 page_label: 1

1 page_label: 2

2 page_label: 3

3 page_label: 3

4 page_label: 4

5 page_label: 5

6 page_label: 6

7 page_label: 7

8 page_label: 7

9 page_label: 8

10 page_label: 9

11 page_label: 10
12 page_label: 10
13 page_label: 11
14 page_label: 11
15 page_label: 12
16 page_label: 12
17 page_label: 13
18 page_label: 13
19 page_label: 14
20 page_label: 15
21 page_label: 16
22 page_label: 17
23 page_label: 18
24 page_label: 19
25 page_label: 20
26 page_label: 21
27 page_label: 22
28 page_label: 23
29 page_label: 24
30 page_label: 25
31 page_label: 26
32 page_label: 27
33 page_label: 28
34 page_label: 29


In [None]:
from llama_index.core import VectorStoreIndex

vector_index = VectorStoreIndex(nodes)
query_engine = vector_index.as_query_engine(similarity_top_k=2)

In [None]:
from llama_index.core.vector_stores import MetadataFilters

query_engine = vector_index.as_query_engine(
    similarity_top_k=2,
    filters=MetadataFilters.from_dicts([{"key": "page_label", "value": "2"}]),
)

response = query_engine.query(
    "What are some high-level results of MetaGPT?",
)

In [None]:
print(str(response))

MetaGPT achieves a new state-of-the-art with 85.9% and 87.7% in Pass@1 on code generation benchmarks. It also demonstrates a 100% task completion rate in experimental evaluations, showcasing its robustness and efficiency in handling complex software projects.


In [None]:
for n in response.source_nodes:
    print(n.metadata)

{'page_label': '2', 'file_name': 'metagpt.pdf', 'file_path': '/mnt/c/Users/f279814/git/deeplearning_ai_courses/data/building-agentic-rag-with-llamaindex/metagpt.pdf', 'file_type': 'application/pdf', 'file_size': 16911937, 'creation_date': '2025-01-03', 'last_modified_date': '2025-01-03'}


## define auto retrieval tool

In [None]:
from typing import List
from llama_index.core.vector_stores import FilterCondition


def vector_query(query: str, page_numbers: List[str]) -> str:
    """Perform a vector search over an index.

    query (str): the string query to be embedded.
    page_numbers (List[str]): Filter by set of pages. Leave BLANK if we want to perform a vector search
        over all pages. Otherwise, filter by the set of specified pages.

    """

    metadata_dicts = [{"key": "page_label", "value": p} for p in page_numbers]

    query_engine = vector_index.as_query_engine(
        similarity_top_k=2,
        filters=MetadataFilters.from_dicts(
            metadata_dicts, condition=FilterCondition.OR
        ),
    )
    response = query_engine.query(query)
    return response


vector_query_tool = FunctionTool.from_defaults(name="vector_tool", fn=vector_query)

In [None]:
llm = get_azure_llm()

response = llm.predict_and_call(
    [vector_query_tool],
    "What are the high-level results of MetaGPT as described on page 2?",
    verbose=True,
)

=== Calling Function ===
Calling function: vector_tool with args: {"query": "high-level results of MetaGPT", "page_numbers": ["2"]}
=== Function Output ===
MetaGPT achieves a new state-of-the-art with 85.9% and 87.7% in Pass@1 for code generation benchmarks. It also demonstrates a 100% task completion rate, showcasing its robustness and efficiency in handling complex software projects.


In [None]:
for n in response.source_nodes:
    print(n.metadata)

{'page_label': '2', 'file_name': 'metagpt.pdf', 'file_path': '/mnt/c/Users/f279814/git/deeplearning_ai_courses/data/building-agentic-rag-with-llamaindex/metagpt.pdf', 'file_type': 'application/pdf', 'file_size': 16911937, 'creation_date': '2025-01-03', 'last_modified_date': '2025-01-03'}


# and with other tools

In [None]:
from llama_index.core import SummaryIndex
from llama_index.core.tools import QueryEngineTool

summary_index = SummaryIndex(nodes)
summary_query_engine = summary_index.as_query_engine(
    response_mode="tree_summarize",
    use_async=True,
)
summary_tool = QueryEngineTool.from_defaults(
    name="summary_tool",
    query_engine=summary_query_engine,
    description=("Useful if you want to get a summary of MetaGPT"),
)

In [None]:
response = llm.predict_and_call(
    [vector_query_tool, summary_tool],
    "What are the MetaGPT comparisons with ChatDev described on page 8?",
    verbose=True,
)

=== Calling Function ===
Calling function: vector_tool with args: {"query": "MetaGPT comparisons with ChatDev", "page_numbers": ["8"]}
=== Function Output ===
MetaGPT outperforms ChatDev in several key metrics on the SoftwareDev dataset. MetaGPT achieves a higher executability score of 3.75 compared to ChatDev's 2.25. It also takes less time to run (503 seconds versus 762 seconds) and requires fewer tokens to generate one line of code (126.5/124.3 tokens compared to ChatDev's 248.9 tokens). Additionally, MetaGPT produces more code files, more lines of code per file, and a higher total number of code lines. The cost of human revision is also significantly lower for MetaGPT (0.83) compared to ChatDev (2.5).


In [None]:
for n in response.source_nodes:
    print(n.metadata)

{'page_label': '8', 'file_name': 'metagpt.pdf', 'file_path': '/mnt/c/Users/f279814/git/deeplearning_ai_courses/data/building-agentic-rag-with-llamaindex/metagpt.pdf', 'file_type': 'application/pdf', 'file_size': 16911937, 'creation_date': '2025-01-03', 'last_modified_date': '2025-01-03'}


In [None]:
response = llm.predict_and_call(
    [vector_query_tool, summary_tool], "What is a summary of the paper?", verbose=True
)

=== Calling Function ===
Calling function: summary_tool with args: {"input": "MetaGPT"}
=== Function Output ===
MetaGPT is a meta-programming framework designed to enhance software development through multi-agent collaboration based on large language models (LLMs). It assigns specific roles to agents, such as Product Manager, Architect, Engineer, and QA Engineer, to streamline workflows and reduce errors. The framework emphasizes structured communication through documents and diagrams, incorporates Standardized Operating Procedures (SOPs), and uses an executable feedback mechanism for iterative code improvement. MetaGPT has demonstrated superior performance in benchmarks like HumanEval and MBPP, excelling in metrics such as executability, cost efficiency, and productivity. It also supports continuous learning and improvement by allowing agents to modify their constraint prompts based on previous feedback. Additionally, MetaGPT operates locally to ensure data privacy and security and ca