In [1]:
from settings import (
    COMPLETIONS_MODEL,
    API_EXCHANGE_VERSION,
    API_BASE_URL,
    
    EMBEDDINGS_MODEL,
    EMBEDDINGS_BASE_URL,
    TOKEN_ID
)

In [2]:
from llama_index.core.tools import FunctionTool


def add(x: int, y: int):
    """Adds two integers together."""
    return x + y

def mystery(x: int, y: int):
    """Mystery fucntion that operates on top of two numbers."""
    return (x + y) * (x - y)

add_tool = FunctionTool.from_defaults(fn=add)
mystery_tool = FunctionTool.from_defaults(fn=mystery)

In [3]:
from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding

llm = AzureOpenAI(
    engine=COMPLETIONS_MODEL,
    api_key=TOKEN_ID,
    api_version=API_EXCHANGE_VERSION,
    azure_endpoint=f"{API_BASE_URL}/api",
)

embed_model = AzureOpenAIEmbedding(
    engine=EMBEDDINGS_MODEL,
    api_key=TOKEN_ID,
    api_version=API_EXCHANGE_VERSION,
    azure_endpoint=f"{EMBEDDINGS_BASE_URL}/api",
)

In [4]:
response = llm.predict_and_call(
    [add_tool, mystery_tool],
    "Tell me the output of the add function on 2 and 9",
    verbose=True
)
print(str(response))

=== Calling Function ===
Calling function: add with args: {"x": 2, "y": 9}
=== Function Output ===
11
11


### Defining an Auto-Retrieval Tool

In [5]:
from llama_index.core import SimpleDirectoryReader

# loading documents
documents = SimpleDirectoryReader(input_files=['data/metagpt.pdf']).load_data()

In [6]:
from llama_index.core.node_parser import SentenceSplitter

splitter = SentenceSplitter(chunk_size=1024)
nodes = splitter.get_nodes_from_documents(documents)

In [7]:
print(nodes[3].get_content(metadata_mode='all'))

page_label: 3
file_name: metagpt.pdf
file_path: data\metagpt.pdf
file_type: application/pdf
file_size: 16911937
creation_date: 2024-05-31
last_modified_date: 2024-05-31

Other works focus on
sociological phenomena. For example, Generative Agents (Park et al., 2023) creates a “town” of 25
agents to study language interaction, social understanding, and collective memory. In the Natural
Language-Based Society of Mind (NLSOM) (Zhuge et al., 2023), agents with different functions
interact to solve complex tasks through multiple rounds of “mindstorms.” Cai et al. (2023) propose
a model for cost reduction by combining large models as tool makers and small models as tool users.
Some works emphasize cooperation and competition related to planning and strategy (Bakhtin et al.,
2022); others propose LLM-based economies (Zhuge et al., 2023). These works focus on open-
world human behavior simulation, while MetaGPT aims to introduce human practice into multi-
agents frameworks. Besides, LLM-based a

In [8]:
from llama_index.core import VectorStoreIndex, Settings

Settings.llm = llm
Settings.embed_model = embed_model


vector_index = VectorStoreIndex(nodes)
query_engine = vector_index.as_query_engine(similarity_top_k=2)

### Metadata Filtering

In [9]:
from llama_index.core.vector_stores import MetadataFilters

query_engine = vector_index.as_query_engine(
    similarity_top_k=2,
    filters=MetadataFilters.from_dicts(
        [{'key': 'page_label', 'value': '2'}]
    )
)

response = query_engine.query(
    "What are some high-level results of MetaGPT"
)

In [10]:
response.response

'MetaGPT achieves a new state-of-the-art (SoTA) in code generation benchmarks with 85.9% and 87.7% in Pass@1. It also stands out in handling higher levels of software complexity and offering extensive functionality. In experimental evaluations, MetaGPT achieves a 100% task completion rate, demonstrating the robustness and efficiency (time and token costs) of its design.'

In [11]:
for n in response.source_nodes:
    print(n.metadata)

{'page_label': '2', 'file_name': 'metagpt.pdf', 'file_path': 'data\\metagpt.pdf', 'file_type': 'application/pdf', 'file_size': 16911937, 'creation_date': '2024-05-31', 'last_modified_date': '2024-05-31'}


### Enhansing Data Retrieval

- Integrating Metadata Filters into a retrieval tool function

- This function enables more precise retrieval by accepting a query string and optional metadata filters, such as page numbers

- The LLM can intelligently infer relevant metadata filters (e.g. page numbers) based on user's query
  
- We can define different types of metadata filters like section IDs, headers or footers

In [18]:
from typing import List
from llama_index.core.vector_stores import FilterCondition

def vector_query(query: str, page_numbers: List[int]):
    
    """
    Performs a vector search over an index.
    
    query str: string query to be embedded
    page numbers List[int]: Filter by set of pages. Leave BLANK if we want to query over all pages.
                            Otherwise filter by set of specific pages.
    """
    
    metadata_dicts = [
        {'key': 'page_label', 'value': str(p)} for p in page_numbers
    ]
    
    query_engine = vector_index.as_query_engine(
        similarity_top_k=2,
        filters=MetadataFilters.from_dicts(
            metadata_dicts,
            condition=FilterCondition.OR
        )
    )
    
    response = query_engine.query(query)
    return response

In [19]:
vector_query_tool = FunctionTool.from_defaults(
    name="vector_tool",
    fn=vector_query
)

In [23]:
response = llm.predict_and_call(
    [vector_query_tool],
    "What are the benchmark results of MetaGPT as described on page 2?",
    verbose=True
)

=== Calling Function ===
Calling function: vector_tool with args: {"query": "benchmark results MetaGPT", "page_numbers": [2]}
=== Function Output ===
MetaGPT achieves a new state-of-the-art (SoTA) with 85.9% and 87.7% in Pass@1 in code generation benchmarks, outperforming other popular frameworks for creating complex software projects such as AutoGPT, LangChain, AgentVerse, and ChatDev. Additionally, MetaGPT stands out in handling higher levels of software complexity and offering extensive functionality, as demonstrated by its 100% task completion rate in experimental evaluations.


In [24]:
for n in response.source_nodes:
    print(n.metadata)

{'page_label': '2', 'file_name': 'metagpt.pdf', 'file_path': 'data\\metagpt.pdf', 'file_type': 'application/pdf', 'file_size': 16911937, 'creation_date': '2024-05-31', 'last_modified_date': '2024-05-31'}


### Setting up all the tools

In [26]:
from llama_index.core import SummaryIndex
from llama_index.core.tools import QueryEngineTool

summary_index = SummaryIndex(nodes)

summary_query_engine = summary_index.as_query_engine(
    response_mode="tree_summarize",
    use_async=True
)

summary_tool = QueryEngineTool.from_defaults(
    name="summary_tool",
    query_engine=summary_query_engine,
    description=(
        "Useful if you want to get a summary of MetaGPT"
    )
)


In [30]:
response = llm.predict_and_call(
    [vector_query_tool, summary_tool],
    "what are the MetaGPT comparisons with ChatDev described on page 8?",
    verbose=True
)

=== Calling Function ===
Calling function: vector_tool with args: {"query": "MetaGPT comparisons with ChatDev", "page_numbers": [8]}
=== Function Output ===
MetaGPT outperforms ChatDev on the challenging SoftwareDev dataset in nearly all metrics, according to Table 1 in the provided context. For example, considering the executability, MetaGPT achieves a score of 3.75, which is very close to 4 (flawless). Additionally, MetaGPT takes less time (503 seconds) compared to ChatDev. Considering the code statistic and the cost of human revision, MetaGPT also significantly outperforms ChatDev.
