In [1]:
from llama_index.core import (
    VectorStoreIndex,
    SimpleKeywordTableIndex,
    SimpleDirectoryReader,
)
from llama_index.core import SummaryIndex
from llama_index.core.schema import IndexNode
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.llms.openai import OpenAI
from llama_index.core.callbacks import CallbackManager

In [2]:
wiki_titles = [
    "Toronto",
    "Seattle",
    "Chicago",
    "Boston",
    "Houston",
    "Tokyo",
    "Berlin",
    "Lisbon",
    "Paris",
    "London",
    "Atlanta",
    "Munich",
    "Shanghai",
    "Beijing",
    "Copenhagen",
    "Moscow",
    "Cairo",
    "Karachi",
]
from pathlib import Path

import requests

for title in wiki_titles:
    response = requests.get(
        "https://en.wikipedia.org/w/api.php",
        params={
            "action": "query",
            "format": "json",
            "titles": title,
            "prop": "extracts",
            # 'exintro': True,
            "explaintext": True,
        },
    ).json()
    page = next(iter(response["query"]["pages"].values()))
    wiki_text = page["extract"]

    data_path = Path("data")
    if not data_path.exists():
        Path.mkdir(data_path)

    with open(data_path / f"{title}.txt", "w") as fp:
        fp.write(wiki_text)
# Load all wiki documents
city_docs = {}
for wiki_title in wiki_titles:
    city_docs[wiki_title] = SimpleDirectoryReader(
        input_files=[f"data/{wiki_title}.txt"]
    ).load_data()

In [3]:
import os

# os.environ["OPENAI_API_KEY"] = "sk-..."
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings

llm = Settings.llm = OpenAI(temperature=0, model="gpt-3.5-turbo")
embed_model = Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")

In [4]:
from llama_index.agent.openai import OpenAIAgent
from llama_index.core import load_index_from_storage, StorageContext
from llama_index.core.node_parser import SentenceSplitter
import os

node_parser = SentenceSplitter()

# Build agents dictionary
agents = {}
query_engines = {}

# this is for the baseline
all_nodes = []

for idx, wiki_title in enumerate(wiki_titles):
    nodes = node_parser.get_nodes_from_documents(city_docs[wiki_title])
    all_nodes.extend(nodes)

    if not os.path.exists(f"./data/{wiki_title}"):
        # build vector index
        vector_index = VectorStoreIndex(nodes)
        vector_index.storage_context.persist(
            persist_dir=f"./data/{wiki_title}"
        )
    else:
        vector_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=f"./data/{wiki_title}"),
        )

    # build summary index
    summary_index = SummaryIndex(nodes)
    # define query engines
    vector_query_engine = vector_index.as_query_engine(llm=llm)
    summary_query_engine = summary_index.as_query_engine(llm=llm)

    # define tools
    query_engine_tools = [
        QueryEngineTool(
            query_engine=vector_query_engine,
            metadata=ToolMetadata(
                name="vector_tool",
                description=(
                    "Useful for questions related to specific aspects of"
                    f" {wiki_title} (e.g. the history, arts and culture,"
                    " sports, demographics, or more)."
                ),
            ),
        ),
        QueryEngineTool(
            query_engine=summary_query_engine,
            metadata=ToolMetadata(
                name="summary_tool",
                description=(
                    "Useful for any requests that require a holistic summary"
                    f" of EVERYTHING about {wiki_title}. For questions about"
                    " more specific sections, please use the vector_tool."
                ),
            ),
        ),
    ]

    # build agent
    function_llm = OpenAI(model="gpt-4")
    agent = OpenAIAgent.from_tools(
        query_engine_tools,
        llm=function_llm,
        verbose=True,
        system_prompt=f"""\
You are a specialized agent designed to answer queries about {wiki_title}.
You must ALWAYS use at least one of the tools provided when answering a question; do NOT rely on prior knowledge.\
""",
    )

    agents[wiki_title] = agent
    query_engines[wiki_title] = vector_index.as_query_engine(
        similarity_top_k=2
    )

In [5]:
# define tool for each document agent
all_tools = []
for wiki_title in wiki_titles:
    wiki_summary = (
        f"This content contains Wikipedia articles about {wiki_title}. Use"
        f" this tool if you want to answer any questions about {wiki_title}.\n"
    )
    doc_tool = QueryEngineTool(
        query_engine=agents[wiki_title],
        metadata=ToolMetadata(
            name=f"tool_{wiki_title}",
            description=wiki_summary,
        ),
    )
    all_tools.append(doc_tool)
# define an "object" index and retriever over these tools
from llama_index.core import VectorStoreIndex
from llama_index.core.objects import ObjectIndex, SimpleToolNodeMapping

tool_mapping = SimpleToolNodeMapping.from_objects(all_tools)
obj_index = ObjectIndex.from_objects(
    all_tools,
    tool_mapping,
    VectorStoreIndex,
)

In [11]:
from llama_index.agent.openai_legacy import FnRetrieverOpenAIAgent

top_agent = FnRetrieverOpenAIAgent.from_retriever(
    obj_index.as_retriever(similarity_top_k=3),
    system_prompt=""" \
You are an agent designed to answer queries about a set of given cities.
Please always use the tools provided to answer a question. Do not rely on prior knowledge.\

""",
    verbose=True,
)

In [12]:
base_index = VectorStoreIndex(all_nodes)
base_query_engine = base_index.as_query_engine(similarity_top_k=4)

In [18]:
# should use Boston agent -> vector tool
response = top_agent.query("Tell me about what it's like to live in Moscow")

STARTING TURN 1
---------------

=== Calling Function ===
Calling function: tool_Moscow with args: {"input":"Living in Moscow"}
Added user message to memory: Living in Moscow
=== Calling Function ===
Calling function: summary_tool with args: {
  "input": "Living in Moscow"
}
Got output: Moscow provides a dynamic living environment with an extensive transit system and plenty of parks. The city boasts prestigious educational institutions and a diverse population, including students. Moscow is a key education hub, with well-known universities such as the Moscow Institute of Physics and Technology and the Moscow Engineering Physics Institute. In recent times, there has been an increase in commercial and private educational establishments offering courses in business and management. Moreover, Moscow is a significant scientific hub, hosting the Russian Academy of Sciences headquarters and numerous research facilities.

Got output: Living in Moscow offers a vibrant and dynamic environment. Th

In [13]:
# should use Boston agent -> vector tool
response = top_agent.query("Tell me about the arts and culture in Boston")

STARTING TURN 1
---------------

=== Calling Function ===
Calling function: tool_Boston with args: {"input":"arts and culture"}
Added user message to memory: arts and culture
=== Calling Function ===
Calling function: vector_tool with args: {
  "input": "arts and culture"
}
Got output: Boston has a rich arts and culture scene, with a strong emphasis on music and performing arts. The city is home to renowned institutions such as the Boston Symphony Orchestra, the Boston Ballet, and various theaters in the Theater District. Additionally, Boston hosts several major annual events like the Boston Early Music Festival, the Boston Arts Festival, and the Boston gay pride parade. The city also boasts a number of art museums and galleries, including the Museum of Fine Arts and the Isabella Stewart Gardner Museum, making it a vibrant hub for artistic expression and appreciation.

Got output: Boston is renowned for its vibrant arts and culture scene. The city is home to prestigious institutions su

In [14]:
print(response)

Boston is renowned for its vibrant arts and culture scene. The city is home to prestigious institutions such as the Boston Symphony Orchestra and the Boston Ballet, both of which contribute significantly to the city's cultural landscape. Boston's Theater District is a hub for performing arts, hosting a variety of shows and performances.

In addition to these, Boston hosts several major annual events that celebrate the city's artistic and cultural diversity. These include the Boston Early Music Festival, the Boston Arts Festival, and the Boston gay pride parade.

The city is also a haven for art lovers, with numerous museums and galleries. The Museum of Fine Arts and the Isabella Stewart Gardner Museum are among the most notable, offering a wide range of art collections that cater to different tastes and interests.

In summary, Boston's arts and culture scene is a vibrant mix of music, performing arts, festivals, and visual arts, making it a dynamic hub for artistic expression and appre

In [15]:
# baseline
response = base_query_engine.query(
    "Tell me about the arts and culture in Boston"
)
print(str(response))


Boston has a rich arts and culture scene with a strong emphasis on literature, music, performing arts, and museums. The city has a deep literary history, being home to renowned writers like Ralph Waldo Emerson, Henry David Thoreau, and Nathaniel Hawthorne. Boston's music culture is vibrant, with institutions like the Boston Symphony Orchestra and the Boston Pops Orchestra being highly acclaimed. The city also boasts a variety of performing arts organizations, theaters, and annual events such as the Boston Early Music Festival and the Boston Arts Festival. Additionally, Boston is home to several art museums and galleries, including the Museum of Fine Arts and the Isabella Stewart Gardner Museum, showcasing a diverse range of artistic expressions.
