In [1]:
import nest_asyncio

nest_asyncio.apply()

In [2]:
from pathlib import Path
import requests
from llama_index import SimpleDirectoryReader

wiki_titles = [
    "Toronto",
    "Seattle",
    "Chicago",
    "Boston",
    "Houston",
    "Tokyo",
    "Berlin",
    "Lisbon",
    "Paris",
    "London",
    "Atlanta",
    "Munich",
    "Shanghai",
    "Beijing",
    "Copenhagen",
    "Moscow",
    "Cairo",
    "Karachi",
]

for title in wiki_titles:
    response = requests.get(
        "https://en.wikipedia.org/w/api.php",
        params={
            "action": "query",
            "format": "json",
            "titles": title,
            "prop": "extracts",
            # 'exintro': True,
            "explaintext": True,
        },
    ).json()
    page = next(iter(response["query"]["pages"].values()))
    wiki_text = page["extract"]

    data_path = Path("data")
    if not data_path.exists():
        Path.mkdir(data_path)

    with open(data_path / f"{title}.txt", "w") as fp:
        fp.write(wiki_text)

# Load all wiki documents
city_docs = {}
for wiki_title in wiki_titles:
    docs = SimpleDirectoryReader(input_files=[f"data/{wiki_title}.txt"]).load_data()
    desc = f"This is a Wikipedia article about the city of {wiki_title}"
    city_docs[wiki_title] = (wiki_title, desc, docs[0])

### Download and Initialize Pack

In [3]:
from llama_index.llama_pack import download_llama_pack

MultiDocumentAgentsPack = download_llama_pack(
    "MultiDocumentAgentsPack",
    "./multi_doc_agents_pack",
    # leave the below commented out (was for testing purposes)
    # llama_hub_url="https://raw.githubusercontent.com/run-llama/llama-hub/jerry/add_llama_packs/llama_hub",
)

In [6]:
city_tups = list(city_docs.values())
multi_doc_agents_pack = MultiDocumentAgentsPack(
    [t[2] for t in city_tups], [t[0] for t in city_tups], [t[1] for t in city_tups]
)

### Run Pack

In [7]:
# this will run the full pack
response = multi_doc_agents_pack.run(
    "Tell me the demographics of Houston, and then compare with the demographics of Chicago"
)


STARTING TURN 1
---------------

=== Calling Function ===
Calling function: tool_Houston with args: {
  "input": "demographics"
}
Added user message to memory: demographics
=== Calling Function ===
Calling function: vector_tool with args: {
  "input": "demographics"
}
Got output: Houston has a population of 2,304,580 according to the 2020 U.S. census. In 2017, the estimated population was 2,312,717, and in 2018 it was 2,325,502. The city has a diverse demographic makeup, with a significant number of undocumented immigrants residing in the Houston area, comprising nearly 9% of the city's metropolitan population in 2017. The age distribution in Houston includes a significant number of individuals under 15 and between the ages of 20 to 34. The median age of the city is 33.4. The city has a mix of homeowners and renters, with an estimated 42.3% of Houstonians owning housing units. The median household income in 2019 was $52,338, and 20.1% of Houstonians lived at or below the poverty line.


In [8]:
print(str(response))


Houston has a larger population compared to Chicago. As of 2020, Houston has a population of 2,304,580, while Chicago's population is around 2.7 million. 

Both cities have diverse demographics, but the specific racial and ethnic makeup differs. In Houston, there is a significant number of undocumented immigrants, making up nearly 9% of the metropolitan population. The city has a diverse population with a mix of different racial and ethnic groups.

In Chicago, the largest racial or ethnic groups are non-Hispanic White (32.8% of the population), Blacks (30.1%), and Hispanics (29.0%). The city has a rich history of immigration, with various ancestral groups represented.

It's important to note that demographics can change over time, and these figures are based on the available data.


In [9]:
response.source_nodes

[NodeWithScore(node=TextNode(id_='f245a0d1-d6ef-4106-9d84-60f320829f7c', embedding=None, metadata={'file_path': 'data/Houston.txt', 'file_name': 'Houston.txt', 'file_type': 'text/plain', 'file_size': 85409, 'creation_date': '2024-01-11', 'last_modified_date': '2024-01-11', 'last_accessed_date': '2024-01-11'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='577cbc95-673d-4ecb-be7c-44b40cbb02e1', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file_path': 'data/Houston.txt', 'file_name': 'Houston.txt', 'file_type': 'text/plain', 'file_size': 85409, 'creation_date': '2024-01-11', 'last_modified_date': '2024-01-11', 'last_accessed_date': '2024-01-11'}, hash='7c6ad1de26343a723434da15a2e91b621e943f02d97de77114cc

In [10]:
modules = multi_doc_agents_pack.get_modules()
display(modules)

{'top_agent': <llama_index.agent.legacy.retriever_openai_agent.FnRetrieverOpenAIAgent at 0x153ad7d50>,
 'obj_index': <llama_index.objects.base.ObjectIndex at 0x153b14310>,
 'doc_agents': {'Toronto': <llama_index.agent.openai.base.OpenAIAgent at 0x152182390>,
  'Seattle': <llama_index.agent.openai.base.OpenAIAgent at 0x1525c7dd0>,
  'Chicago': <llama_index.agent.openai.base.OpenAIAgent at 0x152726c50>,
  'Boston': <llama_index.agent.openai.base.OpenAIAgent at 0x15286bdd0>,
  'Houston': <llama_index.agent.openai.base.OpenAIAgent at 0x1529d3f90>,
  'Tokyo': <llama_index.agent.openai.base.OpenAIAgent at 0x152a9da90>,
  'Berlin': <llama_index.agent.openai.base.OpenAIAgent at 0x152bdbc90>,
  'Lisbon': <llama_index.agent.openai.base.OpenAIAgent at 0x152cf8450>,
  'Paris': <llama_index.agent.openai.base.OpenAIAgent at 0x152e9ba90>,
  'London': <llama_index.agent.openai.base.OpenAIAgent at 0x153031650>,
  'Atlanta': <llama_index.agent.openai.base.OpenAIAgent at 0x152aca8d0>,
  'Munich': <llama_

In [1]:
from llama_index import Document
from llama_index.schema import MetadataMode

document = Document(
    text="This is a super-customized document",
    metadata={
        "file_name": "super_secret_document.txt",
        "category": "finance",
        "author": "LlamaIndex",
    },
    excluded_llm_metadata_keys=["file_name"],
    metadata_seperator="::",
    metadata_template="{key}=>{value}",
    text_template="Metadata: {metadata_str}\n-----\nContent: {content}",
)

print(
    "The LLM sees this: \n",
    document.get_content(metadata_mode=MetadataMode.LLM),
)
print(
    "The Embedding model sees this: \n",
    document.get_content(metadata_mode=MetadataMode.EMBED),
)

The LLM sees this: 
 Metadata: category=>finance::author=>LlamaIndex
-----
Content: This is a super-customized document
The Embedding model sees this: 
 Metadata: file_name=>super_secret_document.txt::category=>finance::author=>LlamaIndex
-----
Content: This is a super-customized document
