In [None]:
# Create the directory (this command works as is)
!mkdir -p 'data/10k/'

# Use curl to download the files
# The syntax changes from 'wget [url] -O [file]' to 'curl [url] -o [file]'
!curl 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/10k/uber_2021.pdf' -o 'data/10k/uber_2021.pdf'
!curl 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/10k/lyft_2021.pdf' -o 'data/10k/lyft_2021.pdf'

In [None]:

import nest_asyncio

nest_asyncio.apply()

from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.llms.openai import OpenAI

from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.query_engine import SubQuestionQueryEngine
import os

os.environ["OPENAI_API_KEY"] = 
from llama_index.core import Settings

Settings.llm = OpenAI(temperature=0.2, model="gpt-4.1-mini")
lyft_docs = SimpleDirectoryReader(
    input_files=["./data/10k/lyft_2021.pdf"]
).load_data()
uber_docs = SimpleDirectoryReader(
    input_files=["./data/10k/uber_2021.pdf"]
).load_data()
lyft_index = VectorStoreIndex.from_documents(lyft_docs)
uber_index = VectorStoreIndex.from_documents(uber_docs)
lyft_engine = lyft_index.as_query_engine(similarity_top_k=3)
uber_engine = uber_index.as_query_engine(similarity_top_k=3)
query_engine_tools = [
    QueryEngineTool(
        query_engine=lyft_engine,
        metadata=ToolMetadata(
            name="lyft_10k",
            description=(
                "Provides information about Lyft financials for year 2021"
            ),
        ),
    ),
    QueryEngineTool(
        query_engine=uber_engine,
        metadata=ToolMetadata(
            name="uber_10k",
            description=(
                "Provides information about Uber financials for year 2021"
            ),
        ),
    ),
]

s_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=query_engine_tools
)
response = s_engine.query(
    "Compare and contrast the customer segments and geographies that grew the"
    " fastest"
)


In [19]:
response = s_engine.query(
    "Compare and contrast the customer segments and geographies that grew the"
    " fastest"
)


Generated 4 sub questions.
[1;3;38;2;237;90;200m[lyft_10k] Q: What were the fastest growing customer segments for Lyft in 2021?
[0m[1;3;38;2;90;149;237m[lyft_10k] Q: What were the fastest growing geographies for Lyft in 2021?
[0m[1;3;38;2;11;159;203m[uber_10k] Q: What were the fastest growing customer segments for Uber in 2021?
[0m[1;3;38;2;155;135;227m[uber_10k] Q: What were the fastest growing geographies for Uber in 2021?
[0m[1;3;38;2;90;149;237m[lyft_10k] A: The provided information does not specify which geographies were the fastest growing for Lyft in 2021.
[0m[1;3;38;2;155;135;227m[uber_10k] A: The context does not provide specific information about the fastest growing geographies for Uber in 2021.
[0m[1;3;38;2;11;159;203m[uber_10k] A: The fastest growing customer segments for Uber in 2021 included members of their membership programs such as Uber One, Uber Pass, Eats Pass, and Rides Pass, which collectively exceeded 6 million members by the end of the year. Additio

In [20]:
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.llms.openai import OpenAI

# 0.  Your OpenAI credentials: ensure OPENAI_API_KEY is in your env
#     or pass it as OpenAI(api_key="…").
llm = OpenAI(model="gpt-4o-mini")          # pick your model / temperature

# 1.  Load any PDF(s) you like
reader = SimpleDirectoryReader(input_files=["./data/10k/uber_2021.pdf"])
docs   = reader.load_data()

# 2.  Build an index
index = VectorStoreIndex.from_documents(docs)

# 3.  Ask questions
engine   = index.as_query_engine(streaming=True, similarity_top_k=4, llm=llm)
response = engine.query("Give me a 3-sentence summary with page citations.")
response.print_response_stream()           # streams answer w/ “(page X)” refs

The Borrower and its Restricted Subsidiaries are duly organized and in good standing under applicable laws, with the necessary powers to conduct their business (p. 249). The Transactions have been authorized and executed, constituting legal obligations enforceable in accordance with their terms, subject to certain legal principles (p. 249). Additionally, the Borrower has provided its consolidated financial statements, which fairly present its financial position as of specified dates, with no material adverse changes noted since December 31, 2014 (p. 249).