In [7]:
from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex, load_index_from_storage
from llama_index.agent.lats import LATSAgentWorker
from llama_index.core.agent import AgentRunner
from llama_index.llms.ollama import Ollama
from llama_index.llms.openai_like import OpenAILike
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.storage import StorageContext
import os

In [8]:
import nest_asyncio 
nest_asyncio.apply()

In [9]:
# llm = OpenAILike(
#     api_base="http://localhost:11434/v1",
#     model="mistral-small",
#     api_key="fake"
#     )
# llm = Ollama(model="llama3.1") # llama3.1 is not working
llm = Ollama(model="qwen2.5") # qwen2.5 is good for this task
# llm = Ollama(model="mistral-small", request_timeout=300)

embed_model = OllamaEmbedding("nomic-embed-text:latest")
# embed_model = HuggingFaceEmbedding("BAAI/bge-base-en-v1.5")

Settings.llm = llm
Settings.embed_model = embed_model

In [10]:
if not os.path.exists("/home/forrest/Github/learn_llamaindex/data/storage/lyft"):
    # load data
    lyft_docs = SimpleDirectoryReader(
        input_files=["/home/forrest/Github/learn_llamaindex/data/10k/lyft_2021.pdf"]
    ).load_data()
    uber_docs = SimpleDirectoryReader(
        input_files=["/home/forrest/Github/learn_llamaindex/data/10k/uber_2021.pdf"]
    ).load_data()

    # build index
    lyft_index = VectorStoreIndex.from_documents(lyft_docs)
    uber_index = VectorStoreIndex.from_documents(uber_docs)

    # persist index
    lyft_index.storage_context.persist(persist_dir="/home/forrest/Github/learn_llamaindex/data/storage/lyft")
    uber_index.storage_context.persist(persist_dir="/home/forrest/Github/learn_llamaindex/data/storage/uber")
else:
    storage_context = StorageContext.from_defaults(
        persist_dir="/home/forrest/Github/learn_llamaindex/data/storage/lyft"
    )
    lyft_index = load_index_from_storage(storage_context)

    storage_context = StorageContext.from_defaults(
        persist_dir="/home/forrest/Github/learn_llamaindex/data/storage/uber"
    )
    uber_index = load_index_from_storage(storage_context)

KeyboardInterrupt: 

In [5]:
# uber_docs=SimpleDirectoryReader(input_files=['/home/forrest/Github/learn_llamaindex/data/UBER/uber_2021.pdf']).load_data()

# uber_index = VectorStoreIndex.from_documents(uber_docs)

# uber_index.storage_context.persist(persist_dir="/home/forrest/Github/learn_llamaindex/data/storage/uber")
lyft_engine = lyft_index.as_query_engine(similarity_top_k=3)
uber_engine = uber_index.as_query_engine(similarity_top_k=3)

In [6]:
uber_engine.query("What is the mission of Uber?")


KeyboardInterrupt: 

In [7]:
query_engine_tools = [
    QueryEngineTool(
        query_engine=lyft_engine,
        metadata=ToolMetadata(
            name="lyft_10k",
            description=(
                "Provides information about Lyft financials for year 2021. "
                "Use a detailed plain text question as input to the tool. "
                "The input is used to power a semantic search engine."
            ),
        ),
    ),
    QueryEngineTool(
        query_engine=uber_engine,
        metadata=ToolMetadata(
            name="uber_10k",
            description=(
                "Provides information about Uber financials for year 2021. "
                "Use a detailed plain text question as input to the tool. "
                "The input is used to power a semantic search engine."
            ),
        ),
    ),
]

In [8]:
# query_engine_tools.pop(0)

In [18]:
agent_worker = LATSAgentWorker.from_tools(
    query_engine_tools,
    llm=llm,
    num_expansions=2,
    max_rollouts=5,  # using -1 for unlimited rollouts
    verbose=True,
)
agent = AgentRunner(agent_worker)

In [19]:
task = agent.create_task(
    "Given the risk factors Uber described in their 10K files, "
    "what are the most important factors to consider for investing in Uber?"
)

In [20]:
step_output = agent.run_step(task.task_id)
# step_output_dict = step_output.dict() if hasattr(step_output, 'dict') else step_output
# agent.chat( "Given the risk factors Uber described in their 10K files, what are the most important factors to consider for investing in Uber?")


[1;3;32m> Selecting node to expand: Observation: Given the risk factors Uber described in their 10K files, what are the most important factors to consider for investing in Uber?
[0m[1;3;33m> Got candidates: ['Regulatory risks and compliance issues', 'Rider and driver safety concerns']
[0m=== Calling Function ===
Calling function: uber_10k with args: {"input": "What are the most important risk factors for investing in Uber based on their 2021 10-K filing?"}
=== Calling Function ===
Calling function: uber_10k with args: {"input": "Discuss regulatory risks and compliance issues in detail as mentioned in Uber's 2021 10-K."}
=== Function Output ===
Based on the 2021 10-K filing, some of the most important risk factors for investing in Uber include:

- The potential impact of Driver misclassification challenges. If Drivers were classified as employees or quasi-employees instead of independent contractors, it could significantly affect Uber's financial statements and operations.
- Increas

In [21]:
for step in (
    step_output.task_step.step_state["root_node"].children[0].current_reasoning
):
    print(step)
    print("---------")

observation='Given the risk factors Uber described in their 10K files, what are the most important factors to consider for investing in Uber?' return_direct=False
---------
observation='Regulatory risks and compliance issues' return_direct=False
---------


In [22]:
for step in (
    step_output.task_step.step_state["root_node"]
    .children[0]
    .children[0]
    .current_reasoning
):
    print(step)
    print("---------")

observation='Given the risk factors Uber described in their 10K files, what are the most important factors to consider for investing in Uber?' return_direct=False
---------
observation='Regulatory risks and compliance issues' return_direct=False
---------
thought="I need more detailed information about regulatory risks and compliance issues mentioned in Uber's 10K report to properly identify the most important factors for investors. I will use the `uber_10k` tool to get this information." action='uber_10k' action_input={'input': "Discuss regulatory risks and compliance issues in detail as mentioned in Uber's 2021 10-K."}
---------
observation="Regulatory risks and compliance issues are significant for Uber, as detailed in their 2021 10-K. Substantially all states in the United States and numerous municipalities have adopted regulations concerning Transportation Network Companies (TNCs). These regulations typically encompass a wide range of requirements such as background checks, vehicl

In [23]:
while not step_output.is_last:
    step_output = agent.run_step(task.task_id)

response = agent.finalize_response(task.task_id)

[1;3;32m> Selecting node to expand: Observation: Based on the 2021 10-K filing, some of the most important risk factors for investing in Uber include:

- The potential impact of Driver misclassification challenges. If Drivers were classified as employees or quasi-employees instead of independent contractors, it could significantly affect Uber's financial statements and operations.
- Increased operating expenses, which may not be offset by corresponding increases in revenue.
- Challenges in maintaining a critical mass of users across various categories such as Drivers, consumers, merchants, shippers, and carriers. Losing any one group could diminish the overall appeal of the platform.
- Risks associated with criminal activities or negative safety incidents involving platform users, which can harm Uber's brand and reputation.
- The inherent risks involved in new investments and technological advancements, where expected benefits might not materialize.
- Economic, social, weather, and re

In [24]:
print(str(response))

Based on the 2021 10-K filing, some of the most important risk factors for investing in Uber include:

- The potential impact of Driver misclassification challenges: If Drivers were classified as employees or quasi-employees instead of independent contractors, it could significantly affect Uber's financial statements and operations.
- Increased operating expenses that may not be offset by corresponding increases in revenue.
- Challenges in maintaining a critical mass of users across various categories such as Drivers, consumers, merchants, shippers, and carriers. Losing any one group could diminish the overall appeal of the platform.
- Risks associated with criminal activities or negative safety incidents involving platform users, which can harm Uber's brand and reputation.
- The inherent risks involved in new investments and technological advancements where expected benefits might not materialize.
- Economic, social, weather, and regulatory conditions that may negatively impact operat

In [25]:
# agent.reset()

# response = agent.chat("Given the revenue growth and risk factors of Uber and Lyft, which company is performing better? Please use concrete numbers to inform your decision.")

[1;3;32m> Selecting node to expand: Observation: Given the revenue growth and risk factors of Uber and Lyft, which company is performing better? Please use concrete numbers to inform your decision.
[0m[1;3;33m> Got candidates: ['Analyze the latest quarterly reports for both companies to extract their respective revenue growth rates.', 'Compare the current market valuation and stock performance of Uber and Lyft based on recent financial data.']
[0m=== Calling Function ===
Calling function: lyft_10k with args: {"input": "Provide the latest quarterly report for Lyft's revenue growth rate."}
=== Calling Function ===
Calling function: lyft_10k with args: {"input": "Provide detailed revenue growth and risk factors for 2021."}
=== Function Output ===
Based on the provided context, there isn't a direct mention of Lyft's specific revenue growth rate for any quarter. However, it does discuss that Revenue per Active Rider reached an all-time high in the three months ended December 31, 2021, c

ValueError: Expected at least one tool call, but got 0 tool calls.