# OpenBB Agents

In [5]:
# import dependencies, in specific langchain
import os
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.agents.output_parsers import JSONAgentOutputParser
from langchain.agents.format_scratchpad import format_log_to_str
from langchain.agents import AgentExecutor
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema import Document
from langchain.vectorstores import FAISS
from langchain import hub
from langchain.tools.render import render_text_description_and_args
from langchain.output_parsers import PydanticOutputParser
from langchain.pydantic_v1 import BaseModel, Field
from langchain.output_parsers import RetryWithErrorOutputParser
from langchain.llms import OpenAI

os.environ["TOKENIZERS_PARALLELISM"] = "False"  # Avoid some warnings from HuggingFace

# Set up OpenAI API key
import openai
os.environ["OPENAI_API_KEY"] = ""
openai.api_key = ""

# Set up OpenBB Personal Access Token from https://my.openbb.co/app/platform/pat
from openbb import obb
from utils import map_openbb_collection_to_langchain_tools  # provides access to OpenBB Tools
obb.account.login(pat="")

ModuleNotFoundError: No module named 'langchain'

## Set up OpenBB tools for retrieval

The following will return all OpenBB tools that we want our agent to have access. This matches the layout architecture defined by OpenBB.

In [None]:
# can also give a single string, but since we want our agent to have more context we provide access to more functions
# TODO: In future, we might implement a  `universal_openbb_tools` collection that allows quick LLM access to all OpenBB functions. 
# We don't run into context size issues doing this, since we rely on embeddings for our tools., the context size is not an issue. 
# However, with many tools in a vector store, retrieval may become a challenge.
openbb_tools = map_openbb_collection_to_langchain_tools(
    openbb_commands_root = [
        "/equity/fundamental",
        "/equity/compare",
        "/equity/estimates"
    ]
)
print(f"A total of {len(openbb_tools)} OpenBB tools has been prepared for function calling\n")


print("Processing each OpenBB tool description into a list of docs...\n")
# Parse the description (i.e. docstring + output fields) for each of these tools
docs = [
    Document(page_content=t.description, metadata={"index": i})
    for i, t in enumerate(openbb_tools)
]

print("Create embeddings for each of these OpenBB tool descriptions...")
# Create embeddings from each of these function descriptions
# this will be important for when we want the agent to know what
# function to use for a particular query
vector_store = FAISS.from_documents(docs, OpenAIEmbeddings())

A total of 27 OpenBB tools has been prepared for function calling

Processing each OpenBB tool description into a list of docs...

Create embeddings for each of these OpenBB tool descriptions...


In [None]:
def get_tools(query):
    "Retrieve the most relevant documents to a query."
    # Set a docs retriever that looks for a score_threshold of 0.65, this means that if the search retrieval
    # is confident in a few endpoints we don't rule out any due to a hardcoded number of docs to be retrieved
    # Empirically this is what we found that worked best for our particular application. This may vary depending on the use case.
    retriever = vector_store.as_retriever(
        search_type="similarity_score_threshold",
        search_kwargs={'score_threshold': 0.65}
    )
    docs = retriever.get_relevant_documents(query)
    
    # This is a fallback mechanism in case the threshold is too high, causing too few tools to be returned.
    # In this case, we fall back to getting the top k=2 results with higher similarity scores.
    if len(docs) < 2:
        retriever = vector_store.as_retriever(
            search_kwargs={"k": 2}
        )
        
        docs = retriever.get_relevant_documents(query)
        
    return [openbb_tools[d.metadata["index"]] for d in docs]

Let's try out our new vector store that contains our tool embeddings to build some intuition around how they work.

In [None]:
print("Tools retrieved for 'market cap':")
fetched_tools = get_tools("market cap")
for tool in fetched_tools:
    print("  " + tool.name + " - " + tool.description.split('\n')[0])

print("\nTools retrieved for 'peers':")
fetched_tools = get_tools("peers")
for tool in fetched_tools:
    print("  " + tool.name + " - " + tool.description.split('\n')[0])

Tools retrieved for 'market cap':
  /equity/fundamental/metrics - Key Metrics. Key metrics for a given company.
  /equity/fundamental/multiples - Equity Valuation Multiples. Valuation multiples for a stock ticker.
  /equity/fundamental/overview - Company Overview. General information about a company.
  /equity/fundamental/income - Income Statement. Report on a company's financial performance.

Tools retrieved for 'peers':
  /equity/compare/peers - Equity Peers. Company peers.
  /equity/fundamental/metrics - Key Metrics. Key metrics for a given company.


## AI-powered Financial Analyst

There are two example prompts to choose from below (but feel free to try your own), one requires linear reasoning (where future answers may depend on previous answers), while the other requires independent reasoning (fetching and combining different pieces of independent information).

These prompts answer 2 very different scenarios and in this notebook, we are going to demonstrate that our OpenBB agent is capable of handling both efficiently, utilizing the same architecture

- **Prompt 1** - This prompt is very deterministic which allows us to access right or wrong immediately because we can check the facts.
It also involves a few complex operations such as extracting a list of tickers from an endpoint and iterating through that list using a different endpoint. Then based on those outputs, a reasoning is made.

- **Prompt 2** - This prompt is not deterministic and allows us to leverage LLMs to provide alpha by uncovering insights that would be hard for a human to discover. Instead of telling the agent what to do, we expect the agent to provide a reasoning of what it would do to perform a typical analyst task, without guardrails.


In [None]:
# Prompt 1
PROMPT =  """\
Check what are TSLA peers. From those, check which one has the highest market cap.
Then, on the ticker that has the highest market cap get the most recent price target estimate from an analyst,
and tell me who it was and on what date the estimate was made.
"""

# Prompt 2
# PROMPT = "Perform a fundamentals financial analysis of AMZN using the most recently available data. What do you find that's interesting?"

### Task decomposition

The goal is to simplify the user prompt into simpler tasks and execute those for more accuracy.

1. Break a larger query down into subquery.
2. Then for each subquery create a set of keywords that allow you to fetch the right tool to execute that same subquery.

In [None]:
# We experiment with Pydantic here to enforce structured output
# "Pydantic is all you need" - https://www.youtube.com/watch?v=yj-wSRJwrrc

class SubQuestion(BaseModel):
    "Pydantic data model we want each subquestion to have, including each field and what they represent"
    id: int = Field(
        description="The unique ID of the subquestion."
    )
    question: str = Field(
        description="The subquestion itself."
    )
    query: str = Field(
        description="The query to pass to the `fetch_tools` function to retrieve the appropriate tool to answer the question."
    )
    depends_on: list[int] = Field(
        description="The list of subquestion ids whose answer is required to answer this subquestion.",
        default=[]
    )

class SubQuestionList(BaseModel):
    "Pydantic data model output we want to enforce, which is a list of the previous SubQuestion Pydantic model"
    subquestions: list[SubQuestion] = Field(
        description="The list of SubQuestion objects."
    )

In [None]:
def task_decomposition(task: str):
    "Break a larger query down into subquery. Then for each subquery create a set of keywords that allow you to fetch the right tool to execute that same subquery."
    subquestion_parser = PydanticOutputParser(pydantic_object=SubQuestionList)
    
    system_message = """\
    You are a world-class state-of-the-art agent.
    
    You can access multiple tools, via a "fetch_tools" function that will retrieve the necessary tools.
    The `fetch_tools` function accepts a string of keywords as input specifying the type of tool to retrieve.
    Each retrieved tool represents a different data source or API that can retrieve the required data.
    
    Your purpose is to help answer a complex user question by generating a list of subquestions,
    as well as the corresponding keyword query to the "fetch_tools" function
    to retrieve the relevant tools to answer each corresponding subquestion.
    You must also specify the dependencies between subquestions, since sometimes one
    subquestion will require the outcome of another in order to fully answer.
    
    These are the guidelines you consider when completing your task:
    * Be as specific as possible
    * Avoid using acronyms
    * The subquestions should be relevant to the user's question
    * The subquestions should be answerable by the tools retrieved by the query to `fetch_tools`
    * You can generate multiple subquestions
    * You don't need to query for a tool if you don't think it's relevant
    * A subquestion may not depend on a subquestion that proceeds it (i.e. comes after it.)
    
    ## Output format
    {format_instructions}
    
    ### Example responses
    ```json
    {{"subquestions": [
        {{
            "id": 1,
            "question": "What are the latest financial statements of AMZN?", 
            "query": "financial statements",
            "depends_on": []
        }}, 
        {{
            "id": 2,
            "question": "What is the most recent revenue and profit margin of AMZN?", 
            "query": "revenue profit margin ratios",
            "depends_on": []
        }}, 
        {{
            "id": 3,
            "question": "What is the current price to earnings (P/E) ratio of AMZN?", 
            "query": "ratio price to earnings",
            "depends_on": []
        }}, 
        {{
            "id": 4,
            "question": "Who are the peers of AMZN?", 
            "query": "peers",
            "depends_on": []
        }},
        {{
            "id": 5,
            "question": "Which of AMZN's peers have the largest market cap?", 
            "query": "market cap",
            "depends_on": [4]
        }}
    ]}}
    ```
    """
    
    human_message = """\
        ## User Question
        {input}
        """
    
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_message),
            ("human", human_message),
        ]
    )
    prompt = prompt.partial(
        format_instructions=subquestion_parser.get_format_instructions()
    )
    
    llm = ChatOpenAI(
        model="gpt-4"
    )  # gpt-3.5-turbo works well, but gpt-4-1106-preview isn't good at returning JSON.
    
    subquestion_chain = {"input": lambda x: x["input"]} | prompt | llm | subquestion_parser

    subquestion_list = subquestion_chain.invoke({"input": task})

    return subquestion_list

In [None]:
subquestion_list = task_decomposition(PROMPT)

# Shows the result from task decomposition
for subquestion in subquestion_list.subquestions:
    print(f"{subquestion.id} - {subquestion.question}")
    print(f"  Query: {subquestion.query}")
    if subquestion.depends_on:
        print(f"  Depends on: {subquestion.depends_on}")

1 - Who are the peers of Tesla (TSLA)?
  Query: Tesla peers
2 - Which of TSLA's peers has the largest market cap?
  Query: market cap
  Depends on: [1]
3 - What is the most recent price target estimate for the company with the largest market cap?
  Query: price target estimate
  Depends on: [2]
4 - Who made the most recent price target estimate for the company with the largest market cap?
  Query: analyst name price target estimate
  Depends on: [2]
5 - On what date was the most recent price target estimate for the company with the largest market cap made?
  Query: date price target estimate
  Depends on: [2]


### Tool retrieval

Use the previously generated queries in order to fetch the tools necessary to answer the subquestion

In [None]:
subquestions_and_tools = []
for subquestion in subquestion_list.subquestions:
    tools = get_tools(subquestion.query)
    subquestions_and_tools.append(
        {   "id": subquestion.id,
            "subquestion": subquestion.question,
            "query": subquestion.query,
            "tools": tools,
            "depends_on": subquestion.depends_on,
        }
    )

# Shows the result from the fetched tools for each subquestion's query 
for subq in subquestions_and_tools:
    print(f"{subq['id']} - {subq['subquestion']}")
    print(f"  Query: {subq['query']}")
    if subquestion.depends_on:
        print(f"  Depends on: {subq['depends_on']}")
    print("  Fetched tools:")
    for tool in subq["tools"]:
        print("    " + tool.name + ": " + tool.description.split('\n')[0])
    print("")

1 - Who are the peers of Tesla (TSLA)?
  Query: Tesla peers
  Depends on: []
  Fetched tools:
    /equity/compare/peers: Equity Peers. Company peers.
    /equity/fundamental/metrics: Key Metrics. Key metrics for a given company.

2 - Which of TSLA's peers has the largest market cap?
  Query: market cap
  Depends on: [1]
  Fetched tools:
    /equity/fundamental/metrics: Key Metrics. Key metrics for a given company.
    /equity/fundamental/multiples: Equity Valuation Multiples. Valuation multiples for a stock ticker.
    /equity/fundamental/overview: Company Overview. General information about a company.
    /equity/fundamental/income: Income Statement. Report on a company's financial performance.

3 - What is the most recent price target estimate for the company with the largest market cap?
  Query: price target estimate
  Depends on: [2]
  Fetched tools:
    /equity/estimates/consensus: Price Target Consensus. Price target consensus data.
    /equity/estimates/price_target: Price Targe

### Agent to execute on each subquestion

The ReAct agent answers each of the subquestions. This is done by providing it with the subquestion and its corresponding fetched tools.

ReAct paper: https://react-lm.github.io/

In [None]:
def langchain_react_agent(tools):
    "Define a ReAct agent bound with specific tools."
    # This retrieves the ReAct agent chat prompt template available in Langchain Hub
    # https://smith.langchain.com/hub/hwchase17/react-json?organizationId=10beea65-e722-5aa1-9f93-034c22e3cd6e
    prompt = hub.pull("hwchase17/react-multi-input-json")
    # Replace the 'tools' and 'tool_names' content of the prompt with information given to the agent
    # Note that tool_names is a field available in each tool, so it can be inferred from same argument
    prompt = prompt.partial(
        tools=render_text_description_and_args(tools),
        tool_names=", ".join([t.name for t in tools]),
    )

    llm = ChatOpenAI(model="gpt-4-1106-preview").bind(stop=["\nObservation"])

    chain = (
        {
            "input": lambda x: x["input"],
            "agent_scratchpad": lambda x: format_log_to_str(x["intermediate_steps"]),
        }
        | prompt
        | llm
        | JSONAgentOutputParser()
    )

    # Agent executor with access to the chain and tools at its disposal
    agent_executor = AgentExecutor(
        agent=chain,
        tools=tools,
        verbose=False,  # <-- set this to False to cut down on output spam. But it's useful for debugging!
        return_intermediate_steps=False,
        handle_parsing_errors=True,
    )
    return agent_executor

In [None]:
# Go through each subquestion and create an agent with the necessary tools and context to execute on it\n
for i, subquestion in enumerate(subquestions_and_tools):

    # We handle each dependency manually since we don't want agents to share memory as this can go over context length
    deps = [dep for dep in subquestions_and_tools if dep["id"] in subquestion["depends_on"]]

    dependencies = ""
    for dep in deps:
        dependencies += "subquestion: " + dep["subquestion"] + "\n"
        # if for some reason there's no temporal dependency between the agents being run
        # this ensures the code doesn't break here
        if "observation" in dep:
            dependencies += "observations:\n" + str(dep["observation"]) + "\n\n"

    input = f"""\
Given the following high-level question: {PROMPT}
Answer the following subquestion: {subquestion['subquestion']}

Give your answer in a bullet-point list.
Explain your reasoning, and make reference to and provide the relevant retrieved data as part of your answer.

Remember to use the tools provided to you to answer the question, and STICK TO THE INPUT SCHEMA.

Example output format:
```
- <the first observation, insight, and/or conclusion> 
- <the second observation, insight, and/or conclusion> 
- <the third observation, insight, and/or conclusion> 
... REPEAT AS MANY TIMES AS NECESSARY TO ANSWER THE SUBQUESTION.
```

If necessary, make use of the following subquestions and their answers to answer your subquestion:
{dependencies}

Return only your answer as a bulleted list as a single string. Don't respond with JSON or any other kind of data structure.
"""

    try:
        result = langchain_react_agent(tools=subquestion["tools"]).invoke({"input": input})
        output = result["output"]
    except Exception as err:  # Terrible practice, but it'll do for now.
        print(err)
        # We'll include the error message in the future
        output = "I was unable to answer the subquestion using the available tool." 


    # This is very cheeky but we are basically going into the subquestions_and_tools and for this current subquestion
    # we are adding the output as an observation. This is important because then above we do the dependencies check-up
    # which allows us to retrieve the correct output to be used in another subquestion.
    # Note: this works because subquestions are done in order to execute prompt. Otherwise it wouldn't since we would
    # be looking for an "observation" that doesn't exist yet.
    subquestion["observation"] = output

    print(subquestion['subquestion'])
    if isinstance(output, dict):
        for val in output.values():
            print(val)
    else:
        print(output)
    print("\n")

Who are the peers of Tesla (TSLA)?
- The peers of Tesla (TSLA) are XPeng Inc. (XPEV), Li Auto Inc. (LI), Rivian Automotive, Inc. (RIVN), Lucid Group, Inc. (LCID), General Motors Company (GM), NIO Inc. (NIO), Ford Motor Company (F), Fisker Inc. (FSR), and Mullen Automotive, Inc. (MULN).
- These companies are considered peers because they operate in the same sector and industry, focusing on electric vehicles and automotive manufacturing, which is the primary business of Tesla.


Which of TSLA's peers has the largest market cap?
- General Motors Company (GM) has the largest market cap among Tesla's peers, with a market cap of $43,275,565,978.
- The market caps of other peers are as follows: Li Auto Inc. (LI) at $37,031,658,909, Rivian Automotive, Inc. (RIVN) at $15,920,206,546, NIO Inc. (NIO) at $12,930,495,049, Ford Motor Company (F) at $40,343,346,985, XPeng Inc. (XPEV) at $11,916,511,602, Lucid Group, Inc. (LCID) at $9,661,394,668, Fisker Inc. (FSR) at $553,878,493, and Mullen Automoti

### Verdict

To combine all of the subquestion answers to generate a final answer.

In [None]:
def render_subquestions_and_answers(subquestions):
    "Combines all subquestions and their answers"
    output = ""
    for subquestion in subquestions:
        output += "Subquestion: " + subquestion["subquestion"] + "\n"
        output += "Observations: \n" + str(subquestion["observation"]) + "\n\n"

    return output

In [None]:
def verdict(question: str, subquestions: dict):
    "Based on the high-level question, it combines the subquestions and their answers to give one final concise answer"
    system_message = """\
        Given the following high-level question: 
    
        {input}
    
        And the following subquestions and subsequent observations:
    
        {subquestions}
    
        Answer the high-level question. Give your answer in a bulleted list.
        """
    
    
    prompt = ChatPromptTemplate.from_messages([("system", system_message)])
    
    llm = ChatOpenAI(model="gpt-4")  # Let's use the big model for the final answer.
    
    final_chain = (
        {
            "input": lambda x: x["input"],
            "subquestions": lambda x: render_subquestions_and_answers(x["subquestions"]),
        }
        | prompt
        | llm
    )
    
    result = final_chain.invoke({"input": question, "subquestions": subquestions})

    return result

In [None]:
result = verdict(
    question=PROMPT,
    subquestions=subquestions_and_tools
)
print(result.content)  # Et voila

- The peers of Tesla (TSLA) are XPeng Inc. (XPEV), Li Auto Inc. (LI), Rivian Automotive, Inc. (RIVN), Lucid Group, Inc. (LCID), General Motors Company (GM), NIO Inc. (NIO), Ford Motor Company (F), Fisker Inc. (FSR), and Mullen Automotive, Inc. (MULN).
- Among these peers, General Motors Company (GM) has the largest market capitalization at $43,275,565,978.
- The most recent price target estimate for General Motors Company (GM) is $37.0.
- This estimate was provided by analyst Dan Levy from Barclays.
- The price target estimate was made on November 1, 2023.


In [None]:
obb.equity.fundamental.overview("GM").to_df().head()

Unnamed: 0,0,1
0,symbol,GM
1,price,31.6
2,beta,1.49
3,vol_avg,16439042
4,mkt_cap,43275565978


In [None]:
obb.equity.estimates.price_target("GM").to_df().head(1)

Unnamed: 0,symbol,published_date,news_url,news_title,analyst_name,analyst_company,price_target,adj_price_target,price_when_posted,news_publisher,news_base_url
0,GM,2023-11-01 02:32:00,https://www.streetinsider.com/Upgrades/Barclay...,Barclays Upgrades General Motors (GM) to Overw...,Dan Levy,Barclays,37.0,37.0,28.2,StreetInsider,streetinsider.com
