**Disclaimer**: This agent is not intended as financial advice.  It is for informational and entertainment purposes only.  Do your own due diligence.

In [None]:
!pip install -U --quiet langgraph langchain_community langchain_openai

In [None]:
import getpass
import os

# Set your OpenAI API key
os.environ["OPENAI_API_KEY"] = getpass.getpass()

In [None]:
# You can get an API key here https://financialdatasets.ai/
os.environ["FINANCIAL_DATASETS_API_KEY"] = getpass.getpass()

# Define the tools our agent can use

In [None]:
from langchain_core.tools import tool


@tool
def roe(
    net_income: float,
    equity: float,
) -> float:
    """
    Computes the return on equity (ROE) for a given company.
    Use this function to evaluate the profitability of a company.
    """
    return net_income / equity


@tool
def roic(
    operating_income: float,
    total_debt: float,
    equity: float,
    cash_and_equivalents: float,
    tax_rate: float = 0.35,
) -> float:
    """
    Computes the return on invested capital (ROIC) for a given company.
    Use this function to evaluate the efficiency of a company in generating returns from its capital.
    """
    net_operating_profit_after_tax = operating_income * (1 - tax_rate)
    invested_capital = total_debt + equity - cash_and_equivalents
    return net_operating_profit_after_tax / invested_capital


@tool
def owner_earnings(
    net_income: float,
    depreciation_amortization: float = 0.0,
    capital_expenditures: float = 0.0
):
    """
    Calculates the owner earnings for a company based on the net income, depreciation/amortization, and capital expenditures.
    """
    return net_income + depreciation_amortization - capital_expenditures


@tool
def intrinsic_value(
    free_cash_flow: float,
    growth_rate: float = 0.05,
    discount_rate: float = 0.10,
    terminal_growth_rate: float = 0.02,
    num_years: int = 5,
) -> float:
    """
    Computes the discounted cash flow (DCF) for a given company based on the current free cash flow.
    Use this function to calculate the intrinsic value of a stock.
    """
    # Estimate the future cash flows based on the growth rate
    cash_flows = [free_cash_flow * (1 + growth_rate) ** i for i in range(num_years)]

    # Calculate the present value of projected cash flows
    present_values = []
    for i in range(num_years):
        present_value = cash_flows[i] / (1 + discount_rate) ** (i + 1)
        present_values.append(present_value)

    # Calculate the terminal value
    terminal_value = cash_flows[-1] * (1 + terminal_growth_rate) / (discount_rate - terminal_growth_rate)
    terminal_present_value = terminal_value / (1 + discount_rate) ** num_years

    # Sum up the present values and terminal value
    dcf_value = sum(present_values) + terminal_present_value

    return dcf_value

In [None]:
from langgraph.prebuilt import ToolNode

from langchain_community.tools import IncomeStatements, BalanceSheets, CashFlowStatements
from langchain_community.utilities.financial_datasets import FinancialDatasetsAPIWrapper

# Create the tools
api_wrapper = FinancialDatasetsAPIWrapper()
integration_tools = [
    IncomeStatements(api_wrapper=api_wrapper),
    BalanceSheets(api_wrapper=api_wrapper),
    CashFlowStatements(api_wrapper=api_wrapper),
]

local_tools = [intrinsic_value, roe, roic, owner_earnings]
tools = integration_tools + local_tools

tool_node = ToolNode(tools)

# Set up the LLM

In [None]:
from langchain.tools.render import format_tool_to_openai_function
from langchain_openai.chat_models import ChatOpenAI

# Choose the LLM that will drive the agent
model = ChatOpenAI(model="gpt-4o", temperature=0).bind_tools(tools)

# Define the agent state

In [None]:
from typing import TypedDict, Annotated, Sequence
import operator
from langchain_core.messages import BaseMessage

class AgentState(TypedDict):
    messages: Annotated[Sequence[BaseMessage], operator.add]

# Define the nodes

In [None]:
from typing import Literal
from langgraph.graph import END, StateGraph, MessagesState


# Define the function that determines whether to continue or not
def should_continue(state: MessagesState) -> Literal["tools", END]:
    messages = state['messages']
    last_message = messages[-1]
    # If the LLM makes a tool call, then we route to the "tools" node
    if last_message.tool_calls:
        return "tools"
    # Otherwise, we stop (reply to the user)
    return END

# Define the function that calls the model
def call_model(state: MessagesState):
    messages = state['messages']
    response = model.invoke(messages)
    # We return a list, because this will get added to the existing list
    return {"messages": [response]}

# Define the graph

In [None]:
from langgraph.checkpoint.memory import MemorySaver

# Define a new graph
workflow = StateGraph(MessagesState)

# Define the two nodes we will cycle between
workflow.add_node("agent", call_model)
workflow.add_node("tools", tool_node)

# Set the entrypoint as `agent`
# This means that this node is the first one called
workflow.set_entry_point("agent")

# We now add a conditional edge
workflow.add_conditional_edges(
    # First, we define the start node. We use `agent`.
    # This means these are the edges taken after the `agent` node is called.
    "agent",
    # Next, we pass in the function that will determine which node is called next.
    should_continue,
)

# We now add a normal edge from `tools` to `agent`.
# This means that after `tools` is called, `agent` node is called next.
workflow.add_edge("tools", 'agent')

# Initialize memory to persist state between graph runs
checkpointer = MemorySaver()

# Finally, we compile it!
# This compiles it into a LangChain Runnable,
# meaning you can use it as you would any other runnable.
# Note that we're (optionally) passing the memory when compiling the graph
app = workflow.compile(checkpointer=checkpointer)

# Run the financial agent

In [None]:
from langchain_core.messages import HumanMessage

# Use the Runnable
final_state = app.invoke(
    {"messages": [HumanMessage(content="What is NVDA's intrinsic value given a discount rate of 5%, growth rate of 10%, terminal growth rate of 3%?")]},
    config={"configurable": {"thread_id": 42}}
)
final_state["messages"][-1].content

'The intrinsic value of NVIDIA Corporation (NVDA) given a discount rate of 5%, growth rate of 10%, and terminal growth rate of 3% is approximately $1,806,645,106,719.94.'

# Create dataset for evaluation

In [None]:
!pip install -U --quiet financial-datasets

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain 0.2.14 requires langchain-core<0.3.0,>=0.2.32, but you have langchain-core 0.1.52 which is incompatible.
langchain 0.2.14 requires langchain-text-splitters<0.3.0,>=0.2.0, but you have langchain-text-splitters 0.0.1 which is incompatible.
langchain-community 0.2.12 requires langchain-core<0.3.0,>=0.2.30, but you have langchain-core 0.1.52 which is incompatible.
langchain-openai 0.1.22 requires langchain-core<0.3.0,>=0.2.33, but you have langchain-core 0.1.52 which is incompatible.
langchain-openai 0.1.22 requires tiktoken<1,>=0.7, but you have tiktoken 0.6.0 which is incompatible.
langgraph 0.2.4 requires langchain-core<0.3,>=0.2.27, but you have langchain-core 0.1.52 which is incompatible.
langgraph-checkpoint 1.0.3 requires langchain-core<0.3,>=0.2.22, but you have langchain-core 0.1.52 which is in

# Define helper functions for getting financials

In [None]:
def get_income_statements(ticker: str, period: str = "ttm", limit: int = 10) -> dict:
    return api_wrapper.get_income_statements(ticker, period, limit)

def get_balance_sheets(ticker: str, period: str = "ttm", limit: int = 10) -> dict:
    return api_wrapper.get_balance_sheets(ticker, period, limit)

def get_cash_flow_statements(ticker: str, period: str = "ttm", limit: int = 10) -> dict:
    return api_wrapper.get_cash_flow_statements(ticker, period, limit)

In [None]:
ticker = "NVDA"
income_statements = get_income_statements(ticker, 'ttm', 10)
balance_sheets = get_balance_sheets(ticker, 'ttm', 10)
cash_flow_statements = get_cash_flow_statements(ticker, 'ttm', 10)

# Create a system prompt

In [None]:
system_prompt = """
You are an expert at creating datasets for evaluating Large Language Models (LLMs) in the domain of financial analysis. Your task is to generate a list of question-answer-context sets based on the provided financial data.

These sets will be used to test LLMs' ability to accurately interpret and analyze financial information.

Your goal is to create question-answer-context sets. Each set should consist of:
1. A question about the financial data
2. The correct answer to that question (ground truth)
3. The relevant context from which the question and answer were derived

Follow these guidelines when creating the sets:
1. Questions should vary in complexity, ranging from simple data retrieval to more complex calculations and comparisons.
2. Ensure that questions cover different aspects of the financial data, such as revenue, profitability, growth, and financial ratios.
3. The context should provide enough information for a human evaluator to understand where the question and answer came from, without giving away the answer directly.

Examples of questions you might create:
- Simple: "What was NVDA's revenue for the period ending March 31, 2024?"
- Moderate: "Calculate the year-over-year growth in gross profit between the two periods."
- Complex: "What is the change in operating margin between the two periods? (Operating margin = Operating income / Revenue)"

"""

# Generate question + answer dataset

In [None]:
from financial_datasets.generator import DatasetGenerator

# Create dataset generator
generator = DatasetGenerator(model="gpt-4-turbo", api_key=os.environ["OPENAI_API_KEY"])

# Generate dataset from income_statements
income_statements_dataset = generator.generate_from_texts(
    texts={"income_statements": income_statements},
    max_questions=10,
    system_prompt=system_prompt,
)

In [None]:
# Generate dataset from balance_sheets
balance_sheets_dataset = generator.generate_from_texts(
    texts={"balance_sheets": balance_sheets},
    max_questions=10,
    system_prompt=system_prompt,
)

In [None]:
# Generate dataset from cash_flow_statements
cash_flow_statements_dataset = generator.generate_from_texts(
    texts={"cash_flow_statements": cash_flow_statements},
    max_questions=10,
    system_prompt=system_prompt,
)

In [None]:
questions = income_statements_dataset.items + balance_sheets_dataset.items + cash_flow_statements_dataset.items

# Visualize dataset

In [None]:
import json

# Convert to JSON and print
json_data = json.dumps([question.model_dump() for question in questions], indent=2)
print("JSON data:")
print(json_data)

In [None]:
import pandas as pd

# Convert to DataFrame and display
df = pd.DataFrame([question.model_dump() for question in questions])
print("\nDataFrame:")
display(df)


DataFrame:


Unnamed: 0,question,answer,context
0,What was the total revenue in 2024?,$15 billion,"In 2024, the total revenue reported was $15 bi..."
1,What was the net income for 2023?,$1 billion,The net income in 2023 was reported to be $1 b...
2,Calculate the year-over-year growth in revenue...,25%,The revenue in 2023 was $12 billion and in 202...
3,Compare the gross profit margins for 2023 and ...,"2023: 40%, 2024: 45%","In 2023, the gross profit margin was 40%, and ..."
4,What was the operating income in 2024?,$3 billion,The operating income for the year 2024 was $3 ...
5,What percentage of revenue did administrative ...,15%,"In 2024, administrative expenses were $2.25 bi..."
6,Calculate the earnings before interest and tax...,$2 billion,"In 2023, the earnings before interest and taxe..."
7,What was the change in operating margin from 2...,5% increase,The operating margin in 2023 was 30% and in 20...
8,What was the earnings per share in 2024?,$2.50,The earnings per share (EPS) in 2024 was repor...
9,Determine the debt to equity ratio for 2024.,0.6,"In 2024, the total debt was reported as $9 bil..."
