In [1]:
!pip install -U --quiet langgraph langchain_community langchain_openai langsmith

In [2]:
import os

In [3]:
langchain_api_key = os.environ["LANGCHAIN_API_KEY"]
financial_dataset_api_key = os.environ["FINANCIAL_DATASETS_API_KEY"]
openai_api_key = os.environ["OPENAI_API_KEY"]

os.environ["LANGCHAIN_TRACING_V2"] = "True"

### Define Tools

In [4]:
from langchain_core.tools import tool


@tool
def roe(
    net_income: float,
    equity: float,
) -> float:
    """
    Computes the return on equity (ROE) for a given company.
    Use this function to evaluate the profitability of a company.
    """
    return net_income / equity


@tool
def roic(
    operating_income: float,
    total_debt: float,
    equity: float,
    cash_and_equivalents: float,
    tax_rate: float = 0.35,
) -> float:
    """
    Computes the return on invested capital (ROIC) for a given company.
    Use this function to evaluate the efficiency of a company in generating returns from its capital.
    """
    net_operating_profit_after_tax = operating_income * (1 - tax_rate)
    invested_capital = total_debt + equity - cash_and_equivalents
    return net_operating_profit_after_tax / invested_capital


@tool
def owner_earnings(
    net_income: float,
    depreciation_amortization: float = 0.0,
    capital_expenditures: float = 0.0
):
    """
    Calculates the owner earnings for a company based on the net income, depreciation/amortization, and capital expenditures.
    """
    return net_income + depreciation_amortization - capital_expenditures


@tool
def intrinsic_value(
    free_cash_flow: float,
    growth_rate: float = 0.05,
    discount_rate: float = 0.10,
    terminal_growth_rate: float = 0.02,
    num_years: int = 5,
) -> float:
    """
    Computes the discounted cash flow (DCF) for a given company based on the current free cash flow.
    Use this function to calculate the intrinsic value of a stock.
    """
    # Estimate the future cash flows based on the growth rate
    cash_flows = [free_cash_flow * (1 + growth_rate) ** i for i in range(num_years)]

    # Calculate the present value of projected cash flows
    present_values = []
    for i in range(num_years):
        present_value = cash_flows[i] / (1 + discount_rate) ** (i + 1)
        present_values.append(present_value)

    # Calculate the terminal value
    terminal_value = cash_flows[-1] * (1 + terminal_growth_rate) / (discount_rate - terminal_growth_rate)
    terminal_present_value = terminal_value / (1 + discount_rate) ** num_years

    # Sum up the present values and terminal value
    dcf_value = sum(present_values) + terminal_present_value

    return dcf_value

In [5]:
from langgraph.prebuilt import ToolNode

from langchain_community.tools import IncomeStatements, BalanceSheets, CashFlowStatements
from langchain_community.utilities.financial_datasets import FinancialDatasetsAPIWrapper

# Create the tools
api_wrapper = FinancialDatasetsAPIWrapper()
integration_tools = [
    IncomeStatements(api_wrapper=api_wrapper),
    BalanceSheets(api_wrapper=api_wrapper),
    CashFlowStatements(api_wrapper=api_wrapper),
]

local_tools = [intrinsic_value, roe, roic, owner_earnings]
tools = integration_tools + local_tools

tool_node = ToolNode(tools)

### Setup the LLM

In [44]:
from langchain.tools.render import format_tool_to_openai_function
from langchain_openai.chat_models import ChatOpenAI

# Choose the LLM that will drive the agent
model = ChatOpenAI(model="gpt-4o-mini", temperature=0).bind_tools(tools)

### Define the Agent State

In [7]:
from typing import TypedDict, Annotated, Sequence
import operator
from langchain_core.messages import BaseMessage

class AgentState(TypedDict):
    messages: Annotated[Sequence[BaseMessage], operator.add]

### Define the Nodes

In [8]:
from typing import Literal
from langgraph.graph import END, StateGraph, MessagesState


# Define the function that determines whether to continue or not
def should_continue(state: MessagesState) -> Literal["tools", END]:
    messages = state['messages']
    last_message = messages[-1]
    # If the LLM makes a tool call, then we route to the "tools" node
    if last_message.tool_calls:
        return "tools"
    # Otherwise, we stop (reply to the user)
    return END

# Define the function that calls the model
def call_model(state: MessagesState):
    messages = state['messages']
    response = model.invoke(messages)
    # We return a list, because this will get added to the existing list
    return {"messages": [response]}

### Define the graph

In [9]:
from langgraph.checkpoint.memory import MemorySaver

# Define a new graph
workflow = StateGraph(MessagesState)

# Define the two nodes we will cycle between
workflow.add_node("agent", call_model)
workflow.add_node("tools", tool_node)

# Set the entrypoint as `agent`
# This means that this node is the first one called
workflow.set_entry_point("agent")

# We now add a conditional edge
workflow.add_conditional_edges(
    # First, we define the start node. We use `agent`.
    # This means these are the edges taken after the `agent` node is called.
    "agent",
    # Next, we pass in the function that will determine which node is called next.
    should_continue,
)

# We now add a normal edge from `tools` to `agent`.
# This means that after `tools` is called, `agent` node is called next.
workflow.add_edge("tools", 'agent')

# Initialize memory to persist state between graph runs
checkpointer = MemorySaver()

# Finally, we compile it!
# This compiles it into a LangChain Runnable,
# meaning you can use it as you would any other runnable.
# Note that we're (optionally) passing the memory when compiling the graph
app = workflow.compile(checkpointer=checkpointer)

### Run the financial Agent

In [10]:
from langchain_core.messages import HumanMessage

# Use the Runnable
final_state = app.invoke(
    {"messages": [HumanMessage(content="What is NVDA's intrinsic value given a discount rate of 5%, growth rate of 10%, terminal growth rate of 3%?")]},
    config={"configurable": {"thread_id": 42}}
)
final_state["messages"][-1].content

'The intrinsic value of NVIDIA Corporation (NVDA) is approximately $1,806,645,107,719.94 given a discount rate of 5%, a growth rate of 10%, and a terminal growth rate of 3%.'

### Create dataset for evaluation

In [11]:
!pip install -U --quiet financial-datasets

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langgraph-checkpoint 1.0.5 requires langchain-core<0.3,>=0.2.22, but you have langchain-core 0.1.52 which is incompatible.
langchain-openai 0.1.22 requires langchain-core<0.3.0,>=0.2.33, but you have langchain-core 0.1.52 which is incompatible.
langchain-openai 0.1.22 requires tiktoken<1,>=0.7, but you have tiktoken 0.6.0 which is incompatible.
langchain-community 0.2.12 requires langchain-core<0.3.0,>=0.2.30, but you have langchain-core 0.1.52 which is incompatible.
langchain 0.2.14 requires langchain-core<0.3.0,>=0.2.32, but you have langchain-core 0.1.52 which is incompatible.
langchain 0.2.14 requires langchain-text-splitters<0.3.0,>=0.2.0, but you have langchain-text-splitters 0.0.1 which is incompatible.
langgraph 0.2.12 requires langchain-core<0.3,>=0.2.27, but you have langchain-core 0.1.52 which is i

### Helper Functions

In [12]:
def get_income_statements(ticker: str, period: str, limit: int) -> dict:
    return api_wrapper.get_income_statements(ticker, period, limit)

def get_balance_sheets(ticker: str, period: str, limit: int) -> dict:
    return api_wrapper.get_balance_sheets(ticker, period, limit)

def get_cash_flow_statements(ticker: str, period: str, limit: int) -> dict:
    return api_wrapper.get_cash_flow_statements(ticker, period, limit)

In [13]:
ticker = "NVDA"
income_statements = get_income_statements(ticker, 'ttm', 5)
balance_sheets = get_balance_sheets(ticker, 'ttm', 5)
cash_flow_statements = get_cash_flow_statements(ticker, 'ttm', 5)

### Create a system prompt

In [14]:
system_prompt = f"""
You are an expert at creating datasets for evaluating Large Language Models (LLMs) in the domain of financial analysis. Your task is to generate a list of question-answer-context sets based on the provided financial data.

These sets will be used to test LLMs' ability to accurately interpret and analyze financial information.

Your goal is to create question-answer-context sets for {ticker}. Each set should consist of:
1. A question about the financial data
2. The correct answer to that question (ground truth)
3. The relevant context from which the question and answer were derived

Important: You will be given a some financial data as context, which you must use to generate the question-answer-context sets.

Follow these guidelines when creating the sets:
1. Questions should vary in complexity, ranging from simple data retrieval to more complex calculations and comparisons.
2. Ensure that questions cover different aspects of the financial data, such as revenue, profitability, growth, and financial ratios.
3. The context should provide enough information for a human evaluator to understand where the question and answer came from, without giving away the answer directly.
4. Include the ticker symbol of the company and the year or quarterly context in every question.

Examples of questions you might create:
- Simple: "What was NVDA's revenue for the period ending March 31, 2024?"
- Moderate: "Calculate the year-over-year growth in gross profit between the two periods."
- Complex: "What is the change in operating margin between the two periods? (Operating margin = Operating income / Revenue)"

"""

### Generate Question + Answer dataset

In [15]:
max_questions = 5

In [31]:
from financial_datasets.generator import DatasetGenerator

# Create dataset generator
generator = DatasetGenerator(model="gpt-4-turbo", api_key=openai_api_key)

# Generate dataset from income_statements
income_statements_dataset = generator.generate_from_texts(
    texts={"income_statements": income_statements},
    max_questions=max_questions,
    system_prompt=system_prompt,
)

Generating questions: 100%|[32m██████████[0m| 5/5 [00:14<00:00,  2.88s/it]


In [32]:
# Generate dataset from balance_sheets
balance_sheets_dataset = generator.generate_from_texts(
    texts={"balance_sheets": balance_sheets},
    max_questions=max_questions,
    system_prompt=system_prompt,
)

Generating questions: 100%|[32m██████████[0m| 5/5 [00:11<00:00,  2.38s/it]


In [33]:
# Generate dataset from cash_flow_statements
cash_flow_statements_dataset = generator.generate_from_texts(
    texts={"cash_flow_statements": cash_flow_statements},
    max_questions=max_questions,
    system_prompt=system_prompt,
)

Generating questions: 100%|[32m██████████[0m| 5/5 [00:10<00:00,  2.17s/it]


In [34]:
questions = income_statements_dataset.items + balance_sheets_dataset.items + cash_flow_statements_dataset.items
questions

[DatasetItem(question="What was NVDA's gross profit for the year 2022?", answer='$17.65 billion', context="NVDA's total gross profit for 2022 stood at $17.65 billion, a significant increase from the $10.92 billion recorded in 2021."),
 DatasetItem(question="How much did NVDA's operating expenses increase in 2022 compared to 2021?", answer='$3.01 billion', context="NVDA's operating expenses were $9.76 billion in 2022, up from $6.75 billion in 2021, marking an increase of $3.01 billion."),
 DatasetItem(question='Calculate the percentage increase in net income for NVDA from 2021 to 2022.', answer='78.94%', context="NVDA's net income rose from $9.75 billion in 2021 to $17.45 billion in 2022, which is an increase of 78.94%."),
 DatasetItem(question='What was the earnings per share (EPS) for NVDA in 2022 and how does it compare to 2021?', answer='2022 EPS was $2.79, up from $2.02 in 2021', context="NVDA's earnings per share (EPS) increased to $2.79 in 2022, from $2.02 in 2021."),
 DatasetIte

### Visualize Dataset

In [35]:
import json

# Convert to JSON and print
json_data = json.dumps([question.model_dump() for question in questions], indent=2)
print("JSON data:")
print(json_data)

JSON data:
[
  {
    "question": "What was NVDA's gross profit for the year 2022?",
    "answer": "$17.65 billion",
    "context": "NVDA's total gross profit for 2022 stood at $17.65 billion, a significant increase from the $10.92 billion recorded in 2021."
  },
  {
    "question": "How much did NVDA's operating expenses increase in 2022 compared to 2021?",
    "answer": "$3.01 billion",
    "context": "NVDA's operating expenses were $9.76 billion in 2022, up from $6.75 billion in 2021, marking an increase of $3.01 billion."
  },
  {
    "question": "Calculate the percentage increase in net income for NVDA from 2021 to 2022.",
    "answer": "78.94%",
    "context": "NVDA's net income rose from $9.75 billion in 2021 to $17.45 billion in 2022, which is an increase of 78.94%."
  },
  {
    "question": "What was the earnings per share (EPS) for NVDA in 2022 and how does it compare to 2021?",
    "answer": "2022 EPS was $2.79, up from $2.02 in 2021",
    "context": "NVDA's earnings per shar

In [36]:
import pandas as pd

# Convert to DataFrame and display
df = pd.DataFrame([question.model_dump() for question in questions])
print("\nDataFrame:")
display(df)


DataFrame:


Unnamed: 0,question,answer,context
0,What was NVDA's gross profit for the year 2022?,$17.65 billion,NVDA's total gross profit for 2022 stood at $1...
1,How much did NVDA's operating expenses increas...,$3.01 billion,NVDA's operating expenses were $9.76 billion i...
2,Calculate the percentage increase in net incom...,78.94%,NVDA's net income rose from $9.75 billion in 2...
3,What was the earnings per share (EPS) for NVDA...,"2022 EPS was $2.79, up from $2.02 in 2021",NVDA's earnings per share (EPS) increased to $...
4,"What was the growth in NVDA's revenue in 2022,...",35.05% growth,NVDA reported a revenue of $37.14 billion in 2...
5,What were NVDA's total assets in 2023?,494.27 billion,"For the fiscal year ended in 2023, NVDA report..."
6,How much did NVDA's long-term debt amount to i...,20.30 billion,"In 2023, NVDA held long-term debt totaling 20...."
7,What was the total shareholders' equity for NV...,34.56 billion,NVDA's total shareholders' equity stood at 34....
8,Calculate the ratio of total liabilities to to...,0.28,"In 2023, NVDA recorded total liabilities of 13..."
9,What percentage of NVDA's total assets was cas...,7.46%,NVDA's cash and cash equivalents were 36.89 bi...


### Create Dataset in LangSmith

In [37]:
inputs = []
outputs = []

for row in questions:
  question = row.question
  answer = row.answer
  inputs.append(question)
  outputs.append(answer)

In [39]:
from langsmith import Client

# Create dataset
client = Client()
dataset_name = "warren-buffett-agent-test-0.0.2"
dataset = client.create_dataset(
    dataset_name=dataset_name,
    description="QA pairs about NVDA's financials",
)
client.create_examples(
    inputs=[{"question": q} for q in inputs],
    outputs=[{"answer": a} for a in outputs],
    dataset_id=dataset.id,
)

### Evaluate

In [40]:
def predict_answer(example: dict):
    """Use this for answer evaluation"""
    question = example.get("question")

    final_state = app.invoke(
      {"messages": [HumanMessage(content=question)]},
      config={"configurable": {"thread_id": 42}}
    )
    answer = final_state["messages"][-1].content
    return {"answer": answer}

In [45]:
from langsmith.evaluation import LangChainStringEvaluator, evaluate

eval_llm = ChatOpenAI(temperature=0.0, model="gpt-4o-mini")

# Evaluator
qa_evalulator = [
    LangChainStringEvaluator(
        "qa",
        prepare_data=lambda run, example: {
            "prediction": run.outputs.get("answer", "No answer key found"),
            "reference": example.outputs.get("answer", "No answer key found"),
            "input": example.inputs["question"],
        },
        config={"llm": eval_llm}
      ),
]
experiment_results = evaluate(
    predict_answer,
    data=dataset_name,
    evaluators=qa_evalulator,
    experiment_prefix="financial-rag-qa",
    metadata={
      "version": "1.0.0",
      "revision_id": "beta"
    },
)

View the evaluation results for experiment: 'financial-rag-qa-5cdbf883' at:
https://smith.langchain.com/o/b106fc6f-dc52-5412-bec0-144113c8e49f/datasets/757d97a7-6cb3-4001-bbfe-f273f88e03c0/compare?selectedSessions=ae4ca8fb-ca43-4713-af65-7a7ab77da8b8




15it [00:16,  1.07s/it]


In [48]:
experiment_results._results

[{'run': RunTree(id=UUID('b34104de-ef5f-4e9f-8d5c-ffad56fcdfc6'), name='Target', start_time=datetime.datetime(2024, 8, 23, 10, 30, 33, 483893, tzinfo=datetime.timezone.utc), run_type='chain', end_time=datetime.datetime(2024, 8, 23, 10, 30, 41, 310431, tzinfo=datetime.timezone.utc), extra={'metadata': {'version': '1.0.0', 'revision_id': 'beta', 'num_repetitions': 1, 'example_version': '2024-08-23T10:28:22.777250+00:00', 'ls_method': 'traceable'}, 'runtime': {'sdk': 'langsmith-py', 'sdk_version': '0.1.104', 'library': 'langsmith', 'platform': 'Linux-5.15.153.1-microsoft-standard-WSL2-x86_64-with-glibc2.35', 'runtime': 'python', 'py_implementation': 'CPython', 'runtime_version': '3.11.9', 'langchain_version': '0.2.14', 'langchain_core_version': '0.2.34', 'thread_count': 69.0, 'mem': {'rss': 360435712.0}, 'cpu': {'time': {'sys': 3.18, 'user': 25.65}, 'ctx_switches': {'voluntary': 7906.0, 'involuntary': 19.0}, 'percent': 0.0}}}, error=None, serialized={'name': 'Target', 'signature': '(examp