In [1]:
import json
# Load JSON file
with open('./data/appl.json', 'r') as file:
    data = json.load(file)

In [21]:
type(data)

dict

In [2]:
from chroma import build_chroma_from_json
from langchain.docstore.document import Document
from langchain_openai import OpenAIEmbeddings
from langchain_mistralai import MistralAIEmbeddings
from langchain_chroma import Chroma

from dotenv import load_dotenv
import os

# Load environment variables
load_dotenv()

True

In [7]:
documents = []
for key, value in data.items():
    # Convert the value to a string if it's not already
    content = str(value)
    # Create a Document object
    doc = Document(page_content=content, id=key, metadata={
        "source": "sec_filings",
        "ticker": "appl",
        "tag": key
        })
    documents.append(doc)

In [27]:
documents[0]

Document(id='AccountsPayable', metadata={'source': 'sec_filings', 'ticker': 'appl', 'tag': 'AccountsPayable'}, page_content='{\'cik\': 320193, \'taxonomy\': \'us-gaap\', \'tag\': \'AccountsPayable\', \'label\': \'Accounts Payable (Deprecated 2009-01-31)\', \'description\': "Carrying value as of the balance sheet date of liabilities incurred (and for which invoices have typically been received) and payable to vendors for goods and services received that are used in an entity\'s business. For classified balance sheets, used to reflect the current portion of the liabilities (due within one year or within the normal operating cycle if longer); for unclassified balance sheets, used to reflect the total liabilities (regardless of due date).", \'entityName\': \'Apple Inc.\', \'units\': {\'USD\': [{\'end\': \'2008-09-27\', \'val\': 5520000000, \'accn\': \'0001193125-09-153165\', \'fy\': 2009, \'fp\': \'Q3\', \'form\': \'10-Q\', \'filed\': \'2009-07-22\', \'frame\': \'CY2008Q3I\'}, {\'end\': \'

In [8]:
embedding_func = OpenAIEmbeddings(model="text-embedding-3-large")

In [9]:
vector_store = Chroma(
    collection_name="appl_sec_filings",
    embedding_function=embedding_func,
    persist_directory="./chromas/appl",  
)

In [12]:
from uuid import uuid4
uuids = [str(uuid4()) for _ in range(len(documents))]

In [13]:
vector_store.add_documents(documents=documents[:200], ids=uuids[:200])

['8f59132d-d925-48ab-97cb-6be54417cd6a',
 '7b2fc09e-fbc8-47f1-822b-0af4dac46e19',
 'e89aa81a-cab8-4ed9-b502-94483a515749',
 '2218fa84-b220-4a7e-99db-86f440364480',
 '651db143-e7c2-4051-beab-44141d870c86',
 '0d12bc6d-9c3a-40c4-a786-c78c104dbf94',
 '0f18d7db-b805-4bec-8b1a-d53515c9143e',
 'ece21b6a-81c9-48b8-a876-08d790e19ae0',
 '6b6cab6f-963e-4cec-9b0b-91059b2ea32b',
 'c99d8929-a9cf-4a15-be19-b197ed0812eb',
 'd2fea191-efba-49b7-a25e-e48f05b1dbbb',
 'a8d32cca-3a3a-47d9-a4c8-5a0d3a801e55',
 '91136b18-37f4-4601-9841-e34e7e1343a7',
 '4e7b0a46-f9f8-47df-b747-4478be65ce36',
 '79c85885-f6e3-4423-aab9-f927097faa98',
 'f592989e-0f92-43a5-9ad8-01054878ba37',
 'b9773d98-0bbd-4ac8-9bce-7a1246a0be2f',
 'a52e8c3c-a9be-4b6e-a8ff-f3f46cf83eb4',
 'cc012afc-9529-45ee-b3bf-2d4439ecbfcb',
 'ea2b8e0b-4084-445d-8c29-8c7706db149f',
 '4467da1f-3d4d-429f-8d4e-d49832a4d689',
 '3eb5adb2-9b9f-4fd5-87af-396676882542',
 '06a582d7-751c-4852-b0c9-b194fe193c45',
 'c180f413-cb97-4e8c-8faf-5369858597a1',
 'f7d2a03b-5527-

In [14]:
vector_store.add_documents(documents=documents[200:400], ids=uuids[200:400])

['3c280586-528f-47f4-8dfe-86a9e720ac89',
 '5ff819a9-1e67-4f11-80a8-3bb95afa770c',
 '260051b2-f4ed-42f5-80c0-5733384f31ad',
 '7092c802-c55f-4ab0-a276-fbfa638cc378',
 '0c061553-28b2-4ce1-90c0-b6e9d0978369',
 'f668382c-82e1-481e-8744-4e72b8125c3b',
 '6a982481-54f3-4c91-b6db-37f80b17fa7c',
 '9fd3356a-6933-49a0-bc7d-a7d8236ff30c',
 '7bb62e26-b62c-49f3-ad95-5e4b7569dc3a',
 'bf2fbdc9-7048-4a92-ab9b-be6fd563cfb1',
 '5c60559a-42c7-4e93-9c4d-4f39017fc7ec',
 'b73ee501-5062-4676-82dc-40c7a8c2e4bc',
 '575a0af1-f6e2-4c18-b58d-c6ea2d1e09ff',
 'ea50b666-817a-4361-bfae-8568ab664853',
 '4d232788-7c0d-4a89-8ead-bcef2d986be2',
 'a4b3dcd9-d689-420e-afff-4ef6326779f7',
 '033a578a-9ab4-40e9-a669-a286acbd5e48',
 'c8b146e4-62cd-41a6-8116-94a8652dc431',
 '76ceb083-8d79-46cc-a813-f17284c30947',
 '85132a2c-a26f-4d12-9e37-565ae6359f7b',
 '48daa81d-680c-4672-b804-da97c6f555d9',
 '35371d71-ce10-4ee2-a71b-0397226e3876',
 'd9269dfb-ed67-4d67-ad93-77c4df632dbe',
 'b4d6bc31-3cbd-4a29-b5ec-0fd5e1233276',
 'fbfa2be0-5a2f-

In [15]:
vector_store.add_documents(documents=documents[400:], ids=uuids[400:])

['5e96a4ac-7f34-417b-980c-6088f83dba48',
 'fdef6965-fe01-4cb0-8f11-0da73c0b1b85',
 '77c3ce16-d032-46e0-b549-7a6c6814cfbc',
 '37b2d6d4-1c24-4ab2-a275-44777ed9f1a7',
 'a90089ed-cf46-4c3c-98fa-3162d188c0fc',
 '30caeb4e-40d1-4702-bff0-4c3c77091c4b',
 '5b37bbee-eee6-4c9b-8d96-e7c21bd0c923',
 'af9cf9a2-d869-4511-8d29-66af7999e96f',
 '78d4c239-40e9-4bb8-b357-2d8282fa9e15',
 'c44367b5-8bb0-4320-ace4-9e53b68de1c6',
 'd47c45a9-a433-443c-8ecd-f0153808aff2',
 '89418e90-3108-41ed-960d-f18d946c8f36',
 '4e83fc1c-c396-4628-8c59-84e7539bf613',
 '900a3cf3-495c-4e0b-9022-796fff576297',
 '702a2d52-e5e3-4730-9493-60053b34eb73',
 '737699ee-25cf-4f04-9b5a-a6e884904c45',
 '752e6aa1-3495-465b-91c2-ccf3fd189e18',
 '85ad5145-fe27-4996-9d60-8b205ab35362',
 '34c26f19-fcb5-4329-bf6a-8dfde500f557',
 '9523ed06-eb5f-4d3c-b440-ccece4537355',
 '0120ab30-0c73-45ce-b3d9-84cb885b4e5a',
 '77180d0b-f7b4-41df-ad5f-666b9b5a37ee',
 'aa21ca54-8ea3-493a-a883-c96f2d0e5053',
 'ec6d23f7-8866-4a2e-9c45-91b1bb92ad94',
 '20033405-5008-

In [16]:
from langchain.tools.retriever import create_retriever_tool

retriever = vector_store.as_retriever()

appl_tool = create_retriever_tool(
    retriever,
    "appl_reporting_searcher",
    """Search information about Apple's reporting to the SEC"""
)

In [17]:
from langchain_mistralai import ChatMistralAI

llm = ChatMistralAI(model="open-mistral-nemo")

In [18]:
tools = [appl_tool]

In [19]:
import os
import json

from typing import Annotated
from typing_extensions import TypedDict
from dotenv import load_dotenv

from langchain_mistralai import ChatMistralAI
from langchain_mistralai import MistralAIEmbeddings

from langgraph.graph import StateGraph, START, END
from langgraph.graph.message import add_messages
from langgraph.prebuilt import ToolNode, tools_condition

from langchain_core.messages import SystemMessage, AIMessage, HumanMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate



load_dotenv()
mistral_api_key = os.getenv("MISTRAL_API_KEY")


llm = ChatMistralAI(
    model="open-mistral-nemo",
    api_key=mistral_api_key,
    temperature=0.5
    )

# llm_with_tools = llm.bind_tools(tools)

tools = [appl_tool]


# Define the graph

from langgraph.prebuilt import create_react_agent

graph = create_react_agent(llm, tools=tools)

In [30]:
graph.invoke({"messages": [("human", "What was apples acccounts payable in 2009?")]})

{'messages': [HumanMessage(content='What was apples acccounts payable in 2009?', id='056b460e-0e15-47a0-866b-c04bef418e3d'),
  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'ixEDomlQM', 'function': {'name': 'appl_reporting_searcher', 'arguments': '{"query": "Apple\'s accounts payable in 2009"}'}}]}, response_metadata={'token_usage': {'prompt_tokens': 98, 'total_tokens': 131, 'completion_tokens': 33}, 'model': 'open-mistral-nemo', 'finish_reason': 'tool_calls'}, id='run-eb24d79a-adf5-4052-824d-ca8395f289f0-0', tool_calls=[{'name': 'appl_reporting_searcher', 'args': {'query': "Apple's accounts payable in 2009"}, 'id': 'ixEDomlQM', 'type': 'tool_call'}], usage_metadata={'input_tokens': 98, 'output_tokens': 33, 'total_tokens': 131}),
  ToolMessage(content='{\'cik\': 320193, \'taxonomy\': \'us-gaap\', \'tag\': \'AccountsPayable\', \'label\': \'Accounts Payable (Deprecated 2009-01-31)\', \'description\': "Carrying value as of the balance sheet date of liabilities incurred (a

In [None]:
The accounts payable (A/P) is a business accounting term that refers to the balance of cash that a company owes to its creditors for goods or services that have already been received on credit.

In [None]:
Here are the accounts payable for Apple Inc. in 2009:\n\n- **Fiscal Quarter Ending 2009-06-27**:\n  - **Form**: 10-Q\n  - **Filed**: 2009-07-22\n  - **Value**: 4,854,000,000 USD

In [11]:
    conversation = []
    while True:
        user_input = input("You: ")
        if user_input.lower() in ['exit', 'quit', 'bye']:
            print("Goodbye!")
            break
        
        conversation.append(("human", user_input))
        response = graph.invoke({"messages": conversation})
        assistant_message = response["messages"][-1]
        conversation.append(("assistant", assistant_message.content))
        print(f"Assistant: {assistant_message.content}")

Assistant: Hello! How can I assist you today?
Assistant: I can help you with information about Apple's reporting to the SEC. What would you like to know?
Assistant: Apple's profit in 2019 was $55.23 billion.
Assistant: Here is the information I used to get that number:

* Form 10-K for the fiscal year ended September 28, 2019, filed with the SEC on October 31, 2019.
* Net income for the fiscal year was $57.41 billion.
* After accounting for a one-time tax benefit of $2.08 billion, the adjusted net income was $55.23 billion.
Assistant: [{"name": "appl_reporting_searcher", "arguments": {"query": "Apple's profit in 2019"}}]
Goodbye!
