In [1]:
import os
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
LANGCHAIN_API_KEY = os.environ.get("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
SEC_API_KEY = os.environ.get("SEC_API_KEY")

In [4]:
import bs4
import html2text
import json 
from sec_api import QueryApi, RenderApi
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents.base import Document
from langchain_openai import ChatOpenAI


In [5]:

queryApi = QueryApi(api_key=SEC_API_KEY)
query = {
  "query": "ticker:AAPL AND formType:\"10-K\"",
  "from": "0",
  "size": "1",
  "sort": [{ "filedAt": { "order": "desc" } }]
}

response = queryApi.get_filings(query)


# print(json.dumps(response["filings"][0], indent=2))
link_to_10k = response["filings"][0]["linkToFilingDetails"]
# link_to_10k

renderApi = RenderApi(api_key=SEC_API_KEY)

target_10k = renderApi.get_filing("https://www.sec.gov/Archives/edgar/data/320193/000032019323000106/aapl-20230930.htm")
target_10k_full_text = html2text.html2text(target_10k)
docs = [Document(page_content=target_10k_full_text)]

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY))

retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})
llm = ChatOpenAI(model="gpt-3.5-turbo-0125")
prompt = hub.pull("rlm/rag-prompt")
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)
for chunk in rag_chain.stream("What are the greatest risks facing apple?"):
    print(chunk, end="", flush=True)