In [3]:
from vector_store import VectorStoreWrapper
from models.retrieval import RetrievalModel
from evaluate.open_eval_chain import QAOpenEvalChain

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain import OpenAI

In [55]:
from pathlib import Path
[x for x in Path("examples/").glob("*.pdf")]

[PosixPath('examples/apple_10q_q3_22.pdf'),
 PosixPath('examples/apple_10q_q1_23.pdf'),
 PosixPath('examples/msft_10q_q1_23.pdf'),
 PosixPath('examples/msft_10q_q3_22.pdf')]

In [58]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
loader = DirectoryLoader(path="examples/", glob="*.pdf", loader_cls=PyPDFLoader)
docs = loader.load()

### Load the Data/Create an Index

In [59]:
dirpath = "examples/"
loader = PDFLoader(dirpath=dirpath)
tdocs = loader.load()

In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=3000, separators=["\n \n","\n\n", "\n", " ", ""])
split_docs = text_splitter.split_documents(docs)

In [6]:
len(split_docs)

413

In [10]:
db = VectorStoreWrapper(load_path="multi_company_index").get_index(split_docs)

In [11]:
#db.save_local("multi_company_index")

### Testing output parsers

In [12]:
first_pages = [doc for doc in split_docs if doc.metadata["page"] == 0]

In [13]:
len(first_pages)

4

In [35]:
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field

class CompanyInfo(BaseModel):
    company: str = Field(description="the name of the company")
    ticker: str = Field(description="the ticker for the company")
    address: str = Field(description="the address of the company")

#from models.extractor import ExtractionModel
#ext_model = ExtractionModel(find_matches=False)
#result = ext_model.run("What is the name and ticker of the company?", first_pages)

In [50]:
CompanyInfo.dict()

TypeError: dict() takes exactly 1 positional argument (0 given)

In [36]:
from prompt_templates.qa_templates import PER_DOC_PROMPT
template = SEARCH_PROMPT.split("\n\n")

In [39]:
from langchain.prompts.chat import HumanMessagePromptTemplate, ChatPromptTemplate

parser = PydanticOutputParser(pydantic_object=CompanyInfo)
prompt = ChatPromptTemplate(
    messages=[HumanMessagePromptTemplate.from_template(template[0]+"\n{format_instructions}\n"+template[1])],
    input_variables=["context", "question"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

In [40]:
from langchain.chains import LLMChain
from langchain.chat_models import ChatOpenAI

chain = LLMChain(llm=ChatOpenAI(temperature=0), prompt=prompt)

In [46]:
result = chain.run(context=first_pages[0].page_content, question="What is the name of the company, ticker of the company, and address of the company?")
parser.parse(result)

CompanyInfo(company='Apple Inc.', ticker='AAPL', address='One Apple Park Way\nCupertino, California 95014')

In [51]:
result

'{\n    "company": "Apple Inc.",\n    "ticker": "AAPL",\n    "address": "One Apple Park Way\\nCupertino, California 95014"\n}'

### Retrieval Model

In [12]:
model = RetrievalModel(retriever=db.as_retriever(search_kwargs={"k": 7}), with_sources=True)

### Getting some results

In [13]:
result = model.run("how much did AAPL's iPhone revenue increase from Q2 2022 to Q3 2022?")

In [14]:
print(f"Answer: {result.answer}")
print("="*20)
for doc in result.source_docs:
    print(f"doc: {doc.metadata['source']}")
    print(f"page: {doc.metadata['page']}")
    print(doc.page_content)
    print("="*20)

Answer: AAPL's iPhone revenue increased from $40,665 million in Q2 2022 to $65,775 million in Q3 2022.
doc: examples/apple_10q_q3_22.pdf
page: 9
Note 2 – Revenue
Net sales disaggregated  by significant products and services for the three- and nine-month periods ended June 25, 2022 and June 26, 2021 were as follows (in
millions):
Three Months Ended Nine Months Ended
June 25,
2022June 26,
2021June 25,
2022June 26,
2021
iPhone $ 40,665 $ 39,570 $ 162,863 $ 153,105 
Mac  7,382 8,235 28,669 26,012 
iPad 7,224 7,368 22,118 23,610 
Wearables, Home and Accessories 8,084 8,775 31,591 29,582 
Services 19,604 17,486 58,941 50,148 
Total net sales $ 82,959 $ 81,434 $ 304,182 $ 282,457 
(1)Products net sales include amortization of the deferred value of unspecified software upgrade rights, which are bundled in the sales price of the respective
product.
(2)Wearables, Home and Accessories net sales include sales of AirPods, Apple TV, Apple Watch, Beats products, HomePod mini and accessories.
(3)Servi

In [44]:
result.source_docs[0]

Document(page_content='Note 2 – Revenue\nNet sales disaggregated by significant products and services for the three months ended December 31, 2022 and December 25, 2021 were as follows (in\nmillions):\nThree Months Ended\nDecember 31,\n2022December 25,\n2021\niPhone $ 65,775 $ 71,628 \nMac  7,735 10,852 \niPad 9,396 7,248 \nWearables, Home and Accessories 13,482 14,701 \nServices 20,766 19,516 \nTotal net sales $ 117,154 $ 123,945 \n(1)Products net sales include amortization of the deferred value of unspecified software upgrade rights, which are bundled in the sales price of the respective\nproduct.\n(2)Wearables, Home and Accessories net sales include sales of AirPods, Apple TV, Apple Watch, Beats products, HomePod mini and accessories.\n(3)Services net sales include sales from the Company’s advertising, AppleCare, cloud, digital content, payment and other services. Services net sales also include\namortization of the deferred value of services bundled in the sales price of certain pr

In [73]:
len(result.context.split())

1630

### Testing out Raw Search

In [2]:
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.chat_models import ChatOpenAI

In [3]:
import os

In [5]:
llm = ChatOpenAI(temperature=0)
tools = load_tools(["serpapi"], llm=llm)

In [6]:
agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True)

In [7]:
agent.run("What was apple's total revenue for 2022?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to search for the latest financial reports from Apple.
Action: Search
Action Input: "Apple financial report 2022"[0m
Observation: [36;1m[1;3mFY 23 First Quarter Results. Apple announced results for the quarter ended December 31, 2022. ... Quarterly Earnings Reports. 2023 2022 2021[0m
Thought:[32;1m[1;3mI need to find the specific report that includes Apple's total revenue for 2022.
Action: Search
Action Input: "Apple financial report 2022 total revenue"[0m
Observation: [36;1m[1;3mApple generated almost a hundred billion dollars in profits in 2022, compared to $94.6 billion in 2021 and over $57 billion in 2020.[0m
Thought:[32;1m[1;3mI have found the answer to the original question.
Final Answer: Apple's total revenue for 2022 was almost a hundred billion dollars.[0m

[1m> Finished chain.[0m


"Apple's total revenue for 2022 was almost a hundred billion dollars."

In [9]:
agent.run("which US company made the most money in 2021?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to find out the current financial status of US companies
Action: Search
Action Input: "top earning US companies 2021"[0m
Observation: [36;1m[1;3mThe Top 10[0m
Thought:[32;1m[1;3mI need to narrow down the search to find the specific company
Action: Search
Action Input: "highest earning US company 2021"[0m
Observation: [36;1m[1;3mThe Top 10 · 1. Walmart · 2. Amazon · 3. Apple · 4. CVS Health · 5. UnitedHealth Group · 6. ExxonMobil · 7. Berkshire Hathaway · 8. Alphabet.[0m
Thought:[32;1m[1;3mI need to find out which of these companies made the most money
Action: Search
Action Input: "revenue of top earning US companies 2021"[0m
Observation: [36;1m[1;3mNo good search result found[0m
Thought:[32;1m[1;3mI should try searching for "financial reports of top earning US companies 2021"
Action: Search
Action Input: "financial reports of top earning US companies 2021"[0m
Observation: [36;1m[1;3mAll of our icon

'The top earning US company in 2021 is Walmart, according to the Fortune 500 list.'