### Load and Split PDF

In [None]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Load PDF pages
loader = PyPDFLoader(r"..\sample_data\Tesla_10Q_Q1_2024.pdf")  
pages = loader.load() 


# Split into smaller chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=50)
docs = splitter.split_documents(pages)  

# Display the number of chunks and the first chunk's content
print(f"Total chunks: {len(docs)}")
print(docs[0].page_content[:500])




Total chunks: 322
UNITED STATES
SECURITIES AND EXCHANGE COMMISSION
Washington, D.C. 20549
FORM 10-Q
(Mark One)
x QUARTERLY REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934
For the quarterly period ended March 31, 2024
OR
o TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934
For the transition period from _________ to _________


### Embed and Store

In [13]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# I chose small, fast model
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

vectorstore = FAISS.from_documents(docs, embedding_model)
# Save the vectorstore to disk
vectorstore.save_local("faiss_index")





In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from transformers import pipeline


# 3. QA Model
qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-base", max_length=512)
llm = HuggingFacePipeline(pipeline=qa_pipeline)

# 4. RetrievalQA Chain
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, chain_type="stuff")


# 5. Ask a question
queries = [
    "Which company is this report about?",
    "What is this report about?",
    "Summarize this report.",
    "What are the key financial highlights?",
    "What is the revenue for the last quarter?",
    
    
]

for q in queries:
    print(f"\n🔹 Question: {q}")
    answer = qa_chain.run(q)
    print(f"Answer:\n{answer}\n{'-'*50}")


Device set to use cpu



🔹 Question: Which company is this report about?
Answer:
Tesla, Inc.
--------------------------------------------------

🔹 Question: What is this report about?
Answer:
Science/Tech
--------------------------------------------------

🔹 Question: Summarize this report.
Answer:
FINANCIAL INFORMATION ITEM 1. FINANCIAL STATEMENTS Tesla, Inc. Consolidated Balance Sheets (in millions, except per share data) (unaudited) 7/11/25, 7:00 PM tsla-20240331 https://www.sec.gov/Archives/edgar/data/1318605/000162828024017503/tsla-20240331.htm 5/43 17 7/11/25, 7:00 PM tsla-20240331 https://www.sec.gov/Archives/edgar/data/1318605/000162828024017503/tsla-20240331.htm 20/43
--------------------------------------------------

🔹 Question: What are the key financial highlights?
Answer:
MANAGEMENT’S DISCUSSION AND ANALYSIS OF FINANCIAL CONDITION AND RESULTS OF OPERATIONS The following discussion and analysis should be read in conjunction with the consolidated financial statements and the related notes included