In [4]:
!pip install pypdf

Collecting pypdf
  Downloading pypdf-5.1.0-py3-none-any.whl.metadata (7.2 kB)
Downloading pypdf-5.1.0-py3-none-any.whl (297 kB)
Installing collected packages: pypdf
Successfully installed pypdf-5.1.0


In [18]:
!pip install chromadb

Collecting chromadb
  Downloading chromadb-0.5.18-py3-none-any.whl.metadata (6.8 kB)
Collecting build>=1.0.3 (from chromadb)
  Downloading build-1.2.2.post1-py3-none-any.whl.metadata (6.5 kB)
Collecting chroma-hnswlib==0.7.6 (from chromadb)
  Downloading chroma_hnswlib-0.7.6-cp312-cp312-macosx_11_0_arm64.whl.metadata (252 bytes)
Collecting fastapi>=0.95.2 (from chromadb)
  Downloading fastapi-0.115.4-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn>=0.18.3 (from uvicorn[standard]>=0.18.3->chromadb)
  Downloading uvicorn-0.32.0-py3-none-any.whl.metadata (6.6 kB)
Collecting posthog>=2.4.0 (from chromadb)
  Downloading posthog-3.7.0-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.20.0-cp312-cp312-macosx_13_0_universal2.whl.metadata (4.4 kB)
Collecting opentelemetry-api>=1.2.0 (from chromadb)
  Downloading opentelemetry_api-1.28.1-py3-none-any.whl.metadata (1.4 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0

In [21]:
!pip freeze >  requirements.txt

In [2]:
# Load environment variables from a .env file
import os
from dotenv import load_dotenv

load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [20]:
from langchain import hub
from langchain_community.vectorstores import Chroma 
from langchain_openai import (
    ChatOpenAI,
    OpenAIEmbeddings,
)
from langchain.document_loaders import PyPDFLoader
from langchain.chains import RetrievalQA

# Load and chunk the PDF
pdf_path = "./the-iliad-summary.pdf"
loader = PyPDFLoader(pdf_path)
pdf_docs = loader.load_and_split()
print(f"{type(pdf_docs)=}")

doc_object = pdf_docs[0]
print(f"{type(doc_object)=}")
print(f"{type(doc_object.page_content)=}")
print(f"{len(pdf_docs)=}")

# Create embeddings
embeddings = OpenAIEmbeddings()

# Create vector store & retriever
vectorDB = Chroma.from_documents(documents=pdf_docs, embedding=embeddings)
retriever = vectorDB.as_retriever()

# Create LLM & retriever chain
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
pdf_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)

# Ask a question
question = "What is the mythology background ? Give this as a bullet point list."
output = pdf_qa.invoke({
    "query": question, 
    "chat_history":[None]
})
print(output)

from IPython.display import display, Markdown
display(Markdown(output["result"]))

Ignoring wrong pointing object 22 0 (offset 0)


type(pdf_docs)=<class 'list'>
type(doc_object)=<class 'langchain_core.documents.base.Document'>
type(doc_object.page_content)=<class 'str'>
len(pdf_docs)=3
{'query': 'What is the mythology background ? Give this as a bullet point list.', 'chat_history': [None], 'result': "- Feminine beauty reaches its peak in Helen, wife of King Menelaus of Greece\n- Aphrodite bribes Paris to choose her as the most beautiful goddess in a contest, promising him Helen\n- Paris elopes with Helen to Troy, sparking a war between the Greeks and Trojans\n- The Greeks assemble a mighty army to win back their pride and Helen\n- The war drags on for ten years until Homer picks up the story in the tenth year focusing on Achilles\n- Achilles, the greatest warrior, withdraws from battle until his friend Patroclus is killed by Hector\n- Achilles rejoins the fight, kills Hector, and drags his body around Troy\n- Priam, Hector's father, crosses the battlefield to claim his son's body, showing Trojan valor\n- Achilles,

- Feminine beauty reaches its peak in Helen, wife of King Menelaus of Greece
- Aphrodite bribes Paris to choose her as the most beautiful goddess in a contest, promising him Helen
- Paris elopes with Helen to Troy, sparking a war between the Greeks and Trojans
- The Greeks assemble a mighty army to win back their pride and Helen
- The war drags on for ten years until Homer picks up the story in the tenth year focusing on Achilles
- Achilles, the greatest warrior, withdraws from battle until his friend Patroclus is killed by Hector
- Achilles rejoins the fight, kills Hector, and drags his body around Troy
- Priam, Hector's father, crosses the battlefield to claim his son's body, showing Trojan valor
- Achilles, moved by Priam's actions, returns Hector's body for a proper burial