In [None]:
%pip install  openai PyPDF2 langchain langchain_openai sentence_transformers chromadb unstructured -q

In [None]:
from dotenv import load_dotenv
import os
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [None]:
from PyPDF2 import PdfReader

pdfreader = PdfReader('./Pets.pdf') 
from typing_extensions import Concatenate
raw_text = ''
for i, page in enumerate(pdfreader.pages):
    content = page.extract_text()
    if content:
        raw_text += content

print(raw_text)

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
texts = text_splitter.split_text(raw_text)

print(texts[0])

In [None]:
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [None]:
from langchain_community.vectorstores import chroma
store = chroma.Chroma.from_texts(texts, embeddings, collection_name="pets")


In [None]:
query = "What are the different kinds of pets people commonly own?"
matching_text = store.similarity_search(query)

print(matching_text[0])


In [None]:
persist_directory = "chroma_db"

vectordb = chroma.Chroma.from_texts(
    texts=texts, embedding=embeddings, persist_directory=persist_directory
)

vectordb.persist()


In [None]:
from langchain.chains.question_answering import load_qa_chain
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model_name="gpt-4o")
chain = load_qa_chain(llm, chain_type="stuff", verbose=True)

query = "What are the emotional benefits of owning a pet?"
matching_text = store.similarity_search(query)
answer =  chain.invoke({"input_documents":matching_text, "question":query})
answer


In [None]:
from langchain.chains import RetrievalQA
retrieval_chain = RetrievalQA.from_chain_type(llm, chain_type="stuff", retriever=store.as_retriever())
retrieval_chain.invoke(query)

# https://js.langchain.com/docs/modules/chains/document/stuff#:~:text=The%20stuff%20documents%20chain%20(%22stuff,that%20prompt%20to%20an%20LLM.
# STUFF, REFINE, MAP_REDUCE, etc...
