# PDF Agent

### Loading the PDF

In [1]:
# Import the required data loaders
from langchain_community.document_loaders import PyPDFLoader

# Load in the PDf
name = '../Building Machine Learning Systems with Python - Second Edition.pdf'

# Initlialize the loader
loader = PyPDFLoader(name)

# Load the pdf
docs = loader.load()

print(len(docs))

326


## Text Splitter and WebStore

In [2]:
# Create a text splitter and vector store to store the loaded pdf documents
from langchain_ollama import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma

# Create a text splitter and vector store and split the documents and store it in the vectorstore
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200)
doc_splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(
    documents=doc_splits, embedding=OllamaEmbeddings(model="llama3.1"))

# Create a retriever from vectorstore
retriever = vectorstore.as_retriever()

### Create the RAG model

In [6]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama import ChatOllama

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

llm = ChatOllama(model="llama3.1", temperature=0.5)
qa_chain = create_stuff_documents_chain(llm, prompt)

rag_chain = create_retrieval_chain(retriever, qa_chain)

results = rag_chain.invoke(
    {"input": "What is the book about?"})
print(results['answer'])

The book appears to be about machine learning using Python, covering topics such as designing features, writing custom features, and implementing algorithms for classification tasks. It uses examples from image classification and text analysis to illustrate concepts.
