# Ollama PDF RAG

## Import Libraries

In [11]:
from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_ollama import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import PGVector
from langchain_community.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_ollama.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.memory import ConversationBufferMemory

from dotenv import load_dotenv
load_dotenv(dotenv_path='.env')

#REPO_OWNER = os.environ.get('REPO_OWNER')

#suppress warnings
import warnings
warnings.filterwarnings('ignore')

from IPython.display import display, Markdown



## Load PDFs

In [12]:
load_path = "/Users/donaldparker/Projects/lab/rag/ollama-pgvector/pdfs/koi_nih.pdf"
if load_path:
    loader = UnstructuredPDFLoader(file_path=load_path)
    pdf_data = loader.load()
    print(f"PDF Loaded successfully: {load_path}")
else:
    print(f"Unable to load PDF")

PDF Loaded successfully: /Users/donaldparker/Projects/lab/rag/ollama-pgvector/pdfs/koi_nih.pdf


## Split text into chunks

In [10]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(pdf_data)
print(f"Text split into {len(chunks)} chunks")

Text split into 195 chunks


## Vector Database

In [6]:
collection_name="rag-tacoma"
chroma_db = Chroma.from_documents(
    documents=chunks,
    embedding=OllamaEmbeddings(model="nomic-embed-text"),
    collection_name=collection_name
)
pgvector_db = PGVector.from_documents(
    collection_name=collection_name,
    documents=chunks,
    embedding=OllamaEmbeddings(model="nomic-embed-text"),
    use_jsonb=True
)

print(f"Vector database created successfully")

Vector database created successfully


## Set LLM and Retrieval

In [7]:
local_model = "llama3.2:latest"
#local_model = "granite3-dense:8b"
llm = ChatOllama(model=local_model)

query_prompt = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant.  Your task is to generate 2 
    different versions of the give user question to retrieve relevant documents from
    a vector database. By generating multiple perspectives on user question, your
    goal is to help users overcome some of the limitations of distance-based
    similarity search. Provide these alternative questions separated by newlines.
    Original question: {question}"""
)
retriever = MultiQueryRetriever.from_llm(
    chroma_db.as_retriever(),
    llm,
    prompt=query_prompt
)

## Create Chain

In [8]:
template = """Answer the question on on the following context: 
{context}
Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

In [9]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [10]:
def chat_with_pdf(question): 
    """
    Chat with the PDF using our chain
    """
    return display(Markdown(chain.invoke(question)))


In [11]:
chat_with_pdf("Summarize this pdf")

This PDF appears to be a manual for a Toyota vehicle, specifically providing instructions and guidelines for the safe use of the vehicle's safety features, including the seat belt system.

The manual covers various topics related to driver and passenger safety, such as:

1. Installing and using child restraints
2. Removing and installing seat belts
3. Adjusting the seat and steering wheel for proper fit
4. Using the seat belt correctly
5. Troubleshooting common issues with the seat belt system

The manual also provides general safety tips and reminders, such as:

* Always wearing a seat belt
* Keeping the vehicle's interior clean and free of distractions
* Regularly checking the condition of the seat belts for signs of wear or damage

Additionally, the manual includes information on maintaining the vehicle's electrical systems, including the battery and charging system.

Overall, this PDF appears to be a comprehensive guide for drivers and passengers to ensure safe and secure transportation in their Toyota vehicle.

In [47]:
#granite 
chat_with_pdf("What were you asked to do in the last step?")

You didn't ask me to do anything in this conversation yet. This is the first message you've sent. Would you like to ask me something or provide a task for me to complete? I'm here to help!

## Cleanup

In [13]:
chroma_db.delete_collection()
print("Vector database deleted successfully")

Vector database deleted successfully
