# Ollama PDF RAG

## Import Libraries

In [2]:
from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_ollama import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import PGVector
from langchain_community.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_ollama.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.memory import ConversationBufferMemory

from dotenv import load_dotenv
load_dotenv(dotenv_path='.env')

#suppress warnings
import warnings
warnings.filterwarnings('ignore')

from IPython.display import display, Markdown



## Load PDFs

In [3]:
paths = [
    "/Users/donaldparker/Projects/lab/rag/ollama-pgvector/pdfs/koi_nih.pdf",
    "/Users/donaldparker/Projects/lab/rag/ollama-pgvector/pdfs/koi_khv_001.pdf",
    "/Users/donaldparker/Projects/lab/rag/ollama-pgvector/pdfs/koi_khv_002.pdf"
]
#load_path = "/Users/donaldparker/Projects/lab/rag/ollama-pgvector/pdfs/koi_nih.pdf"

# loader = UnstructuredPDFLoader(file_path=load_path)
# pdf_data = loader.load()

docs = [UnstructuredPDFLoader(file_path=path).load() for path in paths]
docs_list = [item for sublist in docs for item in sublist]
print(f"PDF Loaded successfully")


PDF Loaded successfully


## Split text into chunks

In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(docs_list)
print(f"Text split into {len(chunks)} chunks")

Text split into 205 chunks


## Vector Database

In [6]:
collection_name="rag-koi"
chroma_db = Chroma.from_documents(
    documents=chunks,
    embedding=OllamaEmbeddings(model="nomic-embed-text"),
    collection_name=collection_name
)
pgvector_db = PGVector.from_documents(
    collection_name=collection_name,
    documents=chunks,
    embedding=OllamaEmbeddings(model="nomic-embed-text"),
    use_jsonb=True
)

print(f"Vector database created successfully")

Vector database created successfully


## Set LLM and Retrieval

In [7]:
local_model = "llama3.2:latest"
#local_model = "granite3-dense:8b"
llm = ChatOllama(model=local_model)

query_prompt = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant.  Your task is to generate 2 
    different versions of the give user question to retrieve relevant documents from
    a vector database. By generating multiple perspectives on user question, your
    goal is to help users overcome some of the limitations of distance-based
    similarity search. Provide these alternative questions separated by newlines.
    Original question: {question}"""
)
retriever = MultiQueryRetriever.from_llm(
    chroma_db.as_retriever(),
    llm,
    prompt=query_prompt
)

## Create Chain

In [9]:
template = """Answer the question on on the following context: 
{context}
Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

In [10]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [11]:
def chat_with_pdf(question): 
    """
    Chat with the PDF using our chain
    """
    return display(Markdown(chain.invoke(question)))


In [12]:
chat_with_pdf("In 4 paragraphs summarize this some of the preventative measures to prevent KHV?")

Here is a summary of some preventative measures to prevent KHV (Koi Herpes Virus) in 4 paragraphs:

To prevent KHV, breeders and aquarists can take several steps. One crucial measure is to ensure proper water quality and hygiene practices. This includes regularly testing water parameters such as pH, ammonia, nitrite, and nitrate levels, and making adjustments as necessary to maintain optimal conditions for the fish. Additionally, frequent cleaning and disinfection of equipment, filters, and tanks can help prevent the spread of the virus.

Another preventative measure is to implement good biosecurity practices. This includes isolating new fish or stock before introducing them to a main tank, and quarantining any sick fish to prevent the spread of disease. It's also essential to monitor fish health closely and remove any infected or suspect fish from the water immediately. Furthermore, using physical barriers such as netting or screens can help prevent the introduction of infected fish into the tank.

In addition to these measures, incorporating certain supplements and treatments into the water can help boost the immune system of Koi and prevent disease. For example, using essential oils derived from plants like Lippia spp. has been shown to have anti-parasitic properties that can help prevent the spread of KHV. Other natural remedies such as spore proteins from Myxobolus Koi or red ginger have also been explored as antihelminthic agents for controlling infections in Koi fish.

Finally, some breeders and aquarists are exploring the use of prophylactic treatments to prevent KHV outbreaks. These can include antibiotics, antiviral medications, or other supplements that target the virus specifically. It's essential to note that these treatments should only be used under the guidance of a veterinarian or experienced aquarist, as overuse or misuse can lead to antibiotic resistance and other problems. By taking a multi-faceted approach to prevention, Koi farmers and enthusiasts can reduce the risk of KHV outbreaks and ensure the health and well-being of their fish.

In [47]:
#granite 
chat_with_pdf("What were you asked to do in the last step?")

You didn't ask me to do anything in this conversation yet. This is the first message you've sent. Would you like to ask me something or provide a task for me to complete? I'm here to help!

## Cleanup

In [13]:
chroma_db.delete_collection()
print("Vector database deleted successfully")

Vector database deleted successfully
