# Ollama PDF RAG

## Import Libraries

In [38]:
from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_ollama import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import PGVector
from langchain_community.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_ollama.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever

from dotenv import load_dotenv
load_dotenv(dotenv_path='.env')

#suppress warnings
import warnings
warnings.filterwarnings('ignore')

from IPython.display import display, Markdown



## Load PDFs

In [39]:
load_path = "/Users/donaldparker/Projects/lab/rag/ollama-pgvector/pdfs/tacoma-2019.pdf"
if load_path:
    loader = UnstructuredPDFLoader(file_path=load_path)
    pdf_data = loader.load()
    print(f"PDF Loaded successfully: {load_path}")
else:
    print(f"Unable to load PDF")

PDF Loaded successfully: /Users/donaldparker/Projects/lab/rag/ollama-pgvector/pdfs/psyc/Altering_Consciousness_Multidisciplinary.pdf


## Split text into chunks

In [40]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(pdf_data)
print(f"Text split into {len(chunks)} chunks")

Text split into 210 chunks


## Vector Database

In [41]:
chroma_db = Chroma.from_documents(
    documents=chunks,
    embedding=OllamaEmbeddings(model="nomic-embed-text"),
    collection_name="rag-tacoma"
)
pgvector_db = PGVector.from_documents(
    collection_name=collection_name,
    documents=chunks,
    embedding=OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY")),
    use_jsonb=True
)

print(f"Vector database created successfully")

Vector database created successfully


## Set LLM and Retrieval

In [42]:
local_model = "llama3.2:latest"
#local_model = "granite3-dense:8b"
llm = ChatOllama(model=local_model)

query_prompt = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant.  Your task is to generate 2 
    different versions of the give user question to retrieve relevant documents from
    a vector database. By generating multiple perspectives on user question, your
    goal is to help users overcome some of the limitations of distance-based
    similarity search. Provide these alternative questions separated by newlines.
    Original question: {question}"""
)
retriever = MultiQueryRetriever.from_llm(
    chroma_db.as_retriever(),
    llm,
    prompt=query_prompt
)

## Create Chain

In [43]:
template = """Answer the question on on the following context: 
{context}
Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

In [44]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [45]:
def chat_with_pdf(question): 
    """
    Chat with the PDF using our chain
    """
    return display(Markdown(chain.invoke(question)))

In [46]:
chat_with_pdf("Summarize this pdf")

The PDF appears to be a collection of articles and essays related to the study of Altered States of Consciousness (ASCs), edited by an author who is not explicitly mentioned in the provided excerpts.

The introduction suggests that the field of ASC research has been neglected, and that a new approach is needed. The editor argues for "state-specific sciences," which implies that each ASC should be studied independently and that the existing scientific methods may not be sufficient to understand these states.

The PDF includes several articles on various topics related to ASCs, such as:

1. Meditation: A discussion of the research literature on meditation and its effects on consciousness.
2. Hypnagogic state: An article on the hypnagogic state, which is a transitional state between wakefulness and sleep.
3. Dream consciousness: An overview of the study of dream consciousness and its relationship to ASCs.
4. Psychedelics: A discussion of the effects of minor and major psychedelics on consciousness.

The editor also mentions that even with extensive literature searching, there was limited research available on many topics related to ASCs. This suggests that the field is still in its early stages and that more research is needed to fully understand these states.

Finally, the PDF includes references to other researchers who have written about ASCs, such as Robert Tart, who has proposed a conceptual framework for understanding discrete states of consciousness.

Overall, the PDF appears to be an anthology of essays on the study of Altered States of Consciousness, with a focus on highlighting the need for more research and a new approach to understanding these complex phenomena.

In [47]:
#granite 
chat_with_pdf("What were you asked to do in the last step?")

You didn't ask me to do anything in this conversation yet. This is the first message you've sent. Would you like to ask me something or provide a task for me to complete? I'm here to help!

## Cleanup

In [36]:
chroma_db.delete_collection()
print("Vector database deleted successfully")

Vector database deleted successfully
