References:<br>
https://docs.langchain.com/oss/python/langchain/rag<br>
https://docs.langchain.com/oss/python/langchain/retrieval<br>
https://docs.langchain.com/oss/python/integrations/chat/ollama

Import required packages

In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_ollama import ChatOllama
from langchain.tools import tool
from langchain.agents import create_agent
from typing import Any
from langchain_core.documents import Document
from langchain.agents.middleware import AgentMiddleware, AgentState

In [None]:
import langchain_community
import langchain_ollama
import langchain

print('Version information')

print('langchain_community: {}'.format(langchain_community.__version__))
print('langchain_ollama: {}'.format(langchain_ollama.__version__))
print('langchain: {}'.format(langchain.__version__))

## Create retriever system

Initialize retriever variables

In [None]:
file_path = './data/' #string, path of location of PDF to load
chunk_size = 300 #int, number of characters in a chunk
chunk_overlap = 20 #int, number of characters overlapping between adjacent chunks
num_chunks_to_return = 8 #int, number of chunks to retrieve per query

Create a list of langchain documents

In [None]:
documents = PyPDFLoader(file_path).load()

In [None]:
for doc in documents[:5]:
    print(doc.page_content[:100] + "\n")

Create a list of langchain document chunks

In [None]:
document_chunks = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap).split_documents(documents)

In [None]:
for doc_chunk in document_chunks[:5]:
    print(doc_chunk.page_content + "\n")

Create vector store

In [None]:
embeddings = HuggingFaceEmbeddings(
    model_name="BAAI/bge-small-en-v1.5",
    encode_kwargs={"normalize_embeddings": True}
)

vectorstore = FAISS.from_documents(document_chunks, embeddings)

## Initialize Ollama LLM

In [None]:
llm = ChatOllama(
    model="llama3.2",
    temperature=0 #set to 0 for repeatability
)

### Option 1: A RAG agent that executes searches with a simple tool

Create the tool

In [None]:
@tool(response_format="content_and_artifact")
def retrieve_context(query: str):
    """Retrieve information to help answer a query."""
    retrieved_docs = vectorstore.similarity_search(query, k=num_chunks_to_return)
    serialized = "\n\n".join(
        (f"Source: {doc.metadata}\nContent: {doc.page_content}")
        for doc in retrieved_docs
    )
    return serialized, retrieved_docs

Create the agent

In [None]:
tools = [retrieve_context]

prompt = (
    "Pretend you keep a daily journal. "
    "You have access to a tool that retrieves context from your journal. "
    "Use the tool to answer the user query."
    "Respond as though you wrote the journal. "
    "Use three sentences maximum. Don't provide any dates."
)

agent = create_agent(llm, tools, system_prompt=prompt)

Question 1: What do you want to do this weekend?

In [None]:
query = "What do you want to do this weekend?"

In [None]:
agent.invoke(
    {"messages": [{"role": "user", "content": query}]}
)["messages"][-1].pretty_print()

In [None]:
for event in agent.stream(
    {"messages": [{"role": "user", "content": query}]},
    stream_mode="values",
):
    event["messages"][-1].pretty_print()

Question 2: What do you do when you're feeling down?

In [None]:
query = "What do you do when you're feeling down?"

In [None]:
agent.invoke(
    {"messages": [{"role": "user", "content": query}]}
)["messages"][-1].pretty_print()

In [None]:
for event in agent.stream(
    {"messages": [{"role": "user", "content": query}]},
    stream_mode="values",
):
    event["messages"][-1].pretty_print()

Question 3: What kinds of work do you like to do?

In [None]:
query = "What kinds of work do you like to do?"

In [None]:
agent.invoke(
    {"messages": [{"role": "user", "content": query}]}
)["messages"][-1].pretty_print()

In [None]:
for event in agent.stream(
    {"messages": [{"role": "user", "content": query}]},
    stream_mode="values",
):
    event["messages"][-1].pretty_print()

### Option 2: A two-step RAG chain that uses just a single LLM call per query

Create the dynamic prompt

In [None]:
class State(AgentState):
    context: list[Document]


class RetrieveDocumentsMiddleware(AgentMiddleware[State]):
    state_schema = State

    def before_model(self, state: AgentState) -> dict[str, Any] | None:
        last_message = state["messages"][-1]
        retrieved_docs = vectorstore.similarity_search(last_message.text)

        docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)

        augmented_message_content = (
            "Pretend you keep a daily journal. "
            "The following pieces of context are from your journal. "
            "Use them to answer the user query at the end. "
            "Respond as though you wrote the journal. "
            "Use three sentences maximum. Don't provide any dates."
            f"\n\n{docs_content}"
            f"\n\nUser Query: {last_message.text}"
        )
        return {
            "messages": [last_message.model_copy(update={"content": augmented_message_content})],
            "context": retrieved_docs,
        }

Create the agent

In [None]:
agent = create_agent(llm, tools=[], middleware=[RetrieveDocumentsMiddleware()])

Question 1: What do you want to do this weekend?

In [None]:
query = "What do you want to do this weekend?"

In [None]:
agent.invoke(
    {"messages": [{"role": "user", "content": query}]}
)["messages"][-1].pretty_print()

In [None]:
for step in agent.stream(
    {"messages": [{"role": "user", "content": query}]},
    stream_mode="values",
):
    step["messages"][-1].pretty_print()

Question 2: What do you do when you're feeling down?

In [None]:
query = "What do you do when you're feeling down?"

In [None]:
agent.invoke(
    {"messages": [{"role": "user", "content": query}]}
)["messages"][-1].pretty_print()

In [None]:
for step in agent.stream(
    {"messages": [{"role": "user", "content": query}]},
    stream_mode="values",
):
    step["messages"][-1].pretty_print()

Question 3: What kinds of work do you like to do?

In [None]:
query = "What kinds of work do you like to do?"

In [None]:
agent.invoke(
    {"messages": [{"role": "user", "content": query}]}
)["messages"][-1].pretty_print()

In [None]:
for step in agent.stream(
    {"messages": [{"role": "user", "content": query}]},
    stream_mode="values",
):
    step["messages"][-1].pretty_print()