# Import Module

In [2]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain_ollama import OllamaEmbeddings,OllamaLLM
from langchain import hub
from langchain_core.documents import Document
from typing_extensions import List, TypedDict
from langgraph.graph import START, StateGraph
from langchain_core.prompts import PromptTemplate
import os
from dotenv import load_dotenv

In [6]:
load_dotenv()

LANGCHAIN_API_KEY=os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_PROJECT"]="RAG"

# Load PDF

In [57]:
file="file.pdf"
loader=PyPDFLoader(file)
docs=loader.load()

In [58]:
len(docs)

3

# Chunk

In [59]:
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200,add_start_index=True)
all_splits=text_splitter.split_documents(docs)

In [60]:
len(all_splits)

9

# Embedding and Vector Store

In [61]:
embeddings=OllamaEmbeddings(model="llama3.2")
vectorstore=Chroma.from_documents(documents=all_splits,embedding=embeddings)

# Retrieval and Generation

In [64]:
model=OllamaLLM(model="llama3.2")

In [65]:
template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.

{context}

Question: {question}

Helpful Answer:"""

In [66]:
custom_rag_prompt = PromptTemplate.from_template(template)

In [67]:
class State(TypedDict):
    question:str
    context=List[Document]
    answer:str

In [68]:
def retrieve(state:State):
    retrieved_docs=vectorstore.similarity_search(state["question"])
    return {"context":retrieved_docs}

def generate(state:State):
    docs_content="\n\n".join(doc.page_content for doc in state["context"])
    message=custom_rag_prompt.invoke({"question": state["question"], "context": docs_content})
    response=model.invoke(message)
    
    return {"answer":response.content}

In [69]:
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [None]:
result = graph.invoke({"question": "What is the name of the company?"})