# Loading pdf document

In [3]:
from langchain_community.document_loaders import PyPDFLoader

In [None]:
pdf_loader = PyPDFLoader(file_path="./Data/Mahabharata.pdf")

# print(pdf_loader)

docs = pdf_loader.load()
docs[0].page_content

# creating knowledgebase

In [2]:
from langchain_chroma import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_ollama.embeddings import OllamaEmbeddings

In [None]:
# Split docs to chunks
text_spliter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
chunks = text_spliter.split_documents(docs)
chunks

In [None]:
# Create embeddings
embeddings = OllamaEmbeddings(model="gemma:2b")

# creating vector and store to cromaDB
vectorstore = Chroma.from_documents(documents=chunks,embedding=embeddings, persist_directory="./chroma_db")

# retrieval part


In [15]:
retriver = vectorstore.as_retriever()

In [16]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

In [None]:
from langchain_ollama.llms import OllamaLLM
model = OllamaLLM(model="gemma:2b")

In [None]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)


question_answer_chain = create_stuff_documents_chain(model, prompt)
rag_chain = create_retrieval_chain(retriver, question_answer_chain)

results = rag_chain.invoke({"input": "who was arjuna"})

results