In [1]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.llms import Ollama
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain



In [13]:
def load_documents(path):
    """Load documents from a PDF file."""
    loader = PyPDFLoader(path)
    return loader.load()

In [14]:
def split_documents(docs, chunk_size=1000, chunk_overlap=200):
    """Split documents into manageable chunks."""
    splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    return splitter.split_documents(docs)

In [15]:
def create_vectorstore(splits):
    """Embed the documents and return a Chroma vector store."""
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return Chroma.from_documents(splits, embedding=embeddings)

In [16]:
def build_conversational_chain(retriever):
    llm = Ollama(model="llama3", temperature=0)

    memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True,
    output_key="answer"  
)

    qa_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=retriever,
        memory=memory,
        return_source_documents=True,
        output_key='answer' 
    )

    return qa_chain


In [18]:
def main():
    pdf_path = "/Users/kryptonempyrean/Desktop/TS3043166.pdf"
    docs = load_documents(pdf_path)
    splits = split_documents(docs)
    vectorstore = create_vectorstore(splits)
    retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

    conversational_chain = build_conversational_chain(retriever)

    print("💬 Start chatting with your document!\n")

    while True:
        query = input("🧠 You: ")
        if query.lower() in ["exit", "quit"]:
            print("👋 Ending session.")
            break
        result = conversational_chain.invoke({"question": query})
        print("\n🤖 Bot:", result["answer"], "\n")


if __name__ == "__main__":
    main()

Ignoring wrong pointing object 169 0 (offset 0)
Ignoring wrong pointing object 178 0 (offset 0)


KeyboardInterrupt: 