In [3]:
# Local RAG POC for unstructured data (PDFs) as Local Knowledge Base test

In [4]:
import ollama

In [5]:
#from langchain_community.embeddings import OllamaEmbeddings
from langchain_ollama import OllamaEmbeddings
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma

In [6]:
import re

In [7]:
import gradio as gr

In [41]:
# Test local DeepSeek model
response = ollama.chat(
    model="deepseek-r1",
    messages=[
        {"role":"user", "content":"Explain Newton's second law of motion"},
    ],
)

print(response["message"]["content"])


Okay, let's break down Newton's Second Law of Motion.

**In simple terms:**

Force is what causes an object to accelerate (change its velocity). The amount of acceleration produced depends directly on the force applied and inversely on the object's mass.

**The Core Idea:**

Imagine an object sitting still or moving at a constant speed. According to Newton's First Law, it will want to keep doing whatever it's doing (inertia). To make it change its motion – to make it speed up, slow down, or change direction – you need to apply a **force**.

Now, Newton's Second Law tells us *exactly* how much force is needed and how the object will respond.

**The Mathematical Expression:**

The law is famously expressed as:

**F = m × a**

Where:
*   **F** is the net force applied to the object (measured in Newtons, N).
*   **m** is the mass of the object (measured in kilograms, kg).
*   **a** is the acceleration of the object (measured in meters per second squared, m/s²).

**Breaking Down F = m × a:*

In [42]:
# Step 1: Processing the uploaded PDF
def process_pdf(pdf_bytes):
    #The process_pdf function:
    # 1.Loads and prepares PDF content for retrieval-based answering.
    # 2.Checks if a PDF is uploaded.
    # 3.Extracts text using PyMuPDFLoader.
    # 4.Splits text into chunks using RecursiveCharacterTextSplitter.
    # 5.Generates vector embeddings using OllamaEmbeddings.
    # 6.Stores embeddings in a Chroma vector store for efficient retrieval.

    if pdf_bytes is None:
        return None, None, None

    loader = PyMuPDFLoader(pdf_bytes)
    data = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=2000, chunk_overlap=200
    )
    chunks = text_splitter.split_documents(data)

    embeddings = OllamaEmbeddings(model="nomic-embed-text")
    #embeddings = OllamaEmbeddings(model="bge-m3")
    vectorstore = Chroma.from_documents(
        documents=chunks, embedding=embeddings, persist_directory="./chroma_db"
    )
    retriever = vectorstore.as_retriever()

    return text_splitter, vectorstore, retriever

In [43]:
# Step 2: Combining retrieved document chunks
def combine_docs(docs):
    # Combining retrieved document chunks
    # Once the embeddings are retrieved, next we need to stitch these together. 
    # The combine_docs() function merges multiple retrieved document chunks into a single string.
    return "\n\n".join(doc.page_content for doc in docs)

In [44]:
# Step 3: Querying DeepSeek-R1 using Ollama
def ollama_llm(question, context):
    formatted_prompt = f"Question: {question}\n\nContext: {context}"

    response = ollama.chat(
        model="deepseek-r1:14b",
        messages=[{"role": "user", "content": formatted_prompt}],
    )

    response_content = response["message"]["content"]

    # Remove content between <think> and </think> tags to remove thinking output
    final_answer = re.sub(r"<think>.*?</think>", "", response_content, flags=re.DOTALL).strip()

    return final_answer

In [45]:
# Step 4: The RAG pipeline
def rag_chain(question, text_splitter, vectorstore, retriever):
    retrieved_docs = retriever.invoke(question)
    formatted_content = combine_docs(retrieved_docs)
    return ollama_llm(question, formatted_content)

In [46]:
# Step 5: Creating the Gradio Interface
def ask_question(pdf_bytes, question):
    text_splitter, vectorstore, retriever = process_pdf(pdf_bytes)

    if text_splitter is None:
        return None  # No PDF uploaded

    result = rag_chain(question, text_splitter, vectorstore, retriever)
    return {result}


interface = gr.Interface(
    fn=ask_question,
    inputs=[
        gr.File(label="Upload PDF (optional)"),
        gr.Textbox(label="Ask a question"),
    ],
    outputs="text",
    title="Ask questions about your PDF",
    description="Use DeepSeek-R1 to answer your questions about the uploaded PDF document.",
)

interface.launch()

* Running on local URL:  http://127.0.0.1:7868
* To create a public link, set `share=True` in `launch()`.




ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "C:\Users\an_19\AppData\Local\Programs\Python\Python313\Lib\site-packages\uvicorn\protocols\http\httptools_impl.py", line 409, in run_asgi
    result = await app(  # type: ignore[func-returns-value]
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        self.scope, self.receive, self.send
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "C:\Users\an_19\AppData\Local\Programs\Python\Python313\Lib\site-packages\uvicorn\middleware\proxy_headers.py", line 60, in __call__
    return await self.app(scope, receive, send)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\an_19\AppData\Local\Programs\Python\Python313\Lib\site-packages\fastapi\applications.py", line 1134, in __call__
    await super().__call__(scope, receive, send)
  File "C:\Users\an_19\AppData\Local\Programs\Python\Python313\Lib\site-packages\starlette\applications.py", line 113, in __call__
    await sel