In [5]:
import os
import sys
from pathlib import Path

from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaEmbeddings, ChatOllama

from langchain_chroma import Chroma

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableParallel, RunnableLambda

from langchain_core.output_parsers import StrOutputParser

In [3]:
!ollama list

NAME                        ID              SIZE      MODIFIED   
gemma3:270m                 e7d36fb2c3b3    291 MB    5 days ago    
mxbai-embed-large:latest    468836162de7    669 MB    5 days ago    
tinyllama:latest            2644915ede35    637 MB    5 days ago    
nomic-embed-text:latest     0a109f422b47    274 MB    5 days ago    
gpt-oss:120b-cloud          569662207105    -         5 days ago    


In [4]:
llm = ChatOllama(
    model = "gemma3:270m",
    temperature= 0.3
)

LOADING

In [16]:
loader = DirectoryLoader(
    path = "book",
    glob = "*.pdf",
    loader_cls= PyPDFLoader
)
documents = loader.lazy_load()


CHUNKING

In [21]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1024,
    chunk_overlap = 128,
    length_function = len,
    separators=["\n\n, \n, " ", "", "]
)
chunk = text_splitter.split_documents(documents)

EMBEDDING AND VECTOR DBS

In [24]:
def clean_text(text: str) -> str:
    return (
        text
        .encode("utf-8", errors="ignore")
        .decode("utf-8")
    )

for doc in chunk:
    doc.page_content = clean_text(doc.page_content)


In [25]:
embedding = OllamaEmbeddings(
  model = "nomic-embed-text:latest",
)
persist_directory = "./chroma_db"
vector_store = Chroma.from_documents(
    documents=chunk,
    embedding=embedding,
    persist_directory=persist_directory,
    collection_name="rev.ipynb"
)


In [26]:
#RETRIVER
retriever = vector_store.as_retriever(
    search_type = "similarity",
    kwargs = {"k":4}
)

#OLLAMA
llm = ChatOllama(
    model = "gemma3:270m",
    temperature= 0
)

In [27]:
system_prompt = (
    "You are a helpful assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer the question. "
    "If you don't know the answer based on the context, say that you don't know. "
    "Keep the answer concise and accurate.\n\n"
    "Context: {context}\n\n"
    "Question: {question}"
)

prompt = ChatPromptTemplate.from_template(system_prompt)

def format_docs(docs):
    """Format retrieved documents into a single string."""
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {
      "context" : retriever | format_docs,
      "question" : RunnablePassthrough()
    }  
    | prompt
    | llm
    | StrOutputParser()
)
print("âœ“ RAG chain created successfully using LCEL!")
print("\nRAG Pipeline Flow:")
print("  1. User provides a query")
print("  2. Retriever finds top 4 relevant chunks (local ChromaDB)")
print("  3. Chunks are formatted as context")
print("  4. Context + question formatted with prompt template")
print("  5. Local LLM (gemma3:1b) generates answer")
print("  6. Answer parsed and returned")
print("\nðŸ”’ Everything runs locally on your machine!")

âœ“ RAG chain created successfully using LCEL!

RAG Pipeline Flow:
  1. User provides a query
  2. Retriever finds top 4 relevant chunks (local ChromaDB)
  3. Chunks are formatted as context
  4. Context + question formatted with prompt template
  5. Local LLM (gemma3:1b) generates answer
  6. Answer parsed and returned

ðŸ”’ Everything runs locally on your machine!


In [28]:
query1 = "What is the main topic or contribution of this document?"
answer =rag_chain.invoke(query1)
print("=" * 80)
print("ANSWER:")
print("=" * 80)
print(answer)
print("\n" + "=" * 80)

# Show source documents
print("\nSOURCE DOCUMENTS USED:")
print("=" * 80)
retrieved_docs = retriever.invoke(query1)
for i, doc in enumerate(retrieved_docs):
    print(f"\nDocument {i+1}:")
    print(f"  Page: {doc.metadata.get('page', 'N/A')}")
    print(f"  Content: {doc.page_content[:200]}...")
    print("-" * 80)

ANSWER:



SOURCE DOCUMENTS USED:

Document 1:
  Page: 1
  Content: Machine Learning For Absolute
Beginners
 
 
 
 
Oliver Theobald...
--------------------------------------------------------------------------------

Document 2:
  Page: 12
  Content: Attention Visualizations
Input-Input Layer5
It
is
in
this
spirit
that
a
majority
of
American
governments
have
passed
new
laws
since
2009
making
the
registration
or
voting
process
more
difficult
.
<EOS...
--------------------------------------------------------------------------------

Document 3:
  Page: 13
  Content: Input-Input Layer5
The
Law
will
never
be
perfect
,
but
its
application
should
be
just
-
this
is
what
we
are
missing
,
in
my
opinion
.
<EOS>
<pad>
The
Law
will
never
be
perfect
,
but
its
application
sh...
--------------------------------------------------------------------------------

Document 4:
  Page: 25
  Content: know. C and C++ are the default programming languages for advanced
machine learning because they can run dire