### Install Libraries

In [None]:
%pip install --upgrade pydantic-core pydantic
%pip install langchain langchain-core langchain-community langchain-ollama langchain-chroma pypdf docx2txt

### Import Libraries

In [1]:
import os
from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaEmbeddings
from langchain_chroma import Chroma
from langchain_ollama import ChatOllama
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

In [2]:
DOC_PATH = "/Users/path"
DB_PATH = "./vector_db" 
MODEL_NAME = "llama3.2"
EMBEDDING_MODEL = "nomic-embed-text"

In [15]:
def load_documents(folder_path):
    """Loads PDF and Word documents from a folder."""
    documents = []
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
        print(f"Created folder '{folder_path}'. Please put your documents there and run again.")
        return []

    for file in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file)
        if file.endswith(".pdf"):
            print(f"Loading PDF: {file}")
            loader = PyPDFLoader(file_path)
            documents.extend(loader.load())
        elif file.endswith(".docx"):
            print(f"Loading Word: {file}")
            loader = Docx2txtLoader(file_path)
            documents.extend(loader.load())
    return documents

def create_vector_db(documents):
    """Chunks documents and stores them in ChromaDB using Ollama embeddings."""
    if not documents:
        print("No documents to process.")
        return None

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    chunks = text_splitter.split_documents(documents)
    print(f"Split documents into {len(chunks)} chunks.")

    embeddings = OllamaEmbeddings(model=EMBEDDING_MODEL)

    vector_store = Chroma.from_documents(
        documents=chunks,
        embedding=embeddings,
        persist_directory=DB_PATH
    )
    print("Vector database created successfully.")
    return vector_store

def setup_rag_chain(vector_store):
    """Sets up the RAG retrieval chain."""
    llm = ChatOllama(model=MODEL_NAME)

    retriever = vector_store.as_retriever(search_kwargs={"k": 3})

    prompt = ChatPromptTemplate.from_template("""
    You MUST answer the user's question based STRICTLY and EXCLUSIVELY on the following context.
    
    CRITICAL INSTRUCTIONS:
    1. Use ONLY the information provided in the context below
    2. Do NOT use any external knowledge or assumptions
    3. If the context does not contain sufficient information to answer the question, you MUST respond with EXACTLY this message:
    
    "Thank you for contacting the AI Cybersecurity team. We would be glad to follow up on your inquiry via email. Please provide us with your preferred email address so we can respond accordingly."
    
    4. Do NOT attempt to partially answer or guess
    5. Do NOT say "I don't know" or variations - use the exact message above
    6. Do NOT add any additional commentary beyond the required response
    
    Context:
    {context}
    
    Question: 
    {input}
    
    Answer:""")

    # Create the chain: Retriever -> Combine Docs -> LLM
    document_chain = create_stuff_documents_chain(llm, prompt)
    retrieval_chain = create_retrieval_chain(retriever, document_chain)
    
    return retrieval_chain

In [16]:
raw_docs = load_documents(DOC_PATH)
vector_db = create_vector_db(raw_docs)
rag_chain = setup_rag_chain(vector_db)

Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 17 0 (offset 0)
Ignoring wrong pointing object 19 0 (offset 0)
Ignoring wrong pointing object 26 0 (offset 0)
Ignoring wrong pointing object 31 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 17 0 (offset 0)
Ignoring wrong pointing object 19 0 (offset 0)
Ignoring wrong point

Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 17 0 (offset 0)
Ignoring wrong pointing object 19 0 (offset 0)
Ignoring wrong pointing object 26 0 (offset 0)
Ignoring wrong pointing object 31 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 17 0 (offset 0)
Ignoring wrong pointing object 19 0 (offset 0)
Ignoring wrong point

Loading PDF: FaQ2.pdf
Loading PDF: Faq.pdf
Split documents into 9 chunks.
Vector database created successfully.
Vector database created successfully.


In [20]:
query = "How are you implementing RAG with LangChain and Ollama?"
if rag_chain:
    response = rag_chain.invoke({"input": query})
    print("Response:", response.get('answer'))
else:
    print("RAG chain setup failed. Please check the previous steps.")

Response: Thank you for contacting the AI Cybersecurity team. We would be glad to follow up on your inquiry via email. Please provide us with your preferred email address so we can respond accordingly.
