### RAG Pipelines -  Data Ingestion to Vector DB Pipeline

In [5]:
import os
from langchain_community.document_loaders import PyPDFLoader, PyMuPDFLoader, DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from pathlib import Path

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
### Read all the pdfs in a directory
def process_all_pdfs(pdf_directory: str):
    pdf_loaders = {
        '.pdf': PyPDFLoader,
        '.pdfx': PyMuPDFLoader
    }
    
    all_documents = []
    pdf_dir = Path(pdf_directory)

    # find all PDF files recursively in the directory
    pdf_files = list(pdf_dir.glob('**/*.pdf')) + list(pdf_dir.glob('**/*.pdfx'))

    print(f"Found {len(pdf_files)} PDF files in directory {pdf_directory}")

    for pdf_file in pdf_files:
        print(f"\nProcessing file: {pdf_file.name}")

        try:
            loader = PyPDFLoader(str(pdf_file))
            documents = loader.load()

            # add source information to metadata
            for doc in documents:
                doc.metadata['source_file'] = str(pdf_file.name)  
                doc.metadata['source_path'] = str(pdf_file.resolve())
                doc.metadata['file_size'] = os.path.getsize(pdf_file)
                doc.metadata['num_pages'] = len(documents)
                doc.metadata['file_type'] = pdf_file.suffix.lower()
            
            all_documents.extend(documents)
            print(f" Loaded {len(documents)} documents from {pdf_file.name}")

        except Exception as e:
            print(f"Error processing file {pdf_file.name}: {e}")

    print(f"\nTotal documents loaded from all PDFs: {len(all_documents)}")
    return all_documents

pdf_documents = process_all_pdfs("../data")

Found 4 PDF files in directory ../data

Processing file: ticket.pdf
 Loaded 1 documents from ticket.pdf

Processing file: Damilola Adekoya CV 3.pdf
 Loaded 6 documents from Damilola Adekoya CV 3.pdf

Processing file: DAMILOLA ADEKOYA CV.pdf
 Loaded 3 documents from DAMILOLA ADEKOYA CV.pdf

Processing file: Cover letter.pdf
 Loaded 3 documents from Cover letter.pdf

Total documents loaded from all PDFs: 13


In [7]:
pdf_documents

[Document(metadata={'producer': 'dompdf 2.0.3 + CPDF', 'creator': 'PyPDF', 'creationdate': '2025-10-10T16:31:36+01:00', 'moddate': '2025-10-10T16:31:36+01:00', 'title': 'Whatadeal | Ticket Details', 'source': '../data/pdf/ticket.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1', 'source_file': 'ticket.pdf', 'source_path': '/Users/damilola.adekoya@payjoy.com/Documents/AI/test_RAG/data/pdf/ticket.pdf', 'file_size': 756034, 'num_pages': 1, 'file_type': '.pdf'}, page_content='Ticket ID:\nWOR25590\nWorld Usability Day Africa 2025\nHackathon: The Prompt is You!\nGeneral Admission\nWORBFZC780\nSlack Channel\nVirtual\nFriday, 17 October 2025 09:00 AM\nOrdered by Damilola Adekoya\non Oct 10, 2025 4:31 PM'),
 Document(metadata={'producer': 'Skia/PDF m143 Google Docs Renderer', 'creator': 'PyPDF', 'creationdate': '', 'title': 'Damilola Adekoya CV 3', 'source': '../data/pdf/Damilola Adekoya CV 3.pdf', 'total_pages': 6, 'page': 0, 'page_label': '1', 'source_file': 'Damilola Adekoya CV 3.pdf', 's

In [8]:
### Text splitting get into chunks

def split_documents(documents, chunk_size=1000, chunk_overlap=200):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
        separators=["\n\n", "\n", " ", ""]
    )
    split_docs = text_splitter.split_documents(documents)
    print(f"Split {len(documents)} documents into {len(split_docs)} chunks.")

    if split_docs:
        print("\nSample chunk")
        print("Sample chunk metadata:", split_docs[0].metadata)
        print("Sample chunk content:", split_docs[0].page_content[:200])  # print first 500 characters
    
    return split_docs
    
     

In [9]:
chunks = split_documents(pdf_documents)
chunks

Split 13 documents into 42 chunks.

Sample chunk
Sample chunk metadata: {'producer': 'dompdf 2.0.3 + CPDF', 'creator': 'PyPDF', 'creationdate': '2025-10-10T16:31:36+01:00', 'moddate': '2025-10-10T16:31:36+01:00', 'title': 'Whatadeal | Ticket Details', 'source': '../data/pdf/ticket.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1', 'source_file': 'ticket.pdf', 'source_path': '/Users/damilola.adekoya@payjoy.com/Documents/AI/test_RAG/data/pdf/ticket.pdf', 'file_size': 756034, 'num_pages': 1, 'file_type': '.pdf'}
Sample chunk content: Ticket ID:
WOR25590
World Usability Day Africa 2025
Hackathon: The Prompt is You!
General Admission
WORBFZC780
Slack Channel
Virtual
Friday, 17 October 2025 09:00 AM
Ordered by Damilola Adekoya
on Oct


[Document(metadata={'producer': 'dompdf 2.0.3 + CPDF', 'creator': 'PyPDF', 'creationdate': '2025-10-10T16:31:36+01:00', 'moddate': '2025-10-10T16:31:36+01:00', 'title': 'Whatadeal | Ticket Details', 'source': '../data/pdf/ticket.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1', 'source_file': 'ticket.pdf', 'source_path': '/Users/damilola.adekoya@payjoy.com/Documents/AI/test_RAG/data/pdf/ticket.pdf', 'file_size': 756034, 'num_pages': 1, 'file_type': '.pdf'}, page_content='Ticket ID:\nWOR25590\nWorld Usability Day Africa 2025\nHackathon: The Prompt is You!\nGeneral Admission\nWORBFZC780\nSlack Channel\nVirtual\nFriday, 17 October 2025 09:00 AM\nOrdered by Damilola Adekoya\non Oct 10, 2025 4:31 PM'),
 Document(metadata={'producer': 'Skia/PDF m143 Google Docs Renderer', 'creator': 'PyPDF', 'creationdate': '', 'title': 'Damilola Adekoya CV 3', 'source': '../data/pdf/Damilola Adekoya CV 3.pdf', 'total_pages': 6, 'page': 0, 'page_label': '1', 'source_file': 'Damilola Adekoya CV 3.pdf', 's

### Embedding And VectorStoreDB


In [10]:
import numpy as np
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings
import uuid
from typing import List, Dict, Any, Tuple
from sklearn.metrics.pairwise import cosine_similarity
import os

In [11]:
class EmbeddingManager:
    """Handles document embedding geenration using SentenceTransformer."""

    def __init__(self, model_name: str = 'all-MiniLM-L6-v2'):
        """
            Ininitalize the EmbeddingManager with a specified SentenceTransformer model.

            Args:
                model_name (str): The name of the SentenceTransformer model to use.
        """
        self.model_name = model_name
        self.model = None
        self._load_model()

    def _load_model(self):
        """Load the SentenceTransformer model."""
        try:
            print(f"Loading SentenceTransformer model: {self.model_name}")
            self.model = SentenceTransformer(self.model_name)
            print(f"Model loaded successfully. Embedding dimension: {self.model.get_sentence_embedding_dimension()} ")
        except Exception as e:
            print("Please install the 'sentence-transformers' package to use EmbeddingManager. {self.model_name} embedding model cannot be loaded: {e}")
            raise

    def generate_embeddings(self, texts: List[str]) -> np.ndarray:
        """
            Generate embeddings for a list of texts.

            Args:
                texts (List[str]): List of texts to embed.

            Returns:
                numpy array of embeddings with shape(len(texts), embedding_dim)).
        """
        if not self.model:
            raise ValueError("Embedding model is not loaded.")  
        
        print(f"Generating embeddings for {len(texts)} texts....")
        embeddings = self.model.encode(texts, show_progress_bar=True)
        print(f"Generated embeddings with shape: {embeddings.shape}")
        return embeddings
    
    def get_embedding_dimension(self) -> int:
        """Get the dimension of the embeddings produced by the model."""
        if not self.model:
            raise ValueError("Embedding model is not loaded.")
        return self.model.get_sentence_embedding_dimension()


## initialize Embedding Manager
embedding_manager = EmbeddingManager(model_name='all-MiniLM-L6-v2')
embedding_manager

Loading SentenceTransformer model: all-MiniLM-L6-v2
Model loaded successfully. Embedding dimension: 384 


<__main__.EmbeddingManager at 0x2bd2c1d30>

### VectorStore

In [12]:
class VectorStore:
    """Manages a ChromaDB vector store for document embeddings."""

    def __init__(self, collection_name: str = "pdf_documents", persist_directory: str = "../data/vector_store"):
        """
            Initialize the VectorStore with a specified persistence directory.

            Args:
                collection_name (str): Name of the ChromaDB collection.
                persist_directory (str): Directory to persist the ChromaDB database.
        """
        self.collection_name = collection_name
        self.persist_directory = persist_directory
        self.client = None
        self.collection = None
        self._initialize_store()

    def _initialize_store(self):
        """Initialize the ChromaDB client and collection."""
        try:
            # Create the persistence directory if it doesn't exist
            os.makedirs(self.persist_directory, exist_ok=True)
            print(f"Initializing ChromaDB at {self.persist_directory}...")
            self.client = chromadb.PersistentClient(path=self.persist_directory)

            # Get or create the collection
            self.collection = self.client.get_or_create_collection(name=self.collection_name, metadata={"description": "PDF Document Embeddings for RAG"})
            
            print(f"ChromaDB initialized successfully. Collection: {self.collection_name}")
            print(f"Number of existing documents in collection: {self.collection.count()}")
        
        except Exception as e:
            print(f"Error initializing Vector store: {e}")
            raise

    def add_documents(self, documents: List[Dict[str, Any]], embeddings: np.ndarray):
        """
            Add documents and their embeddings to the vector store.

            Args:
                documents (List[Dict[str, Any]]): List of document metadata dictionaries.
                embeddings (np.ndarray): Corresponding embeddings for the documents.
        """
        if not self.collection:
            raise ValueError("Vector store collection is not initialized.")
        
        if len(documents) != len(embeddings):
            raise ValueError("Number of documents and embeddings must match.")


        print(f"Adding {len(documents)} documents to the vector store...")

        # prepare data for ChromaDB
        ids = []
        metadatas = []
        documents_text = []
        embeddings_list = []

        for i, (doc, embedding) in enumerate(zip(documents, embeddings)):
            # Generate a unique ID for each document
            doc_id = f"doc_{uuid.uuid4().hex[:8]}_{i}"
            ids.append(doc_id)

            # prepare metadata
            metadata = dict(doc.metadata)  # copy existing metadata
            metadata['doc_index'] = i
            metadata['content_length'] = len(doc.page_content)
            metadatas.append(metadata)
            
            # Document Content
            documents_text.append(doc.page_content)

            #Embedding
            embeddings_list.append(embedding.tolist())

        # Add to ChromaDB collection
        try:
            self.collection.add(
                ids=ids,
                embeddings=embeddings_list,
                metadatas=metadatas,
                documents=documents_text
            )
            print(f"Successfully added {len(documents)} documents to the vector store.")
            print(f"Total documents in collection after addition: {self.collection.count()}")

        except Exception as e:
            print(f"Error adding documents to vector store: {e}")
            raise

vector_store = VectorStore()
vector_store

Initializing ChromaDB at ../data/vector_store...
ChromaDB initialized successfully. Collection: pdf_documents
Number of existing documents in collection: 42


<__main__.VectorStore at 0x2be0312b0>

In [13]:
chunks

[Document(metadata={'producer': 'dompdf 2.0.3 + CPDF', 'creator': 'PyPDF', 'creationdate': '2025-10-10T16:31:36+01:00', 'moddate': '2025-10-10T16:31:36+01:00', 'title': 'Whatadeal | Ticket Details', 'source': '../data/pdf/ticket.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1', 'source_file': 'ticket.pdf', 'source_path': '/Users/damilola.adekoya@payjoy.com/Documents/AI/test_RAG/data/pdf/ticket.pdf', 'file_size': 756034, 'num_pages': 1, 'file_type': '.pdf'}, page_content='Ticket ID:\nWOR25590\nWorld Usability Day Africa 2025\nHackathon: The Prompt is You!\nGeneral Admission\nWORBFZC780\nSlack Channel\nVirtual\nFriday, 17 October 2025 09:00 AM\nOrdered by Damilola Adekoya\non Oct 10, 2025 4:31 PM'),
 Document(metadata={'producer': 'Skia/PDF m143 Google Docs Renderer', 'creator': 'PyPDF', 'creationdate': '', 'title': 'Damilola Adekoya CV 3', 'source': '../data/pdf/Damilola Adekoya CV 3.pdf', 'total_pages': 6, 'page': 0, 'page_label': '1', 'source_file': 'Damilola Adekoya CV 3.pdf', 's

In [14]:
### Convert the text chunks to embeddings and store in vector DB
texts = [doc.page_content for doc in chunks]
texts

['Ticket ID:\nWOR25590\nWorld Usability Day Africa 2025\nHackathon: The Prompt is You!\nGeneral Admission\nWORBFZC780\nSlack Channel\nVirtual\nFriday, 17 October 2025 09:00 AM\nOrdered by Damilola Adekoya\non Oct 10, 2025 4:31 PM',
 'Damilola  Adekoya  (+1)  437-868-1394  |  dharmykoya38@gmail.com |  Welland,  Canada    https://www.linkedin.com/in/damilola-adekoya-solomon/   SUMMARY  Experienced  Software  Engineer  with  over  7  years  of  experience  designing  and  deploying  scalable  web  applications  and  \nbackend\n \nsystems\n \nacross\n \nFinTech,\n \nlogistics,\n \nand\n \ne-commerce\n \nsectors.\n \nProven\n \ntrack\n \nrecord\n \nof\n \nleading\n \nremote\n \ndevelopment\n \nteams,\n \noptimizing\n \nsystem\n \nperformance,\n \nand\n \nbuilding\n \ncomplex\n \nfeatures\n \nthat\n \ndrive\n \nmeasurable\n \nbusiness\n \nresults.\n \nKnown\n \nfor\n \ndelivering\n \nclean,\n \nmaintainable\n \ncode\n \naligned\n \nwith\n \nbest\n \npractices\n \nin\n \nOOP,\n \nSOLID\n \npr

In [15]:
## Generate embeddings for the chunks
embeddings = embedding_manager.generate_embeddings(texts)

## store in vector DB
vector_store.add_documents(chunks, embeddings)

Generating embeddings for 42 texts....


Batches: 100%|██████████| 2/2 [00:00<00:00,  2.48it/s]

Generated embeddings with shape: (42, 384)
Adding 42 documents to the vector store...
Successfully added 42 documents to the vector store.
Total documents in collection after addition: 84





### Retriever Pipeline From VectorStore

In [17]:
class RAGRetriever:
    """Retriever for fetching relevant documents from the vector store based on query embeddings."""

    def __init__(self, vector_store: VectorStore, embedding_manager: EmbeddingManager):
        """
            Initialize the RAGRetriever with a vector store and embedding manager.

            Args:
                vector_store (VectorStore): The vector store to retrieve documents from.
                embedding_manager (EmbeddingManager): The embedding manager to generate query embeddings.
        """
        self.vector_store = vector_store
        self.embedding_manager = embedding_manager

    def retrieve(self, query: str, top_k: int = 5, score_threshold: float = 0.0) -> List[Dict[str, Any]]:
        """
            Retrieve the most relevant documents for a given query.

            Args:
                query (str): The input query string.
                top_k (int): Number of top documents to retrieve.
                score_threshold (float): Minimum similarity score threshold for retrieved documents.
            
            Returns: 
                List[Dict[str, Any]]: List of retrieved document metadata and content.
        """
        if not self.vector_store.collection:
            raise ValueError("Vector store collection is not initialized.")

        print(f"Generating embedding for the query: {query}")
        print(f"Top k: {top_k}, Score threshold: {score_threshold}")

        #Generate embedding for the query
        query_embedding = self.embedding_manager.generate_embeddings([query])[0]

        print(f"Query embedding generated. Retrieving top {top_k} documents...")

        #search in vector store
        try:
            results = self.vector_store.collection.query(
                query_embeddings=[query_embedding.tolist()],
                n_results=top_k
            )

            # process results
            retrieved_docs = []

            if results['documents'] and results['documents'][0]:
                documents = results['documents'][0]
                metadatas = results['metadatas'][0]
                distances = results['distances'][0]
                ids = results['ids'][0]

                for i, (doc_id, document, metadata, distance) in enumerate(zip(ids, documents, metadatas, distances)):
                    # Convert distance to similarity score
                    similarity_score = 1 - distance

                    if similarity_score >= score_threshold:
                        retrieved_docs.append({
                            'id': doc_id,
                            'content': document,
                            'metadata': metadata,
                            'similarity_score': similarity_score,
                            'distance': distance,
                            'rank': i + 1
                        })
                
                print(f"Retrieved {len(retrieved_docs)} documents above the score threshold of {score_threshold}.")
            else:
                print("No documents retrieved from the vector store.")
            
            return retrieved_docs
        
        except Exception as e:
            print(f"Error retrieving documents: {e}")
            raise   

rag_retriever = RAGRetriever(vector_store=vector_store, embedding_manager=embedding_manager) 

In [20]:
rag_retriever.retrieve("Who is Damilola Adekoya?")### RAG Pipelines -  Data Ingestion to Vector DB Pipeline



Generating embedding for the query: Who is Damilola Adekoya?
Top k: 5, Score threshold: 0.0
Generating embeddings for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00, 26.52it/s]

Generated embeddings with shape: (1, 384)
Query embedding generated. Retrieving top 5 documents...
Retrieved 2 documents above the score threshold of 0.0.





[{'id': 'doc_05102a57_1',
  'content': 'Damilola  Adekoya  (+1)  437-868-1394  |  dharmykoya38@gmail.com |  Welland,  Canada    https://www.linkedin.com/in/damilola-adekoya-solomon/   SUMMARY  Experienced  Software  Engineer  with  over  7  years  of  experience  designing  and  deploying  scalable  web  applications  and  \nbackend\n \nsystems\n \nacross\n \nFinTech,\n \nlogistics,\n \nand\n \ne-commerce\n \nsectors.\n \nProven\n \ntrack\n \nrecord\n \nof\n \nleading\n \nremote\n \ndevelopment\n \nteams,\n \noptimizing\n \nsystem\n \nperformance,\n \nand\n \nbuilding\n \ncomplex\n \nfeatures\n \nthat\n \ndrive\n \nmeasurable\n \nbusiness\n \nresults.\n \nKnown\n \nfor\n \ndelivering\n \nclean,\n \nmaintainable\n \ncode\n \naligned\n \nwith\n \nbest\n \npractices\n \nin\n \nOOP,\n \nSOLID\n \nprinciples,\n \nand\n \nagile\n \nmethodologies.\n \nPassionate\n \nabout\n \ncontinuous\n \nlearning,\n \nmentoring,\n \nand\n \ncontributing\n \nto\n \nhigh-impact\n \nsoftware\n \nproducts.',
 

In [23]:
rag_retriever.retrieve("Software Engineer")

Generating embedding for the query: Software Engineer
Top k: 5, Score threshold: 0.0
Generating embeddings for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00, 39.17it/s]

Generated embeddings with shape: (1, 384)
Query embedding generated. Retrieving top 5 documents...
Retrieved 2 documents above the score threshold of 0.0.





[{'id': 'doc_05102a57_1',
  'content': 'Damilola  Adekoya  (+1)  437-868-1394  |  dharmykoya38@gmail.com |  Welland,  Canada    https://www.linkedin.com/in/damilola-adekoya-solomon/   SUMMARY  Experienced  Software  Engineer  with  over  7  years  of  experience  designing  and  deploying  scalable  web  applications  and  \nbackend\n \nsystems\n \nacross\n \nFinTech,\n \nlogistics,\n \nand\n \ne-commerce\n \nsectors.\n \nProven\n \ntrack\n \nrecord\n \nof\n \nleading\n \nremote\n \ndevelopment\n \nteams,\n \noptimizing\n \nsystem\n \nperformance,\n \nand\n \nbuilding\n \ncomplex\n \nfeatures\n \nthat\n \ndrive\n \nmeasurable\n \nbusiness\n \nresults.\n \nKnown\n \nfor\n \ndelivering\n \nclean,\n \nmaintainable\n \ncode\n \naligned\n \nwith\n \nbest\n \npractices\n \nin\n \nOOP,\n \nSOLID\n \nprinciples,\n \nand\n \nagile\n \nmethodologies.\n \nPassionate\n \nabout\n \ncontinuous\n \nlearning,\n \nmentoring,\n \nand\n \ncontributing\n \nto\n \nhigh-impact\n \nsoftware\n \nproducts.',
 

In [24]:
rag_retriever.retrieve("SQL and NoSQl")

Generating embedding for the query: SQL and NoSQl
Top k: 5, Score threshold: 0.0
Generating embeddings for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00, 25.56it/s]

Generated embeddings with shape: (1, 384)
Query embedding generated. Retrieving top 5 documents...
Retrieved 4 documents above the score threshold of 0.0.





[{'id': 'doc_ccd0e112_13',
  'content': 'caching  to  speed  things  up,  and  queues  to  handle  background  jobs  for  async  tasks.  Also  \nexperienced\n \nin\n \nwriting\n \noptimised\n \ndatabase\n \nqueries\n \nto\n \nimprove\n \nperformance\n \nand\n \nreduce\n \nload.\n \nSQL  &  NoSQL  Database  Mastery :  Designed  normalised  schemas  and  optimised  queries  in  SQL  \nServer,\n \nMySQL,\n \nand\n \nPostgreSQL,\n \nreducing\n \nquery\n \ntimes\n \nby\n \nup\n \nto\n \n40%.\n \nAlso\n \nhave\n \nexperience\n \nworking\n \nwith\n \nNoSQL\n \ndatabases\n \nlike\n \nMongoDB\n \nand\n \nAzure\n \nCosmos\n \nDB\n \nfor\n \nhigh-throughput,\n \nflexible\n \ndata\n \nstorage\n \nfor\n \nreal-time\n \nand\n \nanalytics-based\n \nfeatures.\n \nFrontend  with  React  &  NextJs:  Built  responsive,  scalable  and  component-driven  UIs  using  \nReact,\n \nTypeScript,\n \nand\n \nmodern\n \nhooks\n \narchitecture,\n \nleveraging\n \nuseState,\n \nuseEffect,\n \nuseMemo,\n \nand\n \nc

### Integration Vectordb Context pipeline with LLM output


In [27]:
### Simple Rag Pipeline
from langchain_groq import ChatGroq
import os
from dotenv import load_dotenv
load_dotenv('../.env')


### Initialize Groq LLM
groq_api_key = os.getenv("GROQ_API_KEY")
llm = ChatGroq(api_key=groq_api_key, model="openai/gpt-oss-20b", temperature=0.1, max_tokens=500)

## Simple RAG function: retrieve context and generate answer
def rag_simple(query: str, retriever: RAGRetriever, llm: ChatGroq, top_k: int = 5) -> str:
    """
        Perform RAG QA by retrieving context and generating an answer using LLM.

        Args:
            query (str): The input question/query.
            retriever (RAGRetriever): The retriever to fetch relevant documents.
            llm (ChatGroq): The language model to generate answers.
            top_k (int): Number of top documents to retrieve for context.

        Returns:
            str: Generated answer from the LLM.
    """
    print(f"Retrieving context for query: {query}")
    results = retriever.retrieve(query, top_k=top_k)

    if not results:
        return "No relevant documents found to answer the query."

    # Combine retrieved document contents as context
    context = "\n\n".join([doc['content'] for doc in results]) if results else ""
    print(f"Retrieved {len(results)} documents for context.")

    if not context:
        return "No relevant context found to answer the query."

    # Create prompt for LLM
    prompt = f"Use the following context to answer the question:\n\nContext:\n{context}\n\nQuestion: {query}\nAnswer:"

    print("Generating answer using LLM...")
    response = llm.invoke([prompt.format(context=context, query=query)])
    
    return response.content

In [28]:
answer = rag_simple("Who is Damilola Adekoya?", rag_retriever, llm)
print("Generated Answer:")
print(answer)

Retrieving context for query: Who is Damilola Adekoya?
Generating embedding for the query: Who is Damilola Adekoya?
Top k: 5, Score threshold: 0.0
Generating embeddings for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00, 46.73it/s]

Generated embeddings with shape: (1, 384)
Query embedding generated. Retrieving top 5 documents...
Retrieved 2 documents above the score threshold of 0.0.
Retrieved 2 documents for context.
Generating answer using LLM...





Generated Answer:
Damilola Adekoya is a seasoned software engineer from Welland, Canada, with over seven years of experience designing and deploying scalable web applications and backend systems across FinTech, logistics, and e‑commerce sectors. He has a proven track record of leading remote development teams, optimizing system performance, and building complex features that deliver measurable business results. Damilola is known for writing clean, maintainable code that follows OOP, SOLID principles, and agile best practices, and he is passionate about continuous learning, mentoring, and contributing to high‑impact software products.


In [30]:
answer = rag_simple("What kind of engineering roles can Damilola Adekoya get with his experience", rag_retriever, llm)
print("Generated Answer:")
print(answer)

Retrieving context for query: What kind of engineering roles can Damilola Adekoya get with his experience
Generating embedding for the query: What kind of engineering roles can Damilola Adekoya get with his experience
Top k: 5, Score threshold: 0.0
Generating embeddings for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00, 15.19it/s]

Generated embeddings with shape: (1, 384)
Query embedding generated. Retrieving top 5 documents...
Retrieved 4 documents above the score threshold of 0.0.
Retrieved 4 documents for context.
Generating answer using LLM...





Generated Answer:
With over seven years of hands‑on experience building scalable, enterprise‑grade applications across FinTech, logistics, and e‑commerce, Damilola Adekoya is well‑positioned for a range of senior‑level engineering roles.  Some of the most natural fits include:

| Role | Why it’s a good match | Key responsibilities |
|------|----------------------|----------------------|
| **Senior Full‑Stack Developer** | Proven track record with C#, .NET, PHP, React, TypeScript, and cloud platforms (AWS, Azure). | Design, develop, and maintain end‑to‑end solutions; mentor junior developers; drive best‑practice coding standards. |
| **Senior Backend Engineer / API Architect** | Deep experience with ASP.NET Core, Entity Framework, LINQ, microservices, and asynchronous programming. | Build high‑performance, secure APIs; design data models; optimize database interactions; lead backend architecture decisions. |
| **Software Architect / Technical Lead** | Demonstrated ability to lead remote

### Enhanced RAG Pipeline Features

In [38]:
### Enhanced RAG Pipeline Features
def rag_advanced(query: str, retriever: RAGRetriever, llm: ChatGroq, top_k: int = 5, min_score: float = 0.2, return_context=False) -> object:
    """
        Perform advanced RAG QA with filtering and enhanced prompt engineering.

        Args:
            query (str): The input question/query.
            retriever (RAGRetriever): The retriever to fetch relevant documents.
            llm (ChatGroq): The language model to generate answers.
            top_k (int): Number of top documents to retrieve for context.
            min_score (float): Minimum similarity score threshold for retrieved documents.

        Returns:
            str: Generated answer from the LLM.
    """
    print(f"Retrieving context for query: {query} with score threshold: {min_score}")
    results = retriever.retrieve(query, top_k=top_k, score_threshold=min_score)

    if not results:
        return {'answer': "No relevant documents found to answer the query.", 'context': "", 'sources': [], 'confidence': 0.0}

    # Combine retrieved document contents as context
    context = "\n\n".join([doc['content'] for doc in results])
    sources = [
        {
            'source': doc['metadata'].get('source_file', doc['metadata'].get('source', 'unknown')),
            'page': doc['metadata'].get('page', 'unknown'),
            'score': doc['similarity_score'],
            'preview': doc['content'][:200]  # first 200 characters as preview
        } for doc in results
    ]    
    print(f"Retrieved {len(results)} documents for context after applying score threshold.")

    confidence = max([doc['similarity_score'] for doc in results]) if results else 0.0

    # if not context:
        # return "No relevant context found to answer the query."

    # Create enhanced prompt for LLM
    prompt = f"""You are an expert assistant. Use the following context to answer the question accurately and concisely.
    Context:
    {context}
    Question: {query}
    Answer:"""    

    response = llm.invoke([prompt.format(context=context, query=query)])

    output = {
        'answer': response.content,
        'sources': sources,
        'confidence': confidence
    }

    if return_context:
        output['context'] = context
    
    return output

In [44]:
result = rag_advanced("What did Damilola Adekoya do for Shiprocket?", rag_retriever, llm, top_k=5, min_score=0.2, return_context=False)

print("Generated Answer:", result['answer'])
print("Context:", result['context'][:500])  # print first 500 characters of context
print("Sources:", result['sources'])
print("Confidence Score:", result['confidence'])    

Retrieving context for query: What did Damilola Adekoya do for Shiprocket? with score threshold: 0.2
Generating embedding for the query: What did Damilola Adekoya do for Shiprocket?
Top k: 5, Score threshold: 0.2
Generating embeddings for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00, 74.96it/s]

Generated embeddings with shape: (1, 384)
Query embedding generated. Retrieving top 5 documents...
Retrieved 0 documents above the score threshold of 0.2.
Generated Answer: No relevant documents found to answer the query.
Context: 
Sources: []
Confidence Score: 0.0



