### RAG - Agent Framework - AI Foundry

In [1]:
# Installation
! pip install chromadb sentence-transformers agent-framework openai pypdf python-docx -q
print("‚úÖ All packages installed successfully!")

‚úÖ All packages installed successfully!


In [None]:
# Import all required libraries
import os
import chromadb
from chromadb.utils import embedding_functions
from typing import Annotated, List, Dict
from pydantic import Field
from agent_framework import ChatAgent
from agent_framework.openai import OpenAIChatClient
import hashlib
from pathlib import Path

print("‚úÖ All libraries imported successfully!")


‚úÖ All libraries imported successfully!


In [None]:
# Configure API Key
# Replace with your actual OpenAI API key
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

print("‚úÖ API Key configured!")

‚úÖ API Key configured!


In [4]:
# Define Document Processor with File Loading
class DocumentProcessor:
    """Smart document processing with semantic chunking and file loading"""
    
    def __init__(self, chunk_size: int = 1000, chunk_overlap: int = 200):
        self.chunk_size = chunk_size
        self.chunk_overlap = chunk_overlap
    
    def load_from_folder(self, folder_path: str) -> List[Dict]:
        """
        Load all documents from a folder.
        Supports: .txt, .md, .pdf, .docx files
        """
        folder = Path(folder_path)
        
        if not folder.exists():
            raise FileNotFoundError(f"Folder not found: {folder_path}")
        
        documents = []
        supported_extensions = ['.txt', '.md', '.pdf', '.docx']
        
        # Get all files with supported extensions
        for file_path in folder.iterdir():
            if file_path.is_file() and file_path.suffix.lower() in supported_extensions:
                try:
                    content = self._read_file(file_path)
                    if content.strip():  # Only add non-empty documents
                        documents.append({
                            'content': content,
                            'source': str(file_path),
                            'filename': file_path.name,
                            'file_type': file_path.suffix[1:]  # Remove the dot
                        })
                        print(f"  ‚úì Loaded: {file_path.name}")
                except Exception as e:
                    print(f"  ‚úó Error loading {file_path.name}: {e}")
        
        return documents
    
    def _read_file(self, file_path: Path) -> str:
        """Read file content based on file type"""
        suffix = file_path.suffix.lower()
        
        if suffix in ['.txt', '.md']:
            with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                return f.read()
        
        elif suffix == '.pdf':
            try:
                import pypdf
                with open(file_path, 'rb') as f:
                    pdf = pypdf.PdfReader(f)
                    content = []
                    for page in pdf.pages:
                        text = page.extract_text()
                        if text:
                            content.append(text)
                    return "\n\n".join(content)
            except ImportError:
                print("  ‚ö†Ô∏è  pypdf not installed. Install with: pip install pypdf")
                return ""
            except Exception as e:
                print(f"  ‚ö†Ô∏è  Error reading PDF: {e}")
                return ""
        
        elif suffix == '.docx':
            try:
                import docx
                doc = docx.Document(file_path)
                content = []
                for para in doc.paragraphs:
                    if para.text.strip():
                        content.append(para.text)
                return "\n\n".join(content)
            except ImportError:
                print("  ‚ö†Ô∏è  python-docx not installed. Install with: pip install python-docx")
                return ""
            except Exception as e:
                print(f"  ‚ö†Ô∏è  Error reading DOCX: {e}")
                return ""
        
        return ""
    
    def semantic_chunk(self, text: str, metadata: Dict) -> List[Dict]:
        """Semantic chunking that preserves context"""
        chunks = []
        paragraphs = text.split('\n\n')
        
        current_chunk = ""
        chunk_id = 0
        
        for para in paragraphs:
            para = para.strip()
            if not para:
                continue
            
            if len(current_chunk) + len(para) > self.chunk_size:
                if current_chunk:
                    chunks.append({
                        'content': current_chunk.strip(),
                        'metadata': {
                            **metadata,
                            'chunk_id': chunk_id,
                            'char_count': len(current_chunk)
                        },
                        'id': self._generate_id(current_chunk, metadata['source'], chunk_id)
                    })
                    chunk_id += 1
                    
                    overlap_text = current_chunk[-self.chunk_overlap:] if len(current_chunk) > self.chunk_overlap else current_chunk
                    current_chunk = overlap_text + "\n\n" + para
                else:
                    current_chunk = para
            else:
                current_chunk += "\n\n" + para if current_chunk else para
        
        if current_chunk:
            chunks.append({
                'content': current_chunk.strip(),
                'metadata': {
                    **metadata,
                    'chunk_id': chunk_id,
                    'char_count': len(current_chunk)
                },
                'id': self._generate_id(current_chunk, metadata['source'], chunk_id)
            })
        
        return chunks
    
    def _generate_id(self, content: str, source: str, chunk_id: int) -> str:
        """Generate unique ID for chunk"""
        unique_str = f"{source}_{chunk_id}_{content[:50]}"
        return hashlib.md5(unique_str.encode()).hexdigest()

print("‚úÖ DocumentProcessor class defined with file loading support!")


‚úÖ DocumentProcessor class defined with file loading support!


In [5]:
# Define Vector Database Manager with Open-Source Embeddings
class VectorDBManager:
    """Manage ChromaDB vector database with open-source embeddings"""
    
    def __init__(self, 
                 collection_name: str = "rag_knowledge_base",
                 persist_directory: str = "./chroma_db",
                 embedding_model: str = "all-MiniLM-L6-v2",
                 use_local_embeddings: bool = True):
        """
        Initialize ChromaDB with open-source embedding models
        
        Embedding Model Options (all open-source):
        - 'all-MiniLM-L6-v2': Fast, 384 dim, best for general use (DEFAULT)
        - 'all-mpnet-base-v2': Better accuracy, 768 dim, slower
        - 'paraphrase-multilingual-MiniLM-L12-v2': Multilingual support, 384 dim
        - 'BAAI/bge-small-en-v1.5': High quality, 384 dim
        - 'BAAI/bge-base-en-v1.5': Better quality, 768 dim
        - 'intfloat/e5-small-v2': Efficient, 384 dim
        - 'intfloat/e5-base-v2': Better quality, 768 dim
        """
        # Initialize ChromaDB with persistence
        self.client = chromadb.PersistentClient(path=persist_directory)
        self.embedding_model_name = embedding_model
        
        if use_local_embeddings:
            # Use sentence-transformers (fully open-source, runs locally)
            print(f"üì• Loading open-source embedding model: {embedding_model}")
            self.embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(
                model_name=embedding_model,
                device='cpu'  # Change to 'cuda' if you have GPU
            )
            print(f"‚úÖ Model loaded successfully (running locally)")
        else:
            # Fallback to default ChromaDB embeddings
            self.embedding_function = embedding_functions.DefaultEmbeddingFunction()
            print("‚úÖ Using default embeddings")
        
        # Create or get collection
        self.collection = self.client.get_or_create_collection(
            name=collection_name,
            embedding_function=self.embedding_function,
            metadata={"hnsw:space": "cosine"}
        )
        
        print(f"‚úÖ Vector DB initialized: {collection_name}")
        print(f"üìä Current documents: {self.collection.count()}")
    
    def add_documents(self, chunks: List[Dict]):
        """Add document chunks to vector database"""
        documents = [chunk['content'] for chunk in chunks]
        metadatas = [chunk['metadata'] for chunk in chunks]
        ids = [chunk['id'] for chunk in chunks]
        
        print(f"üîÑ Generating embeddings for {len(documents)} chunks...")
        
        # Batch insert
        batch_size = 100
        for i in range(0, len(documents), batch_size):
            batch_docs = documents[i:i + batch_size]
            batch_meta = metadatas[i:i + batch_size]
            batch_ids = ids[i:i + batch_size]
            
            self.collection.add(
                documents=batch_docs,
                metadatas=batch_meta,
                ids=batch_ids
            )
            
            print(f"  ‚úì Processed batch {i//batch_size + 1}/{(len(documents)-1)//batch_size + 1}")
        
        print(f"‚úÖ Added {len(documents)} chunks to vector database")
    
    def search(self, query: str, n_results: int = 5) -> List[Dict]:
        """Semantic search in vector database"""
        results = self.collection.query(
            query_texts=[query],
            n_results=n_results
        )
        
        formatted_results = []
        for i in range(len(results['documents'][0])):
            formatted_results.append({
                'content': results['documents'][0][i],
                'metadata': results['metadatas'][0][i],
                'distance': results['distances'][0][i] if 'distances' in results else None
            })
        
        return formatted_results
    
    def clear_collection(self):
        """Clear all documents from collection"""
        self.client.delete_collection(self.collection.name)
        print(f"üóëÔ∏è  Cleared collection: {self.collection.name}")
    
    def get_stats(self):
        """Get collection statistics"""
        count = self.collection.count()
        return {
            'total_documents': count,
            'collection_name': self.collection.name,
            'embedding_model': self.embedding_model_name
        }

print("‚úÖ VectorDBManager class defined with open-source embeddings!")


‚úÖ VectorDBManager class defined with open-source embeddings!


In [6]:
# Define RAG Agent
class RAGAgent:
    """RAG Agent with Microsoft Agent Framework + ChromaDB"""
    
    def __init__(self, vector_db: VectorDBManager, model: str = "gpt-4o-mini"):
        self.vector_db = vector_db
        self.model = model
        self.agent = None
        
    def create_search_tool(self):
        """Create vector search tool for the agent"""
        
        def search_knowledge_base(
            query: Annotated[str, Field(description="The user's question or search query")]
        ) -> str:
            """
            Search the technical documentation knowledge base.
            Returns relevant information to answer the user's question.
            """
            # Perform vector search
            results = self.vector_db.search(query, n_results=3)
            
            if not results:
                return "No relevant information found in the knowledge base."
            
            # Format results with sources
            formatted_response = []
            for idx, result in enumerate(results, 1):
                metadata = result['metadata']
                content = result['content']
                
                formatted_response.append(
                    f"**Source {idx}: {metadata.get('filename', 'Unknown')}**\n"
                    f"{content}\n"
                    f"_[Chunk ID: {metadata.get('chunk_id', 'N/A')}, "
                    f"Relevance Score: {1 - result['distance']:.3f}]_"
                )
            
            return "\n\n---\n\n".join(formatted_response)
        
        return search_knowledge_base
    
    def initialize_agent(self):
        """Initialize the RAG agent"""
        search_tool = self.create_search_tool()
        
        self.agent = ChatAgent(
            chat_client=OpenAIChatClient(),
            instructions="""You are an expert technical documentation assistant.
            
When answering questions:
1. Always use the search_knowledge_base tool to find relevant information
2. Base your answers primarily on the retrieved context
3. Cite the specific sources (by Source number) for your information
4. If the context doesn't contain enough information, acknowledge this
5. Be concise but thorough
6. Use technical terminology appropriately
7. Provide code examples when available in the context

Format your responses in a clear, structured way with proper markdown.""",
            tools=[search_tool],
            model=self.model
        )
        
        print("‚úÖ RAG Agent initialized")
    
    async def query(self, question: str) -> str:
        """Query the RAG agent"""
        if not self.agent:
            self.initialize_agent()
        
        result = await self.agent.run(question)
        return result.text

print("‚úÖ RAGAgent class defined!")


‚úÖ RAGAgent class defined!


In [7]:
# Load documents from docs folder
DOCS_FOLDER = "./docs"  # Change this to your folder path

# Check if folder exists
if not os.path.exists(DOCS_FOLDER):
    print(f"‚ö†Ô∏è  Folder '{DOCS_FOLDER}' not found. Creating it...")
    os.makedirs(DOCS_FOLDER)
    print(f"‚úÖ Created '{DOCS_FOLDER}' folder")
    print(f"\nüìå Please add your documents (.txt, .md, .pdf, .docx) to the '{DOCS_FOLDER}' folder")
    print("   Then re-run this cell.")
    documents = []
else:
    print(f"üìÇ Loading documents from: {DOCS_FOLDER}\n")
    
    # Initialize document processor
    doc_processor_temp = DocumentProcessor()
    
    # Load all documents
    documents = doc_processor_temp.load_from_folder(DOCS_FOLDER)
    
    if documents:
        print(f"\n‚úÖ Successfully loaded {len(documents)} documents:")
        print("\nDocument Summary:")
        print("-" * 60)
        for doc in documents:
            word_count = len(doc['content'].split())
            print(f"  üìÑ {doc['filename']}")
            print(f"     Type: {doc['file_type'].upper()}")
            print(f"     Size: {len(doc['content'])} chars, ~{word_count} words")
            print()
    else:
        print(f"\n‚ö†Ô∏è  No supported documents found in '{DOCS_FOLDER}'")
        print("   Supported formats: .txt, .md, .pdf, .docx")
        print(f"\n   Please add documents to '{DOCS_FOLDER}' and re-run this cell.")


üìÇ Loading documents from: ./docs

  ‚úì Loaded: RAG_Q&A.pdf

‚úÖ Successfully loaded 1 documents:

Document Summary:
------------------------------------------------------------
  üìÑ RAG_Q&A.pdf
     Type: PDF
     Size: 57877 chars, ~6894 words



In [8]:
# Initialize Document Processor and Vector DB with Open-Source Embeddings
if not documents:
    print("‚ö†Ô∏è  No documents loaded. Please run Cell 7 first.")
else:
    print("üöÄ Initializing RAG System with Open-Source Embeddings...\n")
    
    # Initialize document processor
    doc_processor = DocumentProcessor(chunk_size=1000, chunk_overlap=200)
    
    # ========== CHOOSE YOUR EMBEDDING MODEL ==========
    # Options (all 100% open-source and run locally):
    
    # OPTION 1: Fast and lightweight (RECOMMENDED for most cases)
    embedding_model = "all-MiniLM-L6-v2"  # 384 dim, 22M params
    
    # OPTION 2: Better accuracy
    # embedding_model = "all-mpnet-base-v2"  # 768 dim, 110M params
    
    # OPTION 3: State-of-the-art (BGE models from BAAI)
    # embedding_model = "BAAI/bge-small-en-v1.5"  # 384 dim
    # embedding_model = "BAAI/bge-base-en-v1.5"   # 768 dim
    # embedding_model = "BAAI/bge-large-en-v1.5"  # 1024 dim (best quality)
    
    # OPTION 4: E5 models (efficient)
    # embedding_model = "intfloat/e5-small-v2"  # 384 dim
    # embedding_model = "intfloat/e5-base-v2"   # 768 dim
    
    # OPTION 5: Multilingual support
    # embedding_model = "paraphrase-multilingual-MiniLM-L12-v2"  # 384 dim
    
    print(f"üìå Selected embedding model: {embedding_model}\n")
    
    # Initialize vector database with chosen model
    vector_db = VectorDBManager(
        collection_name="tech_docs_kb",
        persist_directory="./chroma_db",
        embedding_model=embedding_model,
        use_local_embeddings=True  # 100% local, no API calls
    )
    
    print("\n‚úÖ System components initialized!")
    print("üîí All processing runs locally - no data sent to external APIs")


üöÄ Initializing RAG System with Open-Source Embeddings...

üìå Selected embedding model: all-MiniLM-L6-v2

üì• Loading open-source embedding model: all-MiniLM-L6-v2


  from .autonotebook import tqdm as notebook_tqdm


‚úÖ Model loaded successfully (running locally)
‚úÖ Vector DB initialized: tech_docs_kb
üìä Current documents: 0

‚úÖ System components initialized!
üîí All processing runs locally - no data sent to external APIs


In [9]:
# Process documents and create chunks
if not documents:
    print("‚ö†Ô∏è  No documents to process. Please run Cell 7 first.")
else:
    print("üìÑ Processing Documents...\n")
    
    all_chunks = []
    
    for doc in documents:
        chunks = doc_processor.semantic_chunk(
            doc['content'],
            metadata={
                'source': doc['source'],
                'filename': doc['filename'],
                'file_type': doc['file_type'],
                'doc_type': 'technical_documentation'
            }
        )
        all_chunks.extend(chunks)
        print(f"  ‚úì {doc['filename']}: {len(chunks)} chunks")
    
    print(f"\nüì¶ Total chunks created: {len(all_chunks)}")
    
    if all_chunks:
        avg_size = sum(c['metadata']['char_count'] for c in all_chunks) // len(all_chunks)
        print(f"üíæ Average chunk size: {avg_size} characters")


üìÑ Processing Documents...

  ‚úì RAG_Q&A.pdf: 17 chunks

üì¶ Total chunks created: 17
üíæ Average chunk size: 3581 characters


In [10]:
# Add chunks to vector database
if not all_chunks:
    print("‚ö†Ô∏è  No chunks to index. Please run Cell 9 first.")
else:
    print("üîç Indexing documents in vector database...\n")
    
    vector_db.add_documents(all_chunks)
    
    print("\n‚úÖ All documents indexed successfully!")
    print(f"üìä Total chunks in database: {vector_db.collection.count()}")


üîç Indexing documents in vector database...

üîÑ Generating embeddings for 17 chunks...
  ‚úì Processed batch 1/1
‚úÖ Added 17 chunks to vector database

‚úÖ All documents indexed successfully!
üìä Total chunks in database: 17


In [11]:
# Create and initialize RAG Agent
print("ü§ñ Initializing RAG Agent...\n")

rag_agent = RAGAgent(vector_db, model="gpt-4o-mini")
rag_agent.initialize_agent()

print("\n‚úÖ RAG Agent ready to answer questions!")


ü§ñ Initializing RAG Agent...

‚úÖ RAG Agent initialized

‚úÖ RAG Agent ready to answer questions!


In [None]:
# Test with a single query
query = "How does chunk size impact retrieval accuracy?"  # Change this question

print(f"‚ùì Question: {query}\n")
print("="*70)

response = await rag_agent.query(query)
print(f"\nü§ñ Answer:\n{response}")


‚ùì Question: How does chunk size impact retrieval accuracy?


ü§ñ Answer:
## Impact of Chunk Size on Retrieval Accuracy

### Key Points on Chunk Size

1. **Types of Chunking**:
    - **Fixed-size chunking**: Prioritizes speed but may split sentences mid-thought, leading to loss of context which can negatively impact retrieval accuracy.
    - **Semantic chunking**: Splits at topic boundaries, preserving meaning. Best for structured content where topics vary significantly.
    - **Recursive chunking**: Initially attempts larger chunks and splits if necessary, maintaining natural boundaries such as paragraphs or sentences.
    - **Hierarchical chunking**: Creates parent-child relationships for summarization, useful for long documents where multiple levels of detail are required.

2. **Overlap Considerations**:
    - **Overlapping chunks**: Useful when critical information might be split across boundaries. A recommended overlap (50-100 tokens) can help ensure that concepts are not lost.


In [13]:
# Test with multiple queries
# Customize these questions based on your documents
test_queries = [
    "What are the main topics covered in the documents?",
    "Can you summarize the key points?",
    "What technical information is available?",
]

for i, query in enumerate(test_queries, 1):
    print(f"\n{'='*70}")
    print(f"Question {i}/{len(test_queries)}: {query}")
    print(f"{'='*70}")
    
    response = await rag_agent.query(query)
    print(f"\nü§ñ Answer:\n{response}\n")



Question 1/3: What are the main topics covered in the documents?

ü§ñ Answer:
The main topics covered in the documents, particularly in the context of Retrieval-Augmented Generation (RAG), include:

1. **Chunking Techniques**:
   - **Fixed-size Chunking**: Simple and fast, but may break sentences mid-thought.
   - **Semantic Chunking**: Identifies topic boundaries for coherent topic changes.
   - **Recursive Chunking**: Attempts larger chunks first before splitting.
   - **Hierarchical Chunking**: Structures wide documents into parent-child relationships.

2. **Retrieval Techniques**:
   - **Multi-hop and Chain-of-Thought Retrieval**: Breaks complex queries into reasoning steps, allowing for better structured retrieval.
   - **Chunk Routing**: Directs queries to specific document subsets based on query type or metadata to improve relevance and speed.

3. **RAG Workflows**:
   - **Use of Orchestrators**: Handles multi-step workflows, state management, and integrates various tools to e

In [14]:
# Interactive query loop
print("üí¨ Interactive RAG System")
print("Type 'exit' or 'quit' to stop\n")

while True:
    user_query = input("‚ùì Your question: ")
    
    if user_query.lower() in ['exit', 'quit', '']:
        print("üëã Goodbye!")
        break
    
    print("\nü§ñ Answer:")
    response = await rag_agent.query(user_query)
    print(f"{response}\n")
    print("-"*70)


üí¨ Interactive RAG System
Type 'exit' or 'quit' to stop

üëã Goodbye!


In [15]:
# Check vector database statistics
print("üìä Vector Database Statistics\n")
print("="*70)
print(f"Collection Name: {vector_db.collection.name}")
print(f"Total Chunks: {vector_db.collection.count()}")
print(f"Embedding Model: {vector_db.embedding_model_name} (Open-Source)")
print(f"Distance Metric: Cosine Similarity")
print(f"Persist Directory: ./chroma_db")
print(f"üîí Running 100% locally (no external API calls)")

# Model information
from sentence_transformers import SentenceTransformer
model_info = SentenceTransformer(vector_db.embedding_model_name)
print(f"\nüìê Embedding Details:")
print(f"   Dimension: {model_info.get_sentence_embedding_dimension()}")
print(f"   Max Sequence Length: {model_info.max_seq_length}")

# Show sample chunk
sample = vector_db.collection.peek(limit=1)
if sample['documents']:
    print(f"\nüìÑ Sample Chunk Preview:")
    print(f"Content: {sample['documents'][0][:300]}...")
    print(f"\nMetadata: {sample['metadatas'][0]}")


üìä Vector Database Statistics

Collection Name: tech_docs_kb
Total Chunks: 17
Embedding Model: all-MiniLM-L6-v2 (Open-Source)
Distance Metric: Cosine Similarity
Persist Directory: ./chroma_db
üîí Running 100% locally (no external API calls)

üìê Embedding Details:
   Dimension: 384
   Max Sequence Length: 256

üìÑ Sample Chunk Preview:
Content: R A G
I n t e r v i e w
Q u e s t i o n s
N a r e s h  E d a g o t t i
F o l l o w  F o r  M o r e...

Metadata: {'filename': 'RAG_Q&A.pdf', 'chunk_id': 0, 'doc_type': 'technical_documentation', 'char_count': 98, 'file_type': 'pdf', 'source': 'docs\\RAG_Q&A.pdf'}


In [16]:
# View loaded documents statistics
if documents:
    print("üìö Loaded Documents Statistics\n")
    print("="*70)
    
    total_chars = 0
    total_words = 0
    
    for doc in documents:
        chars = len(doc['content'])
        words = len(doc['content'].split())
        lines = doc['content'].count('\n') + 1
        
        total_chars += chars
        total_words += words
        
        print(f"\nüìÑ {doc['filename']}")
        print(f"   Type: {doc['file_type'].upper()}")
        print(f"   Size: {chars:,} characters")
        print(f"   Words: ~{words:,}")
        print(f"   Lines: {lines:,}")
        
        # Preview first 200 characters
        preview = doc['content'][:200].replace('\n', ' ')
        print(f"   Preview: {preview}...")
    
    print("\n" + "="*70)
    print(f"üìä Total Statistics:")
    print(f"   Documents: {len(documents)}")
    print(f"   Total Characters: {total_chars:,}")
    print(f"   Total Words: ~{total_words:,}")
    print(f"   Chunks Created: {len(all_chunks)}")
else:
    print("‚ö†Ô∏è  No documents loaded yet. Please run Cell 7 first.")


üìö Loaded Documents Statistics


üìÑ RAG_Q&A.pdf
   Type: PDF
   Size: 57,877 characters
   Words: ~6,894
   Lines: 7,663
   Preview: R A G I n t e r v i e w Q u e s t i o n s N a r e s h  E d a g o t t i F o l l o w  F o r  M o r e    1.What  issues  occur  when  parsing  PDFs  with  multiple  layouts,  and  how  do  you  handle  t...

üìä Total Statistics:
   Documents: 1
   Total Characters: 57,877
   Total Words: ~6,894
   Chunks Created: 17


In [17]:
# Test vector search directly (without agent)
test_query = "your search term here"  # Change this

print(f"üîç Direct Vector Search Test")
print(f"Query: {test_query}\n")
print("="*70)

results = vector_db.search(test_query, n_results=3)

for i, result in enumerate(results, 1):
    print(f"\nüìÑ Result {i}:")
    print(f"File: {result['metadata']['filename']}")
    print(f"Chunk ID: {result['metadata']['chunk_id']}")
    print(f"Relevance Score: {1 - result['distance']:.4f}")
    print(f"\nContent:\n{result['content'][:300]}...")
    print("-"*70)


üîç Direct Vector Search Test
Query: your search term here


üìÑ Result 1:
File: RAG_Q&A.pdf
Chunk ID: 10
Relevance Score: 0.1589

Content:
system
 
retrieves
 
initial
 
chunks,
 
identifies
 
what
 
additional
 
information
 
is
 
needed,
 
and
 
retrieves
 
again.
 
This
 
continues
 
until
 
sufficient
 
information
 
is
 
gathered.

Retrieval  chain-of-thought  breaks  complex  queries  into  reasoning  steps,  retrieving  informat...
----------------------------------------------------------------------

üìÑ Result 2:
File: RAG_Q&A.pdf
Chunk ID: 5
Relevance Score: 0.1503

Content:
er:  Flat  index  (brute  force)  compares  query  against  every  vector,  giving  100%  recall  but  slow  for  large  
datasets.
 
Use
 
only
 
for
 
small
 
collections
 
under
 
10,000
 
vectors.

HNSW  builds  a  graph  structure  for  fast  approximate  search.  It  offers  great  speed  and ...
----------------------------------------------------------------------

üìÑ Result 3:
File: RAG_Q&

In [18]:
# Clear the database - USE WITH CAUTION!
# Uncomment the lines below to clear all documents

# vector_db.clear_collection()
# print("üóëÔ∏è  Database cleared!")
# print("‚ö†Ô∏è  Run Cells 8-11 to reinitialize the system with documents.")

print("‚ö†Ô∏è  This cell is for clearing the database.")
print("Uncomment the code to actually clear it.")


‚ö†Ô∏è  This cell is for clearing the database.
Uncomment the code to actually clear it.


In [19]:
# Compare different embedding models
# This helps you choose the best model for your use case

from sentence_transformers import SentenceTransformer
import time

print("üî¨ Comparing Open-Source Embedding Models\n")
print("="*70)

models_to_test = [
    "all-MiniLM-L6-v2",           # Fast, general purpose
    "all-mpnet-base-v2",          # Better accuracy
    "BAAI/bge-small-en-v1.5",     # State-of-the-art, small
    "intfloat/e5-small-v2",       # Efficient
]

test_text = "What are the best practices for Python development?"

print(f"Test query: '{test_text}'\n")

for model_name in models_to_test:
    try:
        print(f"üì¶ Testing: {model_name}")
        
        # Load model
        start = time.time()
        model = SentenceTransformer(model_name)
        load_time = time.time() - start
        
        # Generate embedding
        start = time.time()
        embedding = model.encode(test_text)
        encode_time = time.time() - start
        
        print(f"   ‚úì Load time: {load_time:.2f}s")
        print(f"   ‚úì Encode time: {encode_time:.3f}s")
        print(f"   ‚úì Dimension: {len(embedding)}")
        print(f"   ‚úì Model size: ~{model.get_sentence_embedding_dimension() * 4 / 1024:.1f} KB per embedding")
        print()
        
    except Exception as e:
        print(f"   ‚úó Error: {e}\n")

print("="*70)
print("\nüí° Recommendation:")
print("   - Fast & Good: all-MiniLM-L6-v2")
print("   - Best Accuracy: BAAI/bge-base-en-v1.5")
print("   - Multilingual: paraphrase-multilingual-MiniLM-L12-v2")


üî¨ Comparing Open-Source Embedding Models

Test query: 'What are the best practices for Python development?'

üì¶ Testing: all-MiniLM-L6-v2
   ‚úì Load time: 4.65s
   ‚úì Encode time: 0.021s
   ‚úì Dimension: 384
   ‚úì Model size: ~1.5 KB per embedding

üì¶ Testing: all-mpnet-base-v2


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


   ‚úì Load time: 63.88s
   ‚úì Encode time: 0.121s
   ‚úì Dimension: 768
   ‚úì Model size: ~3.0 KB per embedding

üì¶ Testing: BAAI/bge-small-en-v1.5


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


   ‚úì Load time: 23.61s
   ‚úì Encode time: 0.033s
   ‚úì Dimension: 384
   ‚úì Model size: ~1.5 KB per embedding

üì¶ Testing: intfloat/e5-small-v2


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


   ‚úì Load time: 22.93s
   ‚úì Encode time: 0.035s
   ‚úì Dimension: 384
   ‚úì Model size: ~1.5 KB per embedding


üí° Recommendation:
   - Fast & Good: all-MiniLM-L6-v2
   - Best Accuracy: BAAI/bge-base-en-v1.5
   - Multilingual: paraphrase-multilingual-MiniLM-L12-v2
