# Chapter 36: Advanced RAG Techniques

Run this notebook directly in Google Colab - no local Python needed!

**Full code**: [GitHub](https://github.com/eduardd76/AI_for_networking_and_security_engineers/tree/main/CODE/Volume-3-Production-Systems/Chapter-36-Advanced-RAG)

## Setup

Install dependencies and configure API keys.

In [None]:
# Install dependencies
!pip install -q chromadb sentence-transformers anthropic rank-bm25 python-dotenv

# Import and configure API key
import os
from getpass import getpass

# Check for Colab secrets first
try:
    from google.colab import userdata
    os.environ['ANTHROPIC_API_KEY'] = userdata.get('ANTHROPIC_API_KEY')
    print('✓ Using API keys from Colab secrets')
except:
    # Fall back to manual entry
    if 'ANTHROPIC_API_KEY' not in os.environ:
        os.environ['ANTHROPIC_API_KEY'] = getpass('Enter ANTHROPIC_API_KEY: ')
    print('✓ API keys configured')

print('\n✅ Setup complete! Ready to run examples.')

## Example 1: Hybrid Search (Vector + Keyword + Graph)

Combine vector search, BM25 keyword search, and graph relationships for comprehensive retrieval.

In [None]:
import chromadb
from chromadb.utils import embedding_functions
from rank_bm25 import BM25Okapi
import numpy as np
from typing import List, Dict
import re

class HybridNetworkSearch:
    """Hybrid search for network documentation."""

    def __init__(self):
        # Vector search with ChromaDB
        self.client = chromadb.Client()
        self.embedding_fn = embedding_functions.SentenceTransformerEmbeddingFunction(
            model_name="all-MiniLM-L6-v2"
        )
        self.collection = self.client.get_or_create_collection(
            name="network_docs_hybrid",
            embedding_function=self.embedding_fn
        )
        
        # BM25 for keyword search
        self.documents = []
        self.doc_ids = []
        self.bm25 = None
        
        # Graph for relationships
        self.graph = {}

    def add_documents(self, documents: List[Dict[str, str]]):
        """Add documents with metadata."""
        ids = [doc['id'] for doc in documents]
        texts = [doc['text'] for doc in documents]
        metadatas = [{k: v for k, v in doc.items() if k not in ['id', 'text']} for doc in documents]

        # Add to vector store
        self.collection.add(ids=ids, documents=texts, metadatas=metadatas)

        # Build BM25 index
        self.documents = texts
        self.doc_ids = ids
        tokenized_docs = [self._tokenize(doc) for doc in texts]
        self.bm25 = BM25Okapi(tokenized_docs)

        # Build relationship graph
        for doc in documents:
            device = doc.get('device', 'unknown')
            if device not in self.graph:
                self.graph[device] = []
            self.graph[device].append(doc['id'])

    def _tokenize(self, text: str) -> List[str]:
        """Simple tokenization for BM25."""
        tokens = re.findall(r'\b\w+\b|\d+\.\d+\.\d+\.\d+', text.lower())
        return tokens

    def hybrid_search(self, query: str, device: str = None, n_results: int = 5,
                     vector_weight: float = 0.5, keyword_weight: float = 0.3, 
                     graph_weight: float = 0.2):
        """Combine all search methods with weighted fusion."""
        all_scores = {}

        # Vector search
        vector_results = self.collection.query(query_texts=[query], n_results=n_results * 2)
        for i, doc_id in enumerate(vector_results['ids'][0]):
            score = 1.0 - vector_results['distances'][0][i]
            all_scores[doc_id] = all_scores.get(doc_id, 0) + (score * vector_weight)

        # Keyword search
        if self.bm25:
            tokenized_query = self._tokenize(query)
            scores = self.bm25.get_scores(tokenized_query)
            max_score = max(scores) if max(scores) > 0 else 1.0
            for i, score in enumerate(scores):
                if score > 0:
                    normalized = score / max_score
                    all_scores[self.doc_ids[i]] = all_scores.get(self.doc_ids[i], 0) + (normalized * keyword_weight)

        # Graph search (if device specified)
        if device and device in self.graph:
            for doc_id in self.graph[device][:n_results * 2]:
                all_scores[doc_id] = all_scores.get(doc_id, 0) + graph_weight

        # Sort by combined score
        sorted_results = sorted(all_scores.items(), key=lambda x: x[1], reverse=True)
        top_results = sorted_results[:n_results]

        # Retrieve full documents
        final_results = []
        for doc_id, score in top_results:
            doc_result = self.collection.get(ids=[doc_id])
            if doc_result['documents']:
                final_results.append({
                    'id': doc_id,
                    'text': doc_result['documents'][0],
                    'metadata': doc_result['metadatas'][0],
                    'score': score
                })

        return final_results

# Example usage
search = HybridNetworkSearch()

# Add network documentation
docs = [
    {
        'id': 'config_rtr01_bgp',
        'text': 'router bgp 65000\n neighbor 10.1.1.2 remote-as 65001',
        'type': 'config',
        'device': 'rtr01',
        'timestamp': '2024-01-15'
    },
    {
        'id': 'ticket_1234',
        'text': 'BGP session down on rtr01. Root cause: MTU mismatch. Fixed by setting ip mtu 1500.',
        'type': 'ticket',
        'device': 'rtr01',
        'timestamp': '2024-01-16'
    },
    {
        'id': 'config_rtr02_bgp',
        'text': 'router bgp 65001\n neighbor 10.1.1.1 remote-as 65000',
        'type': 'config',
        'device': 'rtr02',
        'timestamp': '2024-01-15'
    }
]

search.add_documents(docs)

# Test hybrid search
print("=== Query: 'BGP AS 65000' ===")
results = search.hybrid_search("BGP AS 65000", n_results=3)
for result in results:
    print(f"\n{result['id']} (score: {result['score']:.3f})")
    print(f"Type: {result['metadata']['type']}, Device: {result['metadata']['device']}")
    print(f"Text: {result['text'][:100]}...")

# Device-specific query
print("\n\n=== Query: 'BGP config' for device 'rtr01' ===")
results = search.hybrid_search("BGP config", device="rtr01", n_results=3)
for result in results:
    print(f"\n{result['id']} (score: {result['score']:.3f})")
    print(f"Type: {result['metadata']['type']}")

## Example 2: Query Routing and Classification

Route queries to appropriate search strategies based on query type and intent.

In [None]:
from anthropic import Anthropic
import json

class QueryRouter:
    """Route queries to appropriate search strategies."""

    def __init__(self, api_key: str, hybrid_search):
        self.client = Anthropic(api_key=api_key)
        self.search = hybrid_search

    def classify_query(self, query: str):
        """Use Claude to classify query intent and extract entities."""
        prompt = f"""Analyze this network operations query and extract:

1. Query type: device_specific, exact_match, incident_analysis, conceptual, or multi_hop
2. Entities: device names, IP addresses, AS numbers, interface names, etc.
3. Requires multi-hop reasoning: yes/no

Query: {query}

Return JSON:
{{
  "query_type": "...",
  "entities": {{}},
  "multi_hop": true/false,
  "search_strategy": "..."
}}"""

        response = self.client.messages.create(
            model="claude-sonnet-4-5-20250929",
            max_tokens=500,
            messages=[{"role": "user", "content": prompt}]
        )

        content = response.content[0].text
        if "```json" in content:
            content = content.split("```json")[1].split("```")[0].strip()
        elif "```" in content:
            content = content.split("```")[1].split("```")[0].strip()

        return json.loads(content)

    def route_query(self, query: str, n_results: int = 5):
        """Route query to appropriate search strategy."""
        classification = self.classify_query(query)
        query_type = classification.get('query_type', 'conceptual')
        entities = classification.get('entities', {})

        # Execute search based on classification
        if query_type == 'device_specific' and entities.get('device'):
            results = self.search.hybrid_search(
                query, device=entities['device'], n_results=n_results,
                vector_weight=0.2, keyword_weight=0.3, graph_weight=0.5
            )
            strategy = 'graph_focused'
        elif query_type == 'exact_match':
            results = self.search.hybrid_search(
                query, n_results=n_results,
                vector_weight=0.2, keyword_weight=0.7, graph_weight=0.1
            )
            strategy = 'keyword_focused'
        else:
            results = self.search.hybrid_search(
                query, n_results=n_results,
                vector_weight=0.7, keyword_weight=0.2, graph_weight=0.1
            )
            strategy = 'vector_focused'

        return {
            'query': query,
            'classification': classification,
            'strategy': strategy,
            'results': results
        }

# Example usage
router = QueryRouter(api_key=os.getenv('ANTHROPIC_API_KEY'), hybrid_search=search)

queries = [
    "What is BGP?",
    "Show BGP config for rtr01",
    "Find AS 65000"
]

for query in queries:
    print(f"\n=== Query: {query} ===")
    result = router.route_query(query, n_results=2)
    print(f"Classification: {result['classification']['query_type']}")
    print(f"Strategy: {result['strategy']}")
    print("Results:")
    for r in result['results']:
        print(f"  - {r['id']} (score: {r['score']:.3f})")

## Example 3: Context Compression

Compress retrieved context to only relevant information using LLM extraction.

In [None]:
from anthropic import Anthropic

class ContextCompressor:
    """Compress retrieved context to only relevant information."""

    def __init__(self, api_key: str):
        self.client = Anthropic(api_key=api_key)

    def compress_document(self, query: str, document: str, doc_id: str) -> str:
        """Extract only query-relevant sentences from document."""
        prompt = f"""Extract only the sentences from this document that are relevant to answering the query.
Preserve exact wording. Remove irrelevant content.

Query: {query}

Document ID: {doc_id}
Document:
{document}

Return only the relevant excerpts, maintaining original formatting where possible."""

        response = self.client.messages.create(
            model="claude-sonnet-4-5-20250929",
            max_tokens=2000,
            messages=[{"role": "user", "content": prompt}]
        )

        return response.content[0].text.strip()

# Example usage
compressor = ContextCompressor(api_key=os.getenv('ANTHROPIC_API_KEY'))

# Simulate retrieved document
config_doc = """hostname rtr01
!
interface Loopback0
 ip address 192.168.1.1 255.255.255.255
!
interface GigabitEthernet0/0
 description Link to DC2
 ip address 10.1.1.1 255.255.255.252
 ip mtu 1500
!
router bgp 65000
 neighbor 10.1.1.2 remote-as 65001
 neighbor 10.1.1.2 description PEER_TO_DC2
 network 192.168.1.0 mask 255.255.255.0
!
line vty 0 4
 login local
 transport input ssh
"""

query = "What is the BGP configuration?"

print(f"Query: {query}\n")
print(f"Original length: {len(config_doc)} characters\n")
print("Original document:")
print(config_doc)
print("\n" + "="*80 + "\n")

compressed = compressor.compress_document(query, config_doc, "config_rtr01")

print(f"Compressed length: {len(compressed)} characters")
print(f"Compression ratio: {len(compressed)/len(config_doc):.1%}\n")
print("Compressed document:")
print(compressed)

## Example 4: RAG Evaluation Metrics

Measure RAG system performance with retrieval precision, recall, and answer faithfulness.

In [None]:
from anthropic import Anthropic
import json

class RAGEvaluator:
    """Evaluate RAG system performance."""

    def __init__(self, api_key: str):
        self.client = Anthropic(api_key=api_key)

    def evaluate_retrieval(self, retrieved_doc_ids: list, relevant_doc_ids: list):
        """Evaluate retrieval quality."""
        retrieved_set = set(retrieved_doc_ids)
        relevant_set = set(relevant_doc_ids)

        tp = len(retrieved_set & relevant_set)  # True positives
        fp = len(retrieved_set - relevant_set)  # False positives
        fn = len(relevant_set - retrieved_set)  # False negatives

        precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
        f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0

        return {
            'precision': precision,
            'recall': recall,
            'f1': f1,
            'true_positives': tp,
            'false_positives': fp,
            'false_negatives': fn
        }

    def evaluate_answer_faithfulness(self, answer: str, retrieved_docs: list):
        """Check if answer is grounded in retrieved documents."""
        context = "\n\n".join([f"Document {i+1}:\n{doc}" for i, doc in enumerate(retrieved_docs)])

        prompt = f"""Evaluate if this answer is faithful to the provided documents.

Answer to evaluate:
{answer}

Retrieved documents:
{context}

Return JSON:
{{
  "faithful": true/false,
  "faithfulness_score": 0.0-1.0,
  "unsupported_claims": ["claim1", "claim2"],
  "explanation": "brief explanation"
}}"""

        response = self.client.messages.create(
            model="claude-sonnet-4-5-20250929",
            max_tokens=1000,
            messages=[{"role": "user", "content": prompt}]
        )

        content = response.content[0].text
        if "```json" in content:
            content = content.split("```json")[1].split("```")[0].strip()
        elif "```" in content:
            content = content.split("```")[1].split("```")[0].strip()

        return json.loads(content)

# Example usage
evaluator = RAGEvaluator(api_key=os.getenv('ANTHROPIC_API_KEY'))

# Example 1: Evaluate retrieval
print("=== Retrieval Evaluation ===\n")
retrieval_result = evaluator.evaluate_retrieval(
    retrieved_doc_ids=['config_rtr01_bgp', 'ticket_1234', 'doc_bgp_overview'],
    relevant_doc_ids=['config_rtr01_bgp', 'ticket_1234']
)

print(f"Precision: {retrieval_result['precision']:.2%}")
print(f"Recall: {retrieval_result['recall']:.2%}")
print(f"F1 Score: {retrieval_result['f1']:.2%}")
print(f"True Positives: {retrieval_result['true_positives']}")
print(f"False Positives: {retrieval_result['false_positives']}")
print(f"False Negatives: {retrieval_result['false_negatives']}")

# Example 2: Evaluate faithfulness
print("\n=== Faithfulness Evaluation ===\n")
answer = "The BGP configuration on rtr01 uses AS 65000 and peers with 10.1.1.2 in AS 65001."
docs = ["router bgp 65000\n neighbor 10.1.1.2 remote-as 65001"]

faithfulness = evaluator.evaluate_answer_faithfulness(answer, docs)
print(f"Faithful: {faithfulness['faithful']}")
print(f"Score: {faithfulness['faithfulness_score']:.2%}")
print(f"Explanation: {faithfulness['explanation']}")

## Interactive Section

Try your own RAG experiments here!

In [None]:
# Your code here
# Experiment with different RAG techniques


## Next Steps

- Full code: [Chapter 36 on GitHub](https://github.com/eduardd76/AI_for_networking_and_security_engineers/tree/main/CODE/Volume-3-Production-Systems/Chapter-36-Advanced-RAG)
- Learn more: [vExpertAI.com](https://vexpertai.com)
- Author: Eduard Dulharu ([@eduardd76](https://github.com/eduardd76))