# Semantic Search for Network Documentation
Explore how semantic search works and apply it to network documentation.

In [None]:
!pip install langchain chromadb -q

## Step 1: Setup Vectorstore and LLM
Use pre-trained models for embeddings.

In [None]:
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_community.vectorstores import Chroma

embeddings = SentenceTransformerEmbeddings(model_name='all-MiniLM-L6-v2')
vectorstore = Chroma(persist_directory='./chroma_db', embedding_function=embeddings)

## Step 2: Define Multi-Query Retrieval
Generate multiple query variations for improved document retrieval.

In [None]:
from langchain_anthropic import ChatAnthropic
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

class MultiQueryRetriever:
    """Generate multiple query variations for better retrieval."""

    def __init__(self, vectorstore, llm):
        self.vectorstore = vectorstore
        self.llm = llm
        
        self.query_generation_prompt = ChatPromptTemplate.from_template("""
        You are an AI assistant helping generate search queries.
        Given a user question about network engineering, generate 3 alternative
        versions of the question to retrieve relevant documents.
        
        Original question: {question}
        Generate 3 alternative questions (one per line):
        """)

    def generate_queries(self, original_query):
        chain = self.query_generation_prompt | self.llm | StrOutputParser()
        response = chain.invoke({'question': original_query})
        alternatives = [q.strip() for q in response.split('\n') if q.strip()]
        return [original_query] + alternatives[:3]

    def retrieve(self, question, k_per_query=3):
        queries = self.generate_queries(question)
        print(f'Generated {len(queries)} queries:')
        for q in queries:
            print(f'  - {q}')
        all_docs = []
        seen_content = set()
        for query in queries:
            docs = self.vectorstore.similarity_search(query, k=k_per_query)
            for doc in docs:
                content_hash = hash(doc.page_content)
                if content_hash not in seen_content:
                    seen_content.add(content_hash)
                    all_docs.append(doc)
        print(f'Retrieved {len(all_docs)} unique documents')
        return all_docs

llm = ChatAnthropic(
    model='claude-sonnet-4-20250514',
    api_key='your-api-key'
)
retriever = MultiQueryRetriever(vectorstore, llm)
docs = retriever.retrieve('How do I peer with AWS?', k_per_query=2)


## Step 3: Implement Hybrid Search
Combine keyword and semantic search for comprehensive results.

In [None]:
from langchain_community.retrievers import BM25Retriever
from langchain.retrievers import EnsembleRetriever

class HybridNetworkRetriever:
    """Combine semantic and keyword search."""

    def __init__(self, vectorstore, documents):
        self.vectorstore = vectorstore
        self.documents = documents

        self.semantic_retriever = vectorstore.as_retriever(search_kwargs={'k': 5})
        self.keyword_retriever = BM25Retriever.from_documents(documents)
        self.keyword_retriever.k = 5

        self.ensemble_retriever = EnsembleRetriever(
            retrievers=[self.semantic_retriever, self.keyword_retriever],
            weights=[0.6, 0.4]
        )

    def search(self, query, k=5):
        results = self.ensemble_retriever.get_relevant_documents(query)
        return results[:k]

document_list = []  # Your documents would be loaded here
hybrid_retriever = HybridNetworkRetriever(vectorstore, document_list)
hybrid_results = hybrid_retriever.search('BGP peering techniques')
