In [1]:
from semantic_kernel import Kernel
from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion
from semantic_kernel.connectors.ai.open_ai import AzureChatPromptExecutionSettings
import os
from langsmith import traceable
import json
import asyncio
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
AZURE_OPENAI_API_KEY=os.environ["AZURE_OPENAI_API_KEY"]
AZURE_OPENAI_ENDPOINT=os.environ["AZURE_OPENAI_ENDPOINT"]
AZURE_OPENAI_API_VERSION=os.environ["AZURE_OPENAI_API_VERSION"]

In [3]:
kernel = Kernel()

service = AzureChatCompletion(
    deployment_name="gpt-4o-mini",
    endpoint=AZURE_OPENAI_ENDPOINT,
    api_key=AZURE_OPENAI_API_KEY,
    api_version=AZURE_OPENAI_API_VERSION
)

kernel.add_service(service)

In [4]:
from pydantic import BaseModel, Field

class RouteDecision(BaseModel):
    agent: str = Field(description="Which agent to route to: tech, math, general")
    reason: str = Field(description="Why this agent is appropriate")


In [None]:
router_prompt = """
You are a routing agent.

Decide which expert should answer.

Options:
- tech → programming, AI, systems
- math → calculations, formulas
- general → anything else

User Question: {{$input}}

Return JSON with:
- agent
- reason
"""
request_settings_router = AzureChatPromptExecutionSettings(temperature=0,
                                                           max_tokens=100,
                                                           response_format=RouteDecision)
router_function = kernel.add_function(
    plugin_name="router",
    function_name="route_query",
    prompt=router_prompt,
    prompt_execution_settings=request_settings_router
)

@traceable(name="Router Step")
async def run_router_step(user_question: str) -> RouteDecision:
    result = await kernel.invoke(router_function, input=user_question)
    return RouteDecision(**json.loads(str(result)))

In [None]:
tech_prompt = """
You are a senior software and AI engineer.
Answer clearly and technically.

Question: {{$input}}
"""
request_settings_tech = AzureChatPromptExecutionSettings(temperature=0,max_tokens=200)
tech_function = kernel.add_function(plugin_name="experts", function_name="tech_agent", prompt=tech_prompt, prompt_execution_settings=request_settings_tech)

@traceable(name="Tech Expert Step")
async def run_tech_expert(user_question: str):
    return await kernel.invoke(tech_function, input=user_question)

In [7]:
math_prompt = """
You are a mathematician.
Show formulas and steps.

Question: {{$input}}
"""
request_settings_math = AzureChatPromptExecutionSettings(temperature=0,max_tokens=400)
math_function = kernel.add_function(plugin_name="experts", function_name="math_agent",
                                    prompt=math_prompt,prompt_execution_settings=request_settings_math)

@traceable(name="Math Expert Step")
async def run_math_expert(user_question: str):
    return await kernel.invoke(math_function, input=user_question)

In [None]:
general_prompt = """
You are a helpful assistant.

Question: {{$input}}
"""
request_settings_general = AzureChatPromptExecutionSettings(temperature=0,max_tokens=200)
general_function = kernel.add_function(plugin_name="experts", function_name="general_agent", 
                                       prompt=general_prompt,prompt_execution_settings=request_settings_general)

@traceable(name="General Expert Step")
async def run_general_expert(user_question: str):
    return await kernel.invoke(general_function, input=user_question)


In [9]:
@traceable(name="Routing Agent")
async def routing_agent(user_question: str):

    # Step 1: Router
    decision = await run_router_step(user_question)
    print(f"[Router] → {decision.agent} | {decision.reason}")

    # Step 2: Delegate
    if decision.agent == "tech":
        answer = await run_tech_expert(user_question)

    elif decision.agent == "math":
        answer = await run_math_expert(user_question)

    else:
        answer = await run_general_expert(user_question)

    return answer


In [10]:
result = await routing_agent(
    "How does hybrid retrieval improve RAG systems?"
)

[Router] → tech | The question pertains to hybrid retrieval in RAG (Retrieval-Augmented Generation) systems, which involves technical concepts related to programming and AI.


In [11]:
print(result)

Hybrid retrieval enhances Retrieval-Augmented Generation (RAG) systems by combining different retrieval methods to improve the quality and relevance of the information retrieved for generating responses. Here’s a breakdown of how hybrid retrieval contributes to RAG systems:

### 1. **Combining Strengths of Different Retrieval Approaches:**
   - **Dense Retrieval:** Utilizes neural embeddings to capture semantic similarity. It excels in retrieving relevant documents based on meaning rather than keyword matching. This is particularly useful for understanding context and nuances in queries.
   - **Sparse Retrieval:** Relies on traditional keyword-based methods (like TF-IDF or BM25). It is effective for retrieving documents that contain specific terms or phrases, ensuring that exact matches are not overlooked.
   - **Hybrid Approach:** By integrating both dense and sparse retrieval, RAG systems can leverage the strengths of each method. For example, sparse retrieval can quickly filter a la