In [25]:
from semantic_kernel import Kernel
from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion
from semantic_kernel.connectors.ai.open_ai import AzureChatPromptExecutionSettings
import os
from langsmith import traceable
import json
import asyncio
from dotenv import load_dotenv
load_dotenv()

True

In [26]:
AZURE_OPENAI_API_KEY=os.environ["AZURE_OPENAI_API_KEY"]
AZURE_OPENAI_ENDPOINT=os.environ["AZURE_OPENAI_ENDPOINT"]
AZURE_OPENAI_API_VERSION=os.environ["AZURE_OPENAI_API_VERSION"]

In [27]:
from pydantic import BaseModel, Field
from typing import List

class GenerateAnswer(BaseModel):
    generated_answer: str = Field(description="The answer generated")

class Critique(BaseModel):
    issues: List[str] = Field(description="Problems in the answer")
    score: int = Field(description="Quality score 1-10")

In [45]:
kernel = Kernel()

service = AzureChatCompletion(
    deployment_name="gpt-4o-mini",
    endpoint=AZURE_OPENAI_ENDPOINT,
    api_key=AZURE_OPENAI_API_KEY,
    api_version=AZURE_OPENAI_API_VERSION
)

kernel.add_service(service)

In [46]:
generator_prompt = """
Answer the user's question clearly and in detail within 150 words.

Question: {{$input}}
"""
request_settings_generator = AzureChatPromptExecutionSettings(temperature=0,max_tokens=500,
                                                           response_format=GenerateAnswer)
generator_function = kernel.add_function(plugin_name="reflect", function_name="generator", 
                                         prompt=generator_prompt,
                                         prompt_execution_settings=request_settings_generator)

@traceable(name="Generate Step")
async def run_generator(q):
    generator_response = await kernel.invoke(generator_function, input=q)
    return GenerateAnswer.model_validate(json.loads(generator_response.value[0].content))

In [53]:
# generator_response = await kernel.invoke(generator_function, input="Explain how hybrid retrieval improves RAG systems")

In [54]:
# generator_response.value[0].content

In [55]:
# rspns = await run_generator("Explain how hybrid retrieval improves RAG systems")

In [47]:
critic_prompt = """
You are a strict reviewer.

Evaluate the answer for:
- correctness
- missing details
- clarity

Answer:
{{$answer}}

Return JSON:
- issues (list)
- score (1-10)
"""
request_settings_critic = AzureChatPromptExecutionSettings(temperature=0,max_tokens=500,
                                                           response_format=Critique)
critic_function = kernel.add_function(
    plugin_name="reflect",
    function_name="critic", 
    prompt=critic_prompt, 
    prompt_execution_settings=request_settings_critic
)

@traceable(name="Critic Step")
async def run_critic(answer):
    result = await kernel.invoke(critic_function, answer=str(answer.generated_answer))
    return Critique.model_validate(json.loads(result.value[0].content))

In [48]:
refiner_prompt = """
Improve the original answer using the critique.

Original Answer:
{{$answer}}

Issues Found:
{{$issues}}

Provide an improved version.
"""
request_settings_refiner = AzureChatPromptExecutionSettings(temperature=0,max_tokens=500,
                                                           response_format=GenerateAnswer)
refiner_function = kernel.add_function(plugin_name="reflect", function_name="refiner",
                                       prompt=refiner_prompt,
                                       prompt_execution_settings=request_settings_refiner)

@traceable(name="Refiner Step")
async def run_refiner(answer, critique):
    final_result = await kernel.invoke(
        refiner_function,
        answer=str(answer.generated_answer),
        issues="\n".join(critique.issues)
    )
    return GenerateAnswer.model_validate(json.loads(final_result.value[0].content))

In [49]:
@traceable(name="Reflection Agent")
async def reflection_agent(user_question):

    # Step 1: Generate
    draft = await run_generator(user_question)

    # Step 2: Critique
    critique = await run_critic(draft)

    print(f"[Critic Score] {critique.score}/10")

    # Step 3: Improve if needed
    if critique.score < 8:
        final_answer = await run_refiner(draft, critique)
    else:
        final_answer = draft

    return final_answer


In [50]:
answer = await reflection_agent(
    "Explain how hybrid retrieval improves RAG systems"
)

[Critic Score] 7/10


In [52]:
print(answer.generated_answer)

Hybrid retrieval significantly enhances Retrieval-Augmented Generation (RAG) systems by effectively merging the strengths of traditional information retrieval methods with advanced generative models. In a typical RAG system, the retrieval component is responsible for fetching relevant documents from a vast corpus based on a user query, while the generative model synthesizes coherent responses using the information retrieved.

Hybrid retrieval employs a combination of various retrieval techniques, such as keyword-based search, which focuses on exact term matches, and semantic search, which understands the context and meaning behind the words. For instance, a hybrid system might use traditional Boolean search to quickly locate documents containing specific keywords, while simultaneously applying vector-based semantic search to identify documents that are contextually relevant, even if they do not contain the exact query terms. This dual approach not only improves the quality and relevanc