In [20]:
# Client configuration for Azure OpenAI and Foundry
import asyncio

from agent_framework.azure import AzureAIAgentClient, AzureOpenAIChatClient
from agent_framework.observability import setup_observability 
from azure.identity import DefaultAzureCredential
from azure.identity.aio import DefaultAzureCredential as AsyncDefaultAzureCredential
from openai import AzureOpenAI
from dotenv import load_dotenv
import config

load_dotenv()
setup_observability()

credential = DefaultAzureCredential()
async_credential = AsyncDefaultAzureCredential()    
openai_client = AzureOpenAI(
    azure_endpoint=config.azure_openai_endpoint,
    azure_ad_token_provider=lambda: credential.get_token("https://cognitiveservices.azure.com/.default").token,
    api_version="2024-02-01"
)

chat_client = AzureOpenAIChatClient(
    credential=credential,
    deployment_name="gpt-5-chat",
    endpoint=config.azure_openai_endpoint
)

agent_client = AzureAIAgentClient(
    project_endpoint=config.foundry_project_endpoint,
    model_deployment_name="gpt-5-chat",
    async_credential=async_credential,
)


In [23]:
# Define a tool to search our knowledge base
from pydantic import Field
from typing import List, Annotated
from search_knowledge_base import KnowledgeBaseSearcher
from agent_framework import ContextProvider

def search_knowledge_base(
        query: Annotated[str, Field(description="The search query string.")]
    ) -> List[str]:
    """Search the knowledge base for relevant information."""

    searcher = KnowledgeBaseSearcher()
    results = searcher.semantic_search(query)

    return [res["chunk"] for res in results] 

In [24]:
agent_instructions = """
You are a helpful AI assistant. You have access to a knowledge base about Meridian Strategic Consulting. 
Use the `search_knowledge_base` function to find relevant information from the knowledge base to answer user queries.
Your tone should be friendly, professional and focussed on informing the user with accurate information.
"""

agent = agent_client.create_agent(
    name="KnowledgeBaseAgent",
    instructions=agent_instructions,
    tools=[search_knowledge_base]) 

In [25]:
# Functions for semantic similarity evaluation 
from typing import Any

evaluation_instructions = """
You are an expert evaluator of AI generated answers.
Given a reference answer, and an agent-generated response, evaluate the quality of the agent's response based on the following metrics. 
Provide a score from 1 to 5 for each metric, where 1 is poor and 5 is excellent. Also, provide an overall score with justification.

REFERENCE ANSWER:
{reference_answer}

AGENT RESPONSE:
{agent_response}

EVALUATION METRICS:
1. Accuracy: Are the facts and figures correct?
2. Completeness: Does it cover all key points mentioned in the reference answer?
3. Clarity: Is the response clear and well-structured?
4. Relevance: Does it directly answer the user's question?

OUTPUT FORMAT:
You MUST return your evaluation as a JSON object with the following structure:
{{
    "accuracy": X,
    "completeness": X,
    "clarity": X,
    "relevance": X,
    "overall": X,
    "justification": "Detailed explanation of your evaluation"
}}

Where X is a score from 1-5 (1=Poor, 2=Below Average, 3=Average, 4=Good, 5=Excellent).
"""

async def evaluate_answer(response: str, reference: str) -> Any:
    prompt = evaluation_instructions.format(reference_answer=reference, agent_response=response)

    evaluation = await chat_client.get_response(
        [prompt],
        model="o3-mini",
        temperature=0.0,
        tool_choice="auto")

    return evaluation.text


In [28]:
query = "When was Meridian founded?"
reference_answer = "Meridian Strategic Consulting was founded in 2018"

response = await agent.run(query, model="gpt-5-mini")
evaluation = await evaluate_answer(response.text, reference_answer)

print(response)
print(evaluation)

Meridian Strategic Consulting was founded in 2018.

Would you like more details about the founders, headquarters, or company history?
{
    "accuracy": 5,
    "completeness": 5,
    "clarity": 5,
    "relevance": 5,
    "overall": 5,
    "justification": "The agent's response exactly matches the factual content of the reference answer, correctly stating that Meridian Strategic Consulting was founded in 2018. It fully covers the key point from the reference answer without omitting any information. The statement is clear, concise, and well-structured. The additional offer to provide more details is relevant and does not detract from the direct answer to the question. Overall, the response is accurate, complete, clear, and directly addresses the query."
}


In [29]:
query = "Who is Xebia?"
reference_answer = "At Xebia, AI is in our DNA. We lead with responsible AI to shape a future grounded in a deep commitment to the human experience, driven by our core values. We collaborate with our clients and across our partner ecosystem to enable bold industry disruptions, accelerate innovation, deliver operational excellence, and secure sustainable competitive advantage. Through our Consulting, Software Engineering, and Training expertise, we help businesses be AI-first and future-ready."

response = await agent.run(query, model="gpt-5-mini")
evaluation = await evaluate_answer(response.text, reference_answer)

print(evaluation)

{
    "accuracy": 4,
    "completeness": 2,
    "clarity": 5,
    "relevance": 3,
    "overall": 3,
    "justification": "The agent's response is largely accurate in terms of general facts about Xebia, such as its founding location, services, and global presence. However, it omits key elements from the reference answer, such as Xebia's emphasis on responsible AI, core values, human experience, and its role in enabling industry disruptions and sustainable competitive advantage. The structure and clarity of the response are excellent, with well-organized bullet points. In terms of relevance, the response provides a general profile rather than directly mirroring the thematic and value-driven focus of the reference answer, which reduces alignment with the intended message. Overall, while factually sound and clear, the response lacks completeness and full relevance to the original content."
}
