In [1]:
# Client configuration for Azure OpenAI and Foundry
import asyncio

from agent_framework.azure import AzureAIAgentClient, AzureOpenAIChatClient
from agent_framework.observability import setup_observability 
from azure.identity import DefaultAzureCredential
from azure.identity.aio import DefaultAzureCredential as AsyncDefaultAzureCredential
from dotenv import load_dotenv
import config

load_dotenv()
setup_observability()

credential = DefaultAzureCredential()
async_credential = AsyncDefaultAzureCredential()    
agent_client = AzureAIAgentClient(
    project_endpoint=config.foundry_project_endpoint,
    model_deployment_name="gpt-4.1",
    async_credential=async_credential,
)


In [2]:
# Initialize the agent
from pydantic import Field
from typing import List, Annotated
from search_knowledge_base import KnowledgeBaseSearcher
from agent_framework import ContextProvider

def search_knowledge_base(
        query: Annotated[str, Field(description="The search query string.")]
    ) -> List[str]:
    """Search the knowledge base for relevant information."""

    searcher = KnowledgeBaseSearcher()
    results = searcher.semantic_search(query)

    return [res["chunk"] for res in results] 


agent_instructions = """
You are a helpful AI assistant. You have access to a knowledge base about Meridian Strategic Consulting. 
Use the `search_knowledge_base` function to find relevant information from the knowledge base to answer user queries.
Include the result of the search as context in your response.
"""

agent = agent_client.create_agent(
    name="MeridianConsultingAgent",
    instructions=agent_instructions,
    tools=[search_knowledge_base]) 


In [3]:
thread = agent.get_new_thread()

query = "I need a consultant with AI/ML expertise and healthcare industry experience for a 6-month project. Who would be the best match?" 
response = await agent.run(query, thread=thread)

print(response)

Based on the information retrieved from Meridian Strategic Consulting’s knowledge base, the best consultant match for a 6-month project requiring both AI/ML expertise and healthcare industry experience would be Dr. Amanda Foster.

Here’s why:
- Dr. Amanda Foster is a Senior Partner and the Healthcare Practice Lead at Meridian.
- She has 14 years of total experience (4 years at Meridian).
- Her expertise areas include artificial intelligence, machine learning, healthcare analytics, cloud architecture, and data strategy.
- She has led healthcare projects such as predictive analytics for patient outcomes and is highly regarded in both the AI/ML and healthcare domains.
- Dr. Foster’s recent achievements include AI implementations for healthcare and other industries that have generated significant value.
- She is recognized as an expert (10+ years) in both AI/ML and the healthcare sector.
- Contact: amanda.foster@meridianstrategic.com

Note: Dr. Foster will be available for new engagements 

In [4]:
# Configure Evaluator model
from azure.ai.evaluation import  AzureOpenAIModelConfiguration
evaluator_model = AzureOpenAIModelConfiguration({
    "type":"azure_openai",
    "azure_deployment": "gpt-4.1",
    "azure_endpoint": config.azure_openai_endpoint,
    "api_key": config.credential.get_token("https://cognitiveservices.azure.com/.default").token
})

In [5]:
# Showcase Intent Resolution Evaluator
from azure.ai.evaluation import IntentResolutionEvaluator

intres_evaluator = IntentResolutionEvaluator(
    model_config=evaluator_model,
    credential=credential)

intres_evaluator(query=query, response=response.text)


Here’s why:
- Dr. Amanda Foster is a Senior Partner and the Healthcare Practice Lead at Meridian.
- She has 14 years of total experience (4 years at Meridian).
- Her expertise areas include artificial intelligence, machine learning, healthcare analytics, cloud architecture, and data strategy.
- She has led healthcare projects such as predictive analytics for patient outcomes and is highly regarded in both the AI/ML and healthcare domains.
- Dr. Foster’s recent achievements include AI implementations for healthcare and other industries that have generated significant value.
- She is recognized as an expert (10+ years) in both AI/ML and the healthcare sector.
- Contact: amanda.foster@meridianstrategic.com

Note: Dr. Foster will be available for new engagements starting February 2025. If your 6-month project can align with that timeframe, she would be the most qualified match. If you need someone to start immediately or require alternative options, Kevin Liu (Principal, Cloud & Infrastru

{'intent_resolution': 5.0,
 'intent_resolution_result': 'pass',
 'intent_resolution_threshold': 3,
 'intent_resolution_reason': 'The user requested the best consultant with AI/ML and healthcare expertise for a 6-month project. The agent identified Dr. Amanda Foster as the top match, provided detailed qualifications, and offered alternatives if timing is an issue, fully resolving the intent with thoroughness and clarity.'}

In [6]:
# Showcase Groundedness Evaluator
from azure.ai.evaluation import GroundednessEvaluator

groundedness_evaluator = GroundednessEvaluator(model_config=evaluator_model, credential=credential)
groundedness_evaluator(
    query=query, 
    response=response.text,
    context="Dr. Amanda foster is a data scientist with 10 years of experience in the healthcare industry. She has worked on multiple AI/ML projects and has expertise in machine learning, data analysis, and statistical modeling."
)

{'groundedness': 5.0,
 'gpt_groundedness': 5.0,
 'groundedness_reason': 'The RESPONSE is fully correct, complete, and directly addresses the QUERY using all relevant details from the CONTEXT, with no unrelated or incorrect information.',
 'groundedness_result': 'pass',
 'groundedness_threshold': 3}

In [7]:
# Testing Document Retrieval 
from azure.ai.evaluation import DocumentRetrievalEvaluator, AIAgentConverter

# Represents the ideal documents that should be retrieved for the given query, with relevance labels from 0 (not relevant) to 5 (highly relevant)
retrieval_ground_truth = [
    {
        "document_id": "people-expertise/expert-profiles.md",
        "query_relevance_label": 5,  
    },
    {
        "document_id": "people-expertise/skills-matrix.md",
        "query_relevance_label": 5,  
    },
    {
        "document_id": "core-business/industry-expertise.md", 
        "query_relevance_label": 4
    },
    {
        "document_id": "core-business/service-offerings.md",
        "query_relevance_label": 3 
    },
    {
        "document_id": "market-intelligence/industry-trends-q4-2024.md",
        "query_relevance_label": 1
    },
    {
        "document_id": "sales-proposals/proposal-templates.md",
        "query_relevance_label": 0
   }
]

# Represents what was actually retrieved from the search index by the Agent
retrieved_documents = [
    {
        "document_id": "people-expertise/skills-matrix.md",
        "relevance_score": 2.395587682723999
    },
    {
        "document_id": "people-expertise/expert-profiles.md",
        "relevance_score": 2.332935094833374
    },
    {
        "document_id": "core-business/industry-expertise.md",
        "relevance_score": 2.2740046977996826
    },
    {
        "document_id": "core-business/service-offerings.md",
        "relevance_score": 2.2369625568389893
    },
    {
        "document_id": "market-intelligence/industry-trends-q4-2024.md",
        "relevance_score": 2.2054591178894043
    },
    {
        "document_id": "market-intelligence/competitive-analysis.md",
        "relevance_score": 2.0840091705322266
    }
]


document_retrieval_evaluator = DocumentRetrievalEvaluator(
    ground_truth_label_max=5,
    ground_truth_label_min=0,
)

document_retrieval_evaluator(retrieval_ground_truth=retrieval_ground_truth,
                             retrieved_documents=retrieved_documents)

{'ndcg@3': 1.0,
 'xdcg@3': 120.40816326530613,
 'fidelity': 1.0,
 'top1_relevance': 5,
 'top3_max_relevance': 5,
 'holes': 1,
 'holes_ratio': 0.16666666666666666,
 'total_retrieved_documents': 6,
 'total_ground_truth_documents': 6,
 'ndcg@3_result': 'pass',
 'ndcg@3_threshold': 0.5,
 'ndcg@3_higher_is_better': True,
 'xdcg@3_result': 'pass',
 'xdcg@3_threshold': 50.0,
 'xdcg@3_higher_is_better': True,
 'fidelity_result': 'pass',
 'fidelity_threshold': 0.5,
 'fidelity_higher_is_better': True,
 'top1_relevance_result': 'fail',
 'top1_relevance_threshold': 50.0,
 'top1_relevance_higher_is_better': True,
 'top3_max_relevance_result': 'fail',
 'top3_max_relevance_threshold': 50.0,
 'top3_max_relevance_higher_is_better': True,
 'holes_result': 'fail',
 'holes_threshold': 0,
 'holes_higher_is_better': False,
 'holes_ratio_result': 'fail',
 'holes_ratio_threshold': 0,
 'holes_ratio_higher_is_better': False,
 'total_retrieved_documents_result': 'fail',
 'total_retrieved_documents_threshold': 50

In [10]:
# Run evaluation on a batch of data
from azure.ai.evaluation import evaluate, QAEvaluator

result = evaluate(
    data = "eval.jsonl",
    evaluators = {
        "qa": QAEvaluator(
            model_config=evaluator_model, 
            credential=credential)
    },
    evaluator_config={
        "qa": {
            "column_mapping": {
                "query": "${data.query}",
                "context": "${data.context}",
                "response": "${data.response}"
            } 
        }
    },
    output_path="./eval.ouputs.json" 
)

2025-10-05 20:03:32 +0200   20004 execution.bulk     INFO     Finished 20 / 20 lines.
2025-10-05 20:03:32 +0200   20004 execution.bulk     INFO     Average execution time for completed lines: 2.3 seconds. Estimated time for incomplete lines: 0.0 seconds.





Run name: "qa_20251005_180246_389677"
Run status: "Completed"
Start time: "2025-10-05 18:02:46.389677+00:00"
Duration: "0:00:45.992969"


{
    "qa": {
        "status": "Completed",
        "duration": "0:00:45.992969",
        "completed_lines": 20,
        "failed_lines": 0,
        "log_path": null
    }
}


Evaluation results saved to "F:\repo\evaluation-intro\eval.ouputs.json".



In [None]:
# Cleanup

# delete all threads
threads = agent_client.project_client.agents.threads.list()
async for t in threads:
    await agent_client.project_client.agents.threads.delete(thread_id=t.id)

# delete all agents
agents = agent_client.project_client.agents.list_agents()
async for a in agents:
    await agent_client.project_client.agents.delete_agent(agent_id=a.id)