In [None]:
import os
from azure.ai.evaluation import AzureOpenAIModelConfiguration
from azure.identity import DefaultAzureCredential
from azure.ai.evaluation import IntentResolutionEvaluator, ResponseCompletenessEvaluator
import os
from dotenv import load_dotenv

load_dotenv()

model_config = AzureOpenAIModelConfiguration(
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    api_key=os.environ["AZURE_OPENAI_API_KEY"],
    api_version="2025-03-01-preview",
    azure_deployment="gpt4o",
)
 
intent_resolution_evaluator = IntentResolutionEvaluator(model_config)
response_completeness_evaluator = ResponseCompletenessEvaluator(model_config=model_config)
 
# Evaluating query and response as strings
# A positive example. Intent is identified and understood and the response correctly resolves user intent
result = intent_resolution_evaluator(
    query="What are the opening hours of the Eiffel Tower?",
    response="Opening hours of the Eiffel Tower are 9:00 AM to 11:00 PM.",
)
print(result)
 
# A negative example. Only half of the statements in the response were complete according to the ground truth  
result = response_completeness_evaluator(
    response="Itinery: Day 1 take a train to visit Disneyland outside of the city; Day 2 rests in hotel.",
    ground_truth="Itinery: Day 1 take a train to visit the downtown area for city sightseeing; Day 2 rests in hotel."
)
print(result)

In [None]:
import os, json
import pandas as pd
from azure.ai.projects import AIProjectClient
from azure.identity import DefaultAzureCredential
from azure.ai.projects.models import FunctionTool, ToolSet
from typing import Set, Callable, Any

from dotenv import load_dotenv

load_dotenv()

# Define some custom python function
def fetch_weather(location: str) -> str:
    """
    Fetches the weather information for the specified location.

    :param location (str): The location to fetch weather for.
    :return: Weather information as a JSON string.
    :rtype: str
    """
    # In a real-world scenario, you'd integrate with a weather API.
    # Here, we'll mock the response.
    mock_weather_data = {"Seattle": "Sunny, 25°C", "London": "Cloudy, 18°C", "Tokyo": "Rainy, 22°C"}
    weather = mock_weather_data.get(location, "Weather data not available for this location.")
    weather_json = json.dumps({"weather": weather})
    return weather_json

functions = FunctionTool({fetch_weather})
toolset = ToolSet()
toolset.add(functions)

# Create the agent
AGENT_NAME = "Seattle Tourist Assistant"

project_client = AIProjectClient.from_connection_string(
    credential=DefaultAzureCredential(),
    conn_str=os.environ["AIPROJECT_CONNECTION_STRING"],
)

project_client.agents.enable_auto_function_calls(toolset=toolset)

agent = project_client.agents.create_agent(
    model=os.environ["CHAT_MODEL"],
    name=AGENT_NAME,
    instructions="You are a helpful assistant",
    toolset=toolset,
)
print(f"Created agent, ID: {agent.id}")

thread = project_client.agents.create_thread()
print(f"Created thread, ID: {thread.id}")

# Create message to thread
MESSAGE = "Can you fetch me the weather in Seattle?"

message = project_client.agents.create_message(
    thread_id=thread.id,
    role="user",
    content=MESSAGE,
)
print(f"Created message, ID: {message.id}")

run = project_client.agents.create_and_process_run(thread_id=thread.id, agent_id=agent.id)

print(f"Run finished with status: {run.status}")

if run.status == "failed":
    print(f"Run failed: {run.last_error}")

print(f"Run ID: {run.id}")

# display messages
for message in project_client.agents.list_messages(thread.id, order="asc").data:
    print(f"Role: {message.role}")
    print(f"Content: {message.content[0].text.value}")
    print("-" * 40)

In [None]:
import json
from azure.ai.evaluation import AIAgentConverter

# Initialize the converter for Azure AI agents
converter = AIAgentConverter(project_client)

# Specify the thread and run id
thread_id = thread.id
run_id = run.id

converted_data = converter.convert(thread_id, run_id)
print(json.dumps(converted_data, indent=4))

In [None]:
import json
from azure.ai.evaluation import AIAgentConverter

# Initialize the converter
converter = AIAgentConverter(project_client)

# specify a file path to save agent output (which is evaluation input data)
filename = os.path.join(os.getcwd(), "evaluation_input_data.jsonl")

evaluation_data = converter.prepare_evaluation_data(thread_ids=thread_id, filename=filename) 

print(f"Evaluation data saved to {filename}")

In [None]:
from azure.ai.evaluation import IntentResolutionEvaluator, TaskAdherenceEvaluator, ToolCallAccuracyEvaluator
from azure.ai.projects.models import ConnectionType
import os

from dotenv import load_dotenv
load_dotenv()


project_client = AIProjectClient.from_connection_string(
    credential=DefaultAzureCredential(),
    conn_str=os.environ["AIPROJECT_CONNECTION_STRING"],
)
model_config = project_client.connections.get_default(
                                            connection_type=ConnectionType.AZURE_OPEN_AI,
                                            include_credentials=True) \
                                         .to_evaluator_model_config(
                                            deployment_name="gpt4o",
                                            api_version="2025-03-01-preview",
                                            include_credentials=True
                                          )

# select evaluators
intent_resolution = IntentResolutionEvaluator(model_config=model_config)
task_adherence = TaskAdherenceEvaluator(model_config=model_config)
tool_call_accuracy = ToolCallAccuracyEvaluator(model_config=model_config)

# batch run API
from azure.ai.evaluation import evaluate

response = evaluate(
    data="evaluation_input_data.jsonl",
    evaluation_name="agent demo - batch run",
    evaluators={
        "intent_resolution": intent_resolution,
        "task_adherence": task_adherence,
        "tool_call_accuracy": tool_call_accuracy,
    },
    # optionally, log your results to your Azure AI Foundry project for rich visualization 
    azure_ai_project={
        "subscription_id": os.environ["AZURE_SUBSCRIPTION_ID"],
        "project_name": os.environ["AZURE_PROJECT_NAME"],
        "resource_group_name": os.environ["AZURE_RESOURCE_GROUP_NAME"],
    }
)
# look at the average scores 
print(response["metrics"])
# use the URL to inspect the results on the UI
print(f'AI Foundary URL: {response.get("studio_url")}')