# Setup Dependencies and Imports
Import required libraries including Azure AI Project SDK, dotenv for environment variables, and custom KYC functions.

In [None]:
# Setup Dependencies and Imports

# Import required libraries
import os
import json
from datetime import datetime
from dotenv import load_dotenv
from azure.identity import DefaultAzureCredential
from azure.ai.projects import AIProjectClient
from azure.ai.projects.models import FunctionTool, ToolSet

# Import custom KYC functions
from kyc_functions import get_kyc_data, update_kyc_data

# Load environment variables
load_dotenv(override=True)

# Configure Environment
Load environment variables and setup authentication using DefaultAzureCredential for Azure services.

In [None]:
# Configure Environment

# Initialize Azure AI client with DefaultAzureCredential
credential = DefaultAzureCredential()
project_client = AIProjectClient.from_connection_string(
    credential=credential,
    conn_str=os.environ["PROJECT_CONNECTION_STRING"]
)

# Define Helper Functions
Define functions for loading evaluation questions from JSONL file and processing evaluation results.

In [None]:
# Define Helper Functions

def load_questions():
    """
    Load evaluation questions from a JSONL file.
    """
    notebook_dir = os.path.abspath(os.path.dirname(''))
    eval_path = os.path.join(notebook_dir, 'evals.jsonl')

    questions = []
    with open(eval_path, 'r') as f:
        for line in f:
            questions.append(json.loads(line))
    return questions

def setup_agent(project_client):
    """
    Setup the KYC agent with the required tools and instructions.
    """
    AGENT_NAME = "kyc-agent-eval"
    
    # Build toolset
    toolset = ToolSet()
    toolset.add(FunctionTool({get_kyc_data, update_kyc_data}))
    
    # Create agent
    agent = project_client.agents.create_agent(
        model=os.environ.get("MODEL_DEPLOYMENT_NAME", "gpt-4"),
        name=AGENT_NAME,
        instructions="""You are a helpful KYC agent. For every question, you should:
1. Retrieve KYC data from Cosmos DB using get_kyc_data
2. Provide a clear and concise answer based on the available data""",
        toolset=toolset
    )
    return agent

def run_evaluation(project_client, agent, questions):
    """
    Run the evaluation for each question and save the results.
    """
    results = []
    thread = project_client.agents.create_thread()
    
    # Create output file path once at the start
    notebook_dir = os.path.abspath(os.path.dirname(''))
    output_dir = os.path.join(notebook_dir, 'eval_results')
    os.makedirs(output_dir, exist_ok=True)
    output_file = os.path.join(output_dir, 
                              f'eval_results_{datetime.utcnow().strftime("%Y%m%d_%H%M%S")}.jsonl')
    
    for question in questions:
        print(f"\nProcessing question {question['id']}: {question['question']}")
        
        # Send question
        message = project_client.agents.create_message(
            thread_id=thread.id,
            role="user",
            content=question['question']
        )
        
        # Get response
        kyc_context = None
        response = ""
        
        # Process the run and capture tool outputs
        run = project_client.agents.create_and_process_run(thread_id=thread.id, assistant_id=agent.id)
        if run.status == "failed":
            print(f"Run failed: {run.last_error}")
            continue
        
        # Get run steps to find KYC context from tool calls
        run_steps = project_client.agents.list_run_steps(run_id=run.id, thread_id=thread.id)
        for step in run_steps.data:
            if step.type == "tool_calls" and step.step_details and step.step_details.tool_calls:
                for tool_call in step.step_details.tool_calls:
                    if tool_call.function.name == "get_kyc_data":
                        kyc_context = tool_call.function.output

        # Fetch the final response from messages
        messages = project_client.agents.list_messages(thread_id=thread.id)
        latest_message = next((msg for msg in messages.data if msg.role == "assistant"), None)
        if latest_message and latest_message.content:
            response = latest_message.content[0].text.value
        
        # Store result
        result = {
            "question_id": question["id"],
            "question": question["question"],
            "ground_truth": question["ground_truth"],
            "kyc_context": kyc_context,
            "response": response.strip()
        }
        results.append(result)
        
        # Save result to the single file
        with open(output_file, 'a') as f:
            f.write(json.dumps(result, ensure_ascii=False) + '\n')
        
        print(f"Completed question {question['id']}")

# Create Azure AI Project Client
Initialize the AI Project client with connection string and credentials.

In [None]:
# Create Azure AI Project Client

# Initialize Azure AI client with DefaultAzureCredential
credential = DefaultAzureCredential()
project_client = AIProjectClient.from_connection_string(
    credential=credential,
    conn_str=os.environ["PROJECT_CONNECTION_STRING"]
)

# Setup Agent and Tools
Create a KYC agent with the required toolset including get_kyc_data and update_kyc_data functions.

In [None]:
# Setup Agent and Tools

# Define the function to setup the KYC agent with the required tools and instructions
def setup_agent(project_client):
    """
    Setup the KYC agent with the required tools and instructions.
    """
    AGENT_NAME = "kyc-agent-eval"
    
    # Build toolset
    toolset = ToolSet()
    toolset.add(FunctionTool({get_kyc_data, update_kyc_data}))
    
    # Create agent
    agent = project_client.agents.create_agent(
        model=os.environ.get("MODEL_DEPLOYMENT_NAME", "gpt-4"),
        name=AGENT_NAME,
        instructions="""You are a helpful KYC agent. For every question, you should:
1. Retrieve KYC data from Cosmos DB using get_kyc_data
2. Provide a clear and concise answer based on the available data""",
        toolset=toolset
    )
    return agent

# Setup the agent
agent = setup_agent(project_client)

# Run Evaluation Process
Execute the evaluation loop to process questions, collect responses, and save results to output files.

In [None]:
# Run Evaluation Process

# Load evaluation questions
questions = load_questions()

# Run the evaluation process
run_evaluation(project_client, agent, questions)