# Environment Setup and Imports
Import required libraries and set up environment variables using dotenv. Configure notebook directory paths.

In [None]:
# Environment Setup and Imports

# Import required libraries
import os
import glob
import json
from datetime import datetime
from typing import Dict, List
from azure.identity import DefaultAzureCredential
from azure.ai.evaluation import (
    evaluate, 
    RelevanceEvaluator,
    CoherenceEvaluator, 
    GroundednessEvaluator,
    F1ScoreEvaluator
)
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Configure notebook directory paths
notebook_dir = os.path.abspath(os.path.dirname(''))
eval_path = os.path.join(notebook_dir, 'evals.jsonl')

# Helper Functions for File Operations
Define functions for file handling including get_latest_eval_file() with notebook-specific path handling using os.path.abspath(os.path.dirname(''))

In [None]:
# Helper Functions for File Operations

def get_latest_eval_file() -> str:
    """Get the most recent evaluation results file."""
    eval_dir = os.path.join(notebook_dir, 'eval_results')
    print (f"Looking for evaluation results in {eval_dir}")
    files = glob.glob(os.path.join(eval_dir, 'eval_results_*.jsonl'))
    if not files:
        raise FileNotFoundError("No evaluation result files found")
    
    # Sort by modification time
    latest_file = max(files, key=os.path.getmtime)
    return latest_file

def validate_record(record: Dict) -> bool:
    """Check if a record has all required fields and valid data."""
    required_fields = ['question_id', 'question', 'ground_truth', 
                      'kyc_context', 'response', 'timestamp']
    
    # Check if all required fields exist
    if not all(field in record for field in required_fields):
        return False
    
    # Check if kyc_context is valid JSON
    if record['kyc_context']:
        try:
            json.loads(record['kyc_context'])
        except:
            return False
    
    # Check if response is not empty
    if not record['response'].strip():
        return False
        
    return True

def load_and_validate_results(filepath: str) -> List[Dict]:
    """Load and validate evaluation results from file."""
    valid_records = []
    total_records = 0
    
    with open(filepath, 'r') as f:
        for line in f:
            total_records += 1
            try:
                record = json.loads(line)
                if validate_record(record):
                    valid_records.append(record)
            except json.JSONDecodeError:
                continue
    
    print(f"\nProcessed {total_records} total records")
    print(f"Found {len(valid_records)} valid records")
    print(f"Filtered out {total_records - len(valid_records)} invalid records")
    
    return valid_records

# Data Validation Functions
Implement validate_record() and load_and_validate_results() functions for data validation

In [None]:
# Data Validation Functions

def validate_record(record: Dict) -> bool:
    """Check if a record has all required fields and valid data."""
    required_fields = ['question_id', 'question', 'ground_truth', 
                      'kyc_context', 'response', 'timestamp']
    
    # Check if all required fields exist
    if not all(field in record for field in required_fields):
        return False
    
    # Check if kyc_context is valid JSON
    if record['kyc_context']:
        try:
            json.loads(record['kyc_context'])
        except:
            return False
    
    # Check if response is not empty
    if not record['response'].strip():
        return False
        
    return True

def load_and_validate_results(filepath: str) -> List[Dict]:
    """Load and validate evaluation results from file."""
    valid_records = []
    total_records = 0
    
    with open(filepath, 'r') as f:
        for line in f:
            total_records += 1
            try:
                record = json.loads(line)
                if validate_record(record):
                    valid_records.append(record)
            except json.JSONDecodeError:
                continue
    
    print(f"\nProcessed {total_records} total records")
    print(f"Found {len(valid_records)} valid records")
    print(f"Filtered out {total_records - len(valid_records)} invalid records")
    
    return valid_records

# Environment Configuration
Set up validate_environment() function and configure required Azure environment variables

In [None]:
# Environment Configuration

def validate_environment():
    """Validate required environment variables are set."""
    required_vars = {
        "AZURE_OPENAI_ENDPOINT": "Azure OpenAI endpoint URL",
        "AZURE_OPENAI_API_KEY": "Azure OpenAI API key",
        "AZURE_OPENAI_DEPLOYMENT": "Azure OpenAI deployment name",
        "AZURE_OPENAI_API_VERSION": "Azure OpenAI API version",
        "AZURE_SUBSCRIPTION_ID": "Azure subscription ID",
        "AZURE_RESOURCE_GROUP": "Azure resource group name",
        "AZURE_PROJECT_NAME": "Azure AI project name"
    }
    
    missing_vars = []
    for var, description in required_vars.items():
        if not os.environ.get(var):
            missing_vars.append(f"{var} ({description})")
    
    if missing_vars:
        raise ValueError(
            "Missing required environment variables:\n" + 
            "\n".join(f"- {var}" for var in missing_vars)
        )

# Validate environment variables
validate_environment()

# Model Evaluation Setup
Initialize evaluators with model configuration and set up Azure AI project details

In [None]:
# Model Evaluation Setup

# Load environment variables for model configuration
model_config = {
    "azure_endpoint": os.environ["AZURE_OPENAI_ENDPOINT"],
    "api_key": os.environ["AZURE_OPENAI_API_KEY"],
    "azure_deployment": os.environ["AZURE_OPENAI_DEPLOYMENT"],
    "api_version": os.environ["AZURE_OPENAI_API_VERSION"],
}

# Load environment variables for Azure AI project details
azure_ai_project = {
    "subscription_id": os.environ["AZURE_SUBSCRIPTION_ID"],
    "resource_group_name": os.environ["AZURE_RESOURCE_GROUP"],
    "project_name": os.environ["AZURE_PROJECT_NAME"],
}

# Initialize evaluators with model configuration
evaluators = {
    "relevance": RelevanceEvaluator(model_config=model_config),
    "coherence": CoherenceEvaluator(model_config=model_config),
    "groundedness": GroundednessEvaluator(model_config=model_config),
    "f1_score": F1ScoreEvaluator()
}

# Run Evaluation
Execute the evaluation process and display results using the evaluate() function

In [None]:
# Run Evaluation

# Get the latest evaluation file
latest_file = get_latest_eval_file()
print(f"Processing file: {os.path.basename(latest_file)}")

# Run evaluations using evaluate() function
result = evaluate(
    data=latest_file,
    evaluators=evaluators,
    evaluator_config={
        "default": {
            "column_mapping": {
                "query": "${data.question}",
                "response": "${data.response}",
                "context": "${data.kyc_context}",
                "ground_truth": "${data.ground_truth}"
            }
        }
    },
    azure_ai_project=azure_ai_project,
    output_path="./eval_metrics.json"
)

# Display results
print("\nEvaluation Results:")
print(result)