In [None]:
# Azure Credential imports
from azure.identity import AzureCliCredential

!az login

# Initialize Azure credentials
credential = AzureCliCredential()

Connect to Microsoft AI Foundry

In [None]:
import os 

# Load environment variables from .env file
from dotenv import load_dotenv
load_dotenv()

azure_ai_project = os.environ.get("AZURE_PROJECT_ENDPOINT")


In [None]:
from azure.ai.projects import AIProjectClient 

# Create the project client (Foundry project and credentials): 

project_client = AIProjectClient( 
    endpoint=azure_ai_project, 
    credential=credential, 
)

In [None]:
import json

from openai.types.evals.create_eval_jsonl_run_data_source_param import (
    CreateEvalJSONLRunDataSourceParam,
    SourceFileID,
)
from azure.ai.projects.models import (
    DatasetVersion,
)
import pathlib

path = str(pathlib.Path(pathlib.Path.cwd())) + "/data.jsonl"

print("Creating an OpenAI client from the AI Project client")
client = project_client.get_openai_client()

dataset: DatasetVersion = project_client.datasets.upload_file(
    name="eval_data",
    version=1,
    file_path=path,
)
#print(dataset)
#dataset: DatasetVersion = project_client.datasets.get(
#    name="eval_data",
#    version=1,
#)

In [None]:
from openai.types.eval_create_params import DataSourceConfigCustom

evaluation_criteria = [
            {
            "type": "azure_ai_evaluator",
            "name": "Similarity",
            "evaluator_name": "builtin.similarity",
            "data_mapping": {"response": "{{item.response}}", "ground_truth": "{{item.ground_truth}}"},
            "initialization_parameters": {"deployment_name": "gpt-5", "threshold": 3},
        },
        {
            "type": "azure_ai_evaluator",
            "name": "ROUGEScore",
            "evaluator_name": "builtin.rouge_score",
            "data_mapping": {"response": "{{item.response}}", "ground_truth": "{{item.ground_truth}}"},
            "initialization_parameters": {
                "rouge_type": "rouge1",
                "f1_score_threshold": 0.5,
                "precision_threshold": 0.5,
                "recall_threshold": 0.5,
            },
        },
        {
            "type": "azure_ai_evaluator",
            "name": "METEORScore",
            "evaluator_name": "builtin.meteor_score",
            "data_mapping": {"response": "{{item.response}}", "ground_truth": "{{item.ground_truth}}"},
            "initialization_parameters": {"threshold": 0.5},
        },
        {
            "type": "azure_ai_evaluator",
            "name": "F1Score",
            "evaluator_name": "builtin.f1_score",
            "data_mapping": {"response": "{{item.response}}", "ground_truth": "{{item.ground_truth}}"},
            "initialization_parameters": {"threshold": 0.5},
        },
]
data_source_config = DataSourceConfigCustom(
        {
            "type": "custom",
            "item_schema": {
                "type": "object",
                "properties": {
                    "query": {"type": "string"},
                    "response": {"type": "string"},
                    "context": {"type": "string"},
                    "ground_truth": {"type": "string"},
                },
                "required": [],
            },
            "include_sample_schema": True,
        }
    )


Dataset evaluation

In [None]:
import datetime


print("Creating evaluation")
eval_object = client.evals.create(
        name="dataset_eval_" + datetime.datetime.now().strftime("%Y%m%d%H%M%S"),
        data_source_config=data_source_config,
        testing_criteria=evaluation_criteria,  # type: ignore
)
print(f"Evaluation created (id: {eval_object.id}, name: {eval_object.name})")


print("Creating evaluation run with Dataset ID")
eval_run_object = client.evals.runs.create(
    eval_id=eval_object.id,
    name="dataset_id_run",
    metadata={"team": "eval-exp", "scenario": "dataset-id-v1"},
    data_source=CreateEvalJSONLRunDataSourceParam(
         type="jsonl", source=SourceFileID(type="file_id", id=dataset.id if dataset.id else "")
    ),
)