In [1]:
from azure.ai.evaluation.simulator import AdversarialSimulator

[INFO] Could not import AIAgentConverter. Please install the dependency with `pip install azure-ai-projects`.


In [2]:
import time
import uuid
import json
import pandas as pd
from pathlib import Path

# Define folder paths
output_folder = "output"
Path(output_folder).mkdir(parents=True, exist_ok=True)  # Ensure output folder exists

count = 10

In [None]:
from azure.identity import DefaultAzureCredential

azure_ai_project = {
    "subscription_id": '',
    "resource_group_name": '',
    "project_name": ''
}

# your azure api endpoint
api_url = ''

In [None]:
from pathlib import Path
import requests

def call_streaming_url(url):
    full_response = ""
    try:
        response = requests.get(url, stream=True)    
    except:
         time.sleep(10)
         response = requests.get(url, stream=True)
    for chunk in response.iter_content(chunk_size=8192):
        if chunk:
            full_response += chunk.decode('utf-8')  # Concatenate each chunk to the full response

    return full_response

In [None]:
from typing import List, Dict, Any, Optional
async def callback(
    messages: List[Dict],
    stream: bool = False,
    session_state: Any = None,
) -> dict:
    query = messages["messages"][0]["content"]
    context = None

    # Add file contents for summarization or re-write
    if 'file_content' in messages["template_parameters"]:
        query += messages["template_parameters"]['file_content']
    
    # Call your own endpoint and pass your query as input. Make sure to handle your function_call_to_your_endpoint's error responses.
    km_api_url = api_url + query
    # print(km_api_url)
    response = call_streaming_url(km_api_url) 
   
    # Format responses in OpenAI message protocol
    try:
        r = json.loads(response)['answer']
    except:
        r = response #'I cannot answer this question from the data available. Please rephrase or add more details.'
    formatted_response = {
        "content": r,
        "role": "assistant",
        "context": {},
    }

    messages["messages"].append(formatted_response)

    return {
        "messages": messages["messages"],
        "stream": stream,
        "session_state": session_state
    }

In [None]:
from azure.ai.evaluation.simulator import AdversarialScenario
from azure.identity import DefaultAzureCredential
credential = DefaultAzureCredential()

scenario = AdversarialScenario.ADVERSARIAL_QA
adversarial_simulator = AdversarialSimulator(azure_ai_project=azure_ai_project, credential=credential)

outputs = await adversarial_simulator(
        scenario=scenario, # required adversarial scenario to simulate
        target=callback, # callback function to simulate against
        max_conversation_turns=1, #optional, applicable only to conversation scenario
        max_simulation_results=count, #optional
    )

output_file_adversarial = Path(output_folder) / f"adversarial_output_{count}.jsonl"
with output_file_adversarial.open("w") as f:
    f.write(outputs.to_eval_qr_json_lines())

In [None]:
from pathlib import Path
from azure.ai.evaluation import ContentSafetyEvaluator
import base64
import pandas as pd

credential = DefaultAzureCredential()
# instantiate an evaluator with image and multi-modal support
safety_evaluator = ContentSafetyEvaluator(credential=credential, azure_ai_project=azure_ai_project)

df = pd.read_json(output_file_adversarial, lines=True)
json_string = []
for index, row in df.iterrows(): 
    safety_score = safety_evaluator(query=row['query'], response=row['response'])
    json_object = {
        "query": row['query'],
        "response": row['response'],
        "violance": safety_score['violence'],
        "violence_score": safety_score['violence_score'],
        "violence_reason": safety_score['violence_reason'],
        "violence_threshold": safety_score['violence_threshold'],
        "violence_result": safety_score['violence_result'],
        "sexual": safety_score['sexual'],
        "sexual_score": safety_score['sexual_score'],
        "sexual_reason": safety_score['sexual_reason'],
        "sexual_threshold": safety_score['sexual_threshold'],
        "sexual_result": safety_score['sexual_result'],
        "hate_unfairness": safety_score['hate_unfairness'],
        "hate_unfairness_score": safety_score['hate_unfairness_score'],
        "hate_unfairness_reason": safety_score['hate_unfairness_reason'],
        "hate_unfairness_threshold": safety_score['hate_unfairness_threshold'],
        "hate_unfairness_result": safety_score['hate_unfairness_result'],
        "self_harm": safety_score['self_harm'],
        "self_harm_score": safety_score['self_harm_score'],
        "self_harm_reason": safety_score['self_harm_reason'],
        "self_harm_threshold": safety_score['self_harm_threshold'],
        "self_harm_result": safety_score['self_harm_result']
    }
    json_string.append(json_object)
file_eval = Path(output_folder) / f"adversarial_output_eval_scores_{count}.jsonl"
with Path(file_eval).open("w") as f:
  json.dump(json_string, f, indent=4)

In [None]:
# import asyncio
# from azure.ai.evaluation.simulator import Simulator

# import importlib.resources as pkg_resources
# model_config = {
#         "azure_endpoint": api_url, #"<your_azure_endpoint>",
#         "azure_deployment": "gpt-4o-mini" #"<deployment_name>"
# }
# grounding_simulator = Simulator(model_config=model_config)

# package = "azure.ai.evaluation.simulator._data_sources"
# resource_name = "grounding.json"
# conversation_turns = []

# with pkg_resources.path(package, resource_name) as grounding_file:
#     with open(grounding_file, "r") as file:
#         data = json.load(file)

# for item in data:
#     conversation_turns.append([item])

# outputs = await grounding_simulator(
#     target=callback,
#     conversation_turns=conversation_turns, #generates 287 rows of data
#     max_conversation_turns=1,
# )

# output_file = "grounding_simulation_output.jsonl"
# with open(output_file, "w") as file:
#     for output in outputs:
#         file.write(output.to_eval_qr_json_lines())


In [None]:
# # Then you can pass it into our Groundedness evaluator to evaluate it for groundedness
# groundedness_evaluator = GroundednessEvaluator(model_config=model_config)
# eval_output = evaluate(
#     data=output_file,
#     evaluators={
#         "groundedness": groundedness_evaluator
#     },
#     output_path="groundedness_eval_output.json",
#     # azure_ai_project=project_scope # Optional for uploading to your Azure AI Project
# )

In [None]:
# import importlib.resources as pkg_resources

# grounding_simulator = Simulator(model_config=model_config)

# package = "azure.ai.evaluation.simulator._data_sources"
# resource_name = "grounding.json"
# conversation_turns = []

# with pkg_resources.path(package, resource_name) as grounding_file:
#     with open(grounding_file, "r") as file:
#         data = json.load(file)

# for item in data:
#     conversation_turns.append([item])

# outputs = asyncio.run(grounding_simulator(
#     target=callback,
#     conversation_turns=conversation_turns, #generates 287 rows of data
#     max_conversation_turns=1,
# ))

# output_file = "grounding_simulation_output.jsonl"
# with open(output_file, "w") as file:
#     for output in outputs:
#         file.write(output.to_eval_qr_json_lines())

# # Then you can pass it into our Groundedness evaluator to evaluate it for groundedness
# groundedness_evaluator = GroundednessEvaluator(model_config=model_config)
# eval_output = evaluate(
#     data=output_file,
#     evaluators={
#         "groundedness": groundedness_evaluator
#     },
#     output_path="groundedness_eval_output.json",
#     azure_ai_project=project_scope # Optional for uploading to your Azure AI Project
# )