In [13]:
!pip install azure-ai-evaluation
!pip install promptflow

In [14]:
import os
from dotenv import load_dotenv

load_dotenv('../.env')

env_var = {
    "gpt-4o": {
        "endpoint": os.environ.get("AOAI_GPT4O_ENDPOINT"),
        "key": os.environ.get("AOAI_API_KEY"),
    },
    "gpt-4o-mini": {
        "endpoint": os.environ.get("AOAI_GPT4O_MINI_ENDPOINT"),
        "key": os.environ.get("AOAI_API_KEY"),
    },
}

azure_ai_project = {
    "subscription_id": os.environ.get("SUBSCRIPTION_ID"),
    "resource_group_name": os.environ.get("RG_NAME"),
    "project_name": os.environ.get("PROJECT_NAME"),
}

In [15]:
import pandas as pd

df = pd.read_json("evaluation_dataset.jsonl", lines=True)
print(df.head())

In [16]:
from azure.ai.evaluation import AzureOpenAIModelConfiguration

model_config = AzureOpenAIModelConfiguration(
    azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
    azure_deployment=os.environ.get("AZURE_OPENAI_DEPLOYMENT"),
    api_key=os.environ.get("AZURE_OPENAI_API_KEY"),
    api_version=os.environ.get("AZURE_OPENAI_API_VERSION"),
)

In [17]:
from azure.ai.evaluation import evaluate
from azure.ai.evaluation import (
    RelevanceEvaluator, 
    CoherenceEvaluator,
    GroundednessEvaluator,
)
from model_endpoints import ModelEndpoints
import random
from pprint import pprint

relevance_evaluator = RelevanceEvaluator(model_config)
coherence_evaluator = CoherenceEvaluator(model_config)
groundedness_evaluator = GroundednessEvaluator(model_config)

models = [
    "gpt-4o",
    "gpt-4o-mini",
]

path = "./evaluation_dataset.jsonl"
randomNum = random.randint(1111, 9999)

for model in models:
    pprint(ModelEndpoints(env_var, model))
    results = evaluate(
        evaluation_name="Eval-Run" + str(randomNum) + "-" + model.title(),
        data=path,
        target=ModelEndpoints(env_var, model),
        evaluators={
            "relevance": relevance_evaluator,
            "coherence": coherence_evaluator,
            "groundedness": groundedness_evaluator,
        },
        azure_ai_project=azure_ai_project,
        evaluator_config={
            "relevance": {
                "column_mapping": {
                    "response": "${target.response}",
                    "context": "${data.context}",
                    "query": "${data.query}",
                },
            },
            "coherence": {
                "column_mapping": {
                    "response": "${target.response}",
                    "context": "${data.context}",
                    "query": "${data.query}",
                },
            },
            "groundedness": {
                "column_mapping": {
                    "response": "${target.response}",
                    "context": "${data.context}",
                    "query": "${data.query}",
                },
            },
        },
    )

In [18]:
pd.DataFrame(results["rows"])

Unnamed: 0,outputs.query,outputs.response,inputs.query,inputs.context,inputs.ground_truth,outputs.relevance.relevance,outputs.relevance.gpt_relevance,outputs.relevance.relevance_reason,outputs.coherence.coherence,outputs.coherence.gpt_coherence,outputs.coherence.coherence_reason,outputs.groundedness.groundedness,outputs.groundedness.gpt_groundedness,outputs.groundedness.groundedness_reason,line_number
0,"What event started on July 28, 1914?","The event that started on July 28, 1914, was t...","What event started on July 28, 1914?",It involved multiple countries and lasted unti...,World War I,5,5,The response accurately and completely address...,4,4,The RESPONSE is coherent and effectively addre...,3,3,The RESPONSE accurately identifies the event a...,0
1,Who was the first person to walk on the moon?,The first person to walk on the moon was astro...,Who was the first person to walk on the moon?,The event occurred during the Apollo 11 missio...,Neil Armstrong,5,5,The response accurately and completely answers...,4,4,The RESPONSE is coherent and effectively addre...,5,5,The RESPONSE accurately and thoroughly conveys...,1
2,What was the significance of the year 1776 in ...,The year 1776 is highly significant in America...,What was the significance of the year 1776 in ...,A key document was signed declaring independen...,The Declaration of Independence,5,5,The response fully addresses the query with ac...,4,4,The RESPONSE is coherent and effectively addre...,3,3,The RESPONSE accurately reflects the CONTEXT b...,2
3,"Which wall fell in 1989, symbolizing the end o...","The Berlin Wall fell in 1989, symbolizing the ...","Which wall fell in 1989, symbolizing the end o...",It divided a German city into East and West.,The Berlin Wall,5,5,The response not only answers the query accura...,5,5,The RESPONSE is coherent and directly addresse...,3,3,The RESPONSE accurately reflects the CONTEXT b...,3
4,What ancient city was buried by the eruption o...,The ancient city buried by the eruption of Mou...,What ancient city was buried by the eruption o...,The city's ruins were rediscovered in the 18th...,Pompeii,4,4,The response fully addresses the query with ac...,4,4,The response is coherent and effectively addre...,1,1,The RESPONSE introduces information about Pomp...,4
5,Who was the British Prime Minister during Worl...,The British Prime Minister during most of Worl...,Who was the British Prime Minister during Worl...,"He is famous for his leadership and speeches, ...",Winston Churchill,5,5,The RESPONSE accurately identifies the British...,4,4,"The RESPONSE is coherent, directly addresses t...",3,3,The RESPONSE is accurate in relation to the CO...,5
6,What was the name of the ship that sank on its...,The ship that sank on its maiden voyage in 191...,What was the name of the ship that sank on its...,It was deemed 'unsinkable' before it hit an ic...,RMS Titanic,4,4,The response fully addresses the query with ac...,4,4,The response is coherent as it directly answer...,5,5,"The RESPONSE is fully grounded and complete, a...",6
7,Which empire was ruled by Genghis Khan?,Genghis Khan founded and ruled the Mongol Empi...,Which empire was ruled by Genghis Khan?,This empire became the largest contiguous land...,The Mongol Empire,5,5,The response not only accurately identifies th...,4,4,The RESPONSE is coherent and directly addresse...,3,3,The RESPONSE accurately reflects the CONTEXT b...,7
8,What was the primary cause of the American Civ...,The primary cause of the American Civil War wa...,What was the primary cause of the American Civ...,The conflict between the Northern and Southern...,Slavery,5,5,The RESPONSE fully addresses the QUERY with ac...,4,4,The RESPONSE is coherent and effectively addre...,3,3,The RESPONSE accurately reflects the CONTEXT b...,8
9,Which ancient wonder was located in Egypt and ...,The ancient wonder located in Egypt that serve...,Which ancient wonder was located in Egypt and ...,It is the only one of the Seven Wonders of the...,The Great Pyramid of Giza,5,5,The response is not only accurate and complete...,4,4,The RESPONSE is coherent and effectively addre...,3,3,The RESPONSE accurately identifies the Great P...,9
