# Evaluate RAG answer quality

## Setup API clients

In [1]:
import os

import azure.identity
import dotenv
from azure.search.documents import SearchClient
from azure.search.documents.models import VectorizedQuery
from openai import OpenAI

dotenv.load_dotenv()

azure_credential = azure.identity.AzureDeveloperCliCredential(tenant_id=os.getenv("AZURE_TENANT_ID"))

# Initialize Azure OpenAI client
AZURE_OPENAI_SERVICE = os.getenv("AZURE_OPENAI_SERVICE")
AZURE_OPENAI_EMBEDDING_DEPLOYMENT = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT")

token_provider = azure.identity.get_bearer_token_provider(azure_credential, "https://cognitiveservices.azure.com/.default")
openai_client = OpenAI(
    base_url=f"https://{AZURE_OPENAI_SERVICE}.openai.azure.com/openai/v1",
    api_key=token_provider)

def get_embedding(text):
    get_embeddings_response = openai_client.embeddings.create(model=AZURE_OPENAI_EMBEDDING_DEPLOYMENT, input=text)
    return get_embeddings_response.data[0].embedding

# Initialize Azure search client
AZURE_SEARCH_SERVICE = os.getenv("AZURE_SEARCH_SERVICE")
AZURE_SEARCH_ENDPOINT = f"https://{AZURE_SEARCH_SERVICE}.search.windows.net"
AZURE_SEARCH_FULL_INDEX = os.getenv("AZURE_SEARCH_INDEX", "gptkbindex")
AZURE_SEARCH_EMBEDDING_FIELD = os.getenv("AZURE_SEARCH_EMBEDDING_FIELD", "embedding")

search_client = SearchClient(AZURE_SEARCH_ENDPOINT, AZURE_SEARCH_FULL_INDEX, credential=azure_credential)

## Get answer for a question

In [4]:
user_question = "What does a product manager do?"
user_question_vector = get_embedding(user_question)

r = search_client.search(
        user_question,
        top=5, 
        vector_queries=[
                VectorizedQuery(vector=user_question_vector, k_nearest_neighbors=50, fields=AZURE_SEARCH_EMBEDDING_FIELD)],
        query_type="semantic",
        semantic_configuration_name="default")

sources = "\n\n".join([f"[{doc['sourcepage']}]: {doc['content']}\n" for doc in r])

SYSTEM_MESSAGE = """
Assistant helps company employees questions about the employee handbook. Be brief in your answers.
Answer ONLY with the facts listed in the list of sources below.
If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below.
Each source has a name followed by colon and the actual information, include the source name for each fact you use.
Use square brackets to reference the source, for example [info1.txt].
"""
USER_MESSAGE = user_question + "\nSources: " + sources

response = openai_client.chat.completions.create(
    model=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME"),
    temperature=0.7,
    messages=[
        {"role": "system", "content": SYSTEM_MESSAGE},
        {"role": "user", "content": USER_MESSAGE},
    ],
)

answer = response.choices[0].message.content
print(answer)

A product manager at Contoso Electronics is responsible for developing and implementing product strategies and plans that support business objectives and growth. They lead a team in executing these strategies, research market trends and customer needs, oversee product roadmaps, manage product life cycles from concept to launch, and monitor product performance and customer feedback. They also identify strategic partnership opportunities, manage product budgets, develop pricing and promotional strategies, and collaborate with cross-functional teams to ensure products meet customer needs. The role requires analyzing competitors and market trends to keep products competitive and successful in the marketplace [role_library.pdf#page=17, #page=23, #page=29].


## Evaluate the answer quality

We can use the `azure-ai-evaluation` package to run GPT-based evaluators on the RAG responses.

In [None]:
import os

from azure.ai.evaluation import AzureOpenAIModelConfiguration, GroundednessEvaluator, RelevanceEvaluator

model_config: AzureOpenAIModelConfiguration = {
    "azure_endpoint": f"https://{AZURE_OPENAI_SERVICE}.openai.azure.com",
    "azure_deployment": os.environ.get("AZURE_OPENAI_DEPLOYMENT_NAME"),
}

relevance_eval = RelevanceEvaluator(model_config)
groundedness_eval = GroundednessEvaluator(model_config)

relevance_score = relevance_eval(
    query=user_question,
    response=answer,
    context=sources,
)
print(relevance_score)

# If you see NaN, make sure you're using a GPT-4 level model
groundedness_score = groundedness_eval(
    response=answer,
    context=sources,
)
print(groundedness_score)

{'relevance': 5.0, 'gpt_relevance': 5.0, 'relevance_reason': 'The response fully addresses the query with accurate and complete information about the role of a product manager, and it includes additional insights that enhance understanding.'}
{'groundedness': 5.0, 'gpt_groundedness': 5.0, 'groundedness_reason': 'The RESPONSE is fully grounded in the CONTEXT, accurately reflecting the responsibilities and qualifications of a product manager as described in the provided material.'}
