# Evaluate your RAG

In [1]:
from dotenv import load_dotenv
import os

load_dotenv()
#create a .env file with the following variables and replace with your values
AISTUDIO_AZURE_OPENAI_KEY = os.getenv("AISTUDIO_AZURE_OPENAI_KEY")
AISTUDIO_AZURE_OPENAI_ENDPOINT = os.getenv("AISTUDIO_AZURE_OPENAI_ENDPOINT")
AISTUDIO_OPENAI_GPT4_DEPLOYMENT_NAME = os.getenv("AISTUDIO_OPENAI_GPT4_DEPLOYMENT_NAME")
AZURE_SUBSCRIPTION_ID = os.getenv("AZURE_SUBSCRIPTION_ID")
AZURE_AISTUDIO_PROJECT_RESOURCE_GROUP = os.getenv("AZURE_AISTUDIO_PROJECT_RESOURCE_GROUP")
AZURE_AISTUDIO_PROJECT_NAME = os.getenv("AZURE_AISTUDIO_PROJECT_NAME")
api_version = "2024-02-15-preview"

You can override configuration with `AzureOpenAIModelConfiguration` and `OpenAIModelConfiguration`.

In [2]:
from promptflow.core import AzureOpenAIModelConfiguration, OpenAIModelConfiguration

# override configuration with AzureOpenAIModelConfiguration - in this case I am using the same model just to show how to use it
configuration = AzureOpenAIModelConfiguration(
    azure_endpoint="${env:AISTUDIO_AZURE_OPENAI_ENDPOINT}",  # Use ${env:<ENV_NAME>} to surround the environment variable name.
    api_key="${env:AISTUDIO_AZURE_OPENAI_KEY}",
    azure_deployment="gpt-4",
)

# override configuration with OpenAIModelConfiguration
# configuration = OpenAIModelConfiguration(
#     base_url="${env:OPENAI_BASE_URL}",
#     api_key="${env:OPENAI_API_KEY}",
#     model="gpt-3.5-turbo"
# )

override_model = {"configuration": configuration, "parameters": {"max_tokens": 512}}


In [3]:
from promptflow.client import PFClient

pf = PFClient()

In [4]:
flow = "./prompts/basic.prompty"  # path to the prompty file
data = "./test-data/evaluation_dataset.jsonl"  # path to the data file

# create run with the flow and data
base_run = pf.run(
    flow=flow,
    data=data,
    column_mapping={
        "question": "${data.question}",
    },
    stream=True,
)

Starting prompt flow service...


[2024-09-21 18:52:41 +0300][promptflow._sdk._orchestrator.run_submitter][INFO] - Submitting run prompts_20240921_185230_058822, log path: C:\Users\dschlesinger\.promptflow\.runs\prompts_20240921_185230_058822\logs.txt


You can stop the prompt flow service with the following command:'[1mpf service stop[0m'.

You can view the traces in local from http://127.0.0.1:23333/v1.0/ui/traces/?#run=prompts_20240921_185230_058822
2024-09-21 18:52:41 +0300   55672 execution.bulk     INFO     Current thread is not main thread, skip signal handler registration in BatchEngine.
2024-09-21 18:52:46 +0300   55672 execution.bulk     INFO     Current system's available memory is 17159.93359375MB, memory consumption of current process is 186.31640625MB, estimated available worker count is 17159.93359375/186.31640625 = 92
2024-09-21 18:52:46 +0300   55672 execution.bulk     INFO     Set process count to 4 by taking the minimum value among the factors of {'default_worker_count': 4, 'row_count': 13, 'estimated_worker_count_based_on_memory_usage': 92}.
2024-09-21 18:52:50 +0300   55672 execution.bulk     INFO     Process name(SpawnProcess-5)-Process id(45860)-Line number(0) start execution.
2024-09-21 18:52:50 +0300   55672

In [5]:
details = pf.get_details(base_run)
details.head(10)

Unnamed: 0,inputs.question,inputs.line_number,outputs.output
0,Which tent is the most waterproof?,0,The most waterproof tent is subjective and can...
1,Which camping table holds the most weight?,1,The weight capacity of camping tables varies b...
2,How much does TrailWalker Hiking Shoes cost?,2,The price of TrailWalker Hiking Shoes can vary...
3,What is the proper care for trailwalker hiking...,3,To care for trailwalker hiking shoes:\n\n1. Cl...
4,What brand is for TrailMaster tent?,4,TrailMaster tents are a product of the brand O...
5,How do I carry the TrailMaster tent around?,5,"To carry the TrailMaster tent, first ensure it..."
6,What is the floor area for Floor Area?,6,"The term ""floor area"" can refer to several dif..."
7,What is the material for TrailBlaze Hiking Pants,7,The material for TrailBlaze Hiking Pants can v...
8,What color does TrailBlaze Hiking Pants come in,8,The color options for TrailBlaze Hiking Pants ...
9,Cant he warrenty for TrailBlaze pants be trans...,9,The transferability of a warranty for TrailBla...


In [13]:
eval_prompty = "./prompts/eval.prompty"

eval_run = pf.run(
    flow=eval_prompty,
    data="./test-data/evaluation_dataset.jsonl",  # path to the data file
    run=base_run,  # specify base_run as the run you want to evaluate
    column_mapping={
        "question": "${data.question}",
        "answer": "${run.outputs.output}",  
        "truth": "${data.ground_truth}",
    },
    stream=True,
)

[2024-09-21 20:36:48 +0300][promptflow._sdk._orchestrator.run_submitter][INFO] - Submitting run prompts_20240921_203648_582918, log path: C:\Users\dschlesinger\.promptflow\.runs\prompts_20240921_203648_582918\logs.txt


Prompt flow service has started...
You can view the traces in local from http://127.0.0.1:23333/v1.0/ui/traces/?#run=prompts_20240921_203648_582918
2024-09-21 20:36:48 +0300   55672 execution.bulk     INFO     Current thread is not main thread, skip signal handler registration in BatchEngine.
2024-09-21 20:36:48 +0300   55672 execution.bulk     INFO     Current system's available memory is 18931.99609375MB, memory consumption of current process is 125.21875MB, estimated available worker count is 18931.99609375/125.21875 = 151
2024-09-21 20:36:48 +0300   55672 execution.bulk     INFO     Set process count to 4 by taking the minimum value among the factors of {'default_worker_count': 4, 'row_count': 13, 'estimated_worker_count_based_on_memory_usage': 151}.
2024-09-21 20:37:12 +0300   55672 execution.bulk     INFO     Process name(SpawnProcess-13)-Process id(54640)-Line number(0) start execution.
2024-09-21 20:37:12 +0300   55672 execution.bulk     INFO     Process name(SpawnProcess-12)-P

In [14]:
details = pf.get_details(eval_run)
details.head(10)

Unnamed: 0,inputs.question,inputs.answer,inputs.ground_truth,inputs.truth,inputs.line_number,outputs.score,outputs.explanation
0,Which tent is the most waterproof?,The most waterproof tent is subjective and can...,The Alpine Explorer Tent has the highest rainf...,The Alpine Explorer Tent has the highest rainf...,0,4,The answer provides a detailed explanation of ...
1,Which camping table holds the most weight?,The weight capacity of camping tables varies b...,The Adventure Dining Table has a higher weight...,The Adventure Dining Table has a higher weight...,1,4,The answer provides a useful range and suggest...
2,How much does TrailWalker Hiking Shoes cost?,The price of TrailWalker Hiking Shoes can vary...,$110,$110,2,4,The answer provides a comprehensive explanatio...
3,What is the proper care for trailwalker hiking...,To care for trailwalker hiking shoes:\n\n1. Cl...,"After each use, remove any dirt or debris by b...","After each use, remove any dirt or debris by b...",3,5,The answer provides comprehensive care instruc...
4,What brand is for TrailMaster tent?,TrailMaster tents are a product of the brand O...,OutdoorLiving,OutdoorLiving,4,2,TrailMaster is not a known product line of Oza...
5,How do I carry the TrailMaster tent around?,"To carry the TrailMaster tent, first ensure it...",Carry bag included for convenient storage and...,Carry bag included for convenient storage and...,5,4,The answer provides a general method for carry...
6,What is the floor area for Floor Area?,"The term ""floor area"" can refer to several dif...",80 square feet,80 square feet,6,4,The answer correctly explains that 'floor area...
7,What is the material for TrailBlaze Hiking Pants,The material for TrailBlaze Hiking Pants can v...,Made of high-quality nylon fabric,Made of high-quality nylon fabric,7,3,The answer provides a general description of m...
8,What color does TrailBlaze Hiking Pants come in,The color options for TrailBlaze Hiking Pants ...,Khaki,Khaki,8,4,The answer provides a helpful direction to fin...
9,Cant he warrenty for TrailBlaze pants be trans...,The transferability of a warranty for TrailBla...,he warranty is non-transferable and applies on...,he warranty is non-transferable and applies on...,9,4,The answer provides a comprehensive response t...


In [15]:
# visualize run using ui
pf.visualize([base_run, eval_run])

Prompt flow service has started...
The HTML file is generated at 'C:\\Users\\dschlesinger\\AppData\\Local\\Temp\\pf-visualize-detail-0yr5vf3p.html'.
Trying to view the result in a web browser...
Successfully visualized from the web browser.
