In [1]:
import os

from dotenv import load_dotenv

from clients import OpenAILLMClient, TemplateStore

load_dotenv()

True

In [2]:
AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION")
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME")

In [3]:
llm_client = OpenAILLMClient(
    key=AZURE_OPENAI_API_KEY,
    endpoint=AZURE_OPENAI_ENDPOINT,
    api_version=AZURE_OPENAI_API_VERSION,
    model=AZURE_OPENAI_DEPLOYMENT_NAME,
)

In [4]:
from clients.data_clients import TemplateStore

template_store = TemplateStore(dir_path="templates", do_validation=True)

In [5]:
from flows.retrieval_eval_flow import retrieval_eval_flow

dict_eval_ret = retrieval_eval_flow(
    question="What is the capital of France?",
    answer_gt="The capital of France is Paris.",
    answer_pred="France's capital city is Paris, known for the Eiffel Tower.",
    context=[
        "Paris is the capital and most populous city of France.",
        "The Eiffel Tower, located in Paris, is a famous landmark.",
        "France is a country in Western Europe known for its art, culture, and cuisine.",
        "Lyon and Marseille are also major French cities but not the capital.",
    ],
    llm_client=llm_client,
    template_store=template_store,
)

print(dict_eval_ret)

{'relevancy': 1.0}


In [6]:
import os

from promptflow.tracing import start_trace

from flows.generation_eval_flow import generation_eval_flow

os.environ["PF_DISABLE_TRACING"] = "FALSE"
start_trace(collection="trace_rag_metrics")

dict_eval_gen = generation_eval_flow(
    question="What is the capital of France?",
    answer_gt="The capital of France is Paris.",
    answer_pred="France's capital city is Paris, known for the Eiffel Tower.",
    context=[
        "Paris is the capital and most populous city of France.",
        "The Eiffel Tower, located in Paris, is a famous landmark.",
        "France is a country in Western Europe known for its art, culture, and cuisine.",
        "Lyon and Marseille are also major French cities but not the capital.",
    ],
    llm_client=llm_client,
    template_store=template_store,
)

print(dict_eval_gen)

Prompt flow service has started...
{'faithfulness': 1.0, 'precision': 1.0, 'recall': 1.0}


You can view the trace detail from the following URL:
http://127.0.0.1:23333/v1.0/ui/traces/?#collection=trace_rag_metrics&uiTraceId=0x82e2135f7032966620ab8695886dd7e3
You can view the trace detail from the following URL:
http://127.0.0.1:23333/v1.0/ui/traces/?#collection=trace_rag_metrics&uiTraceId=0x9c4c987cee14242da05d709bbdee89e4
You can view the trace detail from the following URL:
http://127.0.0.1:23333/v1.0/ui/traces/?#collection=trace_rag_metrics&uiTraceId=0xafeef42e8e842737d8d462762f8155c2
You can view the trace detail from the following URL:
http://127.0.0.1:23333/v1.0/ui/traces/?#collection=trace_rag_metrics&uiTraceId=0x113cb936ff7b33eebae6dc978862b052
You can view the trace detail from the following URL:
http://127.0.0.1:23333/v1.0/ui/traces/?#collection=trace_rag_metrics&uiTraceId=0xe99eef2da1e6e3a95ba6beef34f5ee5e
You can view the trace detail from the following URL:
http://127.0.0.1:23333/v1.0/ui/traces/?#collection=trace_rag_metrics&uiTraceId=0xf487dba590526497bffcbe1c7

In [7]:
from flows.retrieval_eval_flow import retrieval_eval_flow

dict_eval_ret = retrieval_eval_flow(
    question="What are the main ingredients in traditional Italian pesto?",
    answer_gt="Traditional Italian pesto is made with basil, garlic, pine nuts, Parmesan cheese, and olive oil.",
    answer_pred="Italian pesto is usually made with spinach, almonds, cheddar cheese, and olive oil.",
    context=[
        "Pesto alla Genovese, the traditional Italian pesto, is made with fresh basil leaves, garlic, pine nuts, Parmesan cheese, and extra virgin olive oil.",
        "Spinach and other ingredients may be used in variations of pesto, but they are not part of the traditional recipe.",
        "The use of almonds or cheddar cheese is not typical in authentic Italian pesto recipes.",
        "Sunflower oil is not used in traditional Italian pesto.",
    ],
    llm_client=llm_client,
    template_store=template_store,
)
print(dict_eval_ret)

{'relevancy': 0.25}


In [8]:
import os

from promptflow.tracing import start_trace

os.environ["PF_DISABLE_TRACING"] = "FALSE"


start_trace()
dict_eval_gen = generation_eval_flow(
    question="What are the main ingredients in traditional Italian pesto?",
    answer_gt="Traditional Italian pesto is made with basil, garlic, pine nuts, Parmesan cheese, and olive oil.",
    answer_pred="Italian pesto is usually made with spinach, almonds, cheddar cheese, and olive oil.",
    context=[
        "Pesto alla Genovese, the traditional Italian pesto, is made with fresh basil leaves, garlic, pine nuts, Parmesan cheese, and extra virgin olive oil.",
        "Spinach and other ingredients may be used in variations of pesto, but they are not part of the traditional recipe.",
        "The use of almonds or cheddar cheese is not typical in authentic Italian pesto recipes.",
        "Sunflower oil is not used in traditional Italian pesto.",
    ],
    llm_client=llm_client,
    template_store=template_store,
)
print(dict_eval_gen)

Prompt flow service has started...
{'faithfulness': 0.25, 'precision': 0.25, 'recall': 0.0}


RAG evaluation flow

In [5]:
from clients.embedding_client import OpenAIEmbeddingClient
from clients.vector_store_client import FAISSClient
from flows.rag_flow import rag_flow

embedding_client = OpenAIEmbeddingClient(
    credential=AZURE_OPENAI_API_KEY,
    endpoint=AZURE_OPENAI_ENDPOINT,
    api_version=AZURE_OPENAI_API_VERSION,
    model="text-embedding-ada-002",
)
vector_store = FAISSClient(embedding_client=embedding_client)

In [6]:
# context = Path("data/equality-and-diversity-policy.pdf")
# batch_size =  400
context = [
    "To build muscle effectively, focus on compound exercises such as squats, deadlifts, bench press, and pull-ups.",
    "Progressive overload is essential — gradually increasing the weight or reps over time will stimulate muscle growth.",
    "Eating a calorie surplus with adequate protein intake (1.6-2.2g per kg of body weight) supports muscle hypertrophy.",
    "Rest and recovery are crucial; muscles grow during rest periods, so aim for at least 7-8 hours of sleep per night.",
    "Train each major muscle group at least twice a week with a combination of volume and intensity.",
    "A typical beginner mass-building routine includes 3–4 full-body workouts per week focusing on strength and hypertrophy.",
    "Supplements like creatine monohydrate and whey protein can support your muscle-building goals when used correctly.",
]
vector_store.ingest_documents(context, batch_size=64)

[32m2025-05-15 14:45:35.205[0m | [1mINFO    [0m | [36mclients.vector_store_client[0m:[36mingest_documents[0m:[36m96[0m - [1mTotal documents ingested: 7[0m


In [7]:
question = "What are the best exercises for building muscle?"
answer_gt = "The best exercises for building muscle include compound movements like squats, deadlifts, bench press, and pull-ups."

In [8]:
from promptflow.tracing import start_trace

os.environ["PF_DISABLE_TRACING"] = "FALSE"
start_trace(collection="trace_rag_metrics")

result = rag_flow(
    question=question,
    answer_gt=answer_gt,
    llm_client=llm_client,
    template_store=template_store,
    vector_store=vector_store,
    num_results=3,
    system_prompt="Answer the question based on the provided context.",
)
print(result)

Prompt flow service has started...
You can view the trace detail from the following URL:
http://127.0.0.1:23333/v1.0/ui/traces/?#collection=trace_rag_metrics&uiTraceId=0xfa05a8df5a1e566c96191725e89bf267




{'relevancy': 1.0, 'faithfulness': 0.2727272727272727, 'precision': 0.0, 'recall': 1.0}
