In [1]:
import dotenv
dotenv.load_dotenv('../../.env')

True

In [None]:
# data
from datasets import load_dataset

fiqa_eval = load_dataset("explodinggradients/fiqa", "ragas_eval")
fiqa_eval

In [3]:
from ragas.metrics import (
    context_precision,
    answer_relevancy,
    faithfulness,
    context_recall,
)
from ragas.metrics.critique import harmfulness

# list of metrics we're going to use
metrics = [
    faithfulness,
    answer_relevancy,
    context_recall,
    context_precision,
    harmfulness,
]

In [4]:
from langchain.chat_models import AzureChatOpenAI
from langchain.embeddings import AzureOpenAIEmbeddings
from ragas.llms import LangchainLLM
import os

azure_model = AzureChatOpenAI(azure_deployment="gpt-4", api_version="2023-08-01-preview", model="gpt-4")
# wrapper around azure_model
ragas_azure_model = LangchainLLM(azure_model)
# patch the new RagasLLM instance
answer_relevancy.llm = ragas_azure_model

# init and change the embeddings
# only for answer_relevancy
azure_embeddings = AzureOpenAIEmbeddings(
    deployment=os.getenv("AZURE_EMBED_RESOURCE_NAME"),
    openai_api_key=os.getenv("AZURE_EMBED_API_KEY"),
    azure_endpoint=os.getenv("AZURE_EMBED_ENDPOINT"),
)
# embeddings can be used as it is
answer_relevancy.embeddings = azure_embeddings

In [5]:
for m in metrics:
    m.__setattr__("llm", ragas_azure_model)

In [6]:
from ragas import evaluate

result = evaluate(
    fiqa_eval["baseline"],
    metrics=metrics,
)

result

evaluating with [faithfulness]


  0%|          | 0/2 [00:00<?, ?it/s]

100%|██████████| 2/2 [15:32<00:00, 466.30s/it]


evaluating with [answer_relevancy]


100%|██████████| 2/2 [01:16<00:00, 38.02s/it]


evaluating with [context_recall]


100%|██████████| 2/2 [13:14<00:00, 397.35s/it]


evaluating with [context_precision]


100%|██████████| 2/2 [00:30<00:00, 15.02s/it]


evaluating with [harmfulness]


100%|██████████| 2/2 [02:13<00:00, 66.84s/it]


{'faithfulness': 0.7240, 'answer_relevancy': 0.9381, 'context_recall': 0.6770, 'context_precision': 0.9000, 'harmfulness': 0.0000}

In [7]:
df = result.to_pandas()
df.head()

Unnamed: 0,question,contexts,answer,ground_truths,faithfulness,answer_relevancy,context_recall,context_precision,harmfulness
0,How to deposit a cheque issued to an associate...,[Just have the associate sign the back and the...,\nThe best way to deposit a cheque issued to a...,[Have the check reissued to the proper payee.J...,0.4,0.982729,0.75,1.0,0
1,Can I send a money order from USPS as a business?,[Sure you can. You can fill in whatever you w...,"\nYes, you can send a money order from USPS as...",[Sure you can. You can fill in whatever you w...,0.833333,0.928929,1.0,1.0,0
2,1 EIN doing business under multiple business n...,[You're confusing a lot of things here. Compan...,"\nYes, it is possible to have one EIN doing bu...",[You're confusing a lot of things here. Compan...,0.8,0.941605,1.0,1.0,0
3,Applying for and receiving business credit,[Set up a meeting with the bank that handles y...,\nApplying for and receiving business credit c...,"[""I'm afraid the great myth of limited liabili...",1.0,0.919312,1.0,1.0,0
4,401k Transfer After Business Closure,[The time horizon for your 401K/IRA is essenti...,\nIf your employer has closed and you need to ...,[You should probably consult an attorney. Howe...,1.0,0.859501,0.0,1.0,0
