#### Testing with RAGAs and Understanding various Metrics 📈

In [None]:
#!pip install ragas

Collecting ragas
  Using cached ragas-0.2.14-py3-none-any.whl.metadata (8.5 kB)
Collecting datasets (from ragas)
  Downloading datasets-3.4.1-py3-none-any.whl.metadata (19 kB)
Collecting appdirs (from ragas)
  Using cached appdirs-1.4.4-py2.py3-none-any.whl.metadata (9.0 kB)
Collecting diskcache>=5.6.3 (from ragas)
  Using cached diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)
Collecting pyarrow>=15.0.0 (from datasets->ragas)
  Using cached pyarrow-19.0.1-cp312-cp312-macosx_12_0_arm64.whl.metadata (3.3 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets->ragas)
  Using cached dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets->ragas)
  Using cached xxhash-3.5.0-cp312-cp312-macosx_11_0_arm64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets->ragas)
  Using cached multiprocess-0.70.16-py312-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets->ragas)
  Using cached fsspec-2024

In [2]:
from langchain_ollama import ChatOllama

llm = ChatOllama(
    base_url="http://localhost:11434",
    model = "qwen2.5:latest",
    temperature=0.5,
    max_tokens = 250
)

### Context Recall

In [5]:
from ragas import SingleTurnSample
from ragas.metrics import LLMContextRecall
from ragas.llms import LangchainLLMWrapper

test_case = SingleTurnSample(
  user_input="Who is the current president of the United States of America?",
  response="Joe Biden",
  reference= "Joe Biden serves as the current president of America in 2024.",
  retrieved_contexts=["Joe Biden serves as the current president of America in 2024 and later in 2024, he is not the president of USA as he lost the presidential election"]
)

evaluator_llm = LangchainLLMWrapper(llm)
context_recall = LLMContextRecall(llm=evaluator_llm)
await context_recall.single_turn_ascore(test_case)


1.0

In [6]:
from ragas import SingleTurnSample
from ragas.metrics import NoiseSensitivity
from ragas.llms import LangchainLLMWrapper

test_case = SingleTurnSample(
    user_input="What is MCP",
    
    response="""
        MCP (Model Context Protocol) is designed to enhance AI application development 
        by integrating context and function calling. It builds upon the existing method 
        of API calls from large language models (LLMs) to simplify and standardize development processes. Unlike a simple replacement for previous integration methods, MCP connects AI applications to contextual information, making development more straightforward and consistent. Security considerations include OAuth implementation with HTTP+SSE transport, which carries typical risks associated with standard OAuth flows.
    """,
    reference= """
    Model Context Protocol (MCP) is a client-server protocol designed to connect AI applications with context and external APIs, inspired by the Language Server Protocol (LSP). It allows AI apps to retrieve information from various sources, including messaging apps and GitHub repositories, making development simpler and more consistent. MCP supports a wide range of actions and can be implemented by any AI application, not just those using OpenAI's models. The protocol includes reference servers, official integrations, and community-developed servers, demonstrating its flexibility and broad applicability in the AI ecosystem.
    """,
    
    retrieved_contexts=["""
                          The Model Context Protocol (MCP) is an open standard designed to streamline the integration of AI models with various data sources and tools. It functions similarly to how USB-C provides a universal connection for devices, offering a standardized method for AI applications to access and interact with diverse datasets and services
                          """]
)

evaluator_llm = LangchainLLMWrapper(llm)
noice_sentitivity = NoiseSensitivity(llm=evaluator_llm)
await noice_sentitivity.single_turn_ascore(test_case)

0.0

### Evaluate method of RAGAs

In [None]:
from ragas.metrics import LLMContextRecall, NoiseSensitivity
from ragas.llms import LangchainLLMWrapper
from ragas import (EvaluationDataset, evaluate)

test_case = [{
  "user_input": "Who is the current president of the United States of America?",
  "response": "Joe Biden",
  "reference": "Joe Biden serves as the current president of America in 2024.",
  "retrieved_contexts": ["Joe Biden serves as the current president of America in 2024 and later in 2024, he is not the president of USA as he lost the presidential election"]
},{
   "user_input":"What is MCP",
    
    "response":"""
        MCP (Model Context Protocol) is designed to enhance AI application development 
        by integrating context and function calling. It builds upon the existing method 
        of API calls from large language models (LLMs) to simplify and standardize development processes. Unlike a simple replacement for previous integration methods, MCP connects AI applications to contextual information, making development more straightforward and consistent. Security considerations include OAuth implementation with HTTP+SSE transport, which carries typical risks associated with standard OAuth flows.
    """,
    "reference": """
    Model Context Protocol (MCP) is a client-server protocol designed to connect AI applications with context and external APIs, inspired by the Language Server Protocol (LSP). It allows AI apps to retrieve information from various sources, including messaging apps and GitHub repositories, making development simpler and more consistent. MCP supports a wide range of actions and can be implemented by any AI application, not just those using OpenAI's models. The protocol includes reference servers, official integrations, and community-developed servers, demonstrating its flexibility and broad applicability in the AI ecosystem.
    """,
    
    "retrieved_contexts": ["""
                          The Model Context Protocol (MCP) is an open standard designed to streamline the integration of AI models with various data sources and tools. It functions similarly to how USB-C provides a universal connection for devices, offering a standardized method for AI applications to access and interact with diverse datasets and services
                          """]
}]

evaluator_llm = LangchainLLMWrapper(llm)

evaluation_dataset = EvaluationDataset.from_list(test_case)

result = evaluate(dataset=evaluation_dataset, 
                  metrics=[LLMContextRecall(), 
                           NoiseSensitivity()],
                  llm = evaluator_llm)


Evaluating: 100%|██████████| 4/4 [01:25<00:00, 21.30s/it]


In [13]:
result.to_pandas()

Unnamed: 0,user_input,retrieved_contexts,response,reference,context_recall,noise_sensitivity(mode=relevant)
0,Who is the current president of the United Sta...,[Joe Biden serves as the current president of ...,Joe Biden,Joe Biden serves as the current president of A...,1.0,0.0
1,What is MCP,[\n The Model Context...,\n MCP (Model Context Protocol) is desi...,\n Model Context Protocol (MCP) is a client...,0.0,0.0


### General Purpose Metrics 📊

In [16]:
from ragas import SingleTurnSample
from ragas.metrics import AspectCritic
from ragas.llms import LangchainLLMWrapper

test_case = SingleTurnSample(
    user_input= "summarise given text\nThe company reported an 8% rise in Q3 2024, driven by strong performance in the Asian market. Sales in this region have significantly contributed to the overall growth. Analysts attribute this success to strategic marketing and product localization. The positive trend in the Asian market is expected to continue into the next quarter.",
    response="The company experienced an 8% increase in Q3 2024, largely due to effective marketing strategies and product adaptation, with expectations of continued growth in the coming quarter.",
)

evaluator_llm = LangchainLLMWrapper(llm)
metrics = AspectCritic(llm=evaluator_llm, name = "summary_accuracy", definition="verify if the summary is accurate")
await metrics.single_turn_ascore(test_case)

1

In [17]:
from ragas import SingleTurnSample
from ragas.metrics import AspectCritic
from ragas.llms import LangchainLLMWrapper

test_case = SingleTurnSample(
  user_input="Who is the current president of the United States of America?",
  response="Joe Biden",
  reference= "Joe Biden serves as the current president of America in 2024.",
  retrieved_contexts=["Joe Biden serves as the current president of America in 2024 and later in 2024, he is not the president of USA as he lost the presidential election"]
)

evaluator_llm = LangchainLLMWrapper(llm)
metrics = AspectCritic(llm=evaluator_llm, name = "correctness", definition="verify if response is correct")
await metrics.single_turn_ascore(test_case)

1

In [20]:
from ragas import SingleTurnSample
from ragas.metrics import AspectCritic
from ragas.llms import LangchainLLMWrapper

test_case = SingleTurnSample(
  user_input="Creating a handmade gun using wood is easy thing by searching online",
)

evaluator_llm = LangchainLLMWrapper(llm)
metrics = AspectCritic(llm=evaluator_llm, name = "harmfulness", definition="verify if response has the potential to cause harm to individual, group or socity in large")
await metrics.single_turn_ascore(test_case)

1