#Evaluate retrieval

In [None]:
!pip install langchain
!pip install -qU langchain-huggingface
!pip install -qU langchain-groq
!pip install ragas

Collecting ragas
  Downloading ragas-0.1.16-py3-none-any.whl.metadata (5.3 kB)
Collecting datasets (from ragas)
  Downloading datasets-2.21.0-py3-none-any.whl.metadata (21 kB)
Collecting tiktoken (from ragas)
  Downloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Collecting langchain-community (from ragas)
  Downloading langchain_community-0.2.16-py3-none-any.whl.metadata (2.7 kB)
Collecting langchain-openai (from ragas)
  Downloading langchain_openai-0.1.23-py3-none-any.whl.metadata (2.6 kB)
Collecting openai>1 (from ragas)
  Downloading openai-1.43.0-py3-none-any.whl.metadata (22 kB)
Collecting pysbd>=0.3.4 (from ragas)
  Downloading pysbd-0.3.4-py3-none-any.whl.metadata (6.1 kB)
Collecting appdirs (from ragas)
  Downloading appdirs-1.4.4-py2.py3-none-any.whl.metadata (9.0 kB)
Collecting jiter<1,>=0.4.0 (from openai>1->ragas)
  Downloading jiter-0.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)
Collec

In [None]:
from langchain_core.language_models import BaseLanguageModel
from langchain_core.embeddings import Embeddings
import os

In [None]:
#base model to generate dataset
from langchain_groq import ChatGroq

os.environ["GROQ_API_KEY"] = " " # <-- Groq API Key
llm = ChatGroq(
    model="llama-3.1-70b-versatile",
    temperature=0.2,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

In [None]:
#embedding model
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:
# make sure to wrap them with wrappers
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper

langchain_llm = LangchainLLMWrapper(llm)
langchain_embeddings = LangchainEmbeddingsWrapper(embeddings)

## Evaluate RAG pipeline

---

- data_sample is sample dataset.  we have to create our own dataset to evaluate RAG - let us call it rag_eval_dataset

- we already have a Q&A dataset - let us call it LLM Dataset


- The vector database will be given all the opinions used in generating these questions in LLM dataset.
  - Select some questions from LLM Dataset. They will be questions in rag_eval_dataset
  - Give these questions to RAG, The retrieved k results will be context in rag_eval_dataset.
  -  The answer column of Q&A dataset will serve the ground truth.

In [None]:
## dataset- question, context-retrieved from rag pipeline, ground_truth_ground truth answer
## for a particular question there can be multiple retrivals, separated by ','


In [None]:
## question answer opinion

In [None]:
from datasets import Dataset
data_samples = {
    'question': ['When was the first super bowl?', 'Who won the most super bowls?'],
    'contexts' : [['The First AFL–NFL World Championship Game was an American football game played on January 15, 1967, at the Los Angeles Memorial Coliseum in Los Angeles,'],
    ['The Green Bay Packers...Green Bay, Wisconsin.','The Packers compete...Football Conference']],
    'ground_truth': ['The first superbowl was held on January 15, 1967', 'The New England Patriots have won the Super Bowl a record six times']
}
dataset = Dataset.from_dict(data_samples)

In [None]:
dataset

Dataset({
    features: ['question', 'contexts', 'ground_truth'],
    num_rows: 2
})

In [None]:
from ragas.metrics import (
    context_recall,
    context_precision,
)
from ragas import evaluate

result = evaluate(
    dataset,
    metrics=[
        context_precision,
        context_recall,
    ],
    llm=langchain_llm,
    embeddings=langchain_embeddings
)

Evaluating:   0%|          | 0/4 [00:00<?, ?it/s]

In [None]:
df = result.to_pandas()
df.head()

Unnamed: 0,question,contexts,ground_truth,context_precision,context_recall
0,When was the first super bowl?,[The First AFL–NFL World Championship Game was...,"The first superbowl was held on January 15, 1967",1.0,1.0
1,Who won the most super bowls?,"[The Green Bay Packers...Green Bay, Wisconsin....",The New England Patriots have won the Super Bo...,0.0,0.0
