In [1]:
from transformers import BertModel, BertTokenizer, DPRQuestionEncoder, DPRQuestionEncoderTokenizer, RobertaModel, RobertaTokenizer
from sentence_transformers import SentenceTransformer
import torch

class BertEmbeddings:
    def __init__(self, model_name='bert-base-uncased'):
        self.tokenizer = BertTokenizer.from_pretrained(model_name)
        self.model = BertModel.from_pretrained(model_name)

    def embeddings(self, text: str):
        inputs = self.tokenizer(text, return_tensors='pt')
        outputs = self.model(**inputs)
        return outputs.pooler_output.detach().numpy()[0].tolist()
    

class RobertaEmbeddings:
    def __init__(self, model_name='roberta-base'):
        self.tokenizer = RobertaTokenizer.from_pretrained(model_name)
        self.model = RobertaModel.from_pretrained(model_name)

    def embeddings(self, text: str):
        inputs = self.tokenizer(text, return_tensors='pt')
        outputs = self.model(**inputs)
        return outputs.pooler_output.detach().numpy()[0].tolist()
    
class SentenceBertEmbeddings:
    def __init__(self, model_name='paraphrase-MiniLM-L6-v2'):
        self.model = SentenceTransformer(model_name)

    def embeddings(self, text: str):
        return self.model.encode(text).tolist()
    
class DprEmbeddings:
    def __init__(self, model_name='facebook/dpr-question_encoder-single-nq-base'):
        self.tokenizer = DPRQuestionEncoderTokenizer.from_pretrained(model_name)
        self.model = DPRQuestionEncoder.from_pretrained(model_name)

    def embeddings(self, text: str):
        inputs = self.tokenizer(text, return_tensors='pt')
        outputs = self.model(**inputs)
        return outputs.pooler_output.detach().numpy()[0].tolist()


get_embeddings = SentenceBertEmbeddings() 

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from rag_2 import get_questions_answers_contexts
from ragas import evaluate
from ragas.metrics import (
        context_precision,
        faithfulness,
        answer_relevancy,
        context_recall
)
from langchain_community.chat_models import ChatOllama
from langchain_community.embeddings import OllamaEmbeddings

# The list of metrics that we want to evaluate
metrics = [
    faithfulness,
    answer_relevancy,
    context_precision,
    context_recall
]

# We will use our local ollama with the LLaMA 3 model
langchain_llm =  ChatOllama(model="llama3")
langchain_embeddings = OllamaEmbeddings(model="llama3")

# Return the ragas dataset
rag_dataset = get_questions_answers_contexts(get_embeddings, store_name="documents-2")
print(rag_dataset)

# Return the metrics
results = evaluate(rag_dataset, metrics=metrics, llm=langchain_llm, embeddings=langchain_embeddings)
print(results)

Dataset({
    features: ['question', 'answer', 'contexts', 'ground_truth'],
    num_rows: 89
})


Evaluating:   1%|          | 4/356 [04:16<6:55:29, 70.82s/it]