In [1]:
from transformers import BertModel, BertTokenizer, DPRQuestionEncoder, DPRQuestionEncoderTokenizer, RobertaModel, RobertaTokenizer
from sentence_transformers import SentenceTransformer
from langchain.embeddings.base import Embeddings
import torch

class BertEmbeddings:
    def __init__(self, model_name='bert-base-uncased'):
        self.tokenizer = BertTokenizer.from_pretrained(model_name)
        self.model = BertModel.from_pretrained(model_name)

    def embeddings(self, text: str):
        inputs = self.tokenizer(text, return_tensors='pt')
        outputs = self.model(**inputs)
        return outputs.pooler_output.detach().numpy()[0].tolist()
    

class RobertaEmbeddings:
    def __init__(self, model_name='roberta-base'):
        self.tokenizer = RobertaTokenizer.from_pretrained(model_name)
        self.model = RobertaModel.from_pretrained(model_name)

    def embeddings(self, text: str):
        inputs = self.tokenizer(text, return_tensors='pt')
        outputs = self.model(**inputs)
        return outputs.pooler_output.detach().numpy()[0].tolist()
    
class SentenceBertEmbeddings:
    def __init__(self, model_name='paraphrase-MiniLM-L6-v2'):
        self.model = SentenceTransformer(model_name)

    def embeddings(self, text: str):
        return self.model.encode(text).tolist()
    
class DprEmbeddings:
    def __init__(self, model_name='facebook/dpr-question_encoder-single-nq-base'):
        self.tokenizer = DPRQuestionEncoderTokenizer.from_pretrained(model_name)
        self.model = DPRQuestionEncoder.from_pretrained(model_name)

    def embeddings(self, text: str):
        inputs = self.tokenizer(text, return_tensors='pt')
        outputs = self.model(**inputs)
        return outputs.pooler_output.detach().numpy()[0].tolist()


get_embeddings = SentenceBertEmbeddings() 

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
class DPRQuestionEncoderEmbeddings(Embeddings):
    def __init__(self, model_name: str = 'facebook/dpr-question_encoder-single-nq-base'):
        self.tokenizer = DPRQuestionEncoderTokenizer.from_pretrained(model_name)
        self.model = DPRQuestionEncoder.from_pretrained(model_name)
        
    def embed(self, texts):
        # Ensure texts is a list
        if isinstance(texts, str):
            texts = [texts]
        
        embeddings = []
        for text in texts:
            inputs = self.tokenizer(text, return_tensors='pt')
            outputs = self.model(**inputs)
            embedding = outputs.pooler_output.detach().numpy()[0]
            embeddings.append(embedding.tolist())
        
        return embeddings
    
    def embed_documents(self, documents):
        return self.embed(documents)
    
    def embed_query(self, query):
        return self.embed([query])[0]

In [6]:
from rag_4 import get_questions_answers_contexts
from ragas import evaluate
from ragas.metrics import (
        context_precision,
        faithfulness,
        answer_relevancy,
        context_recall
)
from langchain_community.chat_models import ChatOllama

# The list of metrics that we want to evaluate
metrics = [
    faithfulness,
    answer_relevancy,
    context_precision,
    context_recall
]

# We will use our local ollama with the LLaMA 3 model
langchain_llm =  ChatOllama(model="llama3")
langchain_embeddings = DPRQuestionEncoderEmbeddings('facebook/dpr-question_encoder-single-nq-base')

# Return the ragas dataset
rag_dataset = get_questions_answers_contexts(get_embeddings, store_name="documents-4")
print(rag_dataset)

# Return the metrics
results = evaluate(rag_dataset, metrics=metrics, llm=langchain_llm, embeddings=langchain_embeddings)
print(results)

Some weights of the model checkpoint at facebook/dpr-question_encoder-single-nq-base were not used when initializing DPRQuestionEncoder: ['question_encoder.bert_model.pooler.dense.bias', 'question_encoder.bert_model.pooler.dense.weight']
- This IS expected if you are initializing DPRQuestionEncoder from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DPRQuestionEncoder from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


KeyboardInterrupt: 