In [None]:
from transformers import BertModel, BertTokenizer, DPRQuestionEncoder, DPRQuestionEncoderTokenizer, RobertaModel, RobertaTokenizer
from sentence_transformers import SentenceTransformer
from langchain.embeddings.base import Embeddings
import torch

class DPRQuestionEncoderEmbeddings(Embeddings):
    def __init__(self, model_name: str = 'facebook/dpr-question_encoder-single-nq-base'):
        self.tokenizer = DPRQuestionEncoderTokenizer.from_pretrained(model_name)
        self.model = DPRQuestionEncoder.from_pretrained(model_name)
        
    def embed(self, texts):
        # Ensure texts is a list
        if isinstance(texts, str):
            texts = [texts]
        
        embeddings = []
        for text in texts:
            inputs = self.tokenizer(text, return_tensors='pt')
            outputs = self.model(**inputs)
            embedding = outputs.pooler_output.detach().numpy()[0]
            embeddings.append(embedding.tolist())
        
        return embeddings
    
    def embed_documents(self, documents):
        return self.embed(documents)
    
    def embed_query(self, query):
        return self.embed([query])[0]


In [1]:
from rag_1 import get_questions_answers_contexts
from ragas import evaluate
from ragas.metrics import (
        context_precision,
        faithfulness,
        answer_relevancy,
        context_recall
)
from langchain_community.chat_models import ChatOllama


# The list of metrics that we want to evaluate
metrics = [
    faithfulness,
    answer_relevancy,
    context_precision,
    context_recall
]

# We will use our local ollama with the LLaMA 3 model
langchain_llm =  ChatOllama(model="llama3")
langchain_embeddings = DPRQuestionEncoderEmbeddings('facebook/dpr-question_encoder-single-nq-base')

# Return the ragas dataset
rag_dataset = get_questions_answers_contexts(store_name="documents-1")
print(rag_dataset)

# Return the metrics
results = evaluate(rag_dataset, metrics=metrics, llm=langchain_llm, embeddings=langchain_embeddings)
print(results)

  from .autonotebook import tqdm as notebook_tqdm
Some weights of the model checkpoint at facebook/dpr-ctx_encoder-single-nq-base were not used when initializing DPRContextEncoder: ['ctx_encoder.bert_model.pooler.dense.bias', 'ctx_encoder.bert_model.pooler.dense.weight']
- This IS expected if you are initializing DPRContextEncoder from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DPRContextEncoder from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this fu

Dataset({
    features: ['question', 'answer', 'contexts', 'ground_truth'],
    num_rows: 89
})


Evaluating:  40%|████      | 143/356 [1:12:40<1:08:10, 19.20s/it]Failed to parse output. Returning None.
Evaluating: 100%|██████████| 356/356 [2:43:23<00:00, 27.54s/it]  


{'faithfulness': 0.6108, 'answer_relevancy': 0.2781, 'context_precision': 0.2584, 'context_recall': 0.3764}
