In [1]:
import os

from datasets import load_dataset
from ragas.metrics import answer_similarity
from ragas import evaluate
from langchain.embeddings import HuggingFaceEmbeddings

In [2]:
INPUT_DATASET = "dariolopez/justicio-BOE-A-1978-31229-constitucion-by-articles-qa-multilingual-e5-large-groq_llama3_70b_8192"

In [3]:
EMBEDDING_MODEL_NAMES = [
    "intfloat/multilingual-e5-large",
    "hiiamsid/sentence_similarity_spanish_es",
    "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
    "dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn"
]

In [5]:
dataset = load_dataset(INPUT_DATASET, split='train')
dataset

Dataset({
    features: ['number', 'context', 'question', 'answer', 'context_qa', 'response_groq_llama3_70b_8192'],
    num_rows: 515
})

In [6]:
dataset = dataset.rename_column('answer', 'ground_truth')
dataset = dataset.rename_column('response_groq_llama3_70b_8192', 'answer')
dataset

Dataset({
    features: ['number', 'context', 'question', 'ground_truth', 'context_qa', 'answer'],
    num_rows: 515
})

In [7]:
# Remove rows with any None value
dataset = dataset.filter(lambda example: all(value is not None for value in example.values()))
dataset

Dataset({
    features: ['number', 'context', 'question', 'ground_truth', 'context_qa', 'answer'],
    num_rows: 514
})

In [8]:
for embedding_model_name in EMBEDDING_MODEL_NAMES:
    embeddings = HuggingFaceEmbeddings(
        model_name=embedding_model_name,
        model_kwargs={"device": "cpu"},
    )
    score = evaluate(dataset, metrics=[answer_similarity], embeddings=embeddings)
    print(f"{embedding_model_name} - {score['answer_similarity']}")
    dataset = dataset.add_column(f"{embedding_model_name.split('/')[1]}-sas", score.scores['answer_similarity'])

Evaluating:   0%|          | 0/514 [00:00<?, ?it/s]

intfloat/multilingual-e5-large - 0.844112698853668


Evaluating:   0%|          | 0/514 [00:00<?, ?it/s]

hiiamsid/sentence_similarity_spanish_es - 0.5382654501902947


Evaluating:   0%|          | 0/514 [00:00<?, ?it/s]

sentence-transformers/paraphrase-multilingual-mpnet-base-v2 - 0.6064446175448456


Evaluating:   0%|          | 0/514 [00:00<?, ?it/s]

dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn - 0.5513332008608809


In [9]:
dataset

Dataset({
    features: ['number', 'context', 'question', 'ground_truth', 'context_qa', 'answer', 'multilingual-e5-large-sas', 'sentence_similarity_spanish_es-sas', 'paraphrase-multilingual-mpnet-base-v2-sas', 'roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn-sas'],
    num_rows: 514
})

In [10]:
# Definir una función para calcular la media de las columnas especificadas
def compute_mean(row):
    total = sum(row[f"{col.split('/')[1]}-sas"] for col in EMBEDDING_MODEL_NAMES)
    row['mean_sas'] = total / len(EMBEDDING_MODEL_NAMES)
    return row

In [11]:
dataset = dataset.map(compute_mean)
dataset

Dataset({
    features: ['number', 'context', 'question', 'ground_truth', 'context_qa', 'answer', 'multilingual-e5-large-sas', 'sentence_similarity_spanish_es-sas', 'paraphrase-multilingual-mpnet-base-v2-sas', 'roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn-sas', 'mean_sas'],
    num_rows: 514
})

In [12]:
import huggingface_hub

huggingface_hub.login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [13]:
dataset.push_to_hub("dariolopez/justicio-BOE-A-1978-31229-constitucion-by-articles-qa-multilingual-e5-large-groq_llama3_70b-sas")

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/843 [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/datasets/dariolopez/justicio-BOE-A-1978-31229-constitucion-by-articles-qa-multilingual-e5-large-groq_llama3_70b-sas/commit/8e920e1b0868cd60db8f0386f9c166cc0e31ebb6', commit_message='Upload dataset', commit_description='', oid='8e920e1b0868cd60db8f0386f9c166cc0e31ebb6', pr_url=None, pr_revision=None, pr_num=None)

In [14]:
import numpy as np

np.mean(dataset['mean_sas'])

0.6350389918624224