In [1]:
import os

from datasets import load_dataset
from ragas.metrics import answer_similarity
from ragas import evaluate
from langchain.embeddings import HuggingFaceEmbeddings

In [2]:
INPUT_DATASET = "dariolopez/justicio-BOE-A-1978-31229-constitucion-by-articles-qa-qa-groq_llama3_70b_8192"

In [3]:
EMBEDDING_MODEL_NAMES = [
    "dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn",
    "intfloat/multilingual-e5-large",
    "BAAI/bge-m3",
    
    "hiiamsid/sentence_similarity_spanish_es",
    "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
]

In [4]:
dataset = load_dataset(INPUT_DATASET, split='train')
dataset

Dataset({
    features: ['number', 'context', 'question', 'answer', 'context_qa', 'response_groq_llama3_70b_8192'],
    num_rows: 515
})

In [5]:
dataset = dataset.rename_column('answer', 'ground_truth')
dataset = dataset.rename_column('response_groq_llama3_70b_8192', 'answer')
dataset

Dataset({
    features: ['number', 'context', 'question', 'ground_truth', 'context_qa', 'answer'],
    num_rows: 515
})

In [6]:
# Remove rows with any None value
dataset = dataset.filter(lambda example: all(value is not None for value in example.values()))
dataset

Dataset({
    features: ['number', 'context', 'question', 'ground_truth', 'context_qa', 'answer'],
    num_rows: 515
})

In [7]:
os.environ['OPENAI_API_KEY'] = "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"

In [8]:
for embedding_model_name in EMBEDDING_MODEL_NAMES:
    embeddings = HuggingFaceEmbeddings(
        model_name=embedding_model_name,
        model_kwargs={"device": "cpu"},
    )
    score = evaluate(dataset, metrics=[answer_similarity], embeddings=embeddings)
    print(f"{embedding_model_name} - {score['answer_similarity']}")
    dataset = dataset.add_column(f"{embedding_model_name.split('/')[1]}-sas", score.scores['answer_similarity'])

Evaluating:   0%|          | 0/515 [00:00<?, ?it/s]

dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn - 0.5347896565528095


Evaluating:   0%|          | 0/515 [00:00<?, ?it/s]

intfloat/multilingual-e5-large - 0.8416537173510169


Evaluating:   0%|          | 0/515 [00:00<?, ?it/s]

BAAI/bge-m3 - 0.5986226227828518


Evaluating:   0%|          | 0/515 [00:00<?, ?it/s]

hiiamsid/sentence_similarity_spanish_es - 0.517687558325827


Evaluating:   0%|          | 0/515 [00:00<?, ?it/s]

sentence-transformers/paraphrase-multilingual-mpnet-base-v2 - 0.5885015029280471


In [9]:
dataset

Dataset({
    features: ['number', 'context', 'question', 'ground_truth', 'context_qa', 'answer', 'roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn-sas', 'multilingual-e5-large-sas', 'bge-m3-sas', 'sentence_similarity_spanish_es-sas', 'paraphrase-multilingual-mpnet-base-v2-sas'],
    num_rows: 515
})

In [10]:
# Definir una función para calcular la media de las columnas especificadas
def compute_mean(row):
    total = sum(row[f"{col.split('/')[1]}-sas"] for col in EMBEDDING_MODEL_NAMES)
    row['mean_sas'] = total / len(EMBEDDING_MODEL_NAMES)
    return row

In [11]:
dataset = dataset.map(compute_mean)
dataset

Dataset({
    features: ['number', 'context', 'question', 'ground_truth', 'context_qa', 'answer', 'roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn-sas', 'multilingual-e5-large-sas', 'bge-m3-sas', 'sentence_similarity_spanish_es-sas', 'paraphrase-multilingual-mpnet-base-v2-sas', 'mean_sas'],
    num_rows: 515
})

In [13]:
import huggingface_hub

huggingface_hub.login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [14]:
dataset.push_to_hub(f"{INPUT_DATASET}-sas")

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/2.83k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/datasets/dariolopez/justicio-BOE-A-1978-31229-constitucion-by-articles-qa-qa-groq_llama3_70b_8192-sas/commit/53e5e3e8dc8df21bbac92202e44e26f3a5c8e274', commit_message='Upload dataset', commit_description='', oid='53e5e3e8dc8df21bbac92202e44e26f3a5c8e274', pr_url=None, pr_revision=None, pr_num=None)

In [12]:
import numpy as np

np.mean(dataset['mean_sas'])

0.6162510115881105