# Quickstart

In [1]:
# only run this if your have an editable install
%load_ext autoreload
%autoreload 2

## Load the Data

For this quickstart we are going to be using a dataset that we prepared from [eli5](https://huggingface.co/datasets/eli5) dataset with the models response. 

prompt: str
context: str
references: list[str]
ground_truth: list[str]
generated_text: str

In [2]:
from datasets import load_dataset, concatenate_datasets

ds = load_dataset("explodinggradients/eli5-test", split="test_eli5")
ds

Found cached dataset parquet (/home/jjmachan/.cache/huggingface/datasets/explodinggradients___parquet/explodinggradients--eli5-test-217d92ce20e19249/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)


Dataset({
    features: ['context', 'prompt', 'ground_truth', 'references', 'generated_text'],
    num_rows: 500
})

In [24]:
from belar.metrics import (
    Rouge1,
    Evaluation,
    Rouge2,
    RougeL,
    SBERTScore,
    EntailmentScore,
    EditRatio,
    EditDistance,
)

In [28]:
sbert_score = SBERTScore(similarity_metric="cosine")
entail = EntailmentScore(max_length=512)

e = Evaluation(
    metrics=[Rouge1, Rouge2, RougeL, sbert_score, EditDistance, EditRatio, entail],
    batched=False,
    batch_size=30,
)

The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


In [29]:
r = e.eval(ds["ground_truth"], ds["generated_text"])

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

In [20]:
r

{'rouge1_score': 0.27777314683149845, 'rouge2_score': 0.05593454553750915, 'rougeL_score': 0.16365190027294899, 'SBERT_cosine_score': 0.37552570906095206, 'edit_distance_score': 735.114, 'edit_ratio_score': 0.41482407945510713}

In [21]:
r["rouge1_score"]

0.27777314683149845

In [22]:
r.describe()

{'rouge1_score': {'mean': 0.27777314683149845,
  '25%': 0.22222222222222224,
  '50%': 0.28116554054054055,
  '75%': 0.33333333333333337,
  'min': 0.03333333333333333,
  'max': 0.49498327759197325,
  'std': 0.07709937733409833},
 'rouge2_score': {'mean': 0.05593454553750915,
  '25%': 0.029795467108899944,
  '50%': 0.05203595980962454,
  '75%': 0.07713675213675214,
  'min': 0.0,
  'max': 0.22499999999999998,
  'std': 0.03659179594928787},
 'rougeL_score': {'mean': 0.16365190027294899,
  '25%': 0.13122438524590163,
  '50%': 0.1639344262295082,
  '75%': 0.19366875300914782,
  'min': 0.03333333333333333,
  'max': 0.3087248322147651,
  'std': 0.04582111082128693},
 'SBERT_cosine_score': {'mean': 0.37552570906095206,
  '25%': 0.2123386301100254,
  '50%': 0.33269713819026947,
  '75%': 0.5326416194438934,
  'min': 0.007017173804342747,
  'max': 0.9106802940368652,
  'std': 0.2075585785391846},
 'edit_distance_score': {'mean': 735.114,
  '25%': 311.5,
  '50%': 476.5,
  '75%': 864.25,
  'min': 10

In [None]:
t_not_batched = ds_eval["rouge1_score"]

In [None]:
np.array(t_batched) - np.array(t_not_batched)

In [5]:
ds_eval.column_names

['ground_truth', 'generated_text', 'SBERT_cosine_score']

In [6]:
ds_eval["SBERT_cosine_score"]

[0.3033774197101593,
 0.016349632292985916,
 0.4478442072868347,
 0.1860141158103943,
 0.03600190579891205,
 0.6023079752922058,
 0.289838969707489,
 0.08502114564180374,
 0.17191164195537567,
 0.3593299984931946,
 0.1715232878923416,
 0.3805505037307739,
 0.5519564151763916,
 0.2677731215953827,
 0.6183438301086426,
 0.10611602663993835,
 0.19605034589767456,
 0.08165217190980911,
 0.29304254055023193,
 0.35943326354026794,
 0.38164564967155457,
 0.03771442547440529,
 0.11554502695798874,
 0.47948333621025085,
 0.23276342451572418,
 0.4236215353012085,
 0.1943129450082779,
 0.1942053735256195,
 0.12668733298778534,
 0.2597537338733673,
 0.33301281929016113,
 0.3094521462917328,
 0.3279588520526886,
 0.32722654938697815,
 0.38284799456596375,
 0.2851578891277313,
 0.23893719911575317,
 0.6166086196899414,
 0.2423057109117508,
 0.7267876267433167,
 0.08813111484050751,
 0.48606470227241516,
 0.6568448543548584,
 0.1358499825000763,
 0.4515664577484131,
 0.23441915214061737,
 0.474116057