In [31]:
from datasets import load_dataset
import glob
import numpy as np
import evaluate
import json

In [15]:
def load_pred(pred_path):
    files = glob.glob(pred_path + '*.txt')
    files.sort(key=lambda x: int(x.split(".txt")[0].split("/")[-1]))

    data = []
    for file in files:
        with open(file, 'r') as f:
            data.extend(f.read().splitlines())
    return data

In [23]:
def load_flores_dataset(source_lang: str, target_lang: str) -> dict:
    dataset = load_dataset("facebook/flores", f"{source_lang}_Latn-{target_lang}_Latn")
    return dataset["devtest"][f"sentence_{target_lang}_Latn"], dataset["devtest"][f"sentence_{source_lang}_Latn"]

In [33]:
y_true, sources = load_flores_dataset("eng", "fra")
y_pred = load_pred("out/gemini-flores-fr/")

assert len(y_true) == len(y_pred)

In [25]:
sacrablue = evaluate.load("sacrebleu")
chrf = evaluate.load("chrf")
comet = evaluate.load("comet")
bertscore = evaluate.load("bertscore")

Fetching 5 files: 100%|██████████| 5/5 [00:00<00:00, 53773.13it/s]
Lightning automatically upgraded your loaded checkpoint from v1.8.3.post1 to v2.4.0. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../../.cache/huggingface/hub/models--Unbabel--wmt22-comet-da/snapshots/371e9839ca4e213dde891b066cf3080f75ec7e72/checkpoints/model.ckpt`
Encoder model frozen.
/Users/dominykas.seputis/github/dl4nlp/.venv/lib/python3.11/site-packages/pytorch_lightning/core/saving.py:195: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']


In [37]:
metrics = {
    "sacrebleu": sacrablue.compute(predictions=y_pred, references=y_true)["score"],
    "chrf": chrf.compute(predictions=y_pred, references=y_true)["score"],
    "comet": np.array(comet.compute(predictions=y_pred, references=y_true, sources=sources)["scores"]).mean(),
    "bertscore": np.array(bertscore.compute(predictions=y_pred, references=y_true, lang="fr")["f1"]).mean()
}

GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/Users/dominykas.seputis/github/dl4nlp/.venv/lib/python3.11/site-packages/pytorch_lightning/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.


In [39]:
metrics

{'sacrebleu': 50.834582654973005,
 'chrf': 72.08335230820562,
 'comet': 0.8904172878844936,
 'bertscore': 0.9221388308898263}

In [38]:
with open("out/gemini-flores-fr/metrics.json", "w") as f:
    json.dump(metrics, f, indent=4)