In [None]:
# Get the project root directory and add it to the system path
import os
import sys

project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
sys.path.insert(0, project_root)

In [None]:
import comet
import sacrebleu
import pandas as pd

In [None]:
translations_path = os.path.join(project_root, "translations/mistral7b_v2_translations.csv")
evaluations_path = "evaluations.csv"
eval_name = "mistral7b_v2"
comet_dir = os.path.join(project_root, "./comet")
comet_model_name = "wmt20-comet-da"

In [None]:
df = pd.read_csv(translations_path)
sources = df["sources"].tolist()
references = df["references"].tolist()
translations = df["translations"].tolist()

In [None]:
for i in [3, 8 , 98, 66, 165, 876]:
    print(sources[i])
    print(references[i])
    print(translations[i])
    print()

In [None]:
# Calculate BLEU
bleu = sacrebleu.corpus_bleu(translations, [references])  # for spBLEU: tokenize='flores200'
bleu = round(bleu.score, 2)
print("BLEU:", bleu)

# Calculate chrF++
chrf = sacrebleu.corpus_chrf(translations, [references], word_order=2)  # for chrF++ word_order=2
chrf = round(chrf.score, 2)
print("chrF++:", chrf)

# Calculate TER
metric = sacrebleu.metrics.TER()
ter = metric.corpus_score(translations, [references])
ter = round(ter.score, 2)
print("TER:", ter)

In [None]:
# Calculate COMET
df = pd.DataFrame({"src":sources, "mt":translations, "ref":references})
data = df.to_dict('records')

model_path = os.path.join(comet_dir, comet_model_name, "checkpoints/model.ckpt")
if not os.path.exists(model_path):
    model_path = comet.download_model(model=comet_model_name, saving_directory=comet_dir)
model = comet.load_from_checkpoint(model_path)

seg_scores, sys_score = model.predict(data, batch_size=128, gpus=1).values()
comet = round(sys_score*100, 2)
print("COMET:", comet)

In [None]:
df = pd.read_csv(evaluations_path) if os.path.exists(evaluations_path) else pd.DataFrame()

new_row = {
    "model_name": eval_name,
    "BLEU": bleu,
    "chrF++": chrf,
    "TER": ter,
    "COMET": comet,
}

df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)


df.to_csv(evaluations_path, index=False)
df.head()