In [1]:
pip install unbabel-comet torch pandas numpy

In [2]:
!wget https://raw.githubusercontent.com/VladWero08/mt-pattern-preserve/refs/heads/main/data/articles_en.csv -O articles_en.csv
!wget https://raw.githubusercontent.com/VladWero08/mt-pattern-preserve/refs/heads/main/data/articles_fr.csv -O articles_fr.csv
!wget https://raw.githubusercontent.com/VladWero08/mt-pattern-preserve/refs/heads/main/data/articles_de.csv -O articles_de.csv
!wget https://raw.githubusercontent.com/VladWero08/mt-pattern-preserve/refs/heads/main/data/articles_es.csv -O articles_es.csv
!wget https://raw.githubusercontent.com/VladWero08/mt-pattern-preserve/refs/heads/main/data/articles_pl.csv -O articles_pl.csv
!wget https://raw.githubusercontent.com/VladWero08/mt-pattern-preserve/refs/heads/main/data/articles_ru.csv -O articles_ru.csv

In [3]:
import torch
import pandas as pd
from comet import download_model, load_from_checkpoint

In [5]:
model_path = download_model("Unbabel/wmt20-comet-qe-da")
model = load_from_checkpoint(model_path)
model.to("cuda")

In [6]:
data = [
    {
        "src": "Dem Feuer konnte Einhalt geboten werden",
        "mt": "The fire could be stopped",
    },
    {
        "src": "Schulen und Kindergärten wurden eröffnet.",
        "mt": "Schools and kindergartens were open",
    }
]
model_output = model.predict(data, batch_size=8, gpus=1)
print(model_output)

## **Dataset**

In [7]:
articles_en = pd.read_csv("articles_en.csv")
articles_fr = pd.read_csv("articles_fr.csv")
articles_de = pd.read_csv("articles_de.csv")
articles_es = pd.read_csv("articles_es.csv")
articles_pl = pd.read_csv("articles_pl.csv")
articles_ru = pd.read_csv("articles_ru.csv")

In [8]:
data = [
    {
        "src": articles_en["full_articles"].iloc[0],
        "mt":  articles_fr["full_articles"].iloc[0],
    },
    {
        "src": articles_en["full_articles"].iloc[0],
        "mt":  articles_de["full_articles"].iloc[0],
    },
    {
        "src": articles_en["full_articles"].iloc[0],
        "mt":  articles_es["full_articles"].iloc[0],
    },
    {
        "src": articles_en["full_articles"].iloc[0],
        "mt":  articles_pl["full_articles"].iloc[0],
    },
    {
        "src": articles_en["full_articles"].iloc[0],
        "mt":  articles_ru["full_articles"].iloc[0],
    },
]
model_output = model.predict(data, batch_size=8, gpus=1)
print (model_output)

In [None]:
comet_scores = {"fr": 0, "de": 0, "es": 0, "pl": 0, "ru": 0}

for i, article in enumerate(articles_en["full_articles"]):
  if i % 10 == 0:
      print(f"Computed {i} batch scores...")
      
  batch = [
      {
          "src": article,
          "mt":  articles_fr["full_articles"].iloc[i],
      },
      {
          "src": article,
          "mt":  articles_de["full_articles"].iloc[i],
      },
      {
          "src": article,
          "mt":  articles_es["full_articles"].iloc[i],
      },
      {
          "src": article,
          "mt":  articles_pl["full_articles"].iloc[i],
      },
      {
          "src": article,
          "mt":  articles_ru["full_articles"].iloc[i],
      },
  ]

  output = model.predict(batch, batch_size=8, accelerator="gpu")

  comet_scores["fr"] += output.scores[0]
  comet_scores["de"] += output.scores[1]
  comet_scores["es"] += output.scores[2]
  comet_scores["pl"] += output.scores[3]
  comet_scores["ru"] += output.scores[4]

comet_scores["fr"] /= len(articles_fr)
comet_scores["de"] /= len(articles_de)
comet_scores["es"] /= len(articles_es)
comet_scores["pl"] /= len(articles_pl)
comet_scores["ru"] /= len(articles_ru)

## **Scores**

In [18]:
comet_scores_sigm = torch.sigmoid(torch.Tensor([comet_scores["fr"], comet_scores["de"], comet_scores["es"], comet_scores["pl"], comet_scores["ru"]]))

In [21]:
print("COMET Scores:")
print("-------------")
print(f"FR: {comet_scores_sigm[0]:.4f}") 
print(f"DE: {comet_scores_sigm[1]:.4f}") 
print(f"ES: {comet_scores_sigm[2]:.4f}") 
print(f"PL: {comet_scores_sigm[3]:.4f}") 
print(f"RU: {comet_scores_sigm[4]:.4f}") 