<a href="https://colab.research.google.com/github/VDG-code/NMT-Master-Thesis/blob/main/mbart50_large_evaluation_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

In [None]:
from google.colab import drive
drive.mount('/content/drive')
evaluation_df = pd.read_csv('/content/drive/MyDrive/evaluation_df.csv')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install sentencepiece



In [None]:
!pip install sacrebleu



In [None]:
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
import pandas as pd
from sacrebleu.metrics import BLEU

In [None]:
model_name = "facebook/mbart-large-50-many-to-many-mmt"
tokenizer = MBart50TokenizerFast.from_pretrained(model_name)
model = MBartForConditionalGeneration.from_pretrained(model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:
def translate_dutch_to_croatian_batch(texts):
    # Set the tokenizer's source language code for Dutch. mBART uses 'nl_XX' for Dutch.
    tokenizer.src_lang = "nl_XX"
    # Encode the Dutch texts
    encoded_nl = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=512).to('cuda')
    generated_tokens = model.generate(**encoded_nl, forced_bos_token_id=tokenizer.lang_code_to_id["hr_HR"])
    # Decode the generated tokens to texts
    return [tokenizer.decode(g, skip_special_tokens=True) for g in generated_tokens]

def translate_croatian_to_dutch_batch(texts):
    # Set the tokenizer's source language code for Croatian. mBART uses 'hr_XX' for Croatian.
    tokenizer.src_lang = "hr_HR"
    # Encode the Croatian texts
    encoded_hr = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=512).to('cuda')
    generated_tokens = model.generate(**encoded_hr, forced_bos_token_id=tokenizer.lang_code_to_id["nl_XX"])
    # Decode the generated tokens to texts
    return [tokenizer.decode(g, skip_special_tokens=True) for g in generated_tokens]

In [None]:
model = model.to('cuda')  # Move model to GPU

In [None]:
from tqdm.auto import tqdm
import torch

bleu = BLEU()

# Prepare lists for translations and scores
croatian_to_dutch_translations = []
dutch_to_croatian_translations = []
croatian_to_dutch_bleu_scores = []
dutch_to_croatian_bleu_scores = []

batch_size = 8  #
for i in tqdm(range(0, len(evaluation_df), batch_size)):
    batch = evaluation_df.iloc[i:i+batch_size]

    # Translate batches
    translated_dutch_batch = translate_croatian_to_dutch_batch(batch['Croatian'].tolist())  # Batch translation function
    translated_croatian_batch = translate_dutch_to_croatian_batch(batch['Dutch'].tolist())  # Batch translation function

    croatian_to_dutch_translations.extend(translated_dutch_batch)
    dutch_to_croatian_translations.extend(translated_croatian_batch)

    # Compute BLEU scores in batch
    for translated_dutch, translated_croatian, original_dutch, original_croatian in zip(translated_dutch_batch, translated_croatian_batch, batch['Dutch'].tolist(), batch['Croatian'].tolist()):
        dutch_score = bleu.corpus_score([translated_dutch], [[original_dutch]])
        croatian_score = bleu.corpus_score([translated_croatian], [[original_croatian]])

        croatian_to_dutch_bleu_scores.append(dutch_score.score)
        dutch_to_croatian_bleu_scores.append(croatian_score.score)

# Update DataFrame
evaluation_df['Translated Dutch'] = croatian_to_dutch_translations
evaluation_df['Translated Croatian'] = dutch_to_croatian_translations
evaluation_df['Croatian to Dutch BLEU'] = croatian_to_dutch_bleu_scores
evaluation_df['Dutch to Croatian BLEU'] = dutch_to_croatian_bleu_scores

  0%|          | 0/279 [00:00<?, ?it/s]

In [None]:
evaluation_df["Croatian to Dutch BLEU"].mean()

9.912537643364082

In [None]:
evaluation_df["Dutch to Croatian BLEU"].mean()

5.108418677483671

In [None]:
evaluation_df.head(50)

Unnamed: 0.1,Unnamed: 0,Dutch,Croatian,Translated Dutch,Translated Croatian,Croatian to Dutch BLEU,Dutch to Croatian BLEU
0,0,Op maandag kondigden wetenschappers van de Sta...,U ponedjeljak su znanstvenici s Medicinskog fa...,"On Monday, scientists at Stanford Medical Scho...","Monday, scientists from the Stanford Universit...",2.137018,0.961745
1,1,Hoofdonderzoekers zeggen dat dit kan leiden to...,Voditelji istraživanja izjavili su da bi ovo o...,Onderzoekers hebben aangetoond dat deze ontdek...,Znanstvenici say this can lead to early detect...,11.305237,1.175609
2,2,De JAS 39C Gripen stortte rond 09.30 uur lokal...,JAS 39C Gripen srušio se na pistu i eksplodira...,JAS 39C Gripen crashed in de pis en exploded r...,JAS 39C Gripen jeoparm jeoparm jeoparm jeoparm...,5.913213,3.32151
3,3,De piloot werd geïdentificeerd als majoor Dilo...,Pilot je identificiran kao zapovjednik eskadri...,De pilot is identified as Commander Dilokrit P...,A pilot was identified as Major Dilokrit Patta...,19.070828,17.747405
4,4,De lokale media meldt dat er tijdens een actie...,Lokalni mediji izvješćuju da je došlo do prevr...,De local media reports that there was een vrac...,I've heard from the local media that there was...,4.780204,2.033127
5,5,De 28-jaar oude Vidal is drie seizoenen gelede...,28-godišnji Vidal došao je iz Seville u Barçu ...,"Vidal, 28 jaar oud, kwam uit Seville, to Bars,...",28-ogodišnjak Vidal je prešao s Sevilla na Bar...,3.234245,6.608974
6,6,Sinds hij verhuisde naar de Catalaanse hoofdst...,Vidal je za klub odigrao 49 utakmica otkako se...,Vidal had 49 spells voor het club sinds hij ve...,I've played 49 matches for the club,18.751145,2.094347
7,7,Het protest begon rond 11.00 lokale tijd (UTC+...,Prosvjed je započeo oko 11:00 po lokalnom vrem...,De bewaking begon rond 11:00 local time in Whi...,Demonstracija je započela oko 11: 00 local tim...,6.667754,24.297792
8,8,Iets na 11.00 uur begonnen betogers het verkee...,Netom iza 11:00 h prosvjednici su blokirali pr...,Het was net 11: 00 p.m. dat protesters aan de ...,"Otprilike 11: 00, protesters began to block tr...",2.519759,10.704546
9,9,Om 11:20 vroeg de politie aan de demonstranten...,Policija je u 11:20 zatražila od prosvjednika ...,"Om 11:20 p.m., de politie riep de protesters o...","U 11:20, the police demanded that the demonstr...",9.748573,4.595432


In [None]:
from nltk.translate.meteor_score import single_meteor_score
from nltk import word_tokenize
from tqdm import tqdm
import nltk

# Download NLTK's tokenizer models
nltk.download('punkt')
nltk.download('wordnet')

# Initialize the lists to store METEOR scores
dutch_to_croatian_meteor_scores = []
croatian_to_dutch_meteor_scores = []

# Loop through each row in the DataFrame
for _, row in tqdm(evaluation_df.iterrows(), total=evaluation_df.shape[0]):
    # Tokenize the Dutch and Croatian sentences
    reference_dutch = word_tokenize(row['Dutch'])
    hypothesis_croatian_to_dutch = word_tokenize(row['Translated Dutch'])
    reference_croatian = word_tokenize(row['Croatian'])
    hypothesis_dutch_to_croatian = word_tokenize(row['Translated Croatian'])

    # Calculate the METEOR score for Dutch to Croatian translation
    dutch_to_croatian_score = single_meteor_score(reference_croatian, hypothesis_dutch_to_croatian)
    dutch_to_croatian_meteor_scores.append(dutch_to_croatian_score)

    # Calculate the METEOR score for Croatian to Dutch translation
    croatian_to_dutch_score = single_meteor_score(reference_dutch, hypothesis_croatian_to_dutch)
    croatian_to_dutch_meteor_scores.append(croatian_to_dutch_score)

# Add the METEOR scores to DataFrame
evaluation_df['Croatian to Dutch METEOR'] = croatian_to_dutch_meteor_scores
evaluation_df['Dutch to Croatian METEOR'] = dutch_to_croatian_meteor_scores

# Display the updated DataFrame
print(evaluation_df.head())

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
100%|██████████| 2226/2226 [00:10<00:00, 203.39it/s]

   Unnamed: 0                                              Dutch  \
0           0  Op maandag kondigden wetenschappers van de Sta...   
1           1  Hoofdonderzoekers zeggen dat dit kan leiden to...   
2           2  De JAS 39C Gripen stortte rond 09.30 uur lokal...   
3           3  De piloot werd geïdentificeerd als majoor Dilo...   
4           4  De lokale media meldt dat er tijdens een actie...   

                                            Croatian  \
0  U ponedjeljak su znanstvenici s Medicinskog fa...   
1  Voditelji istraživanja izjavili su da bi ovo o...   
2  JAS 39C Gripen srušio se na pistu i eksplodira...   
3  Pilot je identificiran kao zapovjednik eskadri...   
4  Lokalni mediji izvješćuju da je došlo do prevr...   

                                    Translated Dutch  \
0  On Monday, scientists at Stanford Medical Scho...   
1  Onderzoekers hebben aangetoond dat deze ontdek...   
2  JAS 39C Gripen crashed in de pis en exploded r...   
3  De pilot is identified as C




In [None]:
print(evaluation_df["Croatian to Dutch METEOR"].mean())

0.2795983714525936


In [None]:
print(evaluation_df["Dutch to Croatian METEOR"].mean())

0.12428641344287146


In [None]:
!pip install evaluate



In [None]:
!pip install unbabel-comet



In [None]:
from evaluate import load
comet_metric = load('comet')

Downloading builder script:   0%|          | 0.00/6.97k [00:00<?, ?B/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

hparams.yaml:   0%|          | 0.00/567 [00:00<?, ?B/s]

LICENSE:   0%|          | 0.00/9.69k [00:00<?, ?B/s]

.gitattributes:   0%|          | 0.00/1.48k [00:00<?, ?B/s]

README.md:   0%|          | 0.00/3.53k [00:00<?, ?B/s]

model.ckpt:   0%|          | 0.00/2.32G [00:00<?, ?B/s]

INFO:pytorch_lightning.utilities.migration.utils:Lightning automatically upgraded your loaded checkpoint from v1.8.3.post1 to v2.2.0.post0. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../root/.cache/huggingface/hub/models--Unbabel--wmt22-comet-da/snapshots/371e9839ca4e213dde891b066cf3080f75ec7e72/checkpoints/model.ckpt`


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/616 [00:00<?, ?B/s]

/usr/local/lib/python3.10/dist-packages/pytorch_lightning/core/saving.py:188: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']


In [None]:
import numpy as np
from tqdm import tqdm

# Function to compute COMET scores in batches
def compute_comet_in_batches(data, batch_size=100):
    scores = []
    for i in tqdm(range(0, len(data['sources']), batch_size)):
        batch = {
            "sources": data['sources'][i:i+batch_size],
            "predictions": data['predictions'][i:i+batch_size],
            "references": data['references'][i:i+batch_size]
        }
        batch_results = comet_metric.compute(predictions=batch["predictions"], references=batch["references"], sources=batch["sources"])
        scores.extend(batch_results["scores"])
    return scores

# Croatian to Dutch COMET scores
data_cro_to_dut = {
    "sources": evaluation_df['Croatian'].tolist(),
    "predictions": evaluation_df['Translated Dutch'].tolist(),
    "references": evaluation_df['Dutch'].tolist()
}

cro_to_dut_scores = compute_comet_in_batches(data_cro_to_dut)
evaluation_df['Croatian to Dutch COMET'] = [round(score, 5) for score in cro_to_dut_scores]

# Dutch to Croatian COMET scores
data_dut_to_cro = {
    "sources": evaluation_df['Dutch'].tolist(),
    "predictions": evaluation_df['Translated Croatian'].tolist(),
    "references": evaluation_df['Croatian'].tolist()
}

dut_to_cro_scores = compute_comet_in_batches(data_dut_to_cro)
evaluation_df['Dutch to Croatian COMET'] = [round(score, 5) for score in dut_to_cro_scores]

# Display the updated DataFrame
print(evaluation_df.head())

  0%|          | 0/23 [00:00<?, ?it/s]INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  4%|▍         | 1/23 [00:07<02:38,  7.22s/it]INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  9%|▊         | 2/23 [00:15<02:44,  7.82s/it]INFO:pytorch_lightning.utilities.rank_zero:GPU availabl

   Unnamed: 0                                              Dutch  \
0           0  Op maandag kondigden wetenschappers van de Sta...   
1           1  Hoofdonderzoekers zeggen dat dit kan leiden to...   
2           2  De JAS 39C Gripen stortte rond 09.30 uur lokal...   
3           3  De piloot werd geïdentificeerd als majoor Dilo...   
4           4  De lokale media meldt dat er tijdens een actie...   

                                            Croatian  \
0  U ponedjeljak su znanstvenici s Medicinskog fa...   
1  Voditelji istraživanja izjavili su da bi ovo o...   
2  JAS 39C Gripen srušio se na pistu i eksplodira...   
3  Pilot je identificiran kao zapovjednik eskadri...   
4  Lokalni mediji izvješćuju da je došlo do prevr...   

                                    Translated Dutch  \
0  On Monday, scientists at Stanford Medical Scho...   
1  Onderzoekers hebben aangetoond dat deze ontdek...   
2  JAS 39C Gripen crashed in de pis en exploded r...   
3  De pilot is identified as C




In [None]:
evaluation_df

Unnamed: 0.1,Unnamed: 0,Dutch,Croatian,Translated Dutch,Translated Croatian,Croatian to Dutch BLEU,Dutch to Croatian BLEU,Croatian to Dutch METEOR,Dutch to Croatian METEOR,Croatian to Dutch COMET,Dutch to Croatian COMET
0,0,Op maandag kondigden wetenschappers van de Sta...,U ponedjeljak su znanstvenici s Medicinskog fa...,"On Monday, scientists at Stanford Medical Scho...","Monday, scientists from the Stanford Universit...",2.137018,0.961745,0.133229,0.022523,0.64900,0.44438
1,1,Hoofdonderzoekers zeggen dat dit kan leiden to...,Voditelji istraživanja izjavili su da bi ovo o...,Onderzoekers hebben aangetoond dat deze ontdek...,Znanstvenici say this can lead to early detect...,11.305237,1.175609,0.374371,0.045872,0.64009,0.52624
2,2,De JAS 39C Gripen stortte rond 09.30 uur lokal...,JAS 39C Gripen srušio se na pistu i eksplodira...,JAS 39C Gripen crashed in de pis en exploded r...,JAS 39C Gripen jeoparm jeoparm jeoparm jeoparm...,5.913213,3.321510,0.194711,0.099474,0.53131,0.23477
3,3,De piloot werd geïdentificeerd als majoor Dilo...,Pilot je identificiran kao zapovjednik eskadri...,De pilot is identified as Commander Dilokrit P...,A pilot was identified as Major Dilokrit Patta...,19.070828,17.747405,0.416667,0.416667,0.76856,0.69039
4,4,De lokale media meldt dat er tijdens een actie...,Lokalni mediji izvješćuju da je došlo do prevr...,De local media reports that there was een vrac...,I've heard from the local media that there was...,4.780204,2.033127,0.245051,0.026455,0.49873,0.30317
...,...,...,...,...,...,...,...,...,...,...,...
2221,212,Mijn naam is Andrea.,Moje ime je Andrea.,Mijn naam is Andrea.,Moje ime je Andrea.,100.000000,100.000000,0.996000,0.996000,0.99040,0.99322
2222,213,Jupiter is een planeet.,Jupiter je planet.,Jupiter is a planet.,Jupiter je planet.,23.643540,100.000000,0.511111,0.992188,0.90708,0.98727
2223,214,Venus is een planeet.,Venera je planet.,Venus is a planet.,Venus je planet.,23.643540,59.460356,0.511111,0.736111,0.88377,0.93232
2224,215,God is een olifant.,Bog je slon.,God is an elephant.,God is an elephant.,23.643540,10.682175,0.511111,0.121951,0.74346,0.55488


In [None]:
print(evaluation_df["Croatian to Dutch COMET"].mean())

0.6467908221024259


In [None]:
print(evaluation_df["Dutch to Croatian COMET"].mean())

0.5592741464510332


In [None]:
evaluation_df.to_csv('/content/drive/MyDrive/mbart50_large_evaluation_1.csv')