In [1]:
!pip install -U evaluate sacrebleu unbabel-comet

Collecting evaluate
  Downloading evaluate-0.4.1-py3-none-any.whl (84 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sacrebleu
  Downloading sacrebleu-2.4.1-py3-none-any.whl (106 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.6/106.6 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting unbabel-comet
  Downloading unbabel_comet-2.2.2-py3-none-any.whl (93 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m93.1/93.1 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets>=2.0.0 (from evaluate)
  Downloading datasets-2.18.0-py3-none-any.whl (510 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
Collecting dill (from evaluate)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB

In [2]:
from google.colab import drive
import pandas as pd

# Mount Google Drive
drive.mount('/content/drive')
evaluation_df = pd.read_csv('/content/drive/MyDrive/evaluation_df.csv')

Mounted at /content/drive


In [3]:
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast

# Ensure this path exactly matches where you've saved your model and tokenizer on Google Drive
model_save_path = '/content/drive/MyDrive/models-finetuning-1/mbart50_large_nl_hr/finetuned-mbart-large-50-nl-to-hr-mmt/checkpoint-46095'

# Load the model
model = MBartForConditionalGeneration.from_pretrained(model_save_path)

# Load the tokenizer
tokenizer = MBart50TokenizerFast.from_pretrained(model_save_path)

print('Model and tokenizer have been loaded successfully.')

Model and tokenizer have been loaded successfully.


In [4]:
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
model.eval()

MBartForConditionalGeneration(
  (model): MBartModel(
    (shared): Embedding(250054, 1024, padding_idx=1)
    (encoder): MBartEncoder(
      (embed_tokens): Embedding(250054, 1024, padding_idx=1)
      (embed_positions): MBartLearnedPositionalEmbedding(1026, 1024)
      (layers): ModuleList(
        (0-11): 12 x MBartEncoderLayer(
          (self_attn): MBartAttention(
            (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (activation_fn): ReLU()
          (fc1): Linear(in_features=1024, out_features=4096, bias=True)
          (fc2): Linear(in_features=4096, out_features=1024, bias=True)
          (final_layer_norm): LayerNor

In [5]:
def translate_dutch_to_croatian(texts):
    # Set the tokenizer's source language code for Dutch. mBART uses 'nl_XX' for Dutch.
    tokenizer.src_lang = "nl_XX"
    # Encode the Dutch texts
    encoded_nl = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=64).to('cuda')
    # Generate tokens in Croatian
    generated_tokens = model.generate(**encoded_nl, forced_bos_token_id=tokenizer.lang_code_to_id["hr_HR"])
    # Decode the generated tokens to Croatian texts
    return [tokenizer.decode(g, skip_special_tokens=True) for g in generated_tokens]

In [6]:
from tqdm.auto import tqdm
from sacrebleu.metrics import BLEU

bleu = BLEU()

# Prepare lists for translations and BLEU scores
dutch_to_croatian_translations = []
dutch_to_croatian_bleu_scores = []

batch_size = 8  # Set the batch size for processing
for i in tqdm(range(0, len(evaluation_df), batch_size)):
    batch = evaluation_df.iloc[i:i+batch_size]

    # Translate batches from Dutch to Croatian
    translated_croatian_batch = translate_dutch_to_croatian(batch['Dutch'].tolist())  # Batch translation function

    # Extend the translation list with the translated batch
    dutch_to_croatian_translations.extend(translated_croatian_batch)

    # Compute BLEU scores for Croatian translations in batch
    for translated_croatian, original_croatian in zip(translated_croatian_batch, batch['Croatian'].tolist()):
        croatian_score = bleu.corpus_score([translated_croatian], [[original_croatian]])
        dutch_to_croatian_bleu_scores.append(croatian_score.score)

# Update DataFrame with translated Croatian and BLEU scores
evaluation_df['Translated Croatian'] = dutch_to_croatian_translations
evaluation_df['Dutch to Croatian BLEU'] = dutch_to_croatian_bleu_scores

  0%|          | 0/279 [00:00<?, ?it/s]



In [7]:
evaluation_df

Unnamed: 0.1,Unnamed: 0,Dutch,Croatian,Translated Croatian,Dutch to Croatian BLEU
0,0,Op maandag kondigden wetenschappers van de Sta...,U ponedjeljak su znanstvenici s Medicinskog fa...,U ponedjeljak su znanstvenici iz Medicinskog f...,21.088685
1,1,Hoofdonderzoekers zeggen dat dit kan leiden to...,Voditelji istraživanja izjavili su da bi ovo o...,Glavni znanstvenici kažu da to može dovesti do...,14.456852
2,2,De JAS 39C Gripen stortte rond 09.30 uur lokal...,JAS 39C Gripen srušio se na pistu i eksplodira...,JAS 39C Gripen srušio se na piste oko 09 30 sa...,39.328881
3,3,De piloot werd geïdentificeerd als majoor Dilo...,Pilot je identificiran kao zapovjednik eskadri...,Pilot je identificiran kao majstor Dilokrit Pa...,44.124845
4,4,De lokale media meldt dat er tijdens een actie...,Lokalni mediji izvješćuju da je došlo do prevr...,Lokalna medija izvješćuju da je vatrogasna kol...,7.947829
...,...,...,...,...,...
2221,212,Mijn naam is Andrea.,Moje ime je Andrea.,Moje ime je Andrea.,100.000000
2222,213,Jupiter is een planeet.,Jupiter je planet.,Jupiter je planet.,100.000000
2223,214,Venus is een planeet.,Venera je planet.,Venera je planet.,100.000000
2224,215,God is een olifant.,Bog je slon.,Bog je slon.,100.000000


In [8]:
import pandas as pd
from nltk.translate.meteor_score import single_meteor_score
from nltk import word_tokenize
import nltk
from tqdm import tqdm

# Download NLTK's tokenizer models
nltk.download('punkt')
nltk.download('wordnet')

# Initialize an empty list for the METEOR scores
dutch_to_croatian_meteor_scores = []

# Assuming evaluation_df is your DataFrame containing the 'Dutch' and 'Translated_Croatian' columns
# which are the original Dutch sentences and their Croatian translations, respectively.

# Loop through each row in the DataFrame
for _, row in tqdm(evaluation_df.iterrows(), total=evaluation_df.shape[0]):
    # Tokenize the Dutch and Croatian sentences
    original_croatian = word_tokenize(row['Croatian'])  # Original Dutch sentences
    translated_croatian = word_tokenize(row['Translated Croatian'])  # Translated sentences from Dutch to Croatian

    # Calculate the METEOR score for Dutch to Croatian translation
    meteor_score = single_meteor_score(original_croatian, translated_croatian)
    dutch_to_croatian_meteor_scores.append(meteor_score)

# Add the METEOR scores to your DataFrame
evaluation_df['Dutch to Croatian METEOR'] = dutch_to_croatian_meteor_scores

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
100%|██████████| 2226/2226 [00:03<00:00, 606.51it/s] 


In [9]:
from evaluate import load
from tqdm import tqdm

# Load the COMET metric
comet_metric = load('comet')

# Function to compute COMET scores in batches
def compute_comet_in_batches(sources, predictions, references, batch_size=100):
    scores = []
    for i in tqdm(range(0, len(sources), batch_size)):
        batch_sources = sources[i:i + batch_size]
        batch_predictions = predictions[i:i + batch_size]
        batch_references = references[i:i + batch_size]
        batch_results = comet_metric.compute(predictions=batch_predictions, references=batch_references, sources=batch_sources)
        scores.extend(batch_results["scores"])
    return scores

# Assuming evaluation_df is your DataFrame with the necessary columns

# Dutch to Croatian COMET scores
data_dutch_to_cro = {
    "sources": evaluation_df['Dutch'].tolist(),  # Original Dutch sentences
    "predictions": evaluation_df['Translated Croatian'].tolist(),  # Translated sentences in Croatian
    "references": evaluation_df['Croatian'].tolist()  # Reference sentences in Croatian
}

# Calculate the COMET scores
dutch_to_cro_scores = compute_comet_in_batches(data_dutch_to_cro['sources'], data_dutch_to_cro['predictions'], data_dutch_to_cro['references'])
evaluation_df['Dutch to Croatian COMET'] = [round(score, 5) for score in dutch_to_cro_scores]

# Display the updated DataFrame
print(evaluation_df.head())

Downloading builder script:   0%|          | 0.00/6.97k [00:00<?, ?B/s]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

README.md:   0%|          | 0.00/3.53k [00:00<?, ?B/s]

LICENSE:   0%|          | 0.00/9.69k [00:00<?, ?B/s]

hparams.yaml:   0%|          | 0.00/567 [00:00<?, ?B/s]

.gitattributes:   0%|          | 0.00/1.48k [00:00<?, ?B/s]

model.ckpt:   0%|          | 0.00/2.32G [00:00<?, ?B/s]

INFO:pytorch_lightning.utilities.migration.utils:Lightning automatically upgraded your loaded checkpoint from v1.8.3.post1 to v2.2.1. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../root/.cache/huggingface/hub/models--Unbabel--wmt22-comet-da/snapshots/371e9839ca4e213dde891b066cf3080f75ec7e72/checkpoints/model.ckpt`


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/616 [00:00<?, ?B/s]

/usr/local/lib/python3.10/dist-packages/pytorch_lightning/core/saving.py:188: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']
  0%|          | 0/23 [00:00<?, ?it/s]INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  4%|▍         | 1/23 [00:04<01:40,  4.56s/it]INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0

   Unnamed: 0                                              Dutch  \
0           0  Op maandag kondigden wetenschappers van de Sta...   
1           1  Hoofdonderzoekers zeggen dat dit kan leiden to...   
2           2  De JAS 39C Gripen stortte rond 09.30 uur lokal...   
3           3  De piloot werd geïdentificeerd als majoor Dilo...   
4           4  De lokale media meldt dat er tijdens een actie...   

                                            Croatian  \
0  U ponedjeljak su znanstvenici s Medicinskog fa...   
1  Voditelji istraživanja izjavili su da bi ovo o...   
2  JAS 39C Gripen srušio se na pistu i eksplodira...   
3  Pilot je identificiran kao zapovjednik eskadri...   
4  Lokalni mediji izvješćuju da je došlo do prevr...   

                                 Translated Croatian  Dutch to Croatian BLEU  \
0  U ponedjeljak su znanstvenici iz Medicinskog f...               21.088685   
1  Glavni znanstvenici kažu da to može dovesti do...               14.456852   
2  JAS 39C Gri




In [10]:
# Calculate average scores for each metric
average_croatian_to_dutch_bleu = evaluation_df['Dutch to Croatian BLEU'].mean()
average_croatian_to_dutch_meteor = evaluation_df['Dutch to Croatian METEOR'].mean()
average_croatian_to_dutch_comet = evaluation_df['Dutch to Croatian COMET'].mean()

# Print the averages
print("Translation scores from Dutch to Croatian:")
print(f"Average BLEU: {average_croatian_to_dutch_bleu}")
print(f"Average METEOR: {average_croatian_to_dutch_meteor}")
print(f"Average COMET: {average_croatian_to_dutch_comet}")

Translation scores from Dutch to Croatian:
Average BLEU: 18.11643381992551
Average METEOR: 0.4258536570063589
Average COMET: 0.852143459119497


In [11]:
evaluation_df.to_csv('/content/drive/MyDrive/evaluation_df_mbart50_1_nl_hr.csv')