In [1]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

In [2]:
# Load your model from Hugging Face Hub
model_name = "Eshan210352R/mt5-small-instruct-ft-lora"

In [3]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [4]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [5]:
def translate(text: str, source_lang: str, target_lang: str, max_length: int = 128) -> str:
    input_text = f"translate {source_lang} to {target_lang}: {text}"
    inputs = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=max_length).to(device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_length,
            num_beams=4,
            early_stopping=True
        )

    translated = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return translated




In [6]:
# English → French
en_text = "How are you today?"
fr_translation = translate(en_text, "English", "French")
print("EN → FR:", fr_translation)

EN → FR: Vous êtes aujourd'hui?


In [7]:
from datasets import load_dataset

In [8]:
# ===============================
# Load OPUS-100 en-fr dataset
# ===============================
dataset = load_dataset("opus100", "en-fr")["train"].select(range(100))

In [9]:
# ===============================
# Translate and collect outputs
# ===============================
source_texts = dataset["translation"]
english_sentences = [item["en"] for item in source_texts]
reference_french_sentences = [item["fr"] for item in source_texts]

In [10]:
predicted_french_sentences = []

In [11]:
print("Translating 1000 English sentences using fine-tuned mT5...\n")

Translating 1000 English sentences using fine-tuned mT5...



In [12]:
from tqdm import tqdm

In [13]:
for sent in tqdm(english_sentences):
    translated = translate(sent, "English", "French")
    predicted_french_sentences.append(translated)

100%|██████████| 100/100 [00:46<00:00,  2.16it/s]


In [14]:
predicted_french_sentences


['Le temps est 05:08.',
 "Cette Regulation doit être en force sur le sixième jour de la publication en l'Journal de la Union.",
 "Bonjour, ce n'est pas ça?",
 'Je vous apprends tout ce que tu connais.',
 'Vous avez trouver un chose?',
 'Article 6',
 "Oh, honey, il n'est pas vous fault.",
 'Je suis à lui maintenant.',
 "DG XVI, Internet site (electronic address: http://www.cec.lu/en/comm/dg16/dg16home.html) a été publié dans toutes langues de l'Union Européenne.",
 "Je n'est pas.",
 'Steven, quand vous avez écrit ?',
 'Afrique et Other States Group (monthly meeting)',
 "Je suis à l'aide de leurs enfants.",
 'Oui!',
 'Oh, sorry.',
 "Je n'ai pas pas arrêter.",
 "Quand vous avez l'accord?",
 "Where's Prue?",
 'Mais mère, elle peut être un important.',
 "Irisa n'est pas le type de coddling.",
 'ANNEX',
 'Vous êtes à Talley?',
 "Je n'ai l'air.",
 'Quand a Matt expliqué?',
 'Tout de Part One',
 '- Quel est ce?',
 "Je suis à l'hôpital.",
 "'Intérêt public peut être à l'usage d'une clause d'une

In [15]:
!pip install sacrebleu




In [16]:
import sacrebleu

In [17]:
# ===============================
# Compute BLEU score using SacreBLEU
# ===============================
# SacreBLEU expects a list of references for each sentence
references = [[ref] for ref in reference_french_sentences]  # nested list
bleu = sacrebleu.corpus_bleu(predicted_french_sentences, references)

In [18]:
print("\n===== BLEU Score =====")
print(f"BLEU: {bleu.score:.2f}")


===== BLEU Score =====
BLEU: 16.52
