In [1]:
!pip install datasets

Collecting datasets
  Downloading datasets-2.20.0-py3-none-any.whl (547 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/547.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.9/547.8 kB[0m [31m2.7 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m542.7/547.8 kB[0m [31m9.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m547.8/547.8 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-16.1.0-cp310-cp310-manylinux_2_28_x86_64.whl (40.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.8/40.8 MB[0m [31m14.2 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB

In [2]:
!pip install sacrebleu

Collecting sacrebleu
  Downloading sacrebleu-2.4.2-py3-none-any.whl (106 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/106.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.7/106.7 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting portalocker (from sacrebleu)
  Downloading portalocker-2.10.0-py3-none-any.whl (18 kB)
Collecting colorama (from sacrebleu)
  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Installing collected packages: portalocker, colorama, sacrebleu
Successfully installed colorama-0.4.6 portalocker-2.10.0 sacrebleu-2.4.2


In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
from torch.optim import AdamW
from transformers import MarianMTModel, MarianTokenizer
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
from sacrebleu import corpus_bleu

In [5]:
# Define custom dataset class
class TranslationDataset(Dataset):
    def __init__(self, source_texts, target_texts, tokenizer, max_length=256):
        self.source_texts = source_texts
        self.target_texts = target_texts
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.source_texts)

    def __getitem__(self, idx):
        source_text = self.source_texts[idx]
        target_text = self.target_texts[idx]
        source_inputs = self.tokenizer(source_text, return_tensors='pt', padding='max_length', truncation=True, max_length=self.max_length)
        target_inputs = self.tokenizer(target_text, return_tensors='pt', padding='max_length', truncation=True, max_length=self.max_length)
        source_inputs = {key: val.squeeze() for key, val in source_inputs.items()}
        target_inputs = {key: val.squeeze() for key, val in target_inputs.items()}
        return {'source_inputs': source_inputs, 'target_inputs': target_inputs}

def chunks(lst, n):
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

def train_model(model, dataloader, optimizer, device):
    model.train()
    total_loss = 0
    for batch in tqdm(dataloader, desc="Training"):
        optimizer.zero_grad()
        input_ids = batch['source_inputs']['input_ids'].to(device)
        attention_mask = batch['source_inputs']['attention_mask'].to(device)
        labels = batch['target_inputs']['input_ids'].to(device)
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(dataloader)

def evaluate_model(model, dataloader, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluating"):
            input_ids = batch['source_inputs']['input_ids'].to(device)
            attention_mask = batch['source_inputs']['attention_mask'].to(device)
            labels = batch['target_inputs']['input_ids'].to(device)
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            total_loss += loss.item()
    return total_loss / len(dataloader)

def translate_texts(model, tokenizer, texts, device, batch_size=32):
    model.eval()
    translations = []
    for batch_texts in chunks(texts, batch_size):
        inputs = tokenizer(batch_texts, return_tensors='pt', padding=True, truncation=True, max_length=256)
        input_ids = inputs['input_ids'].to(device)
        attention_mask = inputs['attention_mask'].to(device)
        with torch.no_grad():
            translated = model.generate(input_ids=input_ids, attention_mask=attention_mask)
        translations.extend([tokenizer.decode(t, skip_special_tokens=True) for t in translated])
    return translations

In [6]:
# Load dataset
ds_fname = 'data.csv'
df = pd.read_csv(ds_fname)

In [7]:
# Split dataset
train_df, temp_df = train_test_split(df, test_size=0.2, random_state=42)
valid_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

In [8]:
# Prepare texts
train_texts_src = list(train_df['en'])
train_texts_tgt = list(train_df['vi'])
valid_texts_src = list(valid_df['en'])
valid_texts_tgt = list(valid_df['vi'])
test_texts_src = list(test_df['en'])
test_texts_tgt = list(test_df['vi'])  # Ground truth translations

In [None]:
# Define constants
BATCH_SIZE = 16
EPOCHS = 5
LEARNING_RATE = 5e-5
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
# Load model and tokenizer
model_name = 'Helsinki-NLP/opus-mt-en-vi'
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name).to(DEVICE)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/809k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/756k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.19M [00:00<?, ?B/s]



config.json:   0%|          | 0.00/1.39k [00:00<?, ?B/s]



pytorch_model.bin:   0%|          | 0.00/289M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

In [None]:
# Create datasets and dataloaders
train_dataset = TranslationDataset(train_texts_src, train_texts_tgt, tokenizer)
valid_dataset = TranslationDataset(valid_texts_src, valid_texts_tgt, tokenizer)
test_dataset = TranslationDataset(test_texts_src, test_texts_tgt, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [None]:
# Define optimizer
optimizer = AdamW(model.parameters(), lr=LEARNING_RATE)

# Training loop
for epoch in range(EPOCHS):
    print(f'Epoch {epoch+1}/{EPOCHS}')
    train_loss = train_model(model, train_loader, optimizer, DEVICE)
    valid_loss = evaluate_model(model, valid_loader, DEVICE)
    print(f'Training Loss: {train_loss:.4f}, Validation Loss: {valid_loss:.4f}')

Epoch 1/5


Training: 100%|██████████| 566/566 [05:44<00:00,  1.64it/s]
Evaluating: 100%|██████████| 71/71 [00:15<00:00,  4.46it/s]


Training Loss: 0.6745, Validation Loss: 0.4787
Epoch 2/5


Training: 100%|██████████| 566/566 [06:01<00:00,  1.57it/s]
Evaluating: 100%|██████████| 71/71 [00:16<00:00,  4.33it/s]


Training Loss: 0.4604, Validation Loss: 0.4044
Epoch 3/5


Training: 100%|██████████| 566/566 [05:59<00:00,  1.57it/s]
Evaluating: 100%|██████████| 71/71 [00:16<00:00,  4.29it/s]


Training Loss: 0.3884, Validation Loss: 0.3623
Epoch 4/5


Training: 100%|██████████| 566/566 [06:01<00:00,  1.57it/s]
Evaluating: 100%|██████████| 71/71 [00:16<00:00,  4.33it/s]


Training Loss: 0.3355, Validation Loss: 0.3356
Epoch 5/5


Training: 100%|██████████| 566/566 [06:00<00:00,  1.57it/s]
Evaluating: 100%|██████████| 71/71 [00:16<00:00,  4.33it/s]

Training Loss: 0.2947, Validation Loss: 0.3186





In [None]:
# Evaluate on test set
test_translations = translate_texts(model, tokenizer, test_texts_src, DEVICE)
references = [[text] for text in test_texts_tgt]
bleu_score = corpus_bleu(test_translations, references)
print(f'Test BLEU score: {bleu_score.score:.2f}')

Test BLEU score: 47.49


In [None]:
# Define constants
BATCH_SIZE = 8
EPOCHS = 5
LEARNING_RATE = 3e-5
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
# Load model and tokenizer
model_name = 'Helsinki-NLP/opus-mt-en-vi'
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name).to(DEVICE)

In [None]:
# Create datasets and dataloaders
train_dataset = TranslationDataset(train_texts_src, train_texts_tgt, tokenizer)
valid_dataset = TranslationDataset(valid_texts_src, valid_texts_tgt, tokenizer)
test_dataset = TranslationDataset(test_texts_src, test_texts_tgt, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [None]:
# Define optimizer
optimizer = AdamW(model.parameters(), lr=LEARNING_RATE)

# Training loop
for epoch in range(EPOCHS):
    print(f'Epoch {epoch+1}/{EPOCHS}')
    train_loss = train_model(model, train_loader, optimizer, DEVICE)
    valid_loss = evaluate_model(model, valid_loader, DEVICE)
    print(f'Training Loss: {train_loss:.4f}, Validation Loss: {valid_loss:.4f}')

Epoch 1/5


Training: 100%|██████████| 1132/1132 [06:25<00:00,  2.94it/s]
Evaluating: 100%|██████████| 142/142 [00:16<00:00,  8.38it/s]


Training Loss: 0.6627, Validation Loss: 0.4798
Epoch 2/5


Training: 100%|██████████| 1132/1132 [06:26<00:00,  2.93it/s]
Evaluating: 100%|██████████| 142/142 [00:16<00:00,  8.36it/s]


Training Loss: 0.4622, Validation Loss: 0.4073
Epoch 3/5


Training: 100%|██████████| 1132/1132 [06:25<00:00,  2.94it/s]
Evaluating: 100%|██████████| 142/142 [00:16<00:00,  8.41it/s]


Training Loss: 0.3924, Validation Loss: 0.3662
Epoch 4/5


Training: 100%|██████████| 1132/1132 [06:27<00:00,  2.92it/s]
Evaluating: 100%|██████████| 142/142 [00:16<00:00,  8.39it/s]


Training Loss: 0.3410, Validation Loss: 0.3394
Epoch 5/5


Training: 100%|██████████| 1132/1132 [06:26<00:00,  2.93it/s]
Evaluating: 100%|██████████| 142/142 [00:16<00:00,  8.35it/s]

Training Loss: 0.3007, Validation Loss: 0.3222





In [None]:
# Evaluate on test set
test_translations = translate_texts(model, tokenizer, test_texts_src, DEVICE)
references = [[text] for text in test_texts_tgt]
bleu_score = corpus_bleu(test_translations, references)
print(f'Test BLEU score: {bleu_score.score:.2f}')

Test BLEU score: 34.78


In [9]:
# Define constants
BATCH_SIZE = 8
EPOCHS = 5
LEARNING_RATE = 3e-5
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

In [26]:
# Load model and tokenizer
model_name = 'Helsinki-NLP/opus-mt-en-vi'
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name).to(DEVICE)

In [11]:
# Create datasets and dataloaders
train_dataset = TranslationDataset(train_texts_src, train_texts_tgt, tokenizer)
valid_dataset = TranslationDataset(valid_texts_src, valid_texts_tgt, tokenizer)
test_dataset = TranslationDataset(test_texts_src, test_texts_tgt, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [12]:
# Import SGD optimizer
from torch.optim import SGD

# Define SGD optimizer
optimizer = SGD(model.parameters(), lr=LEARNING_RATE, momentum=0.9)

# Training loop
for epoch in range(EPOCHS):
    print(f'Epoch {epoch+1}/{EPOCHS}')
    train_loss = train_model(model, train_loader, optimizer, DEVICE)
    valid_loss = evaluate_model(model, valid_loader, DEVICE)
    print(f'Training Loss: {train_loss:.4f}, Validation Loss: {valid_loss:.4f}')

Epoch 1/5


Training: 100%|██████████| 1132/1132 [06:06<00:00,  3.08it/s]
Evaluating: 100%|██████████| 142/142 [00:17<00:00,  8.11it/s]


Training Loss: 1.2432, Validation Loss: 1.0118
Epoch 2/5


Training: 100%|██████████| 1132/1132 [06:07<00:00,  3.08it/s]
Evaluating: 100%|██████████| 142/142 [00:16<00:00,  8.43it/s]


Training Loss: 1.0059, Validation Loss: 0.9294
Epoch 3/5


Training: 100%|██████████| 1132/1132 [06:07<00:00,  3.08it/s]
Evaluating: 100%|██████████| 142/142 [00:16<00:00,  8.43it/s]


Training Loss: 0.9450, Validation Loss: 0.8833
Epoch 4/5


Training: 100%|██████████| 1132/1132 [06:07<00:00,  3.08it/s]
Evaluating: 100%|██████████| 142/142 [00:16<00:00,  8.42it/s]


Training Loss: 0.9066, Validation Loss: 0.8529
Epoch 5/5


Training: 100%|██████████| 1132/1132 [06:07<00:00,  3.08it/s]
Evaluating: 100%|██████████| 142/142 [00:16<00:00,  8.41it/s]

Training Loss: 0.8795, Validation Loss: 0.8303





In [13]:
# Evaluate on test set
test_translations = translate_texts(model, tokenizer, test_texts_src, DEVICE)
references = [[text] for text in test_texts_tgt]
bleu_score = corpus_bleu(test_translations, references)
print(f'Test BLEU score: {bleu_score.score:.2f}')

Test BLEU score: 74.45


In [14]:
# Save the model and tokenizer
model_save_path = 'trained_translation_model'
model.save_pretrained(model_save_path)
tokenizer.save_pretrained(model_save_path)
print(f'Saved model and tokenizer to {model_save_path}')

Non-default generation parameters: {'max_length': 512, 'num_beams': 4, 'bad_words_ids': [[53684]], 'forced_eos_token_id': 0}


Saved model and tokenizer to trained_translation_model


In [15]:
# Define constants
BATCH_SIZE = 16
EPOCHS = 5
LEARNING_RATE = 3e-5
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

In [16]:
# Load model and tokenizer
model_name = 'Helsinki-NLP/opus-mt-en-vi'
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name).to(DEVICE)



In [17]:
# Create datasets and dataloaders
train_dataset = TranslationDataset(train_texts_src, train_texts_tgt, tokenizer)
valid_dataset = TranslationDataset(valid_texts_src, valid_texts_tgt, tokenizer)
test_dataset = TranslationDataset(test_texts_src, test_texts_tgt, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [18]:
# Import Adagrad optimizer
from torch.optim import Adagrad

# Define Adagrad optimizer
optimizer = Adagrad(model.parameters(), lr=LEARNING_RATE)

# Training loop
for epoch in range(EPOCHS):
    print(f'Epoch {epoch+1}/{EPOCHS}')
    train_loss = train_model(model, train_loader, optimizer, DEVICE)
    valid_loss = evaluate_model(model, valid_loader, DEVICE)
    print(f'Training Loss: {train_loss:.4f}, Validation Loss: {valid_loss:.4f}')

Epoch 1/5


Training: 100%|██████████| 566/566 [05:55<00:00,  1.59it/s]
Evaluating: 100%|██████████| 71/71 [00:16<00:00,  4.33it/s]


Training Loss: 1.0755, Validation Loss: 0.8618
Epoch 2/5


Training: 100%|██████████| 566/566 [05:55<00:00,  1.59it/s]
Evaluating: 100%|██████████| 71/71 [00:16<00:00,  4.33it/s]


Training Loss: 0.8720, Validation Loss: 0.8011
Epoch 3/5


Training: 100%|██████████| 566/566 [05:56<00:00,  1.59it/s]
Evaluating: 100%|██████████| 71/71 [00:16<00:00,  4.31it/s]


Training Loss: 0.8241, Validation Loss: 0.7641
Epoch 4/5


Training: 100%|██████████| 566/566 [05:55<00:00,  1.59it/s]
Evaluating: 100%|██████████| 71/71 [00:16<00:00,  4.31it/s]


Training Loss: 0.7917, Validation Loss: 0.7377
Epoch 5/5


Training: 100%|██████████| 566/566 [05:55<00:00,  1.59it/s]
Evaluating: 100%|██████████| 71/71 [00:16<00:00,  4.32it/s]

Training Loss: 0.7680, Validation Loss: 0.7176





In [19]:
# Evaluate on test set
test_translations = translate_texts(model, tokenizer, test_texts_src, DEVICE)
references = [[text] for text in test_texts_tgt]
bleu_score = corpus_bleu(test_translations, references)
print(f'Test BLEU score: {bleu_score.score:.2f}')

Test BLEU score: 64.48


In [20]:
# Define constants
BATCH_SIZE = 32
EPOCHS = 5
LEARNING_RATE = 1e-5
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

In [21]:
# Load model and tokenizer
model_name = 'Helsinki-NLP/opus-mt-en-vi'
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name).to(DEVICE)

In [22]:
# Create datasets and dataloaders
train_dataset = TranslationDataset(train_texts_src, train_texts_tgt, tokenizer)
valid_dataset = TranslationDataset(valid_texts_src, valid_texts_tgt, tokenizer)
test_dataset = TranslationDataset(test_texts_src, test_texts_tgt, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [23]:
# Define optimizer
optimizer = AdamW(model.parameters(), lr=LEARNING_RATE)

# Training loop
for epoch in range(EPOCHS):
    print(f'Epoch {epoch+1}/{EPOCHS}')
    train_loss = train_model(model, train_loader, optimizer, DEVICE)
    valid_loss = evaluate_model(model, valid_loader, DEVICE)
    print(f'Training Loss: {train_loss:.4f}, Validation Loss: {valid_loss:.4f}')

Epoch 1/5


Training: 100%|██████████| 283/283 [05:50<00:00,  1.24s/it]
Evaluating: 100%|██████████| 36/36 [00:15<00:00,  2.26it/s]


Training Loss: 1.1661, Validation Loss: 0.7566
Epoch 2/5


Training: 100%|██████████| 283/283 [05:50<00:00,  1.24s/it]
Evaluating: 100%|██████████| 36/36 [00:15<00:00,  2.27it/s]


Training Loss: 0.7292, Validation Loss: 0.6424
Epoch 3/5


Training: 100%|██████████| 283/283 [05:49<00:00,  1.24s/it]
Evaluating: 100%|██████████| 36/36 [00:15<00:00,  2.27it/s]


Training Loss: 0.6431, Validation Loss: 0.5812
Epoch 4/5


Training: 100%|██████████| 283/283 [05:50<00:00,  1.24s/it]
Evaluating: 100%|██████████| 36/36 [00:15<00:00,  2.26it/s]


Training Loss: 0.5909, Validation Loss: 0.5412
Epoch 5/5


Training: 100%|██████████| 283/283 [05:49<00:00,  1.24s/it]
Evaluating: 100%|██████████| 36/36 [00:15<00:00,  2.25it/s]

Training Loss: 0.5550, Validation Loss: 0.5142





In [24]:
# Evaluate on test set
test_translations = translate_texts(model, tokenizer, test_texts_src, DEVICE)
references = [[text] for text in test_texts_tgt]
bleu_score = corpus_bleu(test_translations, references)
print(f'Test BLEU score: {bleu_score.score:.2f}')

Test BLEU score: 8.25


In [25]:
# Display sample inputs, generated translations, and actual translations
sample_count = min(10, len(test_texts_src))  # Show up to 10 samples
samples = list(zip(test_texts_src[:sample_count], test_translations[:sample_count], test_texts_tgt[:sample_count]))
for i, (src, gen, act) in enumerate(samples):
    print(f'Sample {i + 1}')
    print(f'Input: {src}')
    print(f'Generated Translation: {gen}')
    print(f'Actual Translation: {act}')
    print('*' * 50)

Sample 1
Input: But that is how things are nowadays: when a man wishes to be clever he must . . . invent something special, and the way he does it must needs be the best!
Generated Translation: Nhưng người này là những người không này: một không không có thể không không không không không không không không không không không không không không nghiện nhười!
Actual Translation: Nhưng đó là cách mọi thứ đang diễn ra ngày nay: khi một người muốn trở nên thông minh, anh ta sẽ cần phải phát minh ra một thứ gì đó đặc biệt, và cách thức thực hiện điều đó cũng phải là tốt nhất!
**************************************************
Sample 2
Input: I've waited my entire life for the opportunity to help you.
Generated Translation: Tôi được chúng tôi được tôi của tôi được tôi được tôi có thể thể giúi của ông.
Actual Translation: Tôi đã đợi cả đời để có cơ hội giúp đỡ anh.
**************************************************
Sample 3
Input: From the Moulin Rouge to Boulogne and Notre Dame
Generated Transla