In [1]:
%%writefile config.py
import torch

class Config:
    TRAIN_PATH = '/content/small-train.json'
    DEV_PATH = '/content/small-dev.json'
    TEST_PATH = '/content/small-test.json'

    # JSON Keys
    SRC_KEY = 'english'
    TGT_KEY = 'vietnamese'

    # Model Params (Ch·ªânh s·ª≠a tho·∫£i m√°i t·∫°i ƒë√¢y)
    D_MODEL = 256
    N_ENC_LAYERS = 3
    N_DEC_LAYERS = 3
    DROPOUT = 0.5

    # Training Params
    BATCH_SIZE = 64
    LEARNING_RATE = 0.001
    NUM_EPOCHS = 15 # TƒÉng l√™n v√¨ ch·∫°y GPU nhanh h∆°n
    CLIP = 1.0

    # Device
    DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Special Tokens
    PAD_IDX = 0
    BOS_IDX = 1
    EOS_IDX = 2
    UNK_IDX = 3

Writing config.py


In [2]:
%%writefile vocab.py
import json
from collections import Counter
from config import Config

class Vocab:
    def __init__(self):
        self.stoi = {
            "<pad>": Config.PAD_IDX,
            "<bos>": Config.BOS_IDX,
            "<eos>": Config.EOS_IDX,
            "<unk>": Config.UNK_IDX,
        }
        self.itos = {v: k for k, v in self.stoi.items()}
        self.total_src_tokens = 4
        self.total_tgt_tokens = 4

    def build_vocab(self, json_path, src_key, tgt_key, min_freq=2):
        print(f"ƒêang x√¢y d·ª±ng Vocab t·ª´ {json_path}...")
        with open(json_path, 'r', encoding='utf-8') as f:
            data = json.load(f)

        src_counter = Counter()
        tgt_counter = Counter()

        for item in data:
            # Tokenize ƒë∆°n gi·∫£n b·∫±ng split(), c√≥ th·ªÉ d√πng spacy n·∫øu mu·ªën x·ªãn h∆°n
            src_tokens = item[src_key].lower().split()
            tgt_tokens = item[tgt_key].lower().split()

            src_counter.update(src_tokens)
            tgt_counter.update(tgt_tokens)

        # Add Source Tokens
        for word, freq in src_counter.items():
            if freq >= min_freq and word not in self.stoi:
                self.stoi[word] = len(self.stoi)

        self.total_src_tokens = len(self.stoi)

        # Add Target Tokens (L∆∞u √Ω: Th∆∞·ªùng ng∆∞·ªùi ta t√°ch 2 vocab ri√™ng,
        # nh∆∞ng ƒë·ªÉ ƒë∆°n gi·∫£n cho lab n√†y ta g·ªôp chung ho·∫∑c ch·ªâ add th√™m t·ª´ m·ªõi)
        for word, freq in tgt_counter.items():
            if freq >= min_freq and word not in self.stoi:
                self.stoi[word] = len(self.stoi)

        self.total_tgt_tokens = len(self.stoi) # T·ªïng size vocab chung
        self.itos = {v: k for k, v in self.stoi.items()}
        print(f"Vocab size: {len(self.stoi)}")

    def encode(self, text):
        # Text string -> List of Indices
        tokens = text.lower().split()
        return [Config.BOS_IDX] + [self.stoi.get(token, Config.UNK_IDX) for token in tokens] + [Config.EOS_IDX]

    def decode(self, indices):
        # List of Indices -> Text String
        tokens = []
        for idx in indices:
            if idx == Config.EOS_IDX: break
            if idx in [Config.BOS_IDX, Config.PAD_IDX]: continue
            tokens.append(self.itos.get(idx, "<unk>"))
        return " ".join(tokens)

Writing vocab.py


In [3]:
%%writefile models.py
import torch
import torch.nn as nn
import torch.nn.functional as F
from config import Config

# ==========================================
# 1. VANILLA SEQ2SEQ
# ==========================================
class VanillaSeq2Seq(nn.Module):
    def __init__(self, vocab):
        super().__init__()
        self.vocab = vocab
        self.src_embedding = nn.Embedding(vocab.total_tgt_tokens, Config.D_MODEL, padding_idx=Config.PAD_IDX)
        self.tgt_embedding = nn.Embedding(vocab.total_tgt_tokens, 2*Config.D_MODEL, padding_idx=Config.PAD_IDX)

        self.encoder = nn.LSTM(Config.D_MODEL, Config.D_MODEL, Config.N_ENC_LAYERS,
                               batch_first=True, dropout=Config.DROPOUT, bidirectional=True)

        self.decoder = nn.LSTM(2*Config.D_MODEL, 2*Config.D_MODEL, Config.N_DEC_LAYERS,
                               batch_first=True, dropout=Config.DROPOUT, bidirectional=False)

        self.output_head = nn.Linear(2*Config.D_MODEL, vocab.total_tgt_tokens)

    def forward(self, x, y):
        embedded_x = self.src_embedding(x)
        _, (hidden, cell) = self.encoder(embedded_x)

        # Merge bidirectional hidden states
        hidden = hidden.view(Config.N_ENC_LAYERS, 2, x.size(0), -1).permute(0, 2, 1, 3).reshape(Config.N_ENC_LAYERS, x.size(0), -1)
        cell = cell.view(Config.N_ENC_LAYERS, 2, x.size(0), -1).permute(0, 2, 1, 3).reshape(Config.N_ENC_LAYERS, x.size(0), -1)

        dec_input = y[:, 0].unsqueeze(1)
        dec_hidden, dec_cell = hidden, cell
        outputs = []

        for t in range(1, y.shape[1]):
            embedded_input = self.tgt_embedding(dec_input)
            output, (dec_hidden, dec_cell) = self.decoder(embedded_input, (dec_hidden, dec_cell))
            prediction = self.output_head(output.squeeze(1))
            outputs.append(prediction.unsqueeze(1))
            dec_input = y[:, t].unsqueeze(1) # Teacher forcing

        return torch.cat(outputs, dim=1)

    def predict(self, x):
        self.eval()
        with torch.no_grad():
            embedded_x = self.src_embedding(x)
            _, (hidden, cell) = self.encoder(embedded_x)
            hidden = hidden.view(Config.N_ENC_LAYERS, 2, x.size(0), -1).permute(0, 2, 1, 3).reshape(Config.N_ENC_LAYERS, x.size(0), -1)
            cell = cell.view(Config.N_ENC_LAYERS, 2, x.size(0), -1).permute(0, 2, 1, 3).reshape(Config.N_ENC_LAYERS, x.size(0), -1)

            dec_input = torch.full((x.size(0), 1), Config.BOS_IDX, device=x.device)
            dec_hidden, dec_cell = hidden, cell
            preds = []

            for _ in range(50):
                embedded_input = self.tgt_embedding(dec_input)
                output, (dec_hidden, dec_cell) = self.decoder(embedded_input, (dec_hidden, dec_cell))
                top1 = self.output_head(output.squeeze(1)).argmax(1).unsqueeze(1)
                preds.append(top1)
                dec_input = top1
                if (top1 == Config.EOS_IDX).all(): break
            return torch.cat(preds, dim=1)

# ==========================================
# 2. BAHDANAU (ADDITIVE ATTENTION)
# ==========================================
class BahdanauSeq2Seq(nn.Module):
    def __init__(self, vocab):
        super().__init__()
        self.vocab = vocab
        self.src_embedding = nn.Embedding(vocab.total_tgt_tokens, Config.D_MODEL, padding_idx=Config.PAD_IDX)
        self.tgt_embedding = nn.Embedding(vocab.total_tgt_tokens, 2*Config.D_MODEL, padding_idx=Config.PAD_IDX)

        self.encoder = nn.LSTM(Config.D_MODEL, Config.D_MODEL, Config.N_ENC_LAYERS, batch_first=True, dropout=Config.DROPOUT, bidirectional=True)
        self.decoder = nn.LSTM(4*Config.D_MODEL, 2*Config.D_MODEL, Config.N_DEC_LAYERS, batch_first=True, dropout=Config.DROPOUT)

        self.attn = nn.Linear(4*Config.D_MODEL, 2*Config.D_MODEL)
        self.v = nn.Linear(2*Config.D_MODEL, 1, bias=False)
        self.output_head = nn.Linear(2*Config.D_MODEL, vocab.total_tgt_tokens)

    def forward(self, x, y):
        embedded_x = self.src_embedding(x)
        encoder_outputs, (hidden, cell) = self.encoder(embedded_x)

        hidden = hidden.view(Config.N_ENC_LAYERS, 2, x.size(0), -1).permute(0, 2, 1, 3).reshape(Config.N_ENC_LAYERS, x.size(0), -1)
        cell = cell.view(Config.N_ENC_LAYERS, 2, x.size(0), -1).permute(0, 2, 1, 3).reshape(Config.N_ENC_LAYERS, x.size(0), -1)

        dec_input = y[:, 0].unsqueeze(1)
        dec_hidden, dec_cell = hidden, cell
        outputs = []

        for t in range(1, y.shape[1]):
            # Attention
            last_hidden = dec_hidden[-1].unsqueeze(1).repeat(1, encoder_outputs.size(1), 1)
            energy = torch.tanh(self.attn(torch.cat((last_hidden, encoder_outputs), dim=2)))
            attention = F.softmax(self.v(energy).squeeze(2), dim=1).unsqueeze(1)
            context = torch.bmm(attention, encoder_outputs)

            embedded_input = self.tgt_embedding(dec_input)
            rnn_input = torch.cat((embedded_input, context), dim=2)
            output, (dec_hidden, dec_cell) = self.decoder(rnn_input, (dec_hidden, dec_cell))

            prediction = self.output_head(output.squeeze(1))
            outputs.append(prediction.unsqueeze(1))
            dec_input = y[:, t].unsqueeze(1)

        return torch.cat(outputs, dim=1)

    def predict(self, x):
        self.eval()
        with torch.no_grad():
            embedded_x = self.src_embedding(x)
            encoder_outputs, (hidden, cell) = self.encoder(embedded_x)

            hidden = hidden.view(Config.N_ENC_LAYERS, 2, x.size(0), -1).permute(0, 2, 1, 3).reshape(Config.N_ENC_LAYERS, x.size(0), -1)
            cell = cell.view(Config.N_ENC_LAYERS, 2, x.size(0), -1).permute(0, 2, 1, 3).reshape(Config.N_ENC_LAYERS, x.size(0), -1)

            dec_input = torch.full((x.size(0), 1), Config.BOS_IDX, device=x.device)
            dec_hidden, dec_cell = hidden, cell
            preds = []

            for _ in range(50):
                last_hidden = dec_hidden[-1].unsqueeze(1).repeat(1, encoder_outputs.size(1), 1)
                energy = torch.tanh(self.attn(torch.cat((last_hidden, encoder_outputs), dim=2)))
                attention = F.softmax(self.v(energy).squeeze(2), dim=1).unsqueeze(1)
                context = torch.bmm(attention, encoder_outputs)

                embedded_input = self.tgt_embedding(dec_input)
                rnn_input = torch.cat((embedded_input, context), dim=2)
                output, (dec_hidden, dec_cell) = self.decoder(rnn_input, (dec_hidden, dec_cell))

                top1 = self.output_head(output.squeeze(1)).argmax(1).unsqueeze(1)
                preds.append(top1)
                dec_input = top1
                if (top1 == Config.EOS_IDX).all(): break

            return torch.cat(preds, dim=1)

# ==========================================
# 3. LUONG (DOT ATTENTION)
# ==========================================
class LuongSeq2Seq(nn.Module):
    def __init__(self, vocab):
        super().__init__()
        self.vocab = vocab
        self.src_embedding = nn.Embedding(vocab.total_tgt_tokens, Config.D_MODEL, padding_idx=Config.PAD_IDX)
        self.tgt_embedding = nn.Embedding(vocab.total_tgt_tokens, 2*Config.D_MODEL, padding_idx=Config.PAD_IDX)
        self.encoder = nn.LSTM(Config.D_MODEL, Config.D_MODEL, Config.N_ENC_LAYERS, batch_first=True, dropout=Config.DROPOUT, bidirectional=True)
        self.decoder = nn.LSTM(2*Config.D_MODEL, 2*Config.D_MODEL, Config.N_DEC_LAYERS, batch_first=True, dropout=Config.DROPOUT)

        self.concat = nn.Linear(4*Config.D_MODEL, 2*Config.D_MODEL)
        self.output_head = nn.Linear(2*Config.D_MODEL, vocab.total_tgt_tokens)

    def forward(self, x, y):
        embedded_x = self.src_embedding(x)
        encoder_outputs, (hidden, cell) = self.encoder(embedded_x)

        hidden = hidden.view(Config.N_ENC_LAYERS, 2, x.size(0), -1).permute(0, 2, 1, 3).reshape(Config.N_ENC_LAYERS, x.size(0), -1)
        cell = cell.view(Config.N_ENC_LAYERS, 2, x.size(0), -1).permute(0, 2, 1, 3).reshape(Config.N_ENC_LAYERS, x.size(0), -1)

        dec_input = y[:, 0].unsqueeze(1)
        dec_hidden, dec_cell = hidden, cell
        outputs = []

        for t in range(1, y.shape[1]):
            embedded_input = self.tgt_embedding(dec_input)
            rnn_output, (dec_hidden, dec_cell) = self.decoder(embedded_input, (dec_hidden, dec_cell))

            # Dot Attention
            scores = torch.bmm(rnn_output, encoder_outputs.permute(0, 2, 1))
            attn_weights = F.softmax(scores, dim=2)
            context = torch.bmm(attn_weights, encoder_outputs)

            concat_input = torch.cat((context, rnn_output), dim=2)
            h_tilde = torch.tanh(self.concat(concat_input))

            prediction = self.output_head(h_tilde.squeeze(1))
            outputs.append(prediction.unsqueeze(1))
            dec_input = y[:, t].unsqueeze(1)

        return torch.cat(outputs, dim=1)

    def predict(self, x):
        self.eval()
        with torch.no_grad():
            embedded_x = self.src_embedding(x)
            encoder_outputs, (hidden, cell) = self.encoder(embedded_x)

            hidden = hidden.view(Config.N_ENC_LAYERS, 2, x.size(0), -1).permute(0, 2, 1, 3).reshape(Config.N_ENC_LAYERS, x.size(0), -1)
            cell = cell.view(Config.N_ENC_LAYERS, 2, x.size(0), -1).permute(0, 2, 1, 3).reshape(Config.N_ENC_LAYERS, x.size(0), -1)

            dec_input = torch.full((x.size(0), 1), Config.BOS_IDX, device=x.device)
            dec_hidden, dec_cell = hidden, cell
            preds = []

            for _ in range(50):
                embedded_input = self.tgt_embedding(dec_input)
                rnn_output, (dec_hidden, dec_cell) = self.decoder(embedded_input, (dec_hidden, dec_cell))

                scores = torch.bmm(rnn_output, encoder_outputs.permute(0, 2, 1))
                attn_weights = F.softmax(scores, dim=2)
                context = torch.bmm(attn_weights, encoder_outputs)

                concat_input = torch.cat((context, rnn_output), dim=2)
                h_tilde = torch.tanh(self.concat(concat_input))

                top1 = self.output_head(h_tilde.squeeze(1)).argmax(1).unsqueeze(1)
                preds.append(top1)
                dec_input = top1
                if (top1 == Config.EOS_IDX).all(): break

            return torch.cat(preds, dim=1)

Writing models.py


In [4]:
%%writefile trainer.py
import torch
from config import Config
from torchmetrics.text.rouge import ROUGEScore

def train_epoch(model, iterator, optimizer, criterion):
    model.train()
    epoch_loss = 0
    for src, tgt in iterator:
        src, tgt = src.to(Config.DEVICE), tgt.to(Config.DEVICE)
        optimizer.zero_grad()
        output = model(src, tgt)

        # Output: [bs, trg_len, vocab_size], Tgt: [bs, trg_len]
        # Flatten ƒë·ªÉ t√≠nh loss, b·ªè qua c·ªôt ƒë·∫ßu ti√™n c·ªßa output (t∆∞∆°ng ·ª©ng v·ªõi d·ª± ƒëo√°n t·ª´ ƒë·∫ßu v√†o <bos>)
        output_dim = output.shape[-1]
        loss = criterion(output.reshape(-1, output_dim), tgt[:, 1:].reshape(-1))

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), Config.CLIP)
        optimizer.step()
        epoch_loss += loss.item()
    return epoch_loss / len(iterator)

def calculate_rouge(model, iterator, vocab):
    model.eval()
    rouge = ROUGEScore()
    preds_text = []
    targets_text = []

    with torch.no_grad():
        for src, tgt in iterator:
            src = src.to(Config.DEVICE)
            # G·ªçi h√†m predict (l∆∞u √Ω b·∫°n c·∫ßn implement predict trong models.py)
            # ·ªû ƒë√¢y d√πng logic gi·∫£ ƒë·ªãnh n·∫øu ch∆∞a implement
            pred_indices = model.predict(src)
            if pred_indices is None: continue

            for i in range(src.size(0)):
                p = vocab.decode(pred_indices[i].tolist())
                t = vocab.decode(tgt[i].tolist())
                preds_text.append(p)
                targets_text.append(t)

    if not preds_text: return 0.0
    return rouge(preds_text, targets_text)

Writing trainer.py


In [5]:
%%writefile dataset.py
import json
import torch
from torch.utils.data import Dataset
from torch.nn.utils.rnn import pad_sequence
from config import Config
from vocab import Vocab

class PhoMTDataset(Dataset):
    def __init__(self, json_path, vocab, src_key, tgt_key):
        self.vocab = vocab
        self.data = []

        with open(json_path, 'r', encoding='utf-8') as f:
            raw_data = json.load(f)

        for item in raw_data:
            # L∆∞u l·∫°i token d·∫°ng s·ªë lu√¥n ƒë·ªÉ train cho nhanh
            src_text = item[src_key]
            tgt_text = item[tgt_key]

            src_indices = self.vocab.encode(src_text)
            tgt_indices = self.vocab.encode(tgt_text)

            self.data.append((torch.tensor(src_indices), torch.tensor(tgt_indices)))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

def collate_fn(batch):
    src_batch, tgt_batch = zip(*batch)
    src_pad = pad_sequence(src_batch, padding_value=Config.PAD_IDX, batch_first=True)
    tgt_pad = pad_sequence(tgt_batch, padding_value=Config.PAD_IDX, batch_first=True)
    return src_pad, tgt_pad

Writing dataset.py


In [6]:
!pip install torchmetrics

Collecting torchmetrics
  Downloading torchmetrics-1.8.2-py3-none-any.whl.metadata (22 kB)
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.15.2-py3-none-any.whl.metadata (5.7 kB)
Downloading torchmetrics-1.8.2-py3-none-any.whl (983 kB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m983.2/983.2 kB[0m [31m19.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lightning_utilities-0.15.2-py3-none-any.whl (29 kB)
Installing collected packages: lightning-utilities, torchmetrics
Successfully installed lightning-utilities-0.15.2 torchmetrics-1.8.2


In [7]:
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import DataLoader
import pandas as pd
from tqdm.auto import tqdm

# Import modules
from config import Config
from vocab import Vocab
from dataset import PhoMTDataset, collate_fn
from models import VanillaSeq2Seq, BahdanauSeq2Seq, LuongSeq2Seq
from trainer import train_epoch, calculate_rouge

# 1. SETUP & LOAD DATA
print("üöÄ ƒêang kh·ªüi t·∫°o d·ªØ li·ªáu...")
vocab = Vocab()
# X√¢y d·ª±ng vocab t·ª´ t·∫≠p train (Gi·∫£ s·ª≠ file t·ªìn t·∫°i)
vocab.build_vocab(Config.TRAIN_PATH, Config.SRC_KEY, Config.TGT_KEY)

train_ds = PhoMTDataset(Config.TRAIN_PATH, vocab, Config.SRC_KEY, Config.TGT_KEY)
test_ds = PhoMTDataset(Config.TEST_PATH, vocab, Config.SRC_KEY, Config.TGT_KEY)

train_loader = DataLoader(train_ds, batch_size=Config.BATCH_SIZE, shuffle=True, collate_fn=collate_fn)
test_loader = DataLoader(test_ds, batch_size=1, shuffle=False, collate_fn=collate_fn) # BS=1 for inference

print(f"‚úÖ Data Loaded. Train size: {len(train_ds)}, Test size: {len(test_ds)}")

# 2. LIST C√ÅC MODEL C·∫¶N TRAIN
model_classes = [
    ("Vanilla Seq2Seq", VanillaSeq2Seq),
    ("Bahdanau Attention", BahdanauSeq2Seq),
    ("Luong Attention", LuongSeq2Seq)
]

results = []

# 3. TRAINING LOOP
for name, ModelClass in model_classes:
    print(f"\n{'='*50}")
    print(f"üîÑ ƒêang train model: {name}")
    print(f"{'='*50}")

    # Init Model
    model = ModelClass(vocab).to(Config.DEVICE)
    optimizer = optim.Adam(model.parameters(), lr=Config.LEARNING_RATE)
    criterion = nn.CrossEntropyLoss(ignore_index=Config.PAD_IDX)

    # Train
    model.train()
    for epoch in range(Config.NUM_EPOCHS):
        loss = train_epoch(model, train_loader, optimizer, criterion)
        if (epoch+1) % 5 == 0:
            print(f"   Epoch {epoch+1}/{Config.NUM_EPOCHS} | Loss: {loss:.4f}")

    # Evaluate ROUGE
    print(f"üìä ƒêang t√≠nh ROUGE-L cho {name}...")
    try:
        rouge_score = calculate_rouge(model, test_loader, vocab)
        # L·∫•y Rouge-L Fmeasure
        r_l = rouge_score['rougeL_fmeasure'].item()
        print(f"   üî• ROUGE-L F1: {r_l:.4f}")
        results.append({"Model": name, "Rouge-L": r_l, "Loss": loss})
    except Exception as e:
        print(f"‚ùå L·ªói t√≠nh Rouge: {e}")
        results.append({"Model": name, "Rouge-L": 0.0, "Loss": loss})

# 4. SHOW COMPARISON TABLE
print("\n" + "#"*50)
print("üèÜ K·∫æT QU·∫¢ SO S√ÅNH")
print("#"*50)
df = pd.DataFrame(results)
print(df)

üöÄ ƒêang kh·ªüi t·∫°o d·ªØ li·ªáu...
ƒêang x√¢y d·ª±ng Vocab t·ª´ /content/small-train.json...
Vocab size: 13190
‚úÖ Data Loaded. Train size: 20000, Test size: 2000

üîÑ ƒêang train model: Vanilla Seq2Seq
   Epoch 5/15 | Loss: 4.3683
   Epoch 10/15 | Loss: 3.6961
   Epoch 15/15 | Loss: 3.2994
üìä ƒêang t√≠nh ROUGE-L cho Vanilla Seq2Seq...
   üî• ROUGE-L F1: 0.3450

üîÑ ƒêang train model: Bahdanau Attention
   Epoch 5/15 | Loss: 4.4078
   Epoch 10/15 | Loss: 3.6943
   Epoch 15/15 | Loss: 3.2833
üìä ƒêang t√≠nh ROUGE-L cho Bahdanau Attention...
   üî• ROUGE-L F1: 0.3486

üîÑ ƒêang train model: Luong Attention
   Epoch 5/15 | Loss: 4.1112
   Epoch 10/15 | Loss: 2.9502
   Epoch 15/15 | Loss: 2.3587
üìä ƒêang t√≠nh ROUGE-L cho Luong Attention...
   üî• ROUGE-L F1: 0.4576

##################################################
üèÜ K·∫æT QU·∫¢ SO S√ÅNH
##################################################
                Model   Rouge-L      Loss
0     Vanilla Seq2Seq  0.345024  3.299417