# Xây dựng huấn luyện 

In [2]:
import time
import torch

def evaluate(model, data_loader, criterion, device):
    model.eval()
    losses = []
    with torch.no_grad():
        for src_input, tgt_input in data_loader:
            src_input, tgt_input = src_input.to(device), tgt_input.to(device)
            pred_logits = model(src_input, tgt_input)

            tgt_input = tgt_input.view(-1)
            pred_logits = pred_logits.view(-1, pred_logits.size(2))
            loss = criterion(pred_logits, tgt_input)
            losses.append(loss.item())
    
    return sum(losses)/len(losses)


def train_epoch(model, train_loader, criterion, optimizer, device):
    model.train()
    epoch_losses = []

    for src_input, tgt_input in train_loader:
        optimizer.zero_grad()
        
        src_input, tgt_input = src_input.to(device), tgt_input.to(device)
        pred_logits = model(src_input, tgt_input)

        tgt_input = tgt_input.view(-1)
        pred_logits = pred_logits.view(-1, pred_logits.size(-1))
        loss = criterion(pred_logits, tgt_input)
        epoch_losses.append(loss.item())

        loss.backward()
        optimizer.step()
        epoch_losses.append(loss.item())

    return sum(epoch_losses)/len(epoch_losses)

def fit(model, train_loader, val_loader, criterion, optimizer, epochs, device):
    model.train()
    train_losses, val_losses = [], []

    for epoch in range(epochs):
        start_time = time.time()
        train_loss = train_epoch(model, train_loader, criterion, optimizer, device)
        val_loss = evaluate(model, val_loader, criterion, device)

        end_time= time.time()
        train_losses.append(train_loss)
        val_losses.append(val_loss)

        print((f"Epoch: {epoch+1}, Train loss: {train_loss:.3f}, Val loss: {val_loss :.3f}, "f"Epoch time = {(end_time-start_time):.3f}s"))
    
    return train_losses, val_losses
        

# Huấn luyện Mô hình RNN cơ bản

In [12]:
import torch.nn as nn
import import_ipynb
from preprocessing import train_loader, val_loader, test_loader, src_vocab, tgt_vocab
from rnn_based_model import RnnDecoder, RnnEncoder, RnnMachineTranslate

embed_dim = 512
hidden_dim = 1024
n_layers = 2
UNK_IDX, PAD_IDX, SOS_IDX, EOS_IDX = 0, 1, 2 ,3
DEVICE = torch.device(
    'cuda' if torch.cuda.is_available() 
    else 'cpu'
    )

encoder = RnnEncoder(src_vocab, embed_dim, hidden_dim, n_layers, 0.2, DEVICE).to(DEVICE)
decoder = RnnDecoder(tgt_vocab, hidden_dim, n_layers, 0.2, 2, DEVICE).to(DEVICE)
rnn_based_model = RnnMachineTranslate(encoder, decoder).to(DEVICE)

criterion = nn.CrossEntropyLoss(ignore_index= PAD_IDX)
optimizer = torch.optim.Adam(rnn_based_model.parameters(), lr= 1e-4, eps= 1e-9)
epochs = 1

train_losses, valid_losses = fit(rnn_based_model, train_loader, val_loader, criterion, optimizer, epochs, device=DEVICE)


Epoch: 1, Train loss: 4.384, Val loss: 3.755, Epoch time = 39.755s
