In [1]:
import torch 
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader, RandomSampler

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import glob
from sklearn.model_selection import train_test_split
import unicodedata
import time
from tqdm import tqdm 


## Processing data


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [4]:
en, vi = [], []
for file in glob.glob("./data/*.txt"): 
    with open(file, 'r') as f:
        text = f.read() 
        list_text = text.split("\n") 
        for pair in list_text:
            pair = pair.split("\t")
            en.append(pair[0])
            vi.append(pair[1])

print(f"Len en : {len(en)}") 
print(f"Len vi : {len(vi)}") 

Len en : 10002
Len vi : 10002


In [5]:
import re
SOS_token = 0
EOS_token = 1
PAD_token = 2 
UNK_token = 3 

class Lang: 
    def __init__(self, name):
        self.name = name
        self.word2index = {"SOS": 0, "EOS": 1, "PAD": 2, "UNK": 3}
        self.word2count = {}
        self.index2word = {0: "SOS", 1: "EOS", 2: "PAD", 3: "UNK"}
        self.n_words = 4  # Count SOS, EOS, PAD, UNK

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1
    
    def get_idx(self, word):
        if word not in self.word2index:
            return self.word2index['UNK']
        return self.word2index[word]
            
# Turn a Unicode string to plain ASCII, thanks to
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, trim, and remove non-letter characters
def normalizeString(s):
    s = s.lower().strip()
    # s = unicodeToAscii(s)
    # s = re.sub(r"([.!?])", r" \1", s)
    # s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    return s


def readLangs(lang1, lang2, en, vi, reverse=False):
    pairs = []
    for i in range(len(en)):
        pairs.append([normalizeString(en[i]), normalizeString(vi[i])])
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)
    
    for pair in pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])
    
    print("Count words in language:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)   
    
    return input_lang, output_lang, pairs

en_lang, vi_lang, pairs = readLangs('en', 'vi', en, vi, False)
MAX_LENGTH = 10

# filter pair for max lenght 
pairs = [pair for pair in pairs if len(pair[0].split(" ")) < MAX_LENGTH and len(pair[1].split(" ")) < MAX_LENGTH] 
print(f"Len pairs: {len(pairs)}")

Count words in language:
en 8231
vi 4824
Len pairs: 6710


In [6]:
def word2index(lang: Lang, sentence: str): 
    sentence = normalizeString(sentence)
    res = [SOS_token] 
    for word in sentence.split(" "):
        if len(word) == 0: 
            continue
        res.append(lang.get_idx(word))
    res.append(EOS_token)
    return np.array(res)


def get_dataloader(pairs, input_lang: Lang, output_lang: Lang, batch_size, max_lenght) -> DataLoader:
    n = len(pairs)
    input_pad = np.full((n, max_lenght), PAD_token, dtype=np.int32)
    target_pad = np.full((n, max_lenght), PAD_token, dtype=np.int32)
    for idx, pair in enumerate(pairs):
        input = word2index(input_lang, pair[0])
        target = word2index(output_lang, pair[1])
        input_pad[idx, :len(input)] = input
        target_pad[idx, :len(target)] = target
    input_tensor = torch.tensor(input_pad, dtype=torch.long, device=device)
    target_tensor = torch.tensor(target_pad, dtype=torch.long, device=device)
    
    dataset = TensorDataset(input_tensor, target_tensor) 
    sampler = RandomSampler(dataset) 
    dataloader = DataLoader(dataset, sampler=sampler, batch_size=batch_size)
    return dataloader



In [7]:
input_size = en_lang.n_words 
output_size = vi_lang.n_words 
hidden_size = 512
num_layers = 4
dropout = 0.2
batch_size = 128
print(f"Input size: {input_size}")
print(f"Output size: {output_size}")

MAX_LENGTH = MAX_LENGTH + 2 # add 2 for SOS and EOS token
train_pairs, test_pairs = train_test_split(pairs, test_size=0.2, random_state=42) 
print(f"Len train: {len(train_pairs)}") 
print(f"Len test: {len(test_pairs)}")
train_dataloader = get_dataloader(train_pairs, en_lang, vi_lang, batch_size=batch_size, max_lenght=MAX_LENGTH) 
test_dataloader = get_dataloader(test_pairs, en_lang, vi_lang, batch_size=batch_size, max_lenght=MAX_LENGTH)


Input size: 8231
Output size: 4824
Len train: 5368
Len test: 1342


In [8]:
class Encoder(nn.Module): 
    def __init__(self, input_size, hidden_size, num_layers, dropout):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers, dropout=dropout, batch_first=True) 
        
    def forward(self, input): 
        """ forward encoder 

        Args:
            input (tensor): shape (batch_size, max_length)

        Returns:
            outputs: shape (batch_size, max_length, hidden_size)
            hidden: shape (num_layers, batch_size, hidden_size) 
            cell: shape (num_layers, batch_size, hidden_size)
        """
        embedded = self.embedding(input)
        outputs, (hidden, cell) = self.lstm(embedded)
        return outputs, (hidden, cell) 

class DecoderSimple(nn.Module): 
    def __init__(self, output_size, hidden_size, num_layers, dropout):
        super(DecoderSimple, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers, dropout=dropout, batch_first=True) 
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, encoder_outputs, encoder_hidden, encoder_cell, target_tensor = None):
        """ forward in decoder 

        Args:
            encoder_outputs (tensor): shape (batch_size, max_length, hidden_size)
            encoder_hidden (tensor): shape (num_layers, batch_size, hidden_size)
            encoder_cell (tensor): shape (num_layers, batch_size, hidden_size)
            target_tensor (tensor, optional): shape (batch_size, max_seq). Defaults to None.

        Returns:
            _type_: _description_
        """
        batch_size = encoder_outputs.shape[0] 
        decoder_input = torch.empty(batch_size, 1, dtype=torch.long, device=device).fill_(SOS_token)
        decoder_hidden = encoder_hidden
        decoder_cell = encoder_cell
        decoder_outputs = [] 
        for i in range(MAX_LENGTH): 
            decoder_output, (decoder_hidden, decoder_cell) = self.forward_step(decoder_input, decoder_hidden, decoder_cell) 
            decoder_outputs.append(decoder_output) 
            if target_tensor is not None: 
                # teacher forcing 
                decoder_input = target_tensor[:, i].unsqueeze(1) 
            else: 
                # without teacher forcing 
                _, topi = decoder_output.topk(1) 
                decoder_input = topi.squeeze(1).detach()    # detach from history as input

        decoder_outputs = torch.cat(decoder_outputs, dim=1) 
        decoder_outputs = F.log_softmax(decoder_outputs, dim=-1) 
        
        return decoder_outputs, decoder_hidden, None
        
        
    def forward_step(self, input, hidden, cell): 
        """ forward_step

        Args:
            input (tensor): input tensor has shape (batch_size, 1)
            hidden (tensor): hidden tensor has shape (num_layers, batch_size, hidden_size)
            cell (tensor): cell tensor has shape (num_layers, batch_size, hidden_size)

        Returns:
            output: shape (batch_size, 1, output_size)
            hidden: shape (num_layers, batch_size, hidden_size)
        """
        embedded = self.embedding(input)    
        output, (hidden, cell) = self.lstm(embedded, (hidden, cell))
        output = self.fc(output)
        return output, (hidden, cell)


In [9]:

class DecoderScaleDotAttn(nn.Module): 
    def __init__(self, output_size, hidden_size, num_layers, dropout): 
        super(DecoderScaleDotAttn, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers, dropout=dropout, batch_first=True) 
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, encoder_outputs, encoder_hidden, encoder_cell, target_tensor = None): 
        batch_size = encoder_outputs.shape[0] 
        decoder_input = torch.empty(batch_size, 1, dtype=torch.long, device=device).fill_(SOS_token) 
        decoder_hidden = encoder_hidden 
        decoder_cell = encoder_cell 
        decoder_outputs = []
        attn_weights = []
        for i in range(MAX_LENGTH): 
            decoder_output, (decoder_hidden, decoder_cell), attn_w = self.forward_step(decoder_input, decoder_hidden, decoder_cell, encoder_outputs) 
            decoder_outputs.append(decoder_output)
            attn_weights.append(attn_w)
            if target_tensor is not None: 
                decoder_input = target_tensor[:, i].unsqueeze(1) 
            else: 
                _, topi = decoder_output.topk(1) 
                decoder_input = topi.squeeze(1).detach()
        
        decoder_outputs = torch.cat(decoder_outputs, dim=1) 
        decoder_outputs = F.log_softmax(decoder_outputs, dim=-1) 
        return decoder_outputs, decoder_hidden, attn_weights
        
        

    def forward_step(self, decoder_input, decoder_hidden, decoder_cell, encoder_outputs): 
        # use scale dot product attention
        embedded = self.embedding(decoder_input)
        """
            calculate attention weights 
            encoder_outputs: key, value 
            embedded: query 
            attention_weights = score(Q, K) 
            attention_score = softmax(attention_weights)
            context = attention_score * V 
            output = bmm (context, q) 
            -------------------------------
            attn_w = query * key 
            attn_score = softmax(attn_w) 
            context = attn_score * value
            output = context * query | mabey we can use linear layer to get output
            
            
        """
        attn_weights = torch.bmm(encoder_outputs, embedded.transpose(1, 2))
        attn_score = F.softmax(attn_weights, dim=1)
        context = torch.bmm(attn_score.transpose(1, 2), encoder_outputs)  # shape (batch_size, 1, hidden_size)
        output, (decoder_hidden, decoder_cell) = self.lstm(context, (decoder_hidden, decoder_cell))
        output = self.fc(output)
        return output, (decoder_hidden, decoder_cell), attn_weights
    

In [10]:
class DecoderAttn(nn.Module): 
    def __init__(self, output_size, hidden_size, num_layers, dropout): 
        super(DecoderAttn, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers, dropout=dropout, batch_first=True) 
        self.fc = nn.Linear(hidden_size, output_size)
        self.attn = nn.Linear(hidden_size * 2, MAX_LENGTH)
        self.attn_combine = nn.Linear(hidden_size * 2, hidden_size) 
    
    def forward(self, encoder_outputs, encoder_hidden, encoder_cell, target_tensor = None): 
        batch_size = encoder_outputs.shape[0] 
        decoder_input = torch.empty(batch_size, 1, dtype=torch.long, device=device).fill_(SOS_token) 
        decoder_hidden = encoder_hidden 
        decoder_cell = encoder_cell 
        decoder_outputs = []
        attn_weights = []
        for i in range(MAX_LENGTH): 
            decoder_output, (decoder_hidden, decoder_cell), attn_w = self.forward_step(decoder_input, decoder_hidden, decoder_cell, encoder_outputs) 
            decoder_outputs.append(decoder_output)
            attn_weights.append(attn_w)
            if target_tensor is not None: 
                decoder_input = target_tensor[:, i].unsqueeze(1) 
            else: 
                _, topi = decoder_output.topk(1) 
                decoder_input = topi.squeeze(1).detach()
        
        decoder_outputs = torch.cat(decoder_outputs, dim=1) 
        decoder_outputs = F.log_softmax(decoder_outputs, dim=-1) 
        return decoder_outputs, decoder_hidden, attn_weights
    
    def forward_step(self, decoder_input, decoder_hidden, decoder_cell, encoder_outputs): 
        """
            calculate attention weights 
            embedded: query shape (batch_size, 1, hidden_size) 
            encoder_outputs: key, value shape (batch_size, max_length, hidden_size) 
            attn_weights = score(Q, K) shape (batch_size, 1, max_length) 
            attn_score = softmax(attn_weights) shape (batch_size, 1, max_length) 
            context = attn_score * V shape (batch_size, 1, hidden_size) 
            rnn_input = [embedded, context] shape (batch_size, 1, hidden_size * 2)
            rnn_input = linear(rnn_input) shape (batch_size, 1, hidden_size)
            output = rnn(rnn_input) shape (batch_size, 1, hidden_size)
            output = fc(output) shape (batch_size, 1, output_size)
        """
        embedded = self.embedding(decoder_input)
        attn_weights = torch.bmm(encoder_outputs, embedded.transpose(1, 2))
        attn_score = F.softmax(attn_weights, dim=1)
        context = torch.bmm(attn_score.transpose(1, 2), encoder_outputs)
        rnn_input = torch.cat((embedded, context), dim=-1)
        rnn_input = self.attn_combine(rnn_input)
        output, (decoder_hidden, decoder_cell) = self.lstm(rnn_input, (decoder_hidden, decoder_cell))
        output = self.fc(output)
        return output, (decoder_hidden, decoder_cell), attn_weights
    

In [18]:
encoder = Encoder(input_size, hidden_size, num_layers, dropout).to(device) 
decoder = DecoderAttn(output_size, hidden_size, num_layers, dropout).to(device)
learning_rate = 1e-4
criterion = nn.NLLLoss() 
encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) 
decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate) 
n_epochs = 500

previous_score = 0 
count = 0
for epoch in tqdm(range(n_epochs)): 
    # train phase 
    encoder.train()
    decoder.train()
    time_epoch = time.time()
    averge_loss = [] 
    for input_tensor, target_tensor in train_dataloader: 
        # zero the gradient
        encoder_optimizer.zero_grad() 
        decoder_optimizer.zero_grad() 
        
        # forward
        encoder_outputs, (encoder_hidden, encoder_cell) = encoder(input_tensor) 
        decoder_outputs, _, _ = decoder(encoder_outputs, encoder_hidden, encoder_cell, target_tensor) 

        # calculate loss
        loss = criterion(decoder_outputs.view(-1, output_size), target_tensor.view(-1)) 
        loss.backward() 
        
        # update weight
        encoder_optimizer.step() 
        decoder_optimizer.step() 
        
        averge_loss.append(loss.item()) 

    if epoch % 50 == 0: 
        print(f"Epoch {epoch} - Time: {time.time() - time_epoch} - Loss: {np.mean(averge_loss)}") 
    # eval phase
    encoder.eval()
    decoder.eval()
    time_epoch = time.time() 
    averge_loss = [] 
    averge_score = [] 
    for input_tensor, target_tensor in test_dataloader: 
        with torch.no_grad(): 
            encoder_outputs, (encoder_hidden, encoder_cell) = encoder(input_tensor) 
            decoder_outputs, _, _ = decoder(encoder_outputs, encoder_hidden, encoder_cell, target_tensor) 
            loss = criterion(decoder_outputs.view(-1, output_size), target_tensor.view(-1)) 
            averge_loss.append(loss.item())
            averge_score.append((decoder_outputs.argmax(-1) == target_tensor).sum().item() / (target_tensor != PAD_token).sum().item()) 
    averge_score = np.mean(averge_score)
    if epoch % 10 == 0:
        print(f"Eval - Time: {time.time() - time_epoch} - Loss: {np.mean(averge_loss)} - Score: {averge_score}")
        print("--------------------------------------------------")
    

  0%|          | 1/500 [00:00<04:54,  1.69it/s]

Epoch 0 - Time: 0.5610320568084717 - Loss: 6.755776246388753
Eval - Time: 0.02927541732788086 - Loss: 4.574126113544811 - Score: 0.5044905971211346
--------------------------------------------------


  2%|▏         | 11/500 [00:06<04:43,  1.72it/s]

Eval - Time: 0.02922654151916504 - Loss: 3.5500499551946465 - Score: 0.6421722787538543
--------------------------------------------------


  4%|▍         | 21/500 [00:12<04:37,  1.72it/s]

Eval - Time: 0.029355764389038086 - Loss: 3.4341230392456055 - Score: 0.6792803629615375
--------------------------------------------------


  6%|▌         | 31/500 [00:18<04:32,  1.72it/s]

Eval - Time: 0.029212474822998047 - Loss: 3.3084082169966265 - Score: 0.7131284786225269
--------------------------------------------------


  8%|▊         | 41/500 [00:23<04:26,  1.72it/s]

Eval - Time: 0.029183626174926758 - Loss: 3.27895526452498 - Score: 0.7209075590865944
--------------------------------------------------


 10%|█         | 51/500 [00:29<04:20,  1.72it/s]

Epoch 50 - Time: 0.5500040054321289 - Loss: 2.4977534725552513
Eval - Time: 0.029334306716918945 - Loss: 3.2475086992437188 - Score: 0.7447799198331566
--------------------------------------------------


 12%|█▏        | 61/500 [00:35<04:14,  1.72it/s]

Eval - Time: 0.029139041900634766 - Loss: 3.2499528364701704 - Score: 0.7461783157709317
--------------------------------------------------


 14%|█▍        | 71/500 [00:41<04:09,  1.72it/s]

Eval - Time: 0.02912116050720215 - Loss: 3.2295353412628174 - Score: 0.7557423241613453
--------------------------------------------------


 16%|█▌        | 81/500 [00:47<04:03,  1.72it/s]

Eval - Time: 0.029119253158569336 - Loss: 3.249952944842252 - Score: 0.7599190321790522
--------------------------------------------------


 18%|█▊        | 91/500 [00:52<03:57,  1.72it/s]

Eval - Time: 0.02924633026123047 - Loss: 3.236988804557107 - Score: 0.7709874222503937
--------------------------------------------------


 20%|██        | 101/500 [00:58<03:51,  1.72it/s]

Epoch 100 - Time: 0.550602912902832 - Loss: 1.5777772693406968
Eval - Time: 0.029290199279785156 - Loss: 3.2303108952262183 - Score: 0.7811510312387887
--------------------------------------------------


 22%|██▏       | 111/500 [01:04<03:45,  1.72it/s]

Eval - Time: 0.029109954833984375 - Loss: 3.2631807977503 - Score: 0.7879970128116497
--------------------------------------------------


 24%|██▍       | 121/500 [01:10<03:39,  1.72it/s]

Eval - Time: 0.02923417091369629 - Loss: 3.299551096829501 - Score: 0.7858939166426826
--------------------------------------------------


 26%|██▌       | 131/500 [01:16<03:34,  1.72it/s]

Eval - Time: 0.029120683670043945 - Loss: 3.329428867860274 - Score: 0.7902529280376837
--------------------------------------------------


 28%|██▊       | 141/500 [01:21<03:28,  1.72it/s]

Eval - Time: 0.029155254364013672 - Loss: 3.3955915624445137 - Score: 0.7879013185171114
--------------------------------------------------


 30%|███       | 151/500 [01:27<03:22,  1.72it/s]

Epoch 150 - Time: 0.5503802299499512 - Loss: 0.7877627710501353
Eval - Time: 0.029292821884155273 - Loss: 3.431558435613459 - Score: 0.7876513567729194
--------------------------------------------------


 32%|███▏      | 161/500 [01:33<03:16,  1.72it/s]

Eval - Time: 0.02933025360107422 - Loss: 3.4546300281177866 - Score: 0.7981223989464962
--------------------------------------------------


 34%|███▍      | 171/500 [01:39<03:10,  1.72it/s]

Eval - Time: 0.029227018356323242 - Loss: 3.5320239067077637 - Score: 0.7943936773174541
--------------------------------------------------


 36%|███▌      | 181/500 [01:45<03:05,  1.72it/s]

Eval - Time: 0.029186725616455078 - Loss: 3.5996378768574107 - Score: 0.7963927441560066
--------------------------------------------------


 38%|███▊      | 191/500 [01:50<02:59,  1.72it/s]

Eval - Time: 0.029115676879882812 - Loss: 3.638278982856057 - Score: 0.8020845621732344
--------------------------------------------------


 40%|████      | 201/500 [01:56<02:53,  1.72it/s]

Epoch 200 - Time: 0.5502557754516602 - Loss: 0.2719739984188761
Eval - Time: 0.02926921844482422 - Loss: 3.701694683595137 - Score: 0.7993622880674202
--------------------------------------------------


 42%|████▏     | 211/500 [02:02<02:47,  1.72it/s]

Eval - Time: 0.029144287109375 - Loss: 3.752287604592063 - Score: 0.8065699997630476
--------------------------------------------------


 44%|████▍     | 221/500 [02:08<02:41,  1.72it/s]

Eval - Time: 0.02925848960876465 - Loss: 3.809311801737005 - Score: 0.8047202820674301
--------------------------------------------------


 46%|████▌     | 231/500 [02:14<02:35,  1.73it/s]

Eval - Time: 0.02908492088317871 - Loss: 3.8861407149921763 - Score: 0.7976962465130328
--------------------------------------------------


 48%|████▊     | 241/500 [02:19<02:30,  1.72it/s]

Eval - Time: 0.029207706451416016 - Loss: 3.918868823484941 - Score: 0.8051485122581323
--------------------------------------------------


 50%|█████     | 251/500 [02:25<02:24,  1.72it/s]

Epoch 250 - Time: 0.5504090785980225 - Loss: 0.07219966962223962
Eval - Time: 0.029233932495117188 - Loss: 3.9804199608889492 - Score: 0.8044759855327464
--------------------------------------------------


 52%|█████▏    | 261/500 [02:31<02:18,  1.72it/s]

Eval - Time: 0.029199600219726562 - Loss: 4.0033933466131035 - Score: 0.8103208652971277
--------------------------------------------------


 54%|█████▍    | 271/500 [02:37<02:12,  1.72it/s]

Eval - Time: 0.02914738655090332 - Loss: 4.047618844292381 - Score: 0.8109041922693178
--------------------------------------------------


 56%|█████▌    | 281/500 [02:43<02:07,  1.72it/s]

Eval - Time: 0.029108285903930664 - Loss: 4.0745837471701885 - Score: 0.8159954622999251
--------------------------------------------------


 58%|█████▊    | 291/500 [02:48<02:01,  1.72it/s]

Eval - Time: 0.02924036979675293 - Loss: 4.135209647091952 - Score: 0.8087325385227989
--------------------------------------------------


 60%|██████    | 301/500 [02:54<01:55,  1.72it/s]

Epoch 300 - Time: 0.5505993366241455 - Loss: 0.027163472381376084
Eval - Time: 0.029306888580322266 - Loss: 4.112360239028931 - Score: 0.8096713484180106
--------------------------------------------------


 62%|██████▏   | 311/500 [03:00<01:49,  1.72it/s]

Eval - Time: 0.029118776321411133 - Loss: 4.178733869032427 - Score: 0.81198002310229
--------------------------------------------------


 64%|██████▍   | 321/500 [03:06<01:43,  1.72it/s]

Eval - Time: 0.02921152114868164 - Loss: 4.275933894244107 - Score: 0.8131931340382337
--------------------------------------------------


 66%|██████▌   | 331/500 [03:12<01:38,  1.72it/s]

Eval - Time: 0.0290377140045166 - Loss: 4.282607793807983 - Score: 0.8153396853040245
--------------------------------------------------


 68%|██████▊   | 341/500 [03:17<01:32,  1.72it/s]

Eval - Time: 0.028908252716064453 - Loss: 4.242939298803156 - Score: 0.8188644613210344
--------------------------------------------------


 70%|███████   | 351/500 [03:23<01:26,  1.72it/s]

Epoch 350 - Time: 0.5504281520843506 - Loss: 0.008814381934436304
Eval - Time: 0.02919602394104004 - Loss: 4.280013192783702 - Score: 0.8186866999767205
--------------------------------------------------


 72%|███████▏  | 361/500 [03:29<01:20,  1.72it/s]

Eval - Time: 0.028900623321533203 - Loss: 4.345968593250621 - Score: 0.8182447534662277
--------------------------------------------------


 74%|███████▍  | 371/500 [03:35<01:14,  1.73it/s]

Eval - Time: 0.02899169921875 - Loss: 4.399081446907737 - Score: 0.8164088169161499
--------------------------------------------------


 76%|███████▌  | 381/500 [03:41<01:09,  1.72it/s]

Eval - Time: 0.029068470001220703 - Loss: 4.33463285186074 - Score: 0.8237260328523437
--------------------------------------------------


 78%|███████▊  | 391/500 [03:46<01:03,  1.72it/s]

Eval - Time: 0.029191017150878906 - Loss: 4.419072649695656 - Score: 0.8197535877436587
--------------------------------------------------


 80%|████████  | 401/500 [03:52<00:57,  1.72it/s]

Epoch 400 - Time: 0.550189733505249 - Loss: 0.004331664381814855
Eval - Time: 0.029161930084228516 - Loss: 4.4171504540876905 - Score: 0.8225932206344911
--------------------------------------------------


 82%|████████▏ | 411/500 [03:58<00:51,  1.72it/s]

Eval - Time: 0.02912163734436035 - Loss: 4.471344384280118 - Score: 0.821014935311871
--------------------------------------------------


 84%|████████▍ | 421/500 [04:04<00:45,  1.72it/s]

Eval - Time: 0.029099225997924805 - Loss: 4.321079514243386 - Score: 0.8248322406973181
--------------------------------------------------


 86%|████████▌ | 431/500 [04:10<00:40,  1.72it/s]

Eval - Time: 0.029065370559692383 - Loss: 4.413210522044789 - Score: 0.825713309784208
--------------------------------------------------


 88%|████████▊ | 441/500 [04:15<00:34,  1.72it/s]

Eval - Time: 0.029120683670043945 - Loss: 4.482459913600575 - Score: 0.82476104585552
--------------------------------------------------


 90%|█████████ | 451/500 [04:21<00:28,  1.72it/s]

Epoch 450 - Time: 0.5504803657531738 - Loss: 0.0026413224737292956
Eval - Time: 0.02912163734436035 - Loss: 4.5297346982088955 - Score: 0.8255956926030602
--------------------------------------------------


 92%|█████████▏| 461/500 [04:27<00:22,  1.73it/s]

Eval - Time: 0.029074907302856445 - Loss: 4.514632550152865 - Score: 0.8272364786448564
--------------------------------------------------


 94%|█████████▍| 471/500 [04:33<00:16,  1.72it/s]

Eval - Time: 0.029161691665649414 - Loss: 4.329861424186013 - Score: 0.834950917829254
--------------------------------------------------


 96%|█████████▌| 481/500 [04:39<00:11,  1.72it/s]

Eval - Time: 0.02901768684387207 - Loss: 4.4124267318032 - Score: 0.8383974985594307
--------------------------------------------------


 98%|█████████▊| 491/500 [04:44<00:05,  1.72it/s]

Eval - Time: 0.029080867767333984 - Loss: 4.4653871276161885 - Score: 0.8366608694946103
--------------------------------------------------


100%|██████████| 500/500 [04:50<00:00,  1.72it/s]


In [19]:
torch.save(encoder.state_dict(), '/home/hoang.minh.an/anhalu-data/learning/deep-learning-from-scratch/nlp/seq2seq/save_model/encoder_model_attention_linear.pth')
torch.save(decoder.state_dict(), '/home/hoang.minh.an/anhalu-data/learning/deep-learning-from-scratch/nlp/seq2seq/save_model/decoder_model_attention_linear.pth')

In [20]:
import torch
def evaluate(encoder, decoder, sentence): 
    with torch.no_grad(): 
        encoder.eval()
        decoder.eval()
        input_tensor = torch.tensor(word2index(en_lang, sentence), dtype=torch.long, device=device).unsqueeze(0) 
        encoder_outputs, (encoder_hidden, encoder_cell) = encoder(input_tensor) 
        decoder_outputs, _, _ = decoder(encoder_outputs, encoder_hidden, encoder_cell) 
        decoder_outputs = decoder_outputs.argmax(-1).squeeze(0) 
        for idx in decoder_outputs: 
            token = vi_lang.index2word[idx.item()]
            if token == "EOS": 
                break
            print(token, end=" ")
            
            
encoder = Encoder(input_size, hidden_size, num_layers, dropout).to(device) 
decoder = DecoderAttn(output_size, hidden_size, num_layers, dropout).to(device) 
encoder.load_state_dict(torch.load('/home/hoang.minh.an/anhalu-data/learning/deep-learning-from-scratch/nlp/seq2seq/save_model/encoder_model_attention_linear.pth'))
decoder.load_state_dict(torch.load('/home/hoang.minh.an/anhalu-data/learning/deep-learning-from-scratch/nlp/seq2seq/save_model/decoder_model_attention_linear.pth')) 

evaluate(encoder, decoder, "I was a fool to trust you .")

SOS tôi thật ngu ngốc khi tin tưởng bạn. 