In [44]:
import spacy
import torch
from vncorenlp import VnCoreNLP
annotator = VnCoreNLP("VnCoreNLP-master\VnCoreNLP-1.1.1.jar", annotators="wseg", max_heap_size='-Xmx500m')
spacy_en = spacy.load('en_core_web_sm')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
from iteration_utilities import deepflatten
def tokenize_en(text):
    return [tok.text for tok in spacy_en.tokenizer(text)]
def tokenize_vi(text):
    return [tok for tok in deepflatten(annotator.tokenize(text), depth=1)]

text_en = 'Please put the dustpan in the broom closet'
text_vi = 'Cuốn sách này là của tôi. Của bạn đâu?'
print(tokenize_en(text_en))
print(tokenize_vi(text_vi))


['Please', 'put', 'the', 'dustpan', 'in', 'the', 'broom', 'closet']
['Cuốn', 'sách', 'này', 'là', 'của', 'tôi', '.', 'Của', 'bạn', 'đâu', '?']


In [3]:
import pandas as pd

def create_raw_dataset():
    data_dir = ""
    en_sents = open(data_dir + 'english.txt', "r",encoding="utf-8" ).read().splitlines()
    vi_sents = open(data_dir + 'vietnamese.txt', "r" ,encoding="utf-8").read().splitlines()
    return {
        "English": [line for line in en_sents[:5000]],
        "Vietnamese": [line for line in vi_sents[:5000]],
    }
raw_data = create_raw_dataset()

from sklearn.model_selection import train_test_split

df = pd.DataFrame(raw_data, columns=["English", "Vietnamese"])
train, test = train_test_split(df, test_size=0.2)
train, val = train_test_split(train, test_size=0.125)

train.to_json("train.json", orient="records", lines=True)
test.to_json("test.json", orient="records", lines=True)
val.to_json("val.json", orient="records", lines=True)


In [57]:
from collections import Counter
from torch.nn.utils.rnn import pad_sequence
import re

class Field:
    def __init__(self, tokenize_func=None, init_token=None, eos_token=None, pad_token='<pad>', lower=True, unk_token='<unk>'):
        self.tokenize_func = tokenize_func or (lambda x: x.split())
        self.init_token = init_token
        self.eos_token = eos_token
        self.pad_token = pad_token
        self.lower = lower
        self.vocab = None
        self.itos = None
        self.unk_token = unk_token
        self.apostrophe_regex = re.compile(r"(\w+)'(\w+)")

    def tokenize(self, texts):
        tokenized_texts = []
        for t in texts:
            tokens = self.tokenize_func(t)
            if self.lower:
                tokens = [token.lower() for token in tokens]
            # Split words with apostrophes into separate tokens
            tokens = [token for word in tokens for token in re.split(self.apostrophe_regex, word) if token]
            tokenized_texts.append(tokens)
        return tokenized_texts

    def build_vocab(self, texts, max_vocab_size=10000, min_freq=3):
        # Tokenize the texts
        tokenized_texts = self.tokenize(texts)

        # Flatten the tokenized texts
        tokens = [token for token_list in tokenized_texts for token in token_list]

        # Replace underscores with spaces in target text
        if '_' in tokens and self.init_token != '<sos>' and self.eos_token != '<eos>':
            tokens = [token.replace('_', ' ') for token in tokens]

        # Split tokens on apostrophes
        if self.apostrophe_regex is not None:
            new_tokens = []
            for token in tokens:
                subtokens = re.split(self.apostrophe_regex, token)
                new_tokens.extend(subtokens)
            tokens = new_tokens

        # Count the tokens
        counter = Counter(tokens)

        # Sort the tokens by frequency
        sorted_tokens = sorted(counter.items(), key=lambda x: x[1], reverse=True)

        # Truncate the sorted tokens by max_vocab_size
        if max_vocab_size is not None:
            sorted_tokens = sorted_tokens[:max_vocab_size]

        # Filter the tokens by min_freq
        filtered_tokens = [(token, freq) for token, freq in sorted_tokens if freq >= min_freq]

        # Add special tokens to the vocabulary
        if self.init_token is not None:
            filtered_tokens.insert(0, (self.init_token, float("inf")))
        if self.eos_token is not None:
            filtered_tokens.append((self.eos_token, float("inf")))
        if self.pad_token is not None:
            filtered_tokens.append((self.pad_token, float("inf")))
        filtered_tokens.append((self.unk_token, float("inf")))

        # Build the token-to-index mapping and index-to-token mapping
        self.vocab = {}
        self.itos = []
        for token, freq in filtered_tokens:
            self.vocab[token] = len(self.itos)
            self.itos.append(token)
            
    def numericalize(self, tokens, device=device):
        flattened_tokens = [token for seq in tokens for token in seq]
        if device is not None:
            return torch.LongTensor([[self.vocab[token] if token in self.vocab else self.vocab[self.unk_token] for token in seq] for seq in tokens]).to(device)
        else:
            return [[self.vocab[token] if token in self.vocab else self.vocab[self.unk_token] for token in seq] for seq in tokens]



In [55]:
from datasets import load_dataset
source_field = Field(tokenize_func=tokenize_en, init_token='<sos>', eos_token='<eos>', lower=True)
target_field = Field(tokenize_func=tokenize_vi, init_token='<sos>', eos_token='<eos>', lower=True)
# Load the JSON files as a dataset
dataset = load_dataset("json", data_files={"train": "train.json", "test": "test.json", "val": "val.json"})

# Build vocabularies for source and target fields
source_text = dataset['train']['English'] + dataset['test']['English'] + dataset['val']['English']
target_text = dataset['train']['Vietnamese'] + dataset['test']['Vietnamese'] + dataset['val']['Vietnamese']
source_field.build_vocab(source_text)
target_field.build_vocab(target_text)

Found cached dataset json (C:/Users/16262/.cache/huggingface/datasets/json/default-ea2dde61eb3c45b9/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4)


  0%|          | 0/3 [00:00<?, ?it/s]

In [32]:

class Example:
    def __init__(self, src, trg):
        self.src = src
        self.trg = trg

train_examples = []
for example in dataset['train']:
    src_tokens = tokenize_en(example['English'])
    trg_tokens = tokenize_vi(example['Vietnamese'])
    train_examples.append(Example([token.lower() for token in src_tokens], [token.lower() for token in trg_tokens]))

test_examples = []
for example in dataset['test']:
    src_tokens = tokenize_en(example['English'])
    trg_tokens = tokenize_vi(example['Vietnamese'])
    test_examples.append(Example([token.lower() for token in src_tokens], [token.lower() for token in trg_tokens]))

val_examples = []
for example in dataset['val']:
    src_tokens = tokenize_en(example['English'])
    trg_tokens = tokenize_vi(example['Vietnamese'])
    val_examples.append(Example([token.lower() for token in src_tokens], [token.lower() for token in trg_tokens]))


Found cached dataset json (C:/Users/16262/.cache/huggingface/datasets/json/default-ea2dde61eb3c45b9/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4)


  0%|          | 0/3 [00:00<?, ?it/s]

In [33]:
print(len(source_field.vocab))
print(len(target_field.vocab))
print(source_field.vocab)
print(target_field.vocab)

1258
1165
{'<sos>': 0, '.': 1, 'i': 2, 'the': 3, 'to': 4, 'tom': 5, 'you': 6, 'a': 7, '?': 8, 'n': 9, 't': 10, 'is': 11, 'do': 12, 'he': 13, 'it': 14, 'that': 15, 'in': 16, 'of': 17, "'s": 18, 'was': 19, ',': 20, 'for': 21, 'have': 22, 'we': 23, 'me': 24, 'this': 25, 'what': 26, 'my': 27, 'his': 28, 'be': 29, 'and': 30, 'not': 31, 'are': 32, 'mary': 33, 'did': 34, 'she': 35, 'on': 36, "'m": 37, 'with': 38, 'want': 39, 'at': 40, 'your': 41, 'can': 42, 'him': 43, 'about': 44, 'there': 45, "'ll": 46, 'know': 47, 'as': 48, 'think': 49, 'go': 50, 'they': 51, 'her': 52, 'all': 53, 'has': 54, 'here': 55, 'like': 56, "'re": 57, 'up': 58, 'how': 59, 'would': 60, 'had': 61, 'time': 62, 'get': 63, 'were': 64, 'will': 65, 'tell': 66, "'ve": 67, 'just': 68, 'ca': 69, 'does': 70, 'out': 71, 'when': 72, 'one': 73, 'very': 74, 'been': 75, 'by': 76, 'could': 77, 'if': 78, 'an': 79, 'why': 80, 'going': 81, 'see': 82, 'should': 83, 'good': 84, 'us': 85, 'from': 86, 'come': 87, 'no': 88, 'never': 89, 'so'

In [11]:
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader, Dataset
import numpy as np
import random
import torch
#self.apostrophe_regex = re.compile(r"(\S+)\s*'(\S+)")
SEED = 2222
random.seed(SEED)
torch.manual_seed(SEED)

class TranslationExample:
    def __init__(self, src, trg):
        self.src = src
        self.trg = trg

class TranslationDataset(Dataset):
    def __init__(self, data, source_vocab, target_vocab):
        self.data = data
        self.source_vocab = source_vocab
        self.target_vocab = target_vocab
        self.apostrophe_regex = re.compile(r"(\S+)\s*'(\S+)")

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        example = self.data[index]
        src_sent = [token for word in example.src for token in re.split(self.apostrophe_regex, word) if token]
        trg_sent = [token.replace('_', ' ') if '_' in token else token for token in example.trg]
        trg_sent = [token for word in trg_sent for token in re.split(self.apostrophe_regex, word) if token]
        src_seq = []
        for token in src_sent:
            if token in self.source_vocab:
                src_seq.append(self.source_vocab[token])
            else:
                print(f"Token '{token}' not in source vocabulary")
        trg_seq = []
        for token in trg_sent:
            if token in self.target_vocab:
                trg_seq.append(self.target_vocab[token])
            else:
                print(f"Token '{token}' not in target vocabulary")
        src_seq = np.array(src_seq)
        trg_seq = np.array(trg_seq)
        return src_seq, trg_seq

def get_iterator(data, source_vocab, target_vocab, device, batch_size=128, shuffle=True):
    dataset = TranslationDataset(data, source_vocab, target_vocab)
    sampler = None
    if shuffle:
        sampler = torch.utils.data.RandomSampler(dataset)
    loader = DataLoader(dataset=dataset, batch_size=batch_size, num_workers=0, sampler=sampler, shuffle=(sampler is None), drop_last=False, collate_fn=lambda x: tuple(zip(*x)))
    for batch in loader:
        src_seqs = pad_sequence([torch.LongTensor(seq) for seq in batch[0]], padding_value=source_vocab['<pad>'], batch_first=True).to(device)
        trg_seqs = pad_sequence([torch.LongTensor(seq) for seq in batch[1]], padding_value=target_vocab['<pad>'], batch_first=True).to(device)
        yield src_seqs.transpose(0, 1), trg_seqs.transpose(0, 1), len(batch[0])


train_batches = get_iterator(train_examples, source_field.vocab, target_field.vocab, device)
test_batches = get_iterator(test_examples, source_field.vocab, target_field.vocab, device)
val_batches = get_iterator(val_examples, source_field.vocab, target_field.vocab, device)


In [10]:
# Convert train_batches to a list and get its length
train_batches_list = list(train_batches)
num_train_batches = len(train_batches_list)

# Convert test_batches to a list and get its length
test_batches_list = list(test_batches)
num_test_batches = len(test_batches_list)

# Print the number of batches
print(f"Number of train batches: {num_train_batches}")
print(f"Number of test batches: {num_test_batches}")


Number of train batches: 0
Number of test batches: 0


In [12]:
batch = next(iter(test_batches))
print(batch[0])

Token 'nguy hiểm' not in target vocabulary
Token 'sore' not in source vocabulary
Token 'có vẻ' not in target vocabulary
Token 'anh trai' not in target vocabulary
Token 'làm việc' not in target vocabulary
Token 'smiled' not in source vocabulary
Token 'delighted' not in source vocabulary
Token 'vui mừng' not in target vocabulary
Token 'shook' not in source vocabulary
Token 'bắt tay' not in target vocabulary
Token 'thực sự' not in target vocabulary
Token 'đồng ý' not in target vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'vấn đề' not in target vocabulary
Token 'flute' not in source vocabulary
Token 'sáo' not in target vocabulary
Token 'added' not in source vocabulary
Token 'danh sách' not in target vocabulary
Token 'tham gia' not in target vocabulary
Token 'điệu' not in target vocabulary
Token 'join' not in source vocabulary
Token 'bao giờ' not in target vocabulary
Token 'tham gia' not in target vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'có vẻ' not in targ

In [13]:
import torch
from torch import nn, optim

# adjustable parameters
INPUT_DIM = len(source_field.vocab)
OUTPUT_DIM = len(target_field.vocab)
ENC_EMB_DIM = 256
DEC_EMB_DIM = 256
ENC_HID_DIM = 512
DEC_HID_DIM = 512
N_LAYERS = 1
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5

class Encoder(nn.Module):
    def __init__(self, input_dim, emb_dim, enc_hid_dim, dec_hid_dim, n_layers, dropout):
        super().__init__()
        self.emb_dim = emb_dim
        self.enc_hid_dim = enc_hid_dim
        self.dec_hid_dim = dec_hid_dim
        self.input_dim = input_dim
        self.n_layers = n_layers
        self.dropout = dropout

        self.embedding = nn.Embedding(input_dim, emb_dim)
        self.rnn = nn.GRU(emb_dim, enc_hid_dim, n_layers, dropout=dropout,
                          bidirectional=True)
        self.fc = nn.Linear(enc_hid_dim * 2, dec_hid_dim)

    def forward(self, src_batch):
        # src [sent len, batch size]

        # [sent len, batch size, emb dim]
        embedded = self.embedding(src_batch)
        outputs, hidden = self.rnn(embedded)
        # outputs -> [sent len, batch size, hidden dim * n directions]
        # hidden -> [n layers * n directions, batch size, hidden dim]

        # initial decoder hidden is final hidden state of the forwards and
        # backwards encoder RNNs fed through a linear layer
        concated = torch.cat((hidden[-2, :, :], hidden[-1, :, :]), dim=1)
        hidden = torch.tanh(self.fc(concated))
        return outputs, hidden

encoder = Encoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, N_LAYERS, ENC_DROPOUT).to(device)
outputs, hidden = encoder(batch[0])

print(outputs.shape, hidden.shape)



torch.Size([20, 128, 1024]) torch.Size([128, 512])




In [14]:
class Attention(nn.Module):

    def __init__(self, enc_hid_dim, dec_hid_dim):
        super().__init__()
        self.enc_hid_dim = enc_hid_dim
        self.dec_hid_dim = dec_hid_dim

        # enc_hid_dim multiply by 2 due to bidirectional
        self.fc1 = nn.Linear(enc_hid_dim * 2 + dec_hid_dim, dec_hid_dim)
        self.fc2 = nn.Linear(dec_hid_dim, 1, bias=False)

    def forward(self, encoder_outputs, hidden):
        src_len = encoder_outputs.shape[0]
        batch_size = encoder_outputs.shape[1]
        
        # repeat encoder hidden state src_len times [batch size, sent len, dec hid dim]
        hidden = hidden.unsqueeze(1).repeat(1, src_len, 1)
        # reshape/permute the encoder output, so that the batch size comes first
        # [batch size, sent len, enc hid dim * 2], times 2 because of bidirectional
        outputs = encoder_outputs.permute(1, 0, 2)

        # the attention mechanism receives a concatenation of the hidden state
        # and the encoder output
        concat = torch.cat((hidden, outputs), dim=2)
        
        # fully connected layer and softmax layer to compute the attention weight
        # [batch size, sent len, dec hid dim]
        energy = torch.tanh(self.fc1(concat))
        # attention weight should be of [batch size, sent len]
        attention = self.fc2(energy).squeeze(dim=2)  
        attention_weight = torch.softmax(attention, dim=1)
        return attention_weight

    
attention = Attention(ENC_HID_DIM, DEC_HID_DIM).to(device)
attention_weight = attention(outputs, hidden)
attention_weight.shape

torch.Size([128, 20])

In [15]:
class Decoder(nn.Module):

    def __init__(self, output_dim, emb_dim, enc_hid_dim, dec_hid_dim, n_layers,
                 dropout, attention):
        super().__init__()
        self.emb_dim = emb_dim
        self.enc_hid_dim = enc_hid_dim
        self.dec_hid_dim = dec_hid_dim
        self.output_dim = output_dim
        self.n_layers = n_layers
        self.dropout = dropout
        self.attention = attention

        self.embedding = nn.Embedding(output_dim, emb_dim)
        self.rnn = nn.GRU(enc_hid_dim * 2 + emb_dim, dec_hid_dim, n_layers, dropout=dropout)
        self.linear = nn.Linear(dec_hid_dim, output_dim)

    def forward(self, trg, encoder_outputs, hidden):
        # trg [batch size]
        # outputs [src sen len, batch size, enc hid dim * 2], times 2 due to bidirectional
        # hidden [batch size, dec hid dim]

        # [batch size, 1, sent len] 
        attention = self.attention(encoder_outputs, hidden).unsqueeze(1)

        # [batch size, sent len, enc hid dim * 2]
        outputs = encoder_outputs.permute(1, 0, 2)

        # [1, batch size, enc hid dim * 2]
        context = torch.bmm(attention, outputs).permute(1, 0, 2)

        # input sentence -> embedding
        # [1, batch size, emb dim]
        embedded = self.embedding(trg.unsqueeze(0))
        rnn_input = torch.cat((embedded, context), dim=2)

        outputs, hidden = self.rnn(rnn_input, hidden.unsqueeze(0))
        prediction = self.linear(outputs.squeeze(0))
        return prediction, hidden.squeeze(0)

decoder = Decoder(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, N_LAYERS, DEC_DROPOUT, attention).to(device)
prediction, decoder_hidden = decoder(batch[1][0], outputs, hidden)

# notice the decoder_hidden's shape should match the shape that's generated by
# the encoder
prediction.shape, decoder_hidden.shape

(torch.Size([128, 1165]), torch.Size([128, 512]))

In [16]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device

    def forward(self, src_batch, trg_batch, teacher_forcing_ratio=0.5):
        max_len, batch_size = trg_batch.shape
        trg_vocab_size = self.decoder.output_dim

        # tensor to store decoder's output
        outputs = torch.zeros(max_len, batch_size, trg_vocab_size).to(self.device)

        # encoder_outputs : all hidden states of the input sequence (forward and backward)
        # hidden : final forward and backward hidden states, passed through a linear layer
        encoder_outputs, hidden = self.encoder(src_batch)

        trg = trg_batch[0]
        for i in range(1, max_len):
            prediction, hidden = self.decoder(trg, encoder_outputs, hidden)
            outputs[i] = prediction

            if random.random() < teacher_forcing_ratio:
                trg = trg_batch[i]
            else:
                trg = prediction.argmax(1)

        return outputs

attention = Attention(ENC_HID_DIM, DEC_HID_DIM)
encoder = Encoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, N_LAYERS, ENC_DROPOUT)
decoder = Decoder(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, N_LAYERS, DEC_DROPOUT, attention)
seq2seq = Seq2Seq(encoder, decoder, device).to(device)
seq2seq

Seq2Seq(
  (encoder): Encoder(
    (embedding): Embedding(1258, 256)
    (rnn): GRU(256, 512, dropout=0.5, bidirectional=True)
    (fc): Linear(in_features=1024, out_features=512, bias=True)
  )
  (decoder): Decoder(
    (attention): Attention(
      (fc1): Linear(in_features=1536, out_features=512, bias=True)
      (fc2): Linear(in_features=512, out_features=1, bias=False)
    )
    (embedding): Embedding(1165, 256)
    (rnn): GRU(1280, 512, dropout=0.5)
    (linear): Linear(in_features=512, out_features=1165, bias=True)
  )
)

In [17]:
outputs = seq2seq(batch[0],batch[1])
outputs.shape

torch.Size([19, 128, 1165])

In [18]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(seq2seq):,} trainable parameters')

The model has 7,651,213 trainable parameters


In [19]:
optimizer = optim.Adam(seq2seq.parameters())

# ignore the padding index when calculating the loss
PAD_IDX = target_field.vocab['<pad>']
criterion = nn.CrossEntropyLoss(ignore_index=PAD_IDX)

In [20]:
from tqdm import tqdm
import math
import time

def train(seq2seq, iterator, optimizer, criterion):
    seq2seq.train()
    
    epoch_loss = 0
    
    for batch in tqdm(iterator):
        optimizer.zero_grad()
        outputs = seq2seq(batch[0], batch[1])

        # the loss function only works on 2d inputs
        # and 1d targets we need to flatten each of them
        outputs_flatten = outputs[1:].view(-1, outputs.shape[-1])
        trg_flatten = batch[1][1:].reshape(-1)
        loss = criterion(outputs_flatten, trg_flatten)
        
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()

    return epoch_loss / 28


def evaluate(seq2seq, iterator, criterion):
    seq2seq.eval()

    epoch_loss = 0
    with torch.no_grad():
        for batch in tqdm(iterator):
            # turn off teacher forcing
            outputs = seq2seq(batch[0], batch[1], teacher_forcing_ratio=0) 

            # trg = [trg sent len, batch size]
            # output = [trg sent len, batch size, output dim]
            outputs_flatten = outputs[1:].view(-1, outputs.shape[-1])
            trg_flatten = batch[1][1:].reshape(-1)
            loss = criterion(outputs_flatten, trg_flatten)
            epoch_loss += loss.item()
        
    return epoch_loss / 8
  

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

N_EPOCHS = 30
best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):
    start_time = time.time()
    train_loss = train(seq2seq, train_batches, optimizer, criterion)
    valid_loss = evaluate(seq2seq, val_batches, criterion)
    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(seq2seq.state_dict(), 'tut2-model.pt')

    print(f'Epoch: {epoch+1} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}')

0it [00:00, ?it/s]

Token 'careless' not in source vocabulary
Token 'bất cẩn' not in target vocabulary
Token 'sai lầm' not in target vocabulary
Token 'như vậy' not in target vocabulary
Token 'attempt' not in source vocabulary
Token 'thành công' not in target vocabulary
Token 'tất cả' not in target vocabulary
Token 'nỗ lực' not in target vocabulary
Token 'đầu tiên' not in target vocabulary
Token 'construction' not in source vocabulary
Token 'xây dựng' not in target vocabulary
Token 'anh chàng' not in target vocabulary
Token 'theo dõi' not in target vocabulary
Token 'trở thành' not in target vocabulary
Token 'golf' not in source vocabulary
Token 'bao giờ' not in target vocabulary
Token 'gôn' not in target vocabulary
Token 'bao giờ' not in target vocabulary
Token 'có thể' not in target vocabulary
Token 'tại sao' not in target vocabulary
Token 'absence' not in source vocabulary
Token 'thời gian' not in target vocabulary
Token 'vắng mặt' not in target vocabulary
Token 'battle' not in source vocabulary
Token 'c

1it [00:00,  1.04it/s]

Token 'không thể' not in target vocabulary
Token 'chờ đợi' not in target vocabulary
Token 'excused' not in source vocabulary
Token 'xin lỗi' not in target vocabulary
Token 'pack' not in source vocabulary
Token 'bags' not in source vocabulary
Token 'đóng gói' not in target vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'trở lại' not in target vocabulary
Token 'nghiêm trọng' not in target vocabulary
Token 'glutted' not in source vocabulary
Token 'ourselves' not in source vocabulary
Token 'lobsters' not in source vocabulary
Token 'seafood' not in source vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'lấp lánh' not in target vocabulary
Token 'tôm hùm' not in target vocabulary
Token 'hải sản' not in target vocabulary
Token 'trừng phạt' not in target vocabulary
Token 'investigate' not in source vocabulary
Token 'điều tra' not in target vocabulary
Token 'trường hợp' not in target vocabulary
Token 'có thể' not in target vocabulary
Token 'xe buýt' not in target vocabul

2it [00:01,  1.04it/s]

Token 'recovered' not in source vocabulary
Token 'hồi phục' not in target vocabulary
Token 'thường xuyên' not in target vocabulary
Token 'payday' not in source vocabulary
Token 'hạnh phúc' not in target vocabulary
Token 'bắt đầu' not in target vocabulary
Token 'thực sự' not in target vocabulary
Token 'đánh thức' not in target vocabulary
Token 'hy vọng' not in target vocabulary
Token 'kinh doanh' not in target vocabulary
Token 'thành công' not in target vocabulary
Token 'chờ đợi' not in target vocabulary
Token 'champion' not in source vocabulary
Token 'vô địch' not in target vocabulary
Token 'bơi lội' not in target vocabulary
Token 'tuyệt vời' not in target vocabulary
Token 'realized' not in source vocabulary
Token 'laughing' not in source vocabulary
Token 'vấn đề' not in target vocabulary
Token 'distracted' not in source vocabulary
Token 'phân tâm' not in target vocabulary
Token 'hy vọng' not in target vocabulary
Token 'có thể' not in target vocabulary
Token 'một chút' not in target vo

3it [00:02,  1.01it/s]

Token 'advertise' not in source vocabulary
Token 'có thể' not in target vocabulary
Token 'quảng cáo' not in target vocabulary
Token 'sử dụng' not in target vocabulary
Token 'horseback' not in source vocabulary
Token 'traffic' not in source vocabulary
Token 'đám cưới' not in target vocabulary
Token 'tai nạn' not in target vocabulary
Token 'giao thông' not in target vocabulary
Token 'công việc' not in target vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'kết thúc' not in target vocabulary
Token 'quần áo' not in target vocabulary
Token 'liên quan' not in target vocabulary
Token 'chắc chắn' not in target vocabulary
Token 'tears' not in source vocabulary
Token 'không thể' not in target vocabulary
Token 'giọt' not in target vocabulary
Token 'nước mắt' not in target vocabulary
Token 'trung học' not in target vocabulary
Token 'quy tắc' not in target vocabulary
Token 'áp dụng' not in target vocabulary
Token 'tất cả' not in target vocabulary
Token 'bao nhiêu' not in target vocabula

4it [00:03,  1.00it/s]

Token 'công ty' not in target vocabulary
Token 'enemies' not in source vocabulary
Token 'kẻ thù' not in target vocabulary
Token 'thời gian' not in target vocabulary
Token 'concerns' not in source vocabulary
Token 'nhanh chóng' not in target vocabulary
Token 'lo lắng' not in target vocabulary
Token 'mushroom' not in source vocabulary
Token 'bất kỳ' not in target vocabulary
Token 'nấm' not in target vocabulary
Token 'có thể' not in target vocabulary
Token 'một số' not in target vocabulary
Token 'stays' not in source vocabulary
Token 'thỉnh thoảng' not in target vocabulary
Token 'khuya' not in target vocabulary
Token 'gentleman' not in source vocabulary
Token 'quý' not in target vocabulary
Token 'trước đây' not in target vocabulary
Token 'tốt bụng' not in target vocabulary
Token 'cá nhân' not in target vocabulary
Token 'chắc chắn' not in target vocabulary
Token 'rear' not in source vocabulary
Token 'speaker' not in source vocabulary
Token 'phòng không thể' not in target vocabulary
Token '

5it [00:04,  1.02it/s]

Token 'tại sao' not in target vocabulary
Token 'dragged' not in source vocabulary
Token 'kéo dài' not in target vocabulary
Token 'international' not in source vocabulary
Token 'trade' not in source vocabulary
Token 'ban' not in source vocabulary
Token 'straw' not in source vocabulary
Token 'lệnh' not in target vocabulary
Token 'cấm' not in target vocabulary
Token 'thương mại' not in target vocabulary
Token 'quốc tế' not in target vocabulary
Token 'có thể' not in target vocabulary
Token 'rơm' not in target vocabulary
Token 'cuối cùng' not in target vocabulary
Token 'kinh tế' not in target vocabulary
Token 'đất nước' not in target vocabulary
Token 'nhà vệ sinh' not in target vocabulary
Token 'bóng chày' not in target vocabulary
Token 'bánh mì' not in target vocabulary
Token 'mở cửa' not in target vocabulary
Token 'crooked' not in source vocabulary
Token 'cop' not in source vocabulary
Token 'cảnh sát' not in target vocabulary
Token 'quanh co' not in target vocabulary
Token 'chúng tôi' not

6it [00:06,  1.04s/it]

Token 'tại sao' not in target vocabulary
Token 'cha mẹ' not in target vocabulary
Token 'bowed' not in source vocabulary
Token 'cúi' not in target vocabulary
Token 'thầy' not in target vocabulary
Token 'fill' not in source vocabulary
Token 'rỗng' not in target vocabulary
Token 'truyện ngắn' not in target vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'roses' not in source vocabulary
Token 'một số' not in target vocabulary
Token 'hoa hồng' not in target vocabulary
Token 'một số' not in target vocabulary
Token 'weekend' not in source vocabulary
Token 'windsurfing' not in source vocabulary
Token 'tất cả' not in target vocabulary
Token 'lướt ván' not in target vocabulary
Token 'thú vị' not in target vocabulary
Token 'yards' not in source vocabulary
Token 'nhật bản' not in target vocabulary
Token 'thành viên' not in target vocabulary
Token 'bóng chày' not in target vocabulary
Token 'lovely' not in source vocabulary
Token 'doll' not in source vocabulary
Token 'búp bê' not in targ

7it [00:07,  1.05s/it]

Token 'có thể' not in target vocabulary
Token 'hết sức' not in target vocabulary
Token 'lý do' not in target vocabulary
Token 'consult' not in source vocabulary
Token 'tham khảo' not in target vocabulary
Token 'từ điển' not in target vocabulary
Token 'pull' not in source vocabulary
Token 'painting' not in source vocabulary
Token 'mặc dù' not in target vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'wives' not in source vocabulary
Token 'outlive' not in source vocabulary
Token 'husbands' not in source vocabulary
Token 'chúng ta' not in target vocabulary
Token 'giải quyết' not in target vocabulary
Token 'bao giờ' not in target vocabulary
Token 'tail' not in source vocabulary
Token 'đuôi' not in target vocabulary
Token 'bao giờ' not in target vocabulary
Token 'có vẻ' not in target vocabulary
Token 'giải toả' not in target vocabulary
Token 'eater' not in source vocabulary
Token 'row' not in source vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'chèo' not in target

8it [00:08,  1.11s/it]

Token 'jack' not in source vocabulary
Token 'jack' not in target vocabulary
Token 'resolve' not in source vocabulary
Token 'conflicts' not in source vocabulary
Token 'tại sao' not in target vocabulary
Token 'giải quyết' not in target vocabulary
Token 'xung đột' not in target vocabulary
Token 'email' not in source vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'không thể' not in target vocabulary
Token 'liên lạc' not in target vocabulary
Token 'điện thoại' not in target vocabulary
Token 'vì vậy' not in target vocabulary
Token 'email' not in target vocabulary
Token 'consists' not in source vocabulary
Token 'mutual' not in source vocabulary
Token 'understanding' not in source vocabulary
Token 'bao gồm' not in target vocabulary
Token 'hiểu biết' not in target vocabulary
Token 'lẫn' not in target vocabulary
Token 'tương lai' not in target vocabulary
Token 'công việc' not in target vocabulary
Token 'dine' not in source vocabulary
Token 'chúng tôi' not in target vocabulary
Token 

9it [00:09,  1.16s/it]

Token 'mountains' not in source vocabulary
Token 'nicer' not in source vocabulary
Token 'distance' not in source vocabulary
Token 'quyết định' not in target vocabulary
Token 'tương tự' not in target vocabulary
Token 'chúng ta' not in target vocabulary
Token 'dù sao' not in target vocabulary
Token 'partly' not in source vocabulary
Token 'spiral' not in source vocabulary
Token 'notebooks' not in source vocabulary
Token 'máy tính xách tay' not in target vocabulary
Token 'ràng buộc' not in target vocabulary
Token 'xoắn ốc' not in target vocabulary
Token 'thực sự' not in target vocabulary
Token 'thắt' not in target vocabulary
Token 'cà vạt' not in target vocabulary
Token 'gained' not in source vocabulary
Token 'khôn ngoan' not in target vocabulary
Token 'như vậy' not in target vocabulary
Token 'sàn' not in target vocabulary
Token 'shaved' not in source vocabulary
Token 'mustache' not in source vocabulary
Token 'cạo' not in target vocabulary
Token 'fascinating' not in source vocabulary
Token

10it [00:10,  1.12s/it]

Token 'trả lời' not in target vocabulary
Token 'gia đình' not in target vocabulary
Token 'embarrass' not in source vocabulary
Token 'xấu hổ' not in target vocabulary
Token 'restroom' not in source vocabulary
Token 'nhà vệ sinh' not in target vocabulary
Token 'stolen' not in source vocabulary
Token 'subway' not in source vocabulary
Token 'thẻ tín dụng' not in target vocabulary
Token 'đánh cắp' not in target vocabulary
Token 'tàu điện ngầm' not in target vocabulary
Token 'sale' not in source vocabulary
Token 'xin lỗi' not in target vocabulary
Token 'suggesting' not in source vocabulary
Token 'chính xác' not in target vocabulary
Token 'đề xuất' not in target vocabulary
Token 'cỡ' not in target vocabulary
Token 'disease' not in source vocabulary
Token 'căn bệnh' not in target vocabulary
Token 'tại sao' not in target vocabulary
Token 'cảm thấy' not in target vocabulary
Token 'thế nào' not in target vocabulary
Token 'vui lòng' not in target vocabulary
Token 'giấy tờ' not in target vocabulary

11it [00:11,  1.05s/it]

Token 'bây giờ' not in target vocabulary
Token 'tại sao' not in target vocabulary
Token 'xuất hiện' not in target vocabulary
Token 'hôm nay' not in target vocabulary
Token 'đất nước' not in target vocabulary
Token 'nội' not in target vocabulary
Token 'hoan nghênh' not in target vocabulary
Token 'sentences' not in source vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'hoàn chỉnh' not in target vocabulary
Token 'một mình' not in target vocabulary
Token 'chủ nhật' not in target vocabulary
Token 'tây ban nha' not in target vocabulary
Token 'sa thải' not in target vocabulary
Token 'confirmed' not in source vocabulary
Token 'bachelor' not in source vocabulary
Token 'cử nhân' not in target vocabulary
Token 'xác nhận' not in target vocabulary
Token 'occurred' not in source vocabulary
Token 'tai nạn' not in target vocabulary
Token 'hôm qua' not in target vocabulary
Token 'ăn ở' not in target vocabulary
Token 'tại sao' not in target vocabulary
Token 'reluctant' not in source vocabul

12it [00:13,  1.19s/it]

Token 'barely' not in source vocabulary
Token 'contain' not in source vocabulary
Token 'hầu như' not in target vocabulary
Token 'không thể' not in target vocabulary
Token 'kiềm chế' not in target vocabulary
Token 'tức giận' not in target vocabulary
Token 'contradicts' not in source vocabulary
Token 'mâu thuẫn' not in target vocabulary
Token 'tất cả' not in target vocabulary
Token 'thừa nhận' not in target vocabulary
Token 'regret' not in source vocabulary
Token 'hối hận' not in target vocabulary
Token 'đại học' not in target vocabulary
Token 'có thể' not in target vocabulary
Token 'tất cả' not in target vocabulary
Token 'climb' not in source vocabulary
Token 'trèo' not in target vocabulary
Token 'mixed' not in source vocabulary
Token 'cảm xúc' not in target vocabulary
Token 'lẫn lộn' not in target vocabulary
Token 'sabotaged' not in source vocabulary
Token 'railroad' not in source vocabulary
Token 'phiến quân' not in target vocabulary
Token 'phá hoại' not in target vocabulary
Token 'đư

13it [00:14,  1.21s/it]

Token 'vui lòng' not in target vocabulary
Token 'báo cáo' not in target vocabulary
Token 'bao giờ' not in target vocabulary
Token 'thực sự' not in target vocabulary
Token 'photo' not in source vocabulary
Token 'xin lỗi' not in target vocabulary
Token 'có thể' not in target vocabulary
Token 'tiếp tục' not in target vocabulary
Token 'buồn cười' not in target vocabulary
Token 'taro' not in source vocabulary
Token 'khoai môn' not in target vocabulary
Token 'tinh thần' not in target vocabulary
Token 'trách nhiệm' not in target vocabulary
Token 'thực sự' not in target vocabulary
Token 'mạnh mẽ' not in target vocabulary
Token 'purse' not in source vocabulary
Token 'dường như' not in target vocabulary
Token 'thế nào' not in target vocabulary
Token 'quyết định' not in target vocabulary
Token 'footprints' not in source vocabulary
Token 'dấu' not in target vocabulary
Token 'tiếp tục' not in target vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'sort' not in source vocabulary
Token 'ý

14it [00:15,  1.20s/it]

Token 'có vẻ' not in target vocabulary
Token 'ngạc nhiên' not in target vocabulary
Token 'tiếp tục' not in target vocabulary
Token 'experiences' not in source vocabulary
Token 'trải nghiệm' not in target vocabulary
Token 'tồi tệ' not in target vocabulary
Token 'làm việc' not in target vocabulary
Token 'lãng phí' not in target vocabulary
Token 'thời gian' not in target vocabulary
Token 'judging' not in source vocabulary
Token 'appearance' not in source vocabulary
Token 'đánh giá' not in target vocabulary
Token 'ngoại hình' not in target vocabulary
Token 'giàu có' not in target vocabulary
Token 'buồn cười' not in target vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'warn' not in source vocabulary
Token 'cảnh báo' not in target vocabulary
Token 'bao giờ' not in target vocabulary
Token 'fit' not in source vocabulary
Token 'phù hợp' not in target vocabulary
Token 'công việc' not in target vocabulary
Token 'regrets' not in source vocabulary
Token 'hối tiếc' not in target vocabu

15it [00:16,  1.19s/it]

Token 'market' not in source vocabulary
Token 'thị trường' not in target vocabulary
Token 'câu chuyện' not in target vocabulary
Token 'awake' not in source vocabulary
Token 'thao thức' not in target vocabulary
Token 'tương lai' not in target vocabulary
Token 'lớn tuổi' not in target vocabulary
Token 'anh trai' not in target vocabulary
Token 'view' not in source vocabulary
Token 'thời gian' not in target vocabulary
Token 'xung quanh' not in target vocabulary
Token 'quan điểm' not in target vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'tray' not in source vocabulary
Token 'khay' not in target vocabulary
Token 'chính xác' not in target vocabulary
Token 'chúng ta' not in target vocabulary
Token 'có thể' not in target vocabulary
Token 'cô giáo' not in target vocabulary
Token 'khen ngợi' not in target vocabulary
Token 'security' not in source vocabulary
Token 'officer' not in source vocabulary
Token 'nhân viên' not in target vocabulary
Token 'an ninh' not in target vocabulary


16it [00:17,  1.17s/it]

Token 'nghỉ ngơi' not in target vocabulary
Token 'tại sao' not in target vocabulary
Token 'chip' not in source vocabulary
Token 'block' not in source vocabulary
Token 'chip' not in target vocabulary
Token 'khối' not in target vocabulary
Token 'changes' not in source vocabulary
Token 'thay đổi' not in target vocabulary
Token 'nhanh chóng' not in target vocabulary
Token 'tất cả' not in target vocabulary
Token 'một mình' not in target vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'sent' not in source vocabulary
Token 'postcard' not in source vocabulary
Token 'bưu thiếp' not in target vocabulary
Token 'hình ảnh' not in target vocabulary
Token 'bookstore' not in source vocabulary
Token 'tình cờ' not in target vocabulary
Token 'hiệu' not in target vocabulary
Token 'clowning' not in source vocabulary
Token 'luôn luôn' not in target vocabulary
Token 'quyết định' not in target vocabulary
Token 'kết hôn' not in target vocabulary
Token 'mặc dù' not in target vocabulary
Token 'bố mẹ'

17it [00:19,  1.28s/it]

Token 'math' not in source vocabulary
Token 'toán' not in target vocabulary
Token 'burying' not in source vocabulary
Token 'sand' not in source vocabulary
Token 'chôn' not in target vocabulary
Token 'thời gian' not in target vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'chờ đợi' not in target vocabulary
Token 'chờ đợi' not in target vocabulary
Token 'bao giờ' not in target vocabulary
Token 'giải thích' not in target vocabulary
Token 'hidden' not in source vocabulary
Token 'thế nào' not in target vocabulary
Token 'có thể' not in target vocabulary
Token 'brief' not in source vocabulary
Token 'human' not in source vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'thảo luận' not in target vocabulary
Token 'con người' not in target vocabulary
Token 'mood' not in source vocabulary
Token 'tâm trạng' not in target vocabulary
Token 'như thế nào' not in target vocabulary
Token 'lãng phí' not in target vocabulary
Token 'thời gian' not in target vocabulary
Token 'ngu ngốc

18it [00:20,  1.33s/it]

Token 'có thể' not in target vocabulary
Token 'simply' not in source vocabulary
Token 'đơn giản' not in target vocabulary
Token 'lắng nghe' not in target vocabulary
Token 'gợi ý' not in target vocabulary
Token 'thực hiện' not in target vocabulary
Token 'đối với' not in target vocabulary
Token 'đối với' not in target vocabulary
Token 'thời gian' not in target vocabulary
Token 'shorter' not in source vocabulary
Token 'anh trai' not in target vocabulary
Token 'có thể' not in target vocabulary
Token 'information' not in source vocabulary
Token 'nhu cầu' not in target vocabulary
Token 'thông tin' not in target vocabulary
Token 'suspicious' not in source vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'ngờ' not in target vocabulary
Token 'hokkaido' not in source vocabulary
Token 'hokkaido' not in target vocabulary
Token 'chúng ta' not in target vocabulary
Token 'tại sao' not in target vocabulary
Token 'lắng nghe' not in target vocabulary
Token 'smaller' not in source vocabulary
T

19it [00:22,  1.59s/it]

Token 'ridiculous' not in source vocabulary
Token 'buồn cười' not in target vocabulary
Token 'weirdo' not in source vocabulary
Token 'kỳ quặc' not in target vocabulary
Token 'bob' not in source vocabulary
Token 'bob' not in target vocabulary
Token 'đến nỗi' not in target vocabulary
Token 'pond' not in source vocabulary
Token 'ao' not in target vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'shaken' not in source vocabulary
Token 'run' not in target vocabulary
Token 'hạnh phúc' not in target vocabulary
Token 'chúng ta' not in target vocabulary
Token 'lo lắng' not in target vocabulary
Token 'tốt bụng' not in target vocabulary
Token 'trả lời' not in target vocabulary
Token 'cuộc sống' not in target vocabulary
Token 'nguy hiểm' not in target vocabulary
Token 'festival' not in source vocabulary
Token 'ăn mặc' not in target vocabulary
Token 'lễ hội' not in target vocabulary
Token 'buồn cười' not in target vocabulary
Token 'chi tiết' not in target vocabulary
Token 'expectations' 

20it [00:24,  1.58s/it]

Token 'cream' not in source vocabulary
Token 'crop' not in source vocabulary
Token 'cây trồng' not in target vocabulary
Token 'năm ngoái' not in target vocabulary
Token 'chào đón' not in target vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'dự kiến' not in target vocabulary
Token 'board' not in source vocabulary
Token 'directors' not in source vocabulary
Token 'giám đốc' not in target vocabulary
Token 'hầu hết' not in target vocabulary
Token 'punched' not in source vocabulary
Token 'đấm' not in target vocabulary
Token 'tiếp tục' not in target vocabulary
Token 'tìm kiếm' not in target vocabulary
Token 'serve' not in source vocabulary
Token 'monitored' not in source vocabulary
Token 'phục vụ' not in target vocabulary
Token 'cuộc gọi' not in target vocabulary
Token 'có thể' not in target vocabulary
Token 'theo dõi' not in target vocabulary
Token 'thừa nhận' not in target vocabulary
Token 'bất cứ' not in target vocabulary
Token 'manna' not in source vocabulary
Token 'heaven' 

21it [00:25,  1.44s/it]

Token 'lái xe' not in target vocabulary
Token 'khả năng' not in target vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'occupied' not in source vocabulary
Token 'chiếm đóng' not in target vocabulary
Token 'quiz' not in source vocabulary
Token 'kiểm tra' not in target vocabulary
Token 'bao giờ' not in target vocabulary
Token 'vấn đề' not in target vocabulary
Token 'resume' not in source vocabulary
Token 'bạn đọc' not in target vocabulary
Token 'sơ yếu' not in target vocabulary
Token 'lý lịch' not in target vocabulary
Token 'teeth' not in source vocabulary
Token 'răng' not in target vocabulary
Token 'luôn luôn' not in target vocabulary
Token 'tìm kiếm' not in target vocabulary
Token 'tương tự' not in target vocabulary
Token 'từ điển' not in target vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'công viên' not in target vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'có thể' not in target vocabulary
Token 'carried' not in source vocabulary
Token 'arm'

22it [00:27,  1.49s/it]

Token 'excused' not in source vocabulary
Token 'giáo viên' not in target vocabulary
Token 'xin lỗi' not in target vocabulary
Token 'piano' not in source vocabulary
Token 'stool' not in source vocabulary
Token 'piano' not in target vocabulary
Token 'di chuyển' not in target vocabulary
Token 'socks' not in source vocabulary
Token 'tất' not in target vocabulary
Token 'nhặt' not in target vocabulary
Token 'chén' not in target vocabulary
Token 'tại sao' not in target vocabulary
Token 'tức giận' not in target vocabulary
Token 'rõ ràng' not in target vocabulary
Token 'knocking' not in source vocabulary
Token 'gõ cửa' not in target vocabulary
Token 'chăm chỉ' not in target vocabulary
Token 'có thể' not in target vocabulary
Token 'leak' not in source vocabulary
Token 'fixed' not in source vocabulary
Token 'rò rỉ' not in target vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'nhật bản' not in target vocabulary
Token 'bất cứ' not in target vocabulary
Token 'nói chung' not in target vo

23it [00:28,  1.38s/it]

Token 'bikini' not in source vocabulary
Token 'xinh đẹp' not in target vocabulary
Token 'bikini' not in target vocabulary
Token 'em gái' not in target vocabulary
Token 'hôm nay' not in target vocabulary
Token 'làm việc' not in target vocabulary
Token 'osaka' not in source vocabulary
Token 'osaka' not in target vocabulary
Token 'bây giờ' not in target vocabulary
Token 'đề cập' not in target vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'lo lắng' not in target vocabulary
Token 'có vẻ' not in target vocabulary
Token 'ngu ngốc' not in target vocabulary
Token 'hoàn thành' not in target vocabulary
Token 'công việc' not in target vocabulary
Token 'goose' not in source vocabulary
Token 'lays' not in source vocabulary
Token 'golden' not in source vocabulary
Token 'ngỗng' not in target vocabulary
Token 'đẻ' not in target vocabulary
Token 'announced' not in source vocabulary
Token 'engagement' not in source vocabulary
Token 'tuyên bố' not in target vocabulary
Token 'đính hôn' not in

24it [00:29,  1.29s/it]

Token 'cleaning' not in source vocabulary
Token 'dọn dẹp' not in target vocabulary
Token 'cố gắng' not in target vocabulary
Token 'restaurants' not in source vocabulary
Token 'nhà hàng' not in target vocabulary
Token 'nhật bản' not in target vocabulary
Token 'tiny' not in source vocabulary
Token 'universe' not in source vocabulary
Token 'thế giới' not in target vocabulary
Token 'chúng ta' not in target vocabulary
Token 'nhỏ bé' not in target vocabulary
Token 'vũ trụ' not in target vocabulary
Token 'sew' not in source vocabulary
Token 'buttons' not in source vocabulary
Token 'có thể' not in target vocabulary
Token 'yumi' not in source vocabulary
Token 'hobby' not in source vocabulary
Token 'sở thích' not in target vocabulary
Token 'yumi' not in target vocabulary
Token 'nổi tiếng' not in target vocabulary
Token 'làm gì' not in target vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'nhé' not in target vocabulary
Token 'thỉnh thoảng' not in target vocabulary
Token 'không thể' n

25it [00:31,  1.39s/it]

Token 'xin lỗi' not in target vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'chắc chắn' not in target vocabulary
Token 'thông thường' not in target vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'income' not in source vocabulary
Token 'thuế thu nhập' not in target vocabulary
Token 'có thể' not in target vocabulary
Token 'prepared' not in source vocabulary
Token 'worst' not in source vocabulary
Token 'chuẩn bị' not in target vocabulary
Token 'tồi tệ' not in target vocabulary
Token 'đề nghị' not in target vocabulary
Token 'josh' not in source vocabulary
Token 'josh' not in target vocabulary
Token 'yêu cầu' not in target vocabulary
Token 'mom' not in source vocabulary
Token 'tại sao' not in target vocabulary
Token 'washington' not in source vocabulary
Token 'cherry' not in source vocabulary
Token 'blossoms' not in source vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'washington' not in target vocabulary
Token 'kịp thời' not in target vocabulary
Tok

26it [00:31,  1.23s/it]

Token 'bài tập' not in target vocabulary
Token 'taker' not in source vocabulary
Token 'mạo hiểm' not in target vocabulary
Token 'tiếng anh' not in target vocabulary
Token 'metal' not in source vocabulary
Token 'contracts' not in source vocabulary
Token 'cooled' not in source vocabulary
Token 'hợp đồng' not in target vocabulary
Token 'kim loại' not in target vocabulary
Token 'mát' not in target vocabulary
Token 'officer' not in source vocabulary
Token 'nhân viên' not in target vocabulary
Token 'cảnh sát' not in target vocabulary
Token 'tiếp theo' not in target vocabulary
Token 'lạc quan' not in target vocabulary
Token 'head' not in source vocabulary
Token 'marketing' not in source vocabulary
Token 'department' not in source vocabulary
Token 'trưởng' not in target vocabulary
Token 'marketing' not in target vocabulary
Token 'đô la' not in target vocabulary
Token 'có thể' not in target vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'chúng tôi' not in target vocabulary
Token 't

27it [00:33,  1.23s/it]

Token 'cảm thấy' not in target vocabulary
Token 'em gái' not in target vocabulary
Token 'elderly' not in source vocabulary
Token 'thậm chí' not in target vocabulary
Token 'có thể' not in target vocabulary
Token 'weird' not in source vocabulary
Token 'kỳ lạ' not in target vocabulary
Token 'xin lỗi' not in target vocabulary
Token 'tối qua' not in target vocabulary
Token 'forty' not in source vocabulary
Token 'bốn mươi' not in target vocabulary
Token 'có thể' not in target vocabulary
Token 'lý do' not in target vocabulary
Token 'tại sao' not in target vocabulary
Token 'bất kỳ' not in target vocabulary
Token 'khoản' not in target vocabulary
Token 'jobs' not in source vocabulary
Token 'công việc' not in target vocabulary
Token 'tiếp tục' not in target vocabulary
Token 'tương tự' not in target vocabulary
Token 'chắc chắn' not in target vocabulary
Token 'kết thúc' not in target vocabulary
Token 'tốt đẹp' not in target vocabulary
Token 'jealous' not in source vocabulary
Token 'ghen tị' not in 

28it [00:33,  1.20s/it]
0it [00:00, ?it/s]

Token 'reckon' not in source vocabulary
Token 'sạch sẽ' not in target vocabulary
Token 'none' not in source vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'có thể' not in target vocabulary
Token 'trả lời' not in target vocabulary
Token 'chắc chắn' not in target vocabulary
Token 'filling' not in source vocabulary
Token 'temporarily' not in source vocabulary
Token 'điền' not in target vocabulary
Token 'tạm thời' not in target vocabulary
Token 'abolish' not in source vocabulary
Token 'penalty' not in source vocabulary
Token 'chúng ta' not in target vocabulary
Token 'xoá bỏ' not in target vocabulary
Token 'án' not in target vocabulary
Token 'tử hình' not in target vocabulary
Token 'sẵn sàng' not in target vocabulary
Token 'gia đình' not in target vocabulary
Token 'paranoid' not in source vocabulary
Token 'hoang tưởng' not in target vocabulary
Token 'chính xác' not in target vocabulary
Token 'đối xử' not in target vocabulary
Token 'ngạc nhiên' not in target vocabulary
Token 'ra

1it [00:00,  2.58it/s]

Token 'champion' not in source vocabulary
Token 'vô địch' not in target vocabulary
Token 'clients' not in source vocabulary
Token 'millionaires' not in source vocabulary
Token 'khách hàng' not in target vocabulary
Token 'triệu phú' not in target vocabulary
Token 'color' not in source vocabulary
Token 'drained' not in source vocabulary
Token 'tất cả' not in target vocabulary
Token 'khuôn mặt' not in target vocabulary
Token 'tại sao' not in target vocabulary
Token 'comments' not in source vocabulary
Token 'favorable' not in source vocabulary
Token 'ý kiến' not in target vocabulary
Token 'thuận lợi' not in target vocabulary
Token 'downloading' not in source vocabulary
Token 'pictures' not in source vocabulary
Token 'tải' not in target vocabulary
Token 'hình ảnh' not in target vocabulary
Token 'bây giờ' not in target vocabulary
Token 'consist' not in source vocabulary
Token 'knowing' not in source vocabulary
Token 'khôn ngoan' not in target vocabulary
Token 'không chỉ' not in target vocabu

2it [00:00,  2.70it/s]

Token 'period' not in source vocabulary
Token 'fifty' not in source vocabulary
Token 'thời gian' not in target vocabulary
Token 'năm mươi' not in target vocabulary
Token 'recent' not in source vocabulary
Token 'shortage' not in source vocabulary
Token 'rise' not in source vocabulary
Token 'thiếu hụt' not in target vocabulary
Token 'cà phê' not in target vocabulary
Token 'vấn đề' not in target vocabulary
Token 'sentenced' not in source vocabulary
Token 'prison' not in source vocabulary
Token 'thẩm phán' not in target vocabulary
Token 'kết án' not in target vocabulary
Token 'enormous' not in source vocabulary
Token 'thảo luận' not in target vocabulary
Token 'sôi nổi' not in target vocabulary
Token 'vấn đề' not in target vocabulary
Token 'italy' not in source vocabulary
Token 'tất cả' not in target vocabulary
Token 'nhường' not in target vocabulary
Token 'normal' not in source vocabulary
Token 'không thể' not in target vocabulary
Token 'chờ đợi' not in target vocabulary
Token 'trở lại' no

3it [00:01,  2.79it/s]

Token 'answering' not in source vocabulary
Token 'trả lời' not in target vocabulary
Token 'dù sao' not in target vocabulary
Token 'flat' not in source vocabulary
Token 'tire' not in source vocabulary
Token 'lốp' not in target vocabulary
Token 'bằng phẳng' not in target vocabulary
Token 'trí nhớ' not in target vocabulary
Token 'có thể' not in target vocabulary
Token 'làm gì' not in target vocabulary
Token 'bất kể' not in target vocabulary
Token 'đàn ông' not in target vocabulary
Token 'trách nhiệm' not in target vocabulary
Token 'microphone' not in source vocabulary
Token 'micro' not in target vocabulary
Token 'flinging' not in source vocabulary
Token 'stones' not in source vocabulary
Token 'thời gian' not in target vocabulary
Token 'mow' not in source vocabulary
Token 'lawn' not in source vocabulary
Token 'permitting' not in source vocabulary
Token 'cỏ' not in target vocabulary
Token 'ngày mai' not in target vocabulary
Token 'thời tiết' not in target vocabulary
Token 'cho phép' not in 

4it [00:01,  2.55it/s]


Epoch: 1 | Time: 0m 35s
	Train Loss: 5.317 | Train PPL: 203.824
	 Val. Loss: 2.514 |  Val. PPL:  12.354


0it [00:00, ?it/s]
0it [00:00, ?it/s]


Epoch: 2 | Time: 0m 0s
	Train Loss: 0.000 | Train PPL:   1.000
	 Val. Loss: 0.000 |  Val. PPL:   1.000


0it [00:00, ?it/s]
0it [00:00, ?it/s]


Epoch: 3 | Time: 0m 0s
	Train Loss: 0.000 | Train PPL:   1.000
	 Val. Loss: 0.000 |  Val. PPL:   1.000


0it [00:00, ?it/s]
0it [00:00, ?it/s]


Epoch: 4 | Time: 0m 0s
	Train Loss: 0.000 | Train PPL:   1.000
	 Val. Loss: 0.000 |  Val. PPL:   1.000


0it [00:00, ?it/s]
0it [00:00, ?it/s]


Epoch: 5 | Time: 0m 0s
	Train Loss: 0.000 | Train PPL:   1.000
	 Val. Loss: 0.000 |  Val. PPL:   1.000


0it [00:00, ?it/s]
0it [00:00, ?it/s]


Epoch: 6 | Time: 0m 0s
	Train Loss: 0.000 | Train PPL:   1.000
	 Val. Loss: 0.000 |  Val. PPL:   1.000


0it [00:00, ?it/s]
0it [00:00, ?it/s]


Epoch: 7 | Time: 0m 0s
	Train Loss: 0.000 | Train PPL:   1.000
	 Val. Loss: 0.000 |  Val. PPL:   1.000


0it [00:00, ?it/s]
0it [00:00, ?it/s]


Epoch: 8 | Time: 0m 0s
	Train Loss: 0.000 | Train PPL:   1.000
	 Val. Loss: 0.000 |  Val. PPL:   1.000


0it [00:00, ?it/s]
0it [00:00, ?it/s]


Epoch: 9 | Time: 0m 0s
	Train Loss: 0.000 | Train PPL:   1.000
	 Val. Loss: 0.000 |  Val. PPL:   1.000


0it [00:00, ?it/s]
0it [00:00, ?it/s]


Epoch: 10 | Time: 0m 0s
	Train Loss: 0.000 | Train PPL:   1.000
	 Val. Loss: 0.000 |  Val. PPL:   1.000


0it [00:00, ?it/s]
0it [00:00, ?it/s]


Epoch: 11 | Time: 0m 0s
	Train Loss: 0.000 | Train PPL:   1.000
	 Val. Loss: 0.000 |  Val. PPL:   1.000


0it [00:00, ?it/s]
0it [00:00, ?it/s]


Epoch: 12 | Time: 0m 0s
	Train Loss: 0.000 | Train PPL:   1.000
	 Val. Loss: 0.000 |  Val. PPL:   1.000


0it [00:00, ?it/s]
0it [00:00, ?it/s]


Epoch: 13 | Time: 0m 0s
	Train Loss: 0.000 | Train PPL:   1.000
	 Val. Loss: 0.000 |  Val. PPL:   1.000


0it [00:00, ?it/s]
0it [00:00, ?it/s]


Epoch: 14 | Time: 0m 0s
	Train Loss: 0.000 | Train PPL:   1.000
	 Val. Loss: 0.000 |  Val. PPL:   1.000


0it [00:00, ?it/s]
0it [00:00, ?it/s]


Epoch: 15 | Time: 0m 0s
	Train Loss: 0.000 | Train PPL:   1.000
	 Val. Loss: 0.000 |  Val. PPL:   1.000


0it [00:00, ?it/s]
0it [00:00, ?it/s]


Epoch: 16 | Time: 0m 0s
	Train Loss: 0.000 | Train PPL:   1.000
	 Val. Loss: 0.000 |  Val. PPL:   1.000


0it [00:00, ?it/s]
0it [00:00, ?it/s]


Epoch: 17 | Time: 0m 0s
	Train Loss: 0.000 | Train PPL:   1.000
	 Val. Loss: 0.000 |  Val. PPL:   1.000


0it [00:00, ?it/s]
0it [00:00, ?it/s]


Epoch: 18 | Time: 0m 0s
	Train Loss: 0.000 | Train PPL:   1.000
	 Val. Loss: 0.000 |  Val. PPL:   1.000


0it [00:00, ?it/s]
0it [00:00, ?it/s]


Epoch: 19 | Time: 0m 0s
	Train Loss: 0.000 | Train PPL:   1.000
	 Val. Loss: 0.000 |  Val. PPL:   1.000


0it [00:00, ?it/s]
0it [00:00, ?it/s]


Epoch: 20 | Time: 0m 0s
	Train Loss: 0.000 | Train PPL:   1.000
	 Val. Loss: 0.000 |  Val. PPL:   1.000


0it [00:00, ?it/s]
0it [00:00, ?it/s]


Epoch: 21 | Time: 0m 0s
	Train Loss: 0.000 | Train PPL:   1.000
	 Val. Loss: 0.000 |  Val. PPL:   1.000


0it [00:00, ?it/s]
0it [00:00, ?it/s]


Epoch: 22 | Time: 0m 0s
	Train Loss: 0.000 | Train PPL:   1.000
	 Val. Loss: 0.000 |  Val. PPL:   1.000


0it [00:00, ?it/s]
0it [00:00, ?it/s]


Epoch: 23 | Time: 0m 0s
	Train Loss: 0.000 | Train PPL:   1.000
	 Val. Loss: 0.000 |  Val. PPL:   1.000


0it [00:00, ?it/s]
0it [00:00, ?it/s]


Epoch: 24 | Time: 0m 0s
	Train Loss: 0.000 | Train PPL:   1.000
	 Val. Loss: 0.000 |  Val. PPL:   1.000


0it [00:00, ?it/s]
0it [00:00, ?it/s]


Epoch: 25 | Time: 0m 0s
	Train Loss: 0.000 | Train PPL:   1.000
	 Val. Loss: 0.000 |  Val. PPL:   1.000


0it [00:00, ?it/s]
0it [00:00, ?it/s]


Epoch: 26 | Time: 0m 0s
	Train Loss: 0.000 | Train PPL:   1.000
	 Val. Loss: 0.000 |  Val. PPL:   1.000


0it [00:00, ?it/s]
0it [00:00, ?it/s]


Epoch: 27 | Time: 0m 0s
	Train Loss: 0.000 | Train PPL:   1.000
	 Val. Loss: 0.000 |  Val. PPL:   1.000


0it [00:00, ?it/s]
0it [00:00, ?it/s]


Epoch: 28 | Time: 0m 0s
	Train Loss: 0.000 | Train PPL:   1.000
	 Val. Loss: 0.000 |  Val. PPL:   1.000


0it [00:00, ?it/s]
0it [00:00, ?it/s]


Epoch: 29 | Time: 0m 0s
	Train Loss: 0.000 | Train PPL:   1.000
	 Val. Loss: 0.000 |  Val. PPL:   1.000


0it [00:00, ?it/s]
0it [00:00, ?it/s]

Epoch: 30 | Time: 0m 0s
	Train Loss: 0.000 | Train PPL:   1.000
	 Val. Loss: 0.000 |  Val. PPL:   1.000





In [21]:
seq2seq.load_state_dict(torch.load('tut2-model.pt'))

test_loss = evaluate(seq2seq, test_batches, criterion)
print(f'| Test Loss: {test_loss:.3f} | Test PPL: {math.exp(test_loss):7.3f} |')

0it [00:00, ?it/s]

Token 'killer' not in source vocabulary
Token 'twirled' not in source vocabulary
Token 'basketball' not in source vocabulary
Token 'finger' not in source vocabulary
Token 'xoay' not in target vocabulary
Token 'tròn' not in target vocabulary
Token 'bóng rổ' not in target vocabulary
Token 'ngón' not in target vocabulary
Token 'none' not in source vocabulary
Token 'proofread' not in source vocabulary
Token 'thời gian' not in target vocabulary
Token 'có thể' not in target vocabulary
Token 'attributed' not in source vocabulary
Token 'rate' not in source vocabulary
Token 'infants' not in source vocabulary
Token 'progress' not in source vocabulary
Token 'tỷ lệ' not in target vocabulary
Token 'tử vong' not in target vocabulary
Token 'sơ sinh' not in target vocabulary
Token 'do' not in target vocabulary
Token 'tiến bộ' not in target vocabulary
Token 'y học' not in target vocabulary
Token 'preparing' not in source vocabulary
Token 'entrance' not in source vocabulary
Token 'chuẩn bị' not in targe

1it [00:00,  1.95it/s]

Token 'chúng tôi' not in target vocabulary
Token 'có thể' not in target vocabulary
Token 'formal' not in source vocabulary
Token 'chuẩn bị' not in target vocabulary
Token 'phát biểu' not in target vocabulary
Token 'chính thức' not in target vocabulary
Token 'bắt đầu' not in target vocabulary
Token 'alternatives' not in source vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'lựa chọn' not in target vocabulary
Token 'thay thế' not in target vocabulary
Token 'giáo viên' not in target vocabulary
Token 'yêu cầu' not in target vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'dọn dẹp' not in target vocabulary
Token 'lớp học' not in target vocabulary
Token 'có thể' not in target vocabulary
Token 'folks' not in source vocabulary
Token 'folks' not in target vocabulary
Token 'hôm nay' not in target vocabulary
Token 'cảm thấy' not in target vocabulary
Token 'giếng' not in target vocabulary
Token 'lý do' not in target vocabulary
Token 'chúng tôi' not in target vocabulary
Tok

2it [00:00,  2.94it/s]

Token 'cảm thấy' not in target vocabulary
Token 'singer' not in source vocabulary
Token 'ca sĩ' not in target vocabulary
Token 'có thể' not in target vocabulary
Token 'hôm qua' not in target vocabulary
Token 'smile' not in source vocabulary
Token 'che' not in target vocabulary
Token 'congress' not in source vocabulary
Token 'rejected' not in source vocabulary
Token 'đại hội' not in target vocabulary
Token 'từ chối' not in target vocabulary
Token 'yêu cầu' not in target vocabulary
Token 'trả lời' not in target vocabulary
Token 'chúng ta' not in target vocabulary
Token 'thường lệ' not in target vocabulary
Token 'không thể' not in target vocabulary
Token 'bây giờ' not in target vocabulary
Token 'có thể' not in target vocabulary
Token 'một chút' not in target vocabulary
Token 'giúp đỡ' not in target vocabulary
Token 'birds' not in source vocabulary
Token 'thế nào' not in target vocabulary
Token 'có thể' not in target vocabulary
Token 'khả năng' not in target vocabulary
Token 'thế này' not 

3it [00:01,  2.27it/s]

Token 'wasted' not in source vocabulary
Token 'chúng ta' not in target vocabulary
Token 'lãng phí' not in target vocabulary
Token 'thời gian' not in target vocabulary
Token 'filed' not in source vocabulary
Token 'divorce' not in source vocabulary
Token 'đệ' not in target vocabulary
Token 'ly hôn' not in target vocabulary
Token 'vấn đề' not in target vocabulary
Token 'thời gian' not in target vocabulary
Token 'rắc rối' not in target vocabulary
Token 'spider' not in source vocabulary
Token 'produce' not in source vocabulary
Token 'silky' not in source vocabulary
Token 'substance' not in source vocabulary
Token 'tiny' not in source vocabulary
Token 'openings' not in source vocabulary
Token 'underside' not in source vocabulary
Token 'nhện' not in target vocabulary
Token 'có thể' not in target vocabulary
Token 'tạo' not in target vocabulary
Token 'chất' not in target vocabulary
Token 'mượt' not in target vocabulary
Token 'lỗ' not in target vocabulary
Token 'encouragement' not in source voca

4it [00:01,  2.42it/s]

Token 'valuable' not in source vocabulary
Token 'có thể' not in target vocabulary
Token 'thời gian' not in target vocabulary
Token 'recession' not in source vocabulary
Token 'suy thoái' not in target vocabulary
Token 'chắc chắn' not in target vocabulary
Token 'trung thực' not in target vocabulary
Token 'officially' not in source vocabulary
Token 'secretary' not in source vocabulary
Token 'chính thức' not in target vocabulary
Token 'trách nhiệm' not in target vocabulary
Token 'thực tế' not in target vocabulary
Token 'thư ký' not in target vocabulary
Token 'tất cả' not in target vocabulary
Token 'công việc' not in target vocabulary
Token 'bất cứ' not in target vocabulary
Token 'có thể' not in target vocabulary
Token 'thay đổi' not in target vocabulary
Token 'ý kiến' not in target vocabulary
Token 'quần jean' not in target vocabulary
Token 'mặt hàng' not in target vocabulary
Token 'quần áo' not in target vocabulary
Token 'xuất khẩu' not in target vocabulary
Token 'phổ biến' not in target 

5it [00:02,  1.53it/s]

Token 'tại sao' not in target vocabulary
Token 'nguy hiểm' not in target vocabulary
Token 'bác sĩ' not in target vocabulary
Token 'jittery' not in source vocabulary
Token 'bồn chồn' not in target vocabulary
Token 'thay vì' not in target vocabulary
Token 'nghỉ ngơi' not in target vocabulary
Token 'làm việc' not in target vocabulary
Token 'chăm chỉ' not in target vocabulary
Token 'bình thường' not in target vocabulary
Token 'politicians' not in source vocabulary
Token 'raising' not in source vocabulary
Token 'chính trị gia' not in target vocabulary
Token 'quỹ' not in target vocabulary
Token 'ngọt ngào' not in target vocabulary
Token 'tại sao' not in target vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'cats' not in source vocabulary
Token 'chúng tôi' not in target vocabulary
Token 'bao giờ' not in target vocabulary
Token 'tha thứ' not in target vocabulary
Token 'oboe' not in source vocabulary
Token 'repaired' not in source vocabulary
Token 'sửa chữa' not in target vocabular

6it [00:03,  1.78it/s]

Token 'irish' not in source vocabulary
Token 'horn' not in source vocabulary
Token 'irish' not in target vocabulary
Token 'sừng' not in target vocabulary
Token 'adults' not in source vocabulary
Token 'humid' not in source vocabulary
Token 'increases' not in source vocabulary
Token 'crimes' not in source vocabulary
Token 'thời tiết' not in target vocabulary
Token 'ấm áp' not in target vocabulary
Token 'ẩm ướt' not in target vocabulary
Token 'số lượng' not in target vocabulary
Token 'tội phạm' not in target vocabulary
Token 'hiccup' not in source vocabulary
Token 'nấc' not in target vocabulary
Token 'attacked' not in source vocabulary
Token 'tấn công' not in target vocabulary
Token 'quần áo' not in target vocabulary
Token 'hẹn hò' not in target vocabulary
Token 'ngày mai' not in target vocabulary
Token 'anxious' not in source vocabulary
Token 'nóng lòng' not in target vocabulary
Token 'thành công' not in target vocabulary
Token 'quan tâm' not in target vocabulary
Token 'rủi ro' not in ta

7it [00:03,  2.06it/s]

| Test Loss: 4.392 | Test PPL:  80.778 |





In [53]:
example_idx = 0
example = train_examples[example_idx]
print('source sentence: ', ' '.join(example.src))
print('target sentence: ', ' '.join(example.trg))

source sentence:  it rained for days
target sentence:  trời mưa nhiều ngày


In [58]:

src_tensor = source_field.numericalize([example.src]).to(device)
trg_tensor = target_field.numericalize([example.trg]).to(device)

print(trg_tensor.shape)

seq2seq.eval()
with torch.no_grad():
    outputs = seq2seq(src_tensor, trg_tensor, teacher_forcing_ratio=0)

print(outputs.shape)


torch.Size([1, 4])
torch.Size([1, 4, 1165])


In [59]:
output_idx = outputs[1:].squeeze(1).argmax(1).tolist()
print(output_idx)
' '.join([target_field.itos[idx] for idx in output_idx])


[]


''